xref: /openbmc/openbmc/poky/scripts/lib/buildstats.py (revision fc113eade321128fc43b0b299e81ad07fc1edf3d)
1 #
2 # Copyright (c) 2017, Intel Corporation.
3 #
4 # SPDX-License-Identifier: GPL-2.0-only
5 #
6 """Functionality for analyzing buildstats"""
7 import json
8 import logging
9 import os
10 import re
11 from collections import namedtuple
12 from statistics import mean
13 
14 
15 log = logging.getLogger()
16 
17 
18 taskdiff_fields = ('pkg', 'pkg_op', 'task', 'task_op', 'value1', 'value2',
19                    'absdiff', 'reldiff')
20 TaskDiff = namedtuple('TaskDiff', ' '.join(taskdiff_fields))
21 
22 
23 class BSError(Exception):
24     """Error handling of buildstats"""
25     pass
26 
27 
28 class BSTask(dict):
29     def __init__(self, *args, **kwargs):
30         self['start_time'] = None
31         self['elapsed_time'] = None
32         self['status'] = None
33         self['iostat'] = {}
34         self['rusage'] = {}
35         self['child_rusage'] = {}
36         super(BSTask, self).__init__(*args, **kwargs)
37 
38     @property
39     def cputime(self):
40         """Sum of user and system time taken by the task"""
41         rusage = self['rusage']['ru_stime'] + self['rusage']['ru_utime']
42         if self['child_rusage']:
43             # Child rusage may have been optimized out
44             return rusage + self['child_rusage']['ru_stime'] + self['child_rusage']['ru_utime']
45         else:
46             return rusage
47 
48     @property
49     def walltime(self):
50         """Elapsed wall clock time"""
51         return self['elapsed_time']
52 
53     @property
54     def read_bytes(self):
55         """Bytes read from the block layer"""
56         return self['iostat']['read_bytes']
57 
58     @property
59     def write_bytes(self):
60         """Bytes written to the block layer"""
61         return self['iostat']['write_bytes']
62 
63     @property
64     def read_ops(self):
65         """Number of read operations on the block layer"""
66         if self['child_rusage']:
67             # Child rusage may have been optimized out
68             return self['rusage']['ru_inblock'] + self['child_rusage']['ru_inblock']
69         else:
70             return self['rusage']['ru_inblock']
71 
72     @property
73     def write_ops(self):
74         """Number of write operations on the block layer"""
75         if self['child_rusage']:
76             # Child rusage may have been optimized out
77             return self['rusage']['ru_oublock'] + self['child_rusage']['ru_oublock']
78         else:
79             return self['rusage']['ru_oublock']
80 
81     @classmethod
82     def from_file(cls, buildstat_file, fallback_end=0):
83         """Read buildstat text file. fallback_end is an optional end time for tasks that are not recorded as finishing."""
84         bs_task = cls()
85         log.debug("Reading task buildstats from %s", buildstat_file)
86         end_time = None
87         with open(buildstat_file) as fobj:
88             for line in fobj.readlines():
89                 key, val = line.split(':', 1)
90                 val = val.strip()
91                 if key == 'Started':
92                     start_time = float(val)
93                     bs_task['start_time'] = start_time
94                 elif key == 'Ended':
95                     end_time = float(val)
96                 elif key.startswith('IO '):
97                     split = key.split()
98                     bs_task['iostat'][split[1]] = int(val)
99                 elif key.find('rusage') >= 0:
100                     split = key.split()
101                     ru_key = split[-1]
102                     if ru_key in ('ru_stime', 'ru_utime'):
103                         val = float(val)
104                     else:
105                         val = int(val)
106                     ru_type = 'rusage' if split[0] == 'rusage' else \
107                                                       'child_rusage'
108                     bs_task[ru_type][ru_key] = val
109                 elif key == 'Status':
110                     bs_task['status'] = val
111         # If the task didn't finish, fill in the fallback end time if specified
112         if start_time and not end_time and fallback_end:
113             end_time = fallback_end
114         if start_time and end_time:
115             bs_task['elapsed_time'] = end_time - start_time
116         else:
117             raise BSError("{} looks like a invalid buildstats file".format(buildstat_file))
118         return bs_task
119 
120 
121 class BSTaskAggregate(object):
122     """Class representing multiple runs of the same task"""
123     properties = ('cputime', 'walltime', 'read_bytes', 'write_bytes',
124                   'read_ops', 'write_ops')
125 
126     def __init__(self, tasks=None):
127         self._tasks = tasks or []
128         self._properties = {}
129 
130     def __getattr__(self, name):
131         if name in self.properties:
132             if name not in self._properties:
133                 # Calculate properties on demand only. We only provide mean
134                 # value, so far
135                 self._properties[name] = mean([getattr(t, name) for t in self._tasks])
136             return self._properties[name]
137         else:
138             raise AttributeError("'BSTaskAggregate' has no attribute '{}'".format(name))
139 
140     def append(self, task):
141         """Append new task"""
142         # Reset pre-calculated properties
143         assert isinstance(task, BSTask), "Type is '{}' instead of 'BSTask'".format(type(task))
144         self._properties = {}
145         self._tasks.append(task)
146 
147 
148 class BSRecipe(object):
149     """Class representing buildstats of one recipe"""
150     def __init__(self, name, epoch, version, revision):
151         self.name = name
152         self.epoch = epoch
153         self.version = version
154         self.revision = revision
155         if epoch is None:
156             self.evr = "{}-{}".format(version, revision)
157         else:
158             self.evr = "{}_{}-{}".format(epoch, version, revision)
159         self.tasks = {}
160 
161     def aggregate(self, bsrecipe):
162         """Aggregate data of another recipe buildstats"""
163         if self.nevr != bsrecipe.nevr:
164             raise ValueError("Refusing to aggregate buildstats, recipe version "
165                              "differs: {} vs. {}".format(self.nevr, bsrecipe.nevr))
166         if set(self.tasks.keys()) != set(bsrecipe.tasks.keys()):
167             raise ValueError("Refusing to aggregate buildstats, set of tasks "
168                              "in {} differ".format(self.name))
169 
170         for taskname, taskdata in bsrecipe.tasks.items():
171             if not isinstance(self.tasks[taskname], BSTaskAggregate):
172                 self.tasks[taskname] = BSTaskAggregate([self.tasks[taskname]])
173             self.tasks[taskname].append(taskdata)
174 
175     @property
176     def nevr(self):
177         return self.name + '-' + self.evr
178 
179 
180 class BuildStats(dict):
181     """Class representing buildstats of one build"""
182 
183     @property
184     def num_tasks(self):
185         """Get number of tasks"""
186         num = 0
187         for recipe in self.values():
188             num += len(recipe.tasks)
189         return num
190 
191     @classmethod
192     def from_json(cls, bs_json):
193         """Create new BuildStats object from JSON object"""
194         buildstats = cls()
195         for recipe in bs_json:
196             if recipe['name'] in buildstats:
197                 raise BSError("Cannot handle multiple versions of the same "
198                               "package ({})".format(recipe['name']))
199             bsrecipe = BSRecipe(recipe['name'], recipe['epoch'],
200                                 recipe['version'], recipe['revision'])
201             for task, data in recipe['tasks'].items():
202                 bsrecipe.tasks[task] = BSTask(data)
203 
204             buildstats[recipe['name']] = bsrecipe
205 
206         return buildstats
207 
208     @staticmethod
209     def from_file_json(path):
210         """Load buildstats from a JSON file"""
211         with open(path) as fobj:
212             bs_json = json.load(fobj)
213         return BuildStats.from_json(bs_json)
214 
215 
216     @staticmethod
217     def split_nevr(nevr):
218         """Split name and version information from recipe "nevr" string"""
219         n_e_v, revision = nevr.rsplit('-', 1)
220         match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[0-9]\S*)$',
221                          n_e_v)
222         if not match:
223             # If we're not able to parse a version starting with a number, just
224             # take the part after last dash
225             match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[^-]+)$',
226                              n_e_v)
227         name = match.group('name')
228         version = match.group('version')
229         epoch = match.group('epoch')
230         return name, epoch, version, revision
231 
232     @staticmethod
233     def parse_top_build_stats(path):
234         """
235         Parse the top-level build_stats file for build-wide start and duration.
236         """
237         start = elapsed = 0
238         with open(path) as fobj:
239             for line in fobj.readlines():
240                 key, val = line.split(':', 1)
241                 val = val.strip()
242                 if key == 'Build Started':
243                     start = float(val)
244                 elif key == "Elapsed time":
245                     elapsed = float(val.split()[0])
246         return start, elapsed
247 
248     @classmethod
249     def from_dir(cls, path):
250         """Load buildstats from a buildstats directory"""
251         top_stats = os.path.join(path, 'build_stats')
252         if not os.path.isfile(top_stats):
253             raise BSError("{} does not look like a buildstats directory".format(path))
254 
255         log.debug("Reading buildstats directory %s", path)
256         buildstats = cls()
257         build_started, build_elapsed = buildstats.parse_top_build_stats(top_stats)
258         build_end = build_started + build_elapsed
259 
260         subdirs = os.listdir(path)
261         for dirname in subdirs:
262             recipe_dir = os.path.join(path, dirname)
263             if dirname == "reduced_proc_pressure" or not os.path.isdir(recipe_dir):
264                 continue
265             name, epoch, version, revision = cls.split_nevr(dirname)
266             bsrecipe = BSRecipe(name, epoch, version, revision)
267             for task in os.listdir(recipe_dir):
268                 bsrecipe.tasks[task] = BSTask.from_file(
269                     os.path.join(recipe_dir, task), build_end)
270             if name in buildstats:
271                 raise BSError("Cannot handle multiple versions of the same "
272                               "package ({})".format(name))
273             buildstats[name] = bsrecipe
274 
275         return buildstats
276 
277     def aggregate(self, buildstats):
278         """Aggregate other buildstats into this"""
279         if set(self.keys()) != set(buildstats.keys()):
280             raise ValueError("Refusing to aggregate buildstats, set of "
281                              "recipes is different: %s" % (set(self.keys()) ^ set(buildstats.keys())))
282         for pkg, data in buildstats.items():
283             self[pkg].aggregate(data)
284 
285 
286 def diff_buildstats(bs1, bs2, stat_attr, min_val=None, min_absdiff=None, only_tasks=[]):
287     """Compare the tasks of two buildstats"""
288     tasks_diff = []
289     pkgs = set(bs1.keys()).union(set(bs2.keys()))
290     for pkg in pkgs:
291         tasks1 = bs1[pkg].tasks if pkg in bs1 else {}
292         tasks2 = bs2[pkg].tasks if pkg in bs2 else {}
293         if only_tasks:
294             tasks1 = {k: v for k, v in tasks1.items() if k in only_tasks}
295             tasks2 = {k: v for k, v in tasks2.items() if k in only_tasks}
296 
297         if not tasks1:
298             pkg_op = '+'
299         elif not tasks2:
300             pkg_op = '-'
301         else:
302             pkg_op = ' '
303 
304         for task in set(tasks1.keys()).union(set(tasks2.keys())):
305             task_op = ' '
306             if task in tasks1:
307                 val1 = getattr(bs1[pkg].tasks[task], stat_attr)
308             else:
309                 task_op = '+'
310                 val1 = 0
311             if task in tasks2:
312                 val2 = getattr(bs2[pkg].tasks[task], stat_attr)
313             else:
314                 val2 = 0
315                 task_op = '-'
316 
317             if val1 == 0:
318                 reldiff = float('inf')
319             else:
320                 reldiff = 100 * (val2 - val1) / val1
321 
322             if min_val and max(val1, val2) < min_val:
323                 log.debug("Filtering out %s:%s (%s)", pkg, task,
324                           max(val1, val2))
325                 continue
326             if min_absdiff and abs(val2 - val1) < min_absdiff:
327                 log.debug("Filtering out %s:%s (difference of %s)", pkg, task,
328                           val2-val1)
329                 continue
330             tasks_diff.append(TaskDiff(pkg, pkg_op, task, task_op, val1, val2,
331                                        val2-val1, reldiff))
332     return tasks_diff
333 
334 
335 class BSVerDiff(object):
336     """Class representing recipe version differences between two buildstats"""
337     def __init__(self, bs1, bs2):
338         RecipeVerDiff = namedtuple('RecipeVerDiff', 'left right')
339 
340         recipes1 = set(bs1.keys())
341         recipes2 = set(bs2.keys())
342 
343         self.new = dict([(r, bs2[r]) for r in sorted(recipes2 - recipes1)])
344         self.dropped = dict([(r, bs1[r]) for r in sorted(recipes1 - recipes2)])
345         self.echanged = {}
346         self.vchanged = {}
347         self.rchanged = {}
348         self.unchanged = {}
349         self.empty_diff = False
350 
351         common = recipes2.intersection(recipes1)
352         if common:
353             for recipe in common:
354                 rdiff = RecipeVerDiff(bs1[recipe], bs2[recipe])
355                 if bs1[recipe].epoch != bs2[recipe].epoch:
356                     self.echanged[recipe] = rdiff
357                 elif bs1[recipe].version != bs2[recipe].version:
358                     self.vchanged[recipe] = rdiff
359                 elif bs1[recipe].revision != bs2[recipe].revision:
360                     self.rchanged[recipe] = rdiff
361                 else:
362                     self.unchanged[recipe] = rdiff
363 
364         if len(recipes1) == len(recipes2) == len(self.unchanged):
365             self.empty_diff = True
366 
367     def __bool__(self):
368         return not self.empty_diff
369