xref: /openbmc/qemu/scripts/simplebench/simplebench.py (revision b4b9a0e32f93c0700f46617524317b0580126592)
1#!/usr/bin/env python
2#
3# Simple benchmarking framework
4#
5# Copyright (c) 2019 Virtuozzo International GmbH.
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19#
20
21import statistics
22import subprocess
23import time
24
25
26def do_drop_caches():
27    subprocess.run('sync; echo 3 > /proc/sys/vm/drop_caches', shell=True,
28                   check=True)
29
30
31def bench_one(test_func, test_env, test_case, count=5, initial_run=True,
32              slow_limit=100, drop_caches=False):
33    """Benchmark one test-case
34
35    test_func   -- benchmarking function with prototype
36                   test_func(env, case), which takes test_env and test_case
37                   arguments and on success returns dict with 'seconds' or
38                   'iops' (or both) fields, specifying the benchmark result.
39                   If both 'iops' and 'seconds' provided, the 'iops' is
40                   considered the main, and 'seconds' is just an additional
41                   info. On failure test_func should return {'error': str}.
42                   Returned dict may contain any other additional fields.
43    test_env    -- test environment - opaque first argument for test_func
44    test_case   -- test case - opaque second argument for test_func
45    count       -- how many times to call test_func, to calculate average
46    initial_run -- do initial run of test_func, which don't get into result
47    slow_limit  -- stop at slow run (that exceedes the slow_limit by seconds).
48                   (initial run is not measured)
49    drop_caches -- drop caches before each run
50
51    Returns dict with the following fields:
52        'runs':     list of test_func results
53        'dimension': dimension of results, may be 'seconds' or 'iops'
54        'average':  average value (iops or seconds) per run (exists only if at
55                    least one run succeeded)
56        'stdev':    standard deviation of results
57                    (exists only if at least one run succeeded)
58        'n-failed': number of failed runs (exists only if at least one run
59                    failed)
60    """
61    if initial_run:
62        print('  #initial run:')
63        do_drop_caches()
64        print('   ', test_func(test_env, test_case))
65
66    runs = []
67    for i in range(count):
68        t = time.time()
69
70        print('  #run {}'.format(i+1))
71        do_drop_caches()
72        res = test_func(test_env, test_case)
73        print('   ', res)
74        runs.append(res)
75
76        if time.time() - t > slow_limit:
77            print('    - run is too slow, stop here')
78            break
79
80    count = len(runs)
81
82    result = {'runs': runs}
83
84    succeeded = [r for r in runs if ('seconds' in r or 'iops' in r)]
85    if succeeded:
86        if 'iops' in succeeded[0]:
87            assert all('iops' in r for r in succeeded)
88            dim = 'iops'
89        else:
90            assert all('seconds' in r for r in succeeded)
91            assert all('iops' not in r for r in succeeded)
92            dim = 'seconds'
93        result['dimension'] = dim
94        result['average'] = statistics.mean(r[dim] for r in succeeded)
95        if len(succeeded) == 1:
96            result['stdev'] = 0
97        else:
98            result['stdev'] = statistics.stdev(r[dim] for r in succeeded)
99
100    if len(succeeded) < count:
101        result['n-failed'] = count - len(succeeded)
102
103    return result
104
105
106def bench(test_func, test_envs, test_cases, *args, **vargs):
107    """Fill benchmark table
108
109    test_func -- benchmarking function, see bench_one for description
110    test_envs -- list of test environments, see bench_one
111    test_cases -- list of test cases, see bench_one
112    args, vargs -- additional arguments for bench_one
113
114    Returns dict with the following fields:
115        'envs':  test_envs
116        'cases': test_cases
117        'tab':   filled 2D array, where cell [i][j] is bench_one result for
118                 test_cases[i] for test_envs[j] (i.e., rows are test cases and
119                 columns are test environments)
120    """
121    tab = {}
122    results = {
123        'envs': test_envs,
124        'cases': test_cases,
125        'tab': tab
126    }
127    n = 1
128    n_tests = len(test_envs) * len(test_cases)
129    for env in test_envs:
130        for case in test_cases:
131            print('Testing {}/{}: {} :: {}'.format(n, n_tests,
132                                                   env['id'], case['id']))
133            if case['id'] not in tab:
134                tab[case['id']] = {}
135            tab[case['id']][env['id']] = bench_one(test_func, env, case,
136                                                   *args, **vargs)
137            n += 1
138
139    print('Done')
140    return results
141