1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3
4# SPDX-License-Identifier:     BSD-3-Clause
5# Copyright (C) 2015 Enea Software AB
6# Author: Thomas Lundström <thomas.lundstrom@enea.com>
7
8# The script measures interrupt latency together with different types of
9# system load. This is done using the programs cyclictest and stress.
10#
11# The main output is:
12#
13#   Best case (min) latency
14#     This has very limited value, but is presented since it can be done
15#     easily
16#
17#   Average latency
18#     This value is of interrest for throughput oriented systems. Limited
19#     value for a real-time system. Also presented because it is easy to do.
20#
21#   Worst case (max) latency
22#     This is the interesting number for a real-time system. The number
23#     presented is (of cource) the largest number observed. The challenge
24#     is to know how the observed worst case relates to the actual worst case.
25#
26#     To get an indication of the confidence, the following method is used:
27#     1) Instead of one long run, the measurement is made as a set of shorter
28#        runs. The number of runs must be a power of 2 for reasons that will
29#        shorlty be obvious
30#
31#     2) First, a list of the max values are created.
32#
33#     3) The smallest value in that list is recorded.
34#
35#     4) Then a new list is create by taking the max value of each pair of
36#        values in the original list. In this list the smallest value is
37#        recorded.
38#
39#     5) Step 3 is repeated until there is only one value in the list. See
40#        example below:
41#
42#        Samples:
43#          |  44 |     |     |     |     |
44#          |  77 |  77 |     |     |     |
45#          | 118 |     |     |     |     |
46#          | 119 | 119 | 119 |     |     |
47#          | 138 |     |     |     |     |
48#          |  57 | 138 |     |     |     |
49#          | 175 |     |     |     |     |
50#          | 130 | 175 | 175 | 175 |     |
51#          |  54 |     |     |     |     |
52#          | 150 | 150 |     |     |     |
53#          |  47 |     |     |     |     |
54#          |  59 |  59 | 150 |     |     |
55#          | 199 |     |     |     |     |
56#          | 115 | 199 |     |     |     |
57#          | 177 |     |     |     |     |
58#          | 129 | 177 | 199 | 199 | 199 |
59#
60#        Smallest value:
61#          |  44 |  59 | 119 | 175 | 199 |
62#
63#     6) The generated list of smallest values is analyzed. In this case, it
64#        can be observed that the values are increasing significantly through
65#        the entire list, which leads to the conclusion that the number of
66#        samples is too small.
67#        If instead the list had been (167, 191, 196, 199, 199), there had
68#        been a very small, or no, increase at the end of the list. We might
69#        then suspect that the number of samples is probably large enough.
70#        There is however no guarantee for that.
71#
72#     Steps 1-2 are done in run_cyclictest_suite
73#     Steps 3-5 are done in gen_minmax_list.
74#     Step 6 needs to be done manually since there is (yet) no well defined
75#     FAIL criterion and a theoretically solid PASS criterion may never be
76#     available.
77
78import multiprocessing
79import os
80import re
81import signal
82import subprocess
83import time
84import traceback
85
86# See comment on the function set_hung_tmo
87has_hung_task_detection = True
88
89#-------------------------------------------------------------------------------
90
91class TestFail(Exception):
92        def __init__(self, msg):
93                self.msg = msg
94
95        def __str__(self):
96                return "Test failure: (" + self.msg + ")"
97
98#-------------------------------------------------------------------------------
99
100def tc_name(sub_name):
101        return "rt_bmark.intlat." + sub_name
102
103#-------------------------------------------------------------------------------
104# log() does the same job as print except that a '#' is added at the beginning
105# of each line. This causes TEFEL to ignore it
106
107def log(*msg):
108        tmp = "".join(map(str, msg)) # 'map(str, ...' allows numbers
109        for line in tmp.splitlines():
110                print("#", line)
111
112#-------------------------------------------------------------------------------
113# Like log(), but with a timestamp added
114
115def log_ts(*msg):
116        ts = time.localtime()
117        stamp = "%2d:%02d:%02d: " % (ts.tm_hour, ts.tm_min, ts.tm_sec)
118        log(stamp, *msg)
119
120#-------------------------------------------------------------------------------
121
122def log_test_header(seq_no, nr_of_tests, name):
123        log("=" * 78)
124        log()
125        log("  Test case (%d/%d): %s" % (seq_no, nr_of_tests, tc_name(name)))
126        log()
127        log("." * 78)
128        log()
129
130#-------------------------------------------------------------------------------
131
132def start_stress(*args):
133        stress_cmd         = [ "stress-ng" ]
134        added_stress_types = []
135        req_stress_types   = set(args)
136        cpu_cnt            = str(multiprocessing.cpu_count())
137
138        # The function cond_add_stress appends the options to the stress
139        # command if the stress type is in the set of requested stress types
140
141        def cond_add_stress(stress_type, options):
142                if stress_type in req_stress_types:
143                        req_stress_types.remove(stress_type)
144                        added_stress_types.append(stress_type)
145                        stress_cmd.extend(options)
146
147        #----------
148
149        cond_add_stress("io",  ["-i", cpu_cnt])
150        cond_add_stress("cpu", ["-c", cpu_cnt])
151        cond_add_stress("hdd", ["-d", cpu_cnt, "--hdd-bytes", "20M"])
152        cond_add_stress("vm",  ["-m", cpu_cnt, "--vm-bytes", "10M"])
153
154        unknown = ", ".join(req_stress_types)
155        if unknown != "":
156            raise TestFail("Unknown stress type(s): %s" % unknown)
157
158        if not added_stress_types:
159                log("No stress requested")
160                return None
161
162        added          = "+".join(added_stress_types)
163        stress_cmd_str = " ".join(stress_cmd)
164
165        log("Starting stress(", added, ")")
166        log("  Command: '", stress_cmd_str, "'")
167        log()
168
169        # start_new_session causes stress to be executed in a separate
170        # session, => it gets a new process group (incl. children). It
171        # can then be terminated using os.killpg in end_stress without
172        # terminating this script.
173
174        p = subprocess.Popen(stress_cmd, start_new_session=True)
175
176        return p
177
178#-------------------------------------------------------------------------------
179
180def end_stress(p):
181        if p is None:
182                # The value None indicates that no stress scenario was started
183                return
184
185        if p.poll() is not None:
186                raise TestFail("stress prematurely terminated.")
187
188        os.killpg(os.getpgid(p.pid), signal.SIGTERM)
189        log("Terminated stress")
190
191#-------------------------------------------------------------------------------
192
193def us2hms_str(us):
194        s = (us+500000) // 1000000 # Round microseconds to s
195        m = s//60
196        s -= 60*m;
197        h = m//60
198        m -= 60*h
199
200        return "%d:%02d:%02d" % (h, m, s)
201
202#-------------------------------------------------------------------------------
203# Sometime the hung task supervision is triggered during execution of
204# cyclictest (cyclictest starves stress). To avoid that, the supervision
205# is temporarily disabled
206
207def set_hung_tmo(new_tmo):
208        global has_hung_task_detection
209
210        tmo_file = "/proc/sys/kernel/hung_task_timeout_secs"
211
212        if not has_hung_task_detection:
213                return
214
215        if not os.access(tmo_file, os.W_OK):
216                log("Hung task detection not supported")
217                log("  (File ", tmo_file, " not found)")
218                has_hung_task_detection = False
219                return
220
221        orig_tmo = int(subprocess.check_output(["cat", tmo_file]).strip())
222        if new_tmo != orig_tmo:
223                cmd = ( "echo " + str(new_tmo) + " > " + tmo_file )
224                subprocess.check_output(cmd, shell=True)
225                log("Changed timeout for detection of hung tasks: ",
226                    orig_tmo, " -> ", new_tmo)
227
228        return orig_tmo
229
230#-------------------------------------------------------------------------------
231
232def gen_minmax_list(max_list):
233        res = [min(max_list)]
234
235        while True:
236                tmp = max_list
237                max_list = []
238                while tmp:
239                        max_list.append(max(tmp.pop(0), tmp.pop(0)))
240
241                res.append(min(max_list))
242
243                if len(max_list) < 2:
244                        return res
245
246#-------------------------------------------------------------------------------
247# Parameters for cyclictest:
248#
249# On the -S option (from cyclictest.c):
250#  -S implies options -a -t -n and same priority of all threads
251#    -a: One thread per core
252#    -n: use clock_nanosleep instead of posix interval timers
253#    -t: (without argument) Set number of threads to the number
254#         of cpus
255
256interval_core_0 = 100     # Timer interval on core 0 [us]
257interval_delta  = 20      # Interval increment for each core [us]
258loop_count      = 30000   # Number of loops (on core 0).
259
260cmd = ("cyclictest",
261       "-S",             # Standard SMP testing. See below
262       "-p", "99",       # RT priority 99
263       "-q",             # Quiet mode, i.e. print only a summary
264       "-i", str(interval_core_0),
265       "-d", str(interval_delta),
266       "-l", str(loop_count)
267       )
268rex = re.compile(r"C:\s*(\d+).*Min:\s*(\d+).*Avg:\s*(\d+).*Max:\s*(\d+)")
269
270def run_cyclictest_once():
271        res = subprocess.check_output(cmd)
272
273        # minlist and maxlist are lists with the extremes for each core
274        # avg_cnt is the sum of cycles for all cores
275        # avg_sum is the sum of (cycle count*average) for each core
276        #     Since cyclictest runs different number of cycles on
277        #     different cores, avg_sum/avg_cnt gives a more accurate
278        #     value of the overall average than just taking the average
279        #     of each core's averages
280
281        minlist = []
282        maxlist = []
283        avg_sum = 0
284        avg_cnt = 0
285
286        for line in res.splitlines():
287                m = rex.search(line.decode('utf-8'))
288                if m is not None:
289                        minlist.append(int(m.group(2)))
290                        maxlist.append(int(m.group(4)))
291                        n = int(m.group(1))
292                        avg_sum += n * int(m.group(3))
293                        avg_cnt += n
294
295        return min(minlist), [avg_sum, avg_cnt], max(maxlist)
296
297#-------------------------------------------------------------------------------
298# A precondition for the tracking of min-max values is that
299# the suite size os a power of 2.
300
301N          = 5
302suite_size = 2**N
303
304est_exec_time_once  = interval_core_0 * loop_count
305est_exec_time_suite = suite_size * est_exec_time_once
306
307def run_cyclictest_suite():
308        log("Starting cyclictest")
309        log("  Command          : ", " ".join(cmd))
310        log("  Number of cycles : ", loop_count*suite_size,
311            " (", suite_size, " sets of ", loop_count, " cycles)")
312        log("  Exec. time (est) : ", us2hms_str(est_exec_time_suite))
313        log()
314
315        orig_tmo = set_hung_tmo(0) # 0 => Disable
316
317        # float('inf') emulates infinity. At least in the sense that it is
318        # guaranteed to be larger than any actual number.
319        ack_min = float('inf')
320        ack_avg = [0, 0]
321
322        log()
323        log_ts("Start of execution")
324        t = time.time()
325        max_list = []
326
327        for i in range(0, suite_size):
328                tmp_min, tmp_avg, tmp_max = run_cyclictest_once()
329
330                msg = "%2d/%2d:" % (i+1, suite_size)
331                msg += " min: %4d" % tmp_min
332                msg += " avg: %5.1f" % (float(tmp_avg[0])/tmp_avg[1])
333                msg += " max: %4d" % tmp_max
334                log_ts(msg)
335
336                # Track minimum value
337                if tmp_min < ack_min:
338                        ack_min = tmp_min
339
340                # Track smallest max value
341                max_list.append(tmp_max)
342
343                ack_avg[0] += tmp_avg[0]
344                ack_avg[1] += tmp_avg[1]
345
346        t = time.time()-t
347        log_ts("Cyclictest completed. Actual execution time:",
348               us2hms_str(t*1000000))
349        log()
350        set_hung_tmo(orig_tmo)
351
352        return ack_min, float(ack_avg[0])/ack_avg[1], gen_minmax_list(max_list)
353
354#-------------------------------------------------------------------------------
355
356class cyclictest_runner:
357        def run_test(self, seq_no, nr_of_tests, name, stressparams):
358
359                try:
360                        log_test_header(seq_no, nr_of_tests, name)
361
362                        p = start_stress(*stressparams)
363
364                        bm_min, bm_avg, bm_max_list = run_cyclictest_suite()
365
366                        end_stress(p)
367
368                        bm_max = bm_max_list[-1]
369
370                        log()
371                        log("Min: %d us" % bm_min)
372                        log("Avg: %.1f us" % bm_avg)
373                        log("Max: %d us" % bm_max)
374                        log()
375                        log("Max list: ", bm_max_list)
376                        log()
377                        log("PASS")
378
379                        print()
380                        print(tc_name(name), "[Min/us,Avg/us,Max/us]:",)
381                        print("%d,%.1f,%d" % (bm_min,bm_avg, bm_max))
382                        print("PASS:", tc_name(name))
383                        print()
384
385                except Exception:
386                        log()
387                        log("Exception!")
388                        log()
389                        log("Traceback:", traceback.format_exc())
390                        log()
391                        log("WD: ", os.getcwd())
392                        log()
393                        log("FAIL")
394                        print()
395                        print("FAIL:", tc_name(name))
396                        print()
397
398#-------------------------------------------------------------------------------
399
400runner = cyclictest_runner()
401
402tests = (("no_stress", []),
403         ("cpu",  ["cpu"]),
404         ("hdd",  ["hdd"]),
405         ("io",   ["io"]),
406         ("vm",   ["vm"]),
407         ("full", ["io", "cpu", "hdd", "vm"]),
408         )
409
410nr_of_tests = len(tests)
411for seq_no, params in enumerate(tests, start=1):
412        runner.run_test(seq_no, nr_of_tests, *params)
413