xref: /openbmc/linux/tools/perf/bench/synthesize.c (revision 53e8558837be58c1d44d50ad87247a8c56c95c13)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Benchmark synthesis of perf events such as at the start of a 'perf
4  * record'. Synthesis is done on the current process and the 'dummy' event
5  * handlers are invoked that support dump_trace but otherwise do nothing.
6  *
7  * Copyright 2019 Google LLC.
8  */
9 #include <stdio.h>
10 #include "bench.h"
11 #include "../util/debug.h"
12 #include "../util/session.h"
13 #include "../util/stat.h"
14 #include "../util/synthetic-events.h"
15 #include "../util/target.h"
16 #include "../util/thread_map.h"
17 #include "../util/tool.h"
18 #include "../util/util.h"
19 #include <linux/atomic.h>
20 #include <linux/err.h>
21 #include <linux/time64.h>
22 #include <subcmd/parse-options.h>
23 
24 static unsigned int min_threads = 1;
25 static unsigned int max_threads = UINT_MAX;
26 static unsigned int single_iterations = 10000;
27 static unsigned int multi_iterations = 10;
28 static bool run_st;
29 static bool run_mt;
30 
31 static const struct option options[] = {
32 	OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
33 	OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
34 	OPT_UINTEGER('m', "min-threads", &min_threads,
35 		"Minimum number of threads in multithreaded bench"),
36 	OPT_UINTEGER('M', "max-threads", &max_threads,
37 		"Maximum number of threads in multithreaded bench"),
38 	OPT_UINTEGER('i', "single-iterations", &single_iterations,
39 		"Number of iterations used to compute single-threaded average"),
40 	OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
41 		"Number of iterations used to compute multi-threaded average"),
42 	OPT_END()
43 };
44 
45 static const char *const bench_usage[] = {
46 	"perf bench internals synthesize <options>",
47 	NULL
48 };
49 
50 static atomic_t event_count;
51 
52 static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
53 				     union perf_event *event __maybe_unused,
54 				     struct perf_sample *sample __maybe_unused,
55 				     struct machine *machine __maybe_unused)
56 {
57 	atomic_inc(&event_count);
58 	return 0;
59 }
60 
61 static int do_run_single_threaded(struct perf_session *session,
62 				struct perf_thread_map *threads,
63 				struct target *target, bool data_mmap)
64 {
65 	const unsigned int nr_threads_synthesize = 1;
66 	struct timeval start, end, diff;
67 	u64 runtime_us;
68 	unsigned int i;
69 	double time_average, time_stddev, event_average, event_stddev;
70 	int err;
71 	struct stats time_stats, event_stats;
72 
73 	init_stats(&time_stats);
74 	init_stats(&event_stats);
75 
76 	for (i = 0; i < single_iterations; i++) {
77 		atomic_set(&event_count, 0);
78 		gettimeofday(&start, NULL);
79 		err = __machine__synthesize_threads(&session->machines.host,
80 						NULL,
81 						target, threads,
82 						process_synthesized_event,
83 						true, data_mmap,
84 						nr_threads_synthesize);
85 		if (err)
86 			return err;
87 
88 		gettimeofday(&end, NULL);
89 		timersub(&end, &start, &diff);
90 		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
91 		update_stats(&time_stats, runtime_us);
92 		update_stats(&event_stats, atomic_read(&event_count));
93 	}
94 
95 	time_average = avg_stats(&time_stats);
96 	time_stddev = stddev_stats(&time_stats);
97 	printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
98 		data_mmap ? "data " : "", time_average, time_stddev);
99 
100 	event_average = avg_stats(&event_stats);
101 	event_stddev = stddev_stats(&event_stats);
102 	printf("  Average num. events: %.3f (+- %.3f)\n",
103 		event_average, event_stddev);
104 
105 	printf("  Average time per event %.3f usec\n",
106 		time_average / event_average);
107 	return 0;
108 }
109 
110 static int run_single_threaded(void)
111 {
112 	struct perf_session *session;
113 	struct target target = {
114 		.pid = "self",
115 	};
116 	struct perf_thread_map *threads;
117 	int err;
118 
119 	perf_set_singlethreaded();
120 	session = perf_session__new(NULL, NULL);
121 	if (IS_ERR(session)) {
122 		pr_err("Session creation failed.\n");
123 		return PTR_ERR(session);
124 	}
125 	threads = thread_map__new_by_pid(getpid());
126 	if (!threads) {
127 		pr_err("Thread map creation failed.\n");
128 		err = -ENOMEM;
129 		goto err_out;
130 	}
131 
132 	puts(
133 "Computing performance of single threaded perf event synthesis by\n"
134 "synthesizing events on the perf process itself:");
135 
136 	err = do_run_single_threaded(session, threads, &target, false);
137 	if (err)
138 		goto err_out;
139 
140 	err = do_run_single_threaded(session, threads, &target, true);
141 
142 err_out:
143 	if (threads)
144 		perf_thread_map__put(threads);
145 
146 	perf_session__delete(session);
147 	return err;
148 }
149 
150 static int do_run_multi_threaded(struct target *target,
151 				unsigned int nr_threads_synthesize)
152 {
153 	struct timeval start, end, diff;
154 	u64 runtime_us;
155 	unsigned int i;
156 	double time_average, time_stddev, event_average, event_stddev;
157 	int err;
158 	struct stats time_stats, event_stats;
159 	struct perf_session *session;
160 
161 	init_stats(&time_stats);
162 	init_stats(&event_stats);
163 	for (i = 0; i < multi_iterations; i++) {
164 		session = perf_session__new(NULL, NULL);
165 		if (IS_ERR(session))
166 			return PTR_ERR(session);
167 
168 		atomic_set(&event_count, 0);
169 		gettimeofday(&start, NULL);
170 		err = __machine__synthesize_threads(&session->machines.host,
171 						NULL,
172 						target, NULL,
173 						process_synthesized_event,
174 						true, false,
175 						nr_threads_synthesize);
176 		if (err) {
177 			perf_session__delete(session);
178 			return err;
179 		}
180 
181 		gettimeofday(&end, NULL);
182 		timersub(&end, &start, &diff);
183 		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
184 		update_stats(&time_stats, runtime_us);
185 		update_stats(&event_stats, atomic_read(&event_count));
186 		perf_session__delete(session);
187 	}
188 
189 	time_average = avg_stats(&time_stats);
190 	time_stddev = stddev_stats(&time_stats);
191 	printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
192 		time_average, time_stddev);
193 
194 	event_average = avg_stats(&event_stats);
195 	event_stddev = stddev_stats(&event_stats);
196 	printf("    Average num. events: %.3f (+- %.3f)\n",
197 		event_average, event_stddev);
198 
199 	printf("    Average time per event %.3f usec\n",
200 		time_average / event_average);
201 	return 0;
202 }
203 
204 static int run_multi_threaded(void)
205 {
206 	struct target target = {
207 		.cpu_list = "0"
208 	};
209 	unsigned int nr_threads_synthesize;
210 	int err;
211 
212 	if (max_threads == UINT_MAX)
213 		max_threads = sysconf(_SC_NPROCESSORS_ONLN);
214 
215 	puts(
216 "Computing performance of multi threaded perf event synthesis by\n"
217 "synthesizing events on CPU 0:");
218 
219 	for (nr_threads_synthesize = min_threads;
220 	     nr_threads_synthesize <= max_threads;
221 	     nr_threads_synthesize++) {
222 		if (nr_threads_synthesize == 1)
223 			perf_set_singlethreaded();
224 		else
225 			perf_set_multithreaded();
226 
227 		printf("  Number of synthesis threads: %u\n",
228 			nr_threads_synthesize);
229 
230 		err = do_run_multi_threaded(&target, nr_threads_synthesize);
231 		if (err)
232 			return err;
233 	}
234 	perf_set_singlethreaded();
235 	return 0;
236 }
237 
238 int bench_synthesize(int argc, const char **argv)
239 {
240 	int err = 0;
241 
242 	argc = parse_options(argc, argv, options, bench_usage, 0);
243 	if (argc) {
244 		usage_with_options(bench_usage, options);
245 		exit(EXIT_FAILURE);
246 	}
247 
248 	/*
249 	 * If neither single threaded or multi-threaded are specified, default
250 	 * to running just single threaded.
251 	 */
252 	if (!run_st && !run_mt)
253 		run_st = true;
254 
255 	if (run_st)
256 		err = run_single_threaded();
257 
258 	if (!err && run_mt)
259 		err = run_multi_threaded();
260 
261 	return err;
262 }
263