xref: /openbmc/linux/tools/perf/bench/mem-functions.c (revision 4da722ca19f30f7db250db808d1ab1703607a932)
1 /*
2  * mem-memcpy.c
3  *
4  * Simple memcpy() and memset() benchmarks
5  *
6  * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
7  */
8 
9 #include "debug.h"
10 #include "../perf.h"
11 #include "../util/util.h"
12 #include <subcmd/parse-options.h>
13 #include "../util/header.h"
14 #include "../util/cloexec.h"
15 #include "../util/string2.h"
16 #include "bench.h"
17 #include "mem-memcpy-arch.h"
18 #include "mem-memset-arch.h"
19 
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/time.h>
24 #include <errno.h>
25 #include <linux/time64.h>
26 
27 #define K 1024
28 
29 static const char	*size_str	= "1MB";
30 static const char	*function_str	= "all";
31 static int		nr_loops	= 1;
32 static bool		use_cycles;
33 static int		cycles_fd;
34 
35 static const struct option options[] = {
36 	OPT_STRING('s', "size", &size_str, "1MB",
37 		    "Specify the size of the memory buffers. "
38 		    "Available units: B, KB, MB, GB and TB (case insensitive)"),
39 
40 	OPT_STRING('f', "function", &function_str, "all",
41 		    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
42 
43 	OPT_INTEGER('l', "nr_loops", &nr_loops,
44 		    "Specify the number of loops to run. (default: 1)"),
45 
46 	OPT_BOOLEAN('c', "cycles", &use_cycles,
47 		    "Use a cycles event instead of gettimeofday() to measure performance"),
48 
49 	OPT_END()
50 };
51 
52 typedef void *(*memcpy_t)(void *, const void *, size_t);
53 typedef void *(*memset_t)(void *, int, size_t);
54 
55 struct function {
56 	const char *name;
57 	const char *desc;
58 	union {
59 		memcpy_t memcpy;
60 		memset_t memset;
61 	} fn;
62 };
63 
64 static struct perf_event_attr cycle_attr = {
65 	.type		= PERF_TYPE_HARDWARE,
66 	.config		= PERF_COUNT_HW_CPU_CYCLES
67 };
68 
69 static int init_cycles(void)
70 {
71 	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
72 
73 	if (cycles_fd < 0 && errno == ENOSYS) {
74 		pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
75 		return -1;
76 	}
77 
78 	return cycles_fd;
79 }
80 
81 static u64 get_cycles(void)
82 {
83 	int ret;
84 	u64 clk;
85 
86 	ret = read(cycles_fd, &clk, sizeof(u64));
87 	BUG_ON(ret != sizeof(u64));
88 
89 	return clk;
90 }
91 
92 static double timeval2double(struct timeval *ts)
93 {
94 	return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
95 }
96 
97 #define print_bps(x) do {						\
98 		if (x < K)						\
99 			printf(" %14lf bytes/sec\n", x);		\
100 		else if (x < K * K)					\
101 			printf(" %14lfd KB/sec\n", x / K);		\
102 		else if (x < K * K * K)					\
103 			printf(" %14lf MB/sec\n", x / K / K);		\
104 		else							\
105 			printf(" %14lf GB/sec\n", x / K / K / K);	\
106 	} while (0)
107 
108 struct bench_mem_info {
109 	const struct function *functions;
110 	u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
111 	double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
112 	const char *const *usage;
113 	bool alloc_src;
114 };
115 
116 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
117 {
118 	const struct function *r = &info->functions[r_idx];
119 	double result_bps = 0.0;
120 	u64 result_cycles = 0;
121 	void *src = NULL, *dst = zalloc(size);
122 
123 	printf("# function '%s' (%s)\n", r->name, r->desc);
124 
125 	if (dst == NULL)
126 		goto out_alloc_failed;
127 
128 	if (info->alloc_src) {
129 		src = zalloc(size);
130 		if (src == NULL)
131 			goto out_alloc_failed;
132 	}
133 
134 	if (bench_format == BENCH_FORMAT_DEFAULT)
135 		printf("# Copying %s bytes ...\n\n", size_str);
136 
137 	if (use_cycles) {
138 		result_cycles = info->do_cycles(r, size, src, dst);
139 	} else {
140 		result_bps = info->do_gettimeofday(r, size, src, dst);
141 	}
142 
143 	switch (bench_format) {
144 	case BENCH_FORMAT_DEFAULT:
145 		if (use_cycles) {
146 			printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
147 		} else {
148 			print_bps(result_bps);
149 		}
150 		break;
151 
152 	case BENCH_FORMAT_SIMPLE:
153 		if (use_cycles) {
154 			printf("%lf\n", (double)result_cycles/size_total);
155 		} else {
156 			printf("%lf\n", result_bps);
157 		}
158 		break;
159 
160 	default:
161 		BUG_ON(1);
162 		break;
163 	}
164 
165 out_free:
166 	free(src);
167 	free(dst);
168 	return;
169 out_alloc_failed:
170 	printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
171 	goto out_free;
172 }
173 
174 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
175 {
176 	int i;
177 	size_t size;
178 	double size_total;
179 
180 	argc = parse_options(argc, argv, options, info->usage, 0);
181 
182 	if (use_cycles) {
183 		i = init_cycles();
184 		if (i < 0) {
185 			fprintf(stderr, "Failed to open cycles counter\n");
186 			return i;
187 		}
188 	}
189 
190 	size = (size_t)perf_atoll((char *)size_str);
191 	size_total = (double)size * nr_loops;
192 
193 	if ((s64)size <= 0) {
194 		fprintf(stderr, "Invalid size:%s\n", size_str);
195 		return 1;
196 	}
197 
198 	if (!strncmp(function_str, "all", 3)) {
199 		for (i = 0; info->functions[i].name; i++)
200 			__bench_mem_function(info, i, size, size_total);
201 		return 0;
202 	}
203 
204 	for (i = 0; info->functions[i].name; i++) {
205 		if (!strcmp(info->functions[i].name, function_str))
206 			break;
207 	}
208 	if (!info->functions[i].name) {
209 		if (strcmp(function_str, "help") && strcmp(function_str, "h"))
210 			printf("Unknown function: %s\n", function_str);
211 		printf("Available functions:\n");
212 		for (i = 0; info->functions[i].name; i++) {
213 			printf("\t%s ... %s\n",
214 			       info->functions[i].name, info->functions[i].desc);
215 		}
216 		return 1;
217 	}
218 
219 	__bench_mem_function(info, i, size, size_total);
220 
221 	return 0;
222 }
223 
224 static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
225 {
226 	u64 cycle_start = 0ULL, cycle_end = 0ULL;
227 	memcpy_t fn = r->fn.memcpy;
228 	int i;
229 
230 	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
231 	memset(src, 0, size);
232 
233 	/*
234 	 * We prefault the freshly allocated memory range here,
235 	 * to not measure page fault overhead:
236 	 */
237 	fn(dst, src, size);
238 
239 	cycle_start = get_cycles();
240 	for (i = 0; i < nr_loops; ++i)
241 		fn(dst, src, size);
242 	cycle_end = get_cycles();
243 
244 	return cycle_end - cycle_start;
245 }
246 
247 static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
248 {
249 	struct timeval tv_start, tv_end, tv_diff;
250 	memcpy_t fn = r->fn.memcpy;
251 	int i;
252 
253 	/*
254 	 * We prefault the freshly allocated memory range here,
255 	 * to not measure page fault overhead:
256 	 */
257 	fn(dst, src, size);
258 
259 	BUG_ON(gettimeofday(&tv_start, NULL));
260 	for (i = 0; i < nr_loops; ++i)
261 		fn(dst, src, size);
262 	BUG_ON(gettimeofday(&tv_end, NULL));
263 
264 	timersub(&tv_end, &tv_start, &tv_diff);
265 
266 	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
267 }
268 
269 struct function memcpy_functions[] = {
270 	{ .name		= "default",
271 	  .desc		= "Default memcpy() provided by glibc",
272 	  .fn.memcpy	= memcpy },
273 
274 #ifdef HAVE_ARCH_X86_64_SUPPORT
275 # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
276 # include "mem-memcpy-x86-64-asm-def.h"
277 # undef MEMCPY_FN
278 #endif
279 
280 	{ .name = NULL, }
281 };
282 
283 static const char * const bench_mem_memcpy_usage[] = {
284 	"perf bench mem memcpy <options>",
285 	NULL
286 };
287 
288 int bench_mem_memcpy(int argc, const char **argv)
289 {
290 	struct bench_mem_info info = {
291 		.functions		= memcpy_functions,
292 		.do_cycles		= do_memcpy_cycles,
293 		.do_gettimeofday	= do_memcpy_gettimeofday,
294 		.usage			= bench_mem_memcpy_usage,
295 		.alloc_src              = true,
296 	};
297 
298 	return bench_mem_common(argc, argv, &info);
299 }
300 
301 static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
302 {
303 	u64 cycle_start = 0ULL, cycle_end = 0ULL;
304 	memset_t fn = r->fn.memset;
305 	int i;
306 
307 	/*
308 	 * We prefault the freshly allocated memory range here,
309 	 * to not measure page fault overhead:
310 	 */
311 	fn(dst, -1, size);
312 
313 	cycle_start = get_cycles();
314 	for (i = 0; i < nr_loops; ++i)
315 		fn(dst, i, size);
316 	cycle_end = get_cycles();
317 
318 	return cycle_end - cycle_start;
319 }
320 
321 static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
322 {
323 	struct timeval tv_start, tv_end, tv_diff;
324 	memset_t fn = r->fn.memset;
325 	int i;
326 
327 	/*
328 	 * We prefault the freshly allocated memory range here,
329 	 * to not measure page fault overhead:
330 	 */
331 	fn(dst, -1, size);
332 
333 	BUG_ON(gettimeofday(&tv_start, NULL));
334 	for (i = 0; i < nr_loops; ++i)
335 		fn(dst, i, size);
336 	BUG_ON(gettimeofday(&tv_end, NULL));
337 
338 	timersub(&tv_end, &tv_start, &tv_diff);
339 
340 	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
341 }
342 
343 static const char * const bench_mem_memset_usage[] = {
344 	"perf bench mem memset <options>",
345 	NULL
346 };
347 
348 static const struct function memset_functions[] = {
349 	{ .name		= "default",
350 	  .desc		= "Default memset() provided by glibc",
351 	  .fn.memset	= memset },
352 
353 #ifdef HAVE_ARCH_X86_64_SUPPORT
354 # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
355 # include "mem-memset-x86-64-asm-def.h"
356 # undef MEMSET_FN
357 #endif
358 
359 	{ .name = NULL, }
360 };
361 
362 int bench_mem_memset(int argc, const char **argv)
363 {
364 	struct bench_mem_info info = {
365 		.functions		= memset_functions,
366 		.do_cycles		= do_memset_cycles,
367 		.do_gettimeofday	= do_memset_gettimeofday,
368 		.usage			= bench_mem_memset_usage,
369 	};
370 
371 	return bench_mem_common(argc, argv, &info);
372 }
373