1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * mem-memcpy.c 4 * 5 * Simple memcpy() and memset() benchmarks 6 * 7 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 8 */ 9 10 #include "debug.h" 11 #include "../perf.h" 12 #include <subcmd/parse-options.h> 13 #include "../util/header.h" 14 #include "../util/cloexec.h" 15 #include "../util/string2.h" 16 #include "bench.h" 17 #include "mem-memcpy-arch.h" 18 #include "mem-memset-arch.h" 19 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <sys/time.h> 24 #include <errno.h> 25 #include <linux/time64.h> 26 #include <linux/zalloc.h> 27 28 #define K 1024 29 30 static const char *size_str = "1MB"; 31 static const char *function_str = "all"; 32 static int nr_loops = 1; 33 static bool use_cycles; 34 static int cycles_fd; 35 36 static const struct option options[] = { 37 OPT_STRING('s', "size", &size_str, "1MB", 38 "Specify the size of the memory buffers. " 39 "Available units: B, KB, MB, GB and TB (case insensitive)"), 40 41 OPT_STRING('f', "function", &function_str, "all", 42 "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), 43 44 OPT_INTEGER('l', "nr_loops", &nr_loops, 45 "Specify the number of loops to run. (default: 1)"), 46 47 OPT_BOOLEAN('c', "cycles", &use_cycles, 48 "Use a cycles event instead of gettimeofday() to measure performance"), 49 50 OPT_END() 51 }; 52 53 typedef void *(*memcpy_t)(void *, const void *, size_t); 54 typedef void *(*memset_t)(void *, int, size_t); 55 56 struct function { 57 const char *name; 58 const char *desc; 59 union { 60 memcpy_t memcpy; 61 memset_t memset; 62 } fn; 63 }; 64 65 static struct perf_event_attr cycle_attr = { 66 .type = PERF_TYPE_HARDWARE, 67 .config = PERF_COUNT_HW_CPU_CYCLES 68 }; 69 70 static int init_cycles(void) 71 { 72 cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); 73 74 if (cycles_fd < 0 && errno == ENOSYS) { 75 pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 76 return -1; 77 } 78 79 return cycles_fd; 80 } 81 82 static u64 get_cycles(void) 83 { 84 int ret; 85 u64 clk; 86 87 ret = read(cycles_fd, &clk, sizeof(u64)); 88 BUG_ON(ret != sizeof(u64)); 89 90 return clk; 91 } 92 93 static double timeval2double(struct timeval *ts) 94 { 95 return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC; 96 } 97 98 #define print_bps(x) do { \ 99 if (x < K) \ 100 printf(" %14lf bytes/sec\n", x); \ 101 else if (x < K * K) \ 102 printf(" %14lfd KB/sec\n", x / K); \ 103 else if (x < K * K * K) \ 104 printf(" %14lf MB/sec\n", x / K / K); \ 105 else \ 106 printf(" %14lf GB/sec\n", x / K / K / K); \ 107 } while (0) 108 109 struct bench_mem_info { 110 const struct function *functions; 111 u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst); 112 double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst); 113 const char *const *usage; 114 bool alloc_src; 115 }; 116 117 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) 118 { 119 const struct function *r = &info->functions[r_idx]; 120 double result_bps = 0.0; 121 u64 result_cycles = 0; 122 void *src = NULL, *dst = zalloc(size); 123 124 printf("# function '%s' (%s)\n", r->name, r->desc); 125 126 if (dst == NULL) 127 goto out_alloc_failed; 128 129 if (info->alloc_src) { 130 src = zalloc(size); 131 if (src == NULL) 132 goto out_alloc_failed; 133 } 134 135 if (bench_format == BENCH_FORMAT_DEFAULT) 136 printf("# Copying %s bytes ...\n\n", size_str); 137 138 if (use_cycles) { 139 result_cycles = info->do_cycles(r, size, src, dst); 140 } else { 141 result_bps = info->do_gettimeofday(r, size, src, dst); 142 } 143 144 switch (bench_format) { 145 case BENCH_FORMAT_DEFAULT: 146 if (use_cycles) { 147 printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); 148 } else { 149 print_bps(result_bps); 150 } 151 break; 152 153 case BENCH_FORMAT_SIMPLE: 154 if (use_cycles) { 155 printf("%lf\n", (double)result_cycles/size_total); 156 } else { 157 printf("%lf\n", result_bps); 158 } 159 break; 160 161 default: 162 BUG_ON(1); 163 break; 164 } 165 166 out_free: 167 free(src); 168 free(dst); 169 return; 170 out_alloc_failed: 171 printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str); 172 goto out_free; 173 } 174 175 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) 176 { 177 int i; 178 size_t size; 179 double size_total; 180 181 argc = parse_options(argc, argv, options, info->usage, 0); 182 183 if (use_cycles) { 184 i = init_cycles(); 185 if (i < 0) { 186 fprintf(stderr, "Failed to open cycles counter\n"); 187 return i; 188 } 189 } 190 191 size = (size_t)perf_atoll((char *)size_str); 192 size_total = (double)size * nr_loops; 193 194 if ((s64)size <= 0) { 195 fprintf(stderr, "Invalid size:%s\n", size_str); 196 return 1; 197 } 198 199 if (!strncmp(function_str, "all", 3)) { 200 for (i = 0; info->functions[i].name; i++) 201 __bench_mem_function(info, i, size, size_total); 202 return 0; 203 } 204 205 for (i = 0; info->functions[i].name; i++) { 206 if (!strcmp(info->functions[i].name, function_str)) 207 break; 208 } 209 if (!info->functions[i].name) { 210 if (strcmp(function_str, "help") && strcmp(function_str, "h")) 211 printf("Unknown function: %s\n", function_str); 212 printf("Available functions:\n"); 213 for (i = 0; info->functions[i].name; i++) { 214 printf("\t%s ... %s\n", 215 info->functions[i].name, info->functions[i].desc); 216 } 217 return 1; 218 } 219 220 __bench_mem_function(info, i, size, size_total); 221 222 return 0; 223 } 224 225 static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) 226 { 227 u64 cycle_start = 0ULL, cycle_end = 0ULL; 228 memcpy_t fn = r->fn.memcpy; 229 int i; 230 231 /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ 232 memset(src, 0, size); 233 234 /* 235 * We prefault the freshly allocated memory range here, 236 * to not measure page fault overhead: 237 */ 238 fn(dst, src, size); 239 240 cycle_start = get_cycles(); 241 for (i = 0; i < nr_loops; ++i) 242 fn(dst, src, size); 243 cycle_end = get_cycles(); 244 245 return cycle_end - cycle_start; 246 } 247 248 static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst) 249 { 250 struct timeval tv_start, tv_end, tv_diff; 251 memcpy_t fn = r->fn.memcpy; 252 int i; 253 254 /* 255 * We prefault the freshly allocated memory range here, 256 * to not measure page fault overhead: 257 */ 258 fn(dst, src, size); 259 260 BUG_ON(gettimeofday(&tv_start, NULL)); 261 for (i = 0; i < nr_loops; ++i) 262 fn(dst, src, size); 263 BUG_ON(gettimeofday(&tv_end, NULL)); 264 265 timersub(&tv_end, &tv_start, &tv_diff); 266 267 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 268 } 269 270 struct function memcpy_functions[] = { 271 { .name = "default", 272 .desc = "Default memcpy() provided by glibc", 273 .fn.memcpy = memcpy }, 274 275 #ifdef HAVE_ARCH_X86_64_SUPPORT 276 # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, 277 # include "mem-memcpy-x86-64-asm-def.h" 278 # undef MEMCPY_FN 279 #endif 280 281 { .name = NULL, } 282 }; 283 284 static const char * const bench_mem_memcpy_usage[] = { 285 "perf bench mem memcpy <options>", 286 NULL 287 }; 288 289 int bench_mem_memcpy(int argc, const char **argv) 290 { 291 struct bench_mem_info info = { 292 .functions = memcpy_functions, 293 .do_cycles = do_memcpy_cycles, 294 .do_gettimeofday = do_memcpy_gettimeofday, 295 .usage = bench_mem_memcpy_usage, 296 .alloc_src = true, 297 }; 298 299 return bench_mem_common(argc, argv, &info); 300 } 301 302 static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst) 303 { 304 u64 cycle_start = 0ULL, cycle_end = 0ULL; 305 memset_t fn = r->fn.memset; 306 int i; 307 308 /* 309 * We prefault the freshly allocated memory range here, 310 * to not measure page fault overhead: 311 */ 312 fn(dst, -1, size); 313 314 cycle_start = get_cycles(); 315 for (i = 0; i < nr_loops; ++i) 316 fn(dst, i, size); 317 cycle_end = get_cycles(); 318 319 return cycle_end - cycle_start; 320 } 321 322 static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst) 323 { 324 struct timeval tv_start, tv_end, tv_diff; 325 memset_t fn = r->fn.memset; 326 int i; 327 328 /* 329 * We prefault the freshly allocated memory range here, 330 * to not measure page fault overhead: 331 */ 332 fn(dst, -1, size); 333 334 BUG_ON(gettimeofday(&tv_start, NULL)); 335 for (i = 0; i < nr_loops; ++i) 336 fn(dst, i, size); 337 BUG_ON(gettimeofday(&tv_end, NULL)); 338 339 timersub(&tv_end, &tv_start, &tv_diff); 340 341 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 342 } 343 344 static const char * const bench_mem_memset_usage[] = { 345 "perf bench mem memset <options>", 346 NULL 347 }; 348 349 static const struct function memset_functions[] = { 350 { .name = "default", 351 .desc = "Default memset() provided by glibc", 352 .fn.memset = memset }, 353 354 #ifdef HAVE_ARCH_X86_64_SUPPORT 355 # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, 356 # include "mem-memset-x86-64-asm-def.h" 357 # undef MEMSET_FN 358 #endif 359 360 { .name = NULL, } 361 }; 362 363 int bench_mem_memset(int argc, const char **argv) 364 { 365 struct bench_mem_info info = { 366 .functions = memset_functions, 367 .do_cycles = do_memset_cycles, 368 .do_gettimeofday = do_memset_gettimeofday, 369 .usage = bench_mem_memset_usage, 370 }; 371 372 return bench_mem_common(argc, argv, &info); 373 } 374