1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * mem-memcpy.c 4 * 5 * Simple memcpy() and memset() benchmarks 6 * 7 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 8 */ 9 10 #include "debug.h" 11 #include "../perf-sys.h" 12 #include <subcmd/parse-options.h> 13 #include "../util/header.h" 14 #include "../util/cloexec.h" 15 #include "../util/string2.h" 16 #include "bench.h" 17 #include "mem-memcpy-arch.h" 18 #include "mem-memset-arch.h" 19 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <unistd.h> 24 #include <sys/time.h> 25 #include <errno.h> 26 #include <linux/time64.h> 27 #include <linux/zalloc.h> 28 29 #define K 1024 30 31 static const char *size_str = "1MB"; 32 static const char *function_str = "all"; 33 static int nr_loops = 1; 34 static bool use_cycles; 35 static int cycles_fd; 36 37 static const struct option options[] = { 38 OPT_STRING('s', "size", &size_str, "1MB", 39 "Specify the size of the memory buffers. " 40 "Available units: B, KB, MB, GB and TB (case insensitive)"), 41 42 OPT_STRING('f', "function", &function_str, "all", 43 "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), 44 45 OPT_INTEGER('l', "nr_loops", &nr_loops, 46 "Specify the number of loops to run. (default: 1)"), 47 48 OPT_BOOLEAN('c', "cycles", &use_cycles, 49 "Use a cycles event instead of gettimeofday() to measure performance"), 50 51 OPT_END() 52 }; 53 54 typedef void *(*memcpy_t)(void *, const void *, size_t); 55 typedef void *(*memset_t)(void *, int, size_t); 56 57 struct function { 58 const char *name; 59 const char *desc; 60 union { 61 memcpy_t memcpy; 62 memset_t memset; 63 } fn; 64 }; 65 66 static struct perf_event_attr cycle_attr = { 67 .type = PERF_TYPE_HARDWARE, 68 .config = PERF_COUNT_HW_CPU_CYCLES 69 }; 70 71 static int init_cycles(void) 72 { 73 cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); 74 75 if (cycles_fd < 0 && errno == ENOSYS) { 76 pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 77 return -1; 78 } 79 80 return cycles_fd; 81 } 82 83 static u64 get_cycles(void) 84 { 85 int ret; 86 u64 clk; 87 88 ret = read(cycles_fd, &clk, sizeof(u64)); 89 BUG_ON(ret != sizeof(u64)); 90 91 return clk; 92 } 93 94 static double timeval2double(struct timeval *ts) 95 { 96 return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC; 97 } 98 99 #define print_bps(x) do { \ 100 if (x < K) \ 101 printf(" %14lf bytes/sec\n", x); \ 102 else if (x < K * K) \ 103 printf(" %14lfd KB/sec\n", x / K); \ 104 else if (x < K * K * K) \ 105 printf(" %14lf MB/sec\n", x / K / K); \ 106 else \ 107 printf(" %14lf GB/sec\n", x / K / K / K); \ 108 } while (0) 109 110 struct bench_mem_info { 111 const struct function *functions; 112 u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst); 113 double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst); 114 const char *const *usage; 115 bool alloc_src; 116 }; 117 118 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) 119 { 120 const struct function *r = &info->functions[r_idx]; 121 double result_bps = 0.0; 122 u64 result_cycles = 0; 123 void *src = NULL, *dst = zalloc(size); 124 125 printf("# function '%s' (%s)\n", r->name, r->desc); 126 127 if (dst == NULL) 128 goto out_alloc_failed; 129 130 if (info->alloc_src) { 131 src = zalloc(size); 132 if (src == NULL) 133 goto out_alloc_failed; 134 } 135 136 if (bench_format == BENCH_FORMAT_DEFAULT) 137 printf("# Copying %s bytes ...\n\n", size_str); 138 139 if (use_cycles) { 140 result_cycles = info->do_cycles(r, size, src, dst); 141 } else { 142 result_bps = info->do_gettimeofday(r, size, src, dst); 143 } 144 145 switch (bench_format) { 146 case BENCH_FORMAT_DEFAULT: 147 if (use_cycles) { 148 printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); 149 } else { 150 print_bps(result_bps); 151 } 152 break; 153 154 case BENCH_FORMAT_SIMPLE: 155 if (use_cycles) { 156 printf("%lf\n", (double)result_cycles/size_total); 157 } else { 158 printf("%lf\n", result_bps); 159 } 160 break; 161 162 default: 163 BUG_ON(1); 164 break; 165 } 166 167 out_free: 168 free(src); 169 free(dst); 170 return; 171 out_alloc_failed: 172 printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str); 173 goto out_free; 174 } 175 176 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) 177 { 178 int i; 179 size_t size; 180 double size_total; 181 182 argc = parse_options(argc, argv, options, info->usage, 0); 183 184 if (use_cycles) { 185 i = init_cycles(); 186 if (i < 0) { 187 fprintf(stderr, "Failed to open cycles counter\n"); 188 return i; 189 } 190 } 191 192 size = (size_t)perf_atoll((char *)size_str); 193 size_total = (double)size * nr_loops; 194 195 if ((s64)size <= 0) { 196 fprintf(stderr, "Invalid size:%s\n", size_str); 197 return 1; 198 } 199 200 if (!strncmp(function_str, "all", 3)) { 201 for (i = 0; info->functions[i].name; i++) 202 __bench_mem_function(info, i, size, size_total); 203 return 0; 204 } 205 206 for (i = 0; info->functions[i].name; i++) { 207 if (!strcmp(info->functions[i].name, function_str)) 208 break; 209 } 210 if (!info->functions[i].name) { 211 if (strcmp(function_str, "help") && strcmp(function_str, "h")) 212 printf("Unknown function: %s\n", function_str); 213 printf("Available functions:\n"); 214 for (i = 0; info->functions[i].name; i++) { 215 printf("\t%s ... %s\n", 216 info->functions[i].name, info->functions[i].desc); 217 } 218 return 1; 219 } 220 221 __bench_mem_function(info, i, size, size_total); 222 223 return 0; 224 } 225 226 static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst) 227 { 228 /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ 229 memset(src, 0, size); 230 231 /* 232 * We prefault the freshly allocated memory range here, 233 * to not measure page fault overhead: 234 */ 235 fn(dst, src, size); 236 } 237 238 static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) 239 { 240 u64 cycle_start = 0ULL, cycle_end = 0ULL; 241 memcpy_t fn = r->fn.memcpy; 242 int i; 243 244 memcpy_prefault(fn, size, src, dst); 245 246 cycle_start = get_cycles(); 247 for (i = 0; i < nr_loops; ++i) 248 fn(dst, src, size); 249 cycle_end = get_cycles(); 250 251 return cycle_end - cycle_start; 252 } 253 254 static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst) 255 { 256 struct timeval tv_start, tv_end, tv_diff; 257 memcpy_t fn = r->fn.memcpy; 258 int i; 259 260 memcpy_prefault(fn, size, src, dst); 261 262 BUG_ON(gettimeofday(&tv_start, NULL)); 263 for (i = 0; i < nr_loops; ++i) 264 fn(dst, src, size); 265 BUG_ON(gettimeofday(&tv_end, NULL)); 266 267 timersub(&tv_end, &tv_start, &tv_diff); 268 269 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 270 } 271 272 struct function memcpy_functions[] = { 273 { .name = "default", 274 .desc = "Default memcpy() provided by glibc", 275 .fn.memcpy = memcpy }, 276 277 #ifdef HAVE_ARCH_X86_64_SUPPORT 278 # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, 279 # include "mem-memcpy-x86-64-asm-def.h" 280 # undef MEMCPY_FN 281 #endif 282 283 { .name = NULL, } 284 }; 285 286 static const char * const bench_mem_memcpy_usage[] = { 287 "perf bench mem memcpy <options>", 288 NULL 289 }; 290 291 int bench_mem_memcpy(int argc, const char **argv) 292 { 293 struct bench_mem_info info = { 294 .functions = memcpy_functions, 295 .do_cycles = do_memcpy_cycles, 296 .do_gettimeofday = do_memcpy_gettimeofday, 297 .usage = bench_mem_memcpy_usage, 298 .alloc_src = true, 299 }; 300 301 return bench_mem_common(argc, argv, &info); 302 } 303 304 static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst) 305 { 306 u64 cycle_start = 0ULL, cycle_end = 0ULL; 307 memset_t fn = r->fn.memset; 308 int i; 309 310 /* 311 * We prefault the freshly allocated memory range here, 312 * to not measure page fault overhead: 313 */ 314 fn(dst, -1, size); 315 316 cycle_start = get_cycles(); 317 for (i = 0; i < nr_loops; ++i) 318 fn(dst, i, size); 319 cycle_end = get_cycles(); 320 321 return cycle_end - cycle_start; 322 } 323 324 static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst) 325 { 326 struct timeval tv_start, tv_end, tv_diff; 327 memset_t fn = r->fn.memset; 328 int i; 329 330 /* 331 * We prefault the freshly allocated memory range here, 332 * to not measure page fault overhead: 333 */ 334 fn(dst, -1, size); 335 336 BUG_ON(gettimeofday(&tv_start, NULL)); 337 for (i = 0; i < nr_loops; ++i) 338 fn(dst, i, size); 339 BUG_ON(gettimeofday(&tv_end, NULL)); 340 341 timersub(&tv_end, &tv_start, &tv_diff); 342 343 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 344 } 345 346 static const char * const bench_mem_memset_usage[] = { 347 "perf bench mem memset <options>", 348 NULL 349 }; 350 351 static const struct function memset_functions[] = { 352 { .name = "default", 353 .desc = "Default memset() provided by glibc", 354 .fn.memset = memset }, 355 356 #ifdef HAVE_ARCH_X86_64_SUPPORT 357 # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, 358 # include "mem-memset-x86-64-asm-def.h" 359 # undef MEMSET_FN 360 #endif 361 362 { .name = NULL, } 363 }; 364 365 int bench_mem_memset(int argc, const char **argv) 366 { 367 struct bench_mem_info info = { 368 .functions = memset_functions, 369 .do_cycles = do_memset_cycles, 370 .do_gettimeofday = do_memset_gettimeofday, 371 .usage = bench_mem_memset_usage, 372 }; 373 374 return bench_mem_common(argc, argv, &info); 375 } 376