1 /* 2 * mem-memcpy.c 3 * 4 * Simple memcpy() and memset() benchmarks 5 * 6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 7 */ 8 9 #include "debug.h" 10 #include "../perf.h" 11 #include "../util/util.h" 12 #include <subcmd/parse-options.h> 13 #include "../util/header.h" 14 #include "../util/cloexec.h" 15 #include "bench.h" 16 #include "mem-memcpy-arch.h" 17 #include "mem-memset-arch.h" 18 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include <sys/time.h> 23 #include <errno.h> 24 #include <linux/time64.h> 25 26 #define K 1024 27 28 static const char *size_str = "1MB"; 29 static const char *function_str = "all"; 30 static int nr_loops = 1; 31 static bool use_cycles; 32 static int cycles_fd; 33 34 static const struct option options[] = { 35 OPT_STRING('s', "size", &size_str, "1MB", 36 "Specify the size of the memory buffers. " 37 "Available units: B, KB, MB, GB and TB (case insensitive)"), 38 39 OPT_STRING('f', "function", &function_str, "all", 40 "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), 41 42 OPT_INTEGER('l', "nr_loops", &nr_loops, 43 "Specify the number of loops to run. (default: 1)"), 44 45 OPT_BOOLEAN('c', "cycles", &use_cycles, 46 "Use a cycles event instead of gettimeofday() to measure performance"), 47 48 OPT_END() 49 }; 50 51 typedef void *(*memcpy_t)(void *, const void *, size_t); 52 typedef void *(*memset_t)(void *, int, size_t); 53 54 struct function { 55 const char *name; 56 const char *desc; 57 union { 58 memcpy_t memcpy; 59 memset_t memset; 60 } fn; 61 }; 62 63 static struct perf_event_attr cycle_attr = { 64 .type = PERF_TYPE_HARDWARE, 65 .config = PERF_COUNT_HW_CPU_CYCLES 66 }; 67 68 static int init_cycles(void) 69 { 70 cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); 71 72 if (cycles_fd < 0 && errno == ENOSYS) { 73 pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 74 return -1; 75 } 76 77 return cycles_fd; 78 } 79 80 static u64 get_cycles(void) 81 { 82 int ret; 83 u64 clk; 84 85 ret = read(cycles_fd, &clk, sizeof(u64)); 86 BUG_ON(ret != sizeof(u64)); 87 88 return clk; 89 } 90 91 static double timeval2double(struct timeval *ts) 92 { 93 return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC; 94 } 95 96 #define print_bps(x) do { \ 97 if (x < K) \ 98 printf(" %14lf bytes/sec\n", x); \ 99 else if (x < K * K) \ 100 printf(" %14lfd KB/sec\n", x / K); \ 101 else if (x < K * K * K) \ 102 printf(" %14lf MB/sec\n", x / K / K); \ 103 else \ 104 printf(" %14lf GB/sec\n", x / K / K / K); \ 105 } while (0) 106 107 struct bench_mem_info { 108 const struct function *functions; 109 u64 (*do_cycles)(const struct function *r, size_t size); 110 double (*do_gettimeofday)(const struct function *r, size_t size); 111 const char *const *usage; 112 }; 113 114 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) 115 { 116 const struct function *r = &info->functions[r_idx]; 117 double result_bps = 0.0; 118 u64 result_cycles = 0; 119 120 printf("# function '%s' (%s)\n", r->name, r->desc); 121 122 if (bench_format == BENCH_FORMAT_DEFAULT) 123 printf("# Copying %s bytes ...\n\n", size_str); 124 125 if (use_cycles) { 126 result_cycles = info->do_cycles(r, size); 127 } else { 128 result_bps = info->do_gettimeofday(r, size); 129 } 130 131 switch (bench_format) { 132 case BENCH_FORMAT_DEFAULT: 133 if (use_cycles) { 134 printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); 135 } else { 136 print_bps(result_bps); 137 } 138 break; 139 140 case BENCH_FORMAT_SIMPLE: 141 if (use_cycles) { 142 printf("%lf\n", (double)result_cycles/size_total); 143 } else { 144 printf("%lf\n", result_bps); 145 } 146 break; 147 148 default: 149 BUG_ON(1); 150 break; 151 } 152 } 153 154 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) 155 { 156 int i; 157 size_t size; 158 double size_total; 159 160 argc = parse_options(argc, argv, options, info->usage, 0); 161 162 if (use_cycles) { 163 i = init_cycles(); 164 if (i < 0) { 165 fprintf(stderr, "Failed to open cycles counter\n"); 166 return i; 167 } 168 } 169 170 size = (size_t)perf_atoll((char *)size_str); 171 size_total = (double)size * nr_loops; 172 173 if ((s64)size <= 0) { 174 fprintf(stderr, "Invalid size:%s\n", size_str); 175 return 1; 176 } 177 178 if (!strncmp(function_str, "all", 3)) { 179 for (i = 0; info->functions[i].name; i++) 180 __bench_mem_function(info, i, size, size_total); 181 return 0; 182 } 183 184 for (i = 0; info->functions[i].name; i++) { 185 if (!strcmp(info->functions[i].name, function_str)) 186 break; 187 } 188 if (!info->functions[i].name) { 189 if (strcmp(function_str, "help") && strcmp(function_str, "h")) 190 printf("Unknown function: %s\n", function_str); 191 printf("Available functions:\n"); 192 for (i = 0; info->functions[i].name; i++) { 193 printf("\t%s ... %s\n", 194 info->functions[i].name, info->functions[i].desc); 195 } 196 return 1; 197 } 198 199 __bench_mem_function(info, i, size, size_total); 200 201 return 0; 202 } 203 204 static void memcpy_alloc_mem(void **dst, void **src, size_t size) 205 { 206 *dst = zalloc(size); 207 if (!*dst) 208 die("memory allocation failed - maybe size is too large?\n"); 209 210 *src = zalloc(size); 211 if (!*src) 212 die("memory allocation failed - maybe size is too large?\n"); 213 214 /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ 215 memset(*src, 0, size); 216 } 217 218 static u64 do_memcpy_cycles(const struct function *r, size_t size) 219 { 220 u64 cycle_start = 0ULL, cycle_end = 0ULL; 221 void *src = NULL, *dst = NULL; 222 memcpy_t fn = r->fn.memcpy; 223 int i; 224 225 memcpy_alloc_mem(&dst, &src, size); 226 227 /* 228 * We prefault the freshly allocated memory range here, 229 * to not measure page fault overhead: 230 */ 231 fn(dst, src, size); 232 233 cycle_start = get_cycles(); 234 for (i = 0; i < nr_loops; ++i) 235 fn(dst, src, size); 236 cycle_end = get_cycles(); 237 238 free(src); 239 free(dst); 240 return cycle_end - cycle_start; 241 } 242 243 static double do_memcpy_gettimeofday(const struct function *r, size_t size) 244 { 245 struct timeval tv_start, tv_end, tv_diff; 246 memcpy_t fn = r->fn.memcpy; 247 void *src = NULL, *dst = NULL; 248 int i; 249 250 memcpy_alloc_mem(&dst, &src, size); 251 252 /* 253 * We prefault the freshly allocated memory range here, 254 * to not measure page fault overhead: 255 */ 256 fn(dst, src, size); 257 258 BUG_ON(gettimeofday(&tv_start, NULL)); 259 for (i = 0; i < nr_loops; ++i) 260 fn(dst, src, size); 261 BUG_ON(gettimeofday(&tv_end, NULL)); 262 263 timersub(&tv_end, &tv_start, &tv_diff); 264 265 free(src); 266 free(dst); 267 268 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 269 } 270 271 struct function memcpy_functions[] = { 272 { .name = "default", 273 .desc = "Default memcpy() provided by glibc", 274 .fn.memcpy = memcpy }, 275 276 #ifdef HAVE_ARCH_X86_64_SUPPORT 277 # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, 278 # include "mem-memcpy-x86-64-asm-def.h" 279 # undef MEMCPY_FN 280 #endif 281 282 { .name = NULL, } 283 }; 284 285 static const char * const bench_mem_memcpy_usage[] = { 286 "perf bench mem memcpy <options>", 287 NULL 288 }; 289 290 int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) 291 { 292 struct bench_mem_info info = { 293 .functions = memcpy_functions, 294 .do_cycles = do_memcpy_cycles, 295 .do_gettimeofday = do_memcpy_gettimeofday, 296 .usage = bench_mem_memcpy_usage, 297 }; 298 299 return bench_mem_common(argc, argv, &info); 300 } 301 302 static void memset_alloc_mem(void **dst, size_t size) 303 { 304 *dst = zalloc(size); 305 if (!*dst) 306 die("memory allocation failed - maybe size is too large?\n"); 307 } 308 309 static u64 do_memset_cycles(const struct function *r, size_t size) 310 { 311 u64 cycle_start = 0ULL, cycle_end = 0ULL; 312 memset_t fn = r->fn.memset; 313 void *dst = NULL; 314 int i; 315 316 memset_alloc_mem(&dst, size); 317 318 /* 319 * We prefault the freshly allocated memory range here, 320 * to not measure page fault overhead: 321 */ 322 fn(dst, -1, size); 323 324 cycle_start = get_cycles(); 325 for (i = 0; i < nr_loops; ++i) 326 fn(dst, i, size); 327 cycle_end = get_cycles(); 328 329 free(dst); 330 return cycle_end - cycle_start; 331 } 332 333 static double do_memset_gettimeofday(const struct function *r, size_t size) 334 { 335 struct timeval tv_start, tv_end, tv_diff; 336 memset_t fn = r->fn.memset; 337 void *dst = NULL; 338 int i; 339 340 memset_alloc_mem(&dst, size); 341 342 /* 343 * We prefault the freshly allocated memory range here, 344 * to not measure page fault overhead: 345 */ 346 fn(dst, -1, size); 347 348 BUG_ON(gettimeofday(&tv_start, NULL)); 349 for (i = 0; i < nr_loops; ++i) 350 fn(dst, i, size); 351 BUG_ON(gettimeofday(&tv_end, NULL)); 352 353 timersub(&tv_end, &tv_start, &tv_diff); 354 355 free(dst); 356 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 357 } 358 359 static const char * const bench_mem_memset_usage[] = { 360 "perf bench mem memset <options>", 361 NULL 362 }; 363 364 static const struct function memset_functions[] = { 365 { .name = "default", 366 .desc = "Default memset() provided by glibc", 367 .fn.memset = memset }, 368 369 #ifdef HAVE_ARCH_X86_64_SUPPORT 370 # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, 371 # include "mem-memset-x86-64-asm-def.h" 372 # undef MEMSET_FN 373 #endif 374 375 { .name = NULL, } 376 }; 377 378 int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) 379 { 380 struct bench_mem_info info = { 381 .functions = memset_functions, 382 .do_cycles = do_memset_cycles, 383 .do_gettimeofday = do_memset_gettimeofday, 384 .usage = bench_mem_memset_usage, 385 }; 386 387 return bench_mem_common(argc, argv, &info); 388 } 389