1 /* 2 * mem-memcpy.c 3 * 4 * Simple memcpy() and memset() benchmarks 5 * 6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 7 */ 8 9 #include "debug.h" 10 #include "../perf.h" 11 #include "../util/util.h" 12 #include <subcmd/parse-options.h> 13 #include "../util/header.h" 14 #include "../util/cloexec.h" 15 #include "bench.h" 16 #include "mem-memcpy-arch.h" 17 #include "mem-memset-arch.h" 18 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include <sys/time.h> 23 #include <errno.h> 24 25 #define K 1024 26 27 static const char *size_str = "1MB"; 28 static const char *function_str = "all"; 29 static int nr_loops = 1; 30 static bool use_cycles; 31 static int cycles_fd; 32 33 static const struct option options[] = { 34 OPT_STRING('s', "size", &size_str, "1MB", 35 "Specify the size of the memory buffers. " 36 "Available units: B, KB, MB, GB and TB (case insensitive)"), 37 38 OPT_STRING('f', "function", &function_str, "all", 39 "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), 40 41 OPT_INTEGER('l', "nr_loops", &nr_loops, 42 "Specify the number of loops to run. (default: 1)"), 43 44 OPT_BOOLEAN('c', "cycles", &use_cycles, 45 "Use a cycles event instead of gettimeofday() to measure performance"), 46 47 OPT_END() 48 }; 49 50 typedef void *(*memcpy_t)(void *, const void *, size_t); 51 typedef void *(*memset_t)(void *, int, size_t); 52 53 struct function { 54 const char *name; 55 const char *desc; 56 union { 57 memcpy_t memcpy; 58 memset_t memset; 59 } fn; 60 }; 61 62 static struct perf_event_attr cycle_attr = { 63 .type = PERF_TYPE_HARDWARE, 64 .config = PERF_COUNT_HW_CPU_CYCLES 65 }; 66 67 static int init_cycles(void) 68 { 69 cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); 70 71 if (cycles_fd < 0 && errno == ENOSYS) { 72 pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 73 return -1; 74 } 75 76 return cycles_fd; 77 } 78 79 static u64 get_cycles(void) 80 { 81 int ret; 82 u64 clk; 83 84 ret = read(cycles_fd, &clk, sizeof(u64)); 85 BUG_ON(ret != sizeof(u64)); 86 87 return clk; 88 } 89 90 static double timeval2double(struct timeval *ts) 91 { 92 return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; 93 } 94 95 #define print_bps(x) do { \ 96 if (x < K) \ 97 printf(" %14lf bytes/sec\n", x); \ 98 else if (x < K * K) \ 99 printf(" %14lfd KB/sec\n", x / K); \ 100 else if (x < K * K * K) \ 101 printf(" %14lf MB/sec\n", x / K / K); \ 102 else \ 103 printf(" %14lf GB/sec\n", x / K / K / K); \ 104 } while (0) 105 106 struct bench_mem_info { 107 const struct function *functions; 108 u64 (*do_cycles)(const struct function *r, size_t size); 109 double (*do_gettimeofday)(const struct function *r, size_t size); 110 const char *const *usage; 111 }; 112 113 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) 114 { 115 const struct function *r = &info->functions[r_idx]; 116 double result_bps = 0.0; 117 u64 result_cycles = 0; 118 119 printf("# function '%s' (%s)\n", r->name, r->desc); 120 121 if (bench_format == BENCH_FORMAT_DEFAULT) 122 printf("# Copying %s bytes ...\n\n", size_str); 123 124 if (use_cycles) { 125 result_cycles = info->do_cycles(r, size); 126 } else { 127 result_bps = info->do_gettimeofday(r, size); 128 } 129 130 switch (bench_format) { 131 case BENCH_FORMAT_DEFAULT: 132 if (use_cycles) { 133 printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); 134 } else { 135 print_bps(result_bps); 136 } 137 break; 138 139 case BENCH_FORMAT_SIMPLE: 140 if (use_cycles) { 141 printf("%lf\n", (double)result_cycles/size_total); 142 } else { 143 printf("%lf\n", result_bps); 144 } 145 break; 146 147 default: 148 BUG_ON(1); 149 break; 150 } 151 } 152 153 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) 154 { 155 int i; 156 size_t size; 157 double size_total; 158 159 argc = parse_options(argc, argv, options, info->usage, 0); 160 161 if (use_cycles) { 162 i = init_cycles(); 163 if (i < 0) { 164 fprintf(stderr, "Failed to open cycles counter\n"); 165 return i; 166 } 167 } 168 169 size = (size_t)perf_atoll((char *)size_str); 170 size_total = (double)size * nr_loops; 171 172 if ((s64)size <= 0) { 173 fprintf(stderr, "Invalid size:%s\n", size_str); 174 return 1; 175 } 176 177 if (!strncmp(function_str, "all", 3)) { 178 for (i = 0; info->functions[i].name; i++) 179 __bench_mem_function(info, i, size, size_total); 180 return 0; 181 } 182 183 for (i = 0; info->functions[i].name; i++) { 184 if (!strcmp(info->functions[i].name, function_str)) 185 break; 186 } 187 if (!info->functions[i].name) { 188 if (strcmp(function_str, "help") && strcmp(function_str, "h")) 189 printf("Unknown function: %s\n", function_str); 190 printf("Available functions:\n"); 191 for (i = 0; info->functions[i].name; i++) { 192 printf("\t%s ... %s\n", 193 info->functions[i].name, info->functions[i].desc); 194 } 195 return 1; 196 } 197 198 __bench_mem_function(info, i, size, size_total); 199 200 return 0; 201 } 202 203 static void memcpy_alloc_mem(void **dst, void **src, size_t size) 204 { 205 *dst = zalloc(size); 206 if (!*dst) 207 die("memory allocation failed - maybe size is too large?\n"); 208 209 *src = zalloc(size); 210 if (!*src) 211 die("memory allocation failed - maybe size is too large?\n"); 212 213 /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ 214 memset(*src, 0, size); 215 } 216 217 static u64 do_memcpy_cycles(const struct function *r, size_t size) 218 { 219 u64 cycle_start = 0ULL, cycle_end = 0ULL; 220 void *src = NULL, *dst = NULL; 221 memcpy_t fn = r->fn.memcpy; 222 int i; 223 224 memcpy_alloc_mem(&dst, &src, size); 225 226 /* 227 * We prefault the freshly allocated memory range here, 228 * to not measure page fault overhead: 229 */ 230 fn(dst, src, size); 231 232 cycle_start = get_cycles(); 233 for (i = 0; i < nr_loops; ++i) 234 fn(dst, src, size); 235 cycle_end = get_cycles(); 236 237 free(src); 238 free(dst); 239 return cycle_end - cycle_start; 240 } 241 242 static double do_memcpy_gettimeofday(const struct function *r, size_t size) 243 { 244 struct timeval tv_start, tv_end, tv_diff; 245 memcpy_t fn = r->fn.memcpy; 246 void *src = NULL, *dst = NULL; 247 int i; 248 249 memcpy_alloc_mem(&dst, &src, size); 250 251 /* 252 * We prefault the freshly allocated memory range here, 253 * to not measure page fault overhead: 254 */ 255 fn(dst, src, size); 256 257 BUG_ON(gettimeofday(&tv_start, NULL)); 258 for (i = 0; i < nr_loops; ++i) 259 fn(dst, src, size); 260 BUG_ON(gettimeofday(&tv_end, NULL)); 261 262 timersub(&tv_end, &tv_start, &tv_diff); 263 264 free(src); 265 free(dst); 266 267 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 268 } 269 270 struct function memcpy_functions[] = { 271 { .name = "default", 272 .desc = "Default memcpy() provided by glibc", 273 .fn.memcpy = memcpy }, 274 275 #ifdef HAVE_ARCH_X86_64_SUPPORT 276 # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, 277 # include "mem-memcpy-x86-64-asm-def.h" 278 # undef MEMCPY_FN 279 #endif 280 281 { .name = NULL, } 282 }; 283 284 static const char * const bench_mem_memcpy_usage[] = { 285 "perf bench mem memcpy <options>", 286 NULL 287 }; 288 289 int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) 290 { 291 struct bench_mem_info info = { 292 .functions = memcpy_functions, 293 .do_cycles = do_memcpy_cycles, 294 .do_gettimeofday = do_memcpy_gettimeofday, 295 .usage = bench_mem_memcpy_usage, 296 }; 297 298 return bench_mem_common(argc, argv, &info); 299 } 300 301 static void memset_alloc_mem(void **dst, size_t size) 302 { 303 *dst = zalloc(size); 304 if (!*dst) 305 die("memory allocation failed - maybe size is too large?\n"); 306 } 307 308 static u64 do_memset_cycles(const struct function *r, size_t size) 309 { 310 u64 cycle_start = 0ULL, cycle_end = 0ULL; 311 memset_t fn = r->fn.memset; 312 void *dst = NULL; 313 int i; 314 315 memset_alloc_mem(&dst, size); 316 317 /* 318 * We prefault the freshly allocated memory range here, 319 * to not measure page fault overhead: 320 */ 321 fn(dst, -1, size); 322 323 cycle_start = get_cycles(); 324 for (i = 0; i < nr_loops; ++i) 325 fn(dst, i, size); 326 cycle_end = get_cycles(); 327 328 free(dst); 329 return cycle_end - cycle_start; 330 } 331 332 static double do_memset_gettimeofday(const struct function *r, size_t size) 333 { 334 struct timeval tv_start, tv_end, tv_diff; 335 memset_t fn = r->fn.memset; 336 void *dst = NULL; 337 int i; 338 339 memset_alloc_mem(&dst, size); 340 341 /* 342 * We prefault the freshly allocated memory range here, 343 * to not measure page fault overhead: 344 */ 345 fn(dst, -1, size); 346 347 BUG_ON(gettimeofday(&tv_start, NULL)); 348 for (i = 0; i < nr_loops; ++i) 349 fn(dst, i, size); 350 BUG_ON(gettimeofday(&tv_end, NULL)); 351 352 timersub(&tv_end, &tv_start, &tv_diff); 353 354 free(dst); 355 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 356 } 357 358 static const char * const bench_mem_memset_usage[] = { 359 "perf bench mem memset <options>", 360 NULL 361 }; 362 363 static const struct function memset_functions[] = { 364 { .name = "default", 365 .desc = "Default memset() provided by glibc", 366 .fn.memset = memset }, 367 368 #ifdef HAVE_ARCH_X86_64_SUPPORT 369 # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, 370 # include "mem-memset-x86-64-asm-def.h" 371 # undef MEMSET_FN 372 #endif 373 374 { .name = NULL, } 375 }; 376 377 int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) 378 { 379 struct bench_mem_info info = { 380 .functions = memset_functions, 381 .do_cycles = do_memset_cycles, 382 .do_gettimeofday = do_memset_gettimeofday, 383 .usage = bench_mem_memset_usage, 384 }; 385 386 return bench_mem_common(argc, argv, &info); 387 } 388