1 /* 2 * mem-memcpy.c 3 * 4 * Simple memcpy() and memset() benchmarks 5 * 6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 7 */ 8 9 #include "debug.h" 10 #include "../perf.h" 11 #include "../util/util.h" 12 #include <subcmd/parse-options.h> 13 #include "../util/header.h" 14 #include "../util/cloexec.h" 15 #include "../util/string2.h" 16 #include "bench.h" 17 #include "mem-memcpy-arch.h" 18 #include "mem-memset-arch.h" 19 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <sys/time.h> 24 #include <errno.h> 25 #include <linux/time64.h> 26 27 #define K 1024 28 29 static const char *size_str = "1MB"; 30 static const char *function_str = "all"; 31 static int nr_loops = 1; 32 static bool use_cycles; 33 static int cycles_fd; 34 35 static const struct option options[] = { 36 OPT_STRING('s', "size", &size_str, "1MB", 37 "Specify the size of the memory buffers. " 38 "Available units: B, KB, MB, GB and TB (case insensitive)"), 39 40 OPT_STRING('f', "function", &function_str, "all", 41 "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), 42 43 OPT_INTEGER('l', "nr_loops", &nr_loops, 44 "Specify the number of loops to run. (default: 1)"), 45 46 OPT_BOOLEAN('c', "cycles", &use_cycles, 47 "Use a cycles event instead of gettimeofday() to measure performance"), 48 49 OPT_END() 50 }; 51 52 typedef void *(*memcpy_t)(void *, const void *, size_t); 53 typedef void *(*memset_t)(void *, int, size_t); 54 55 struct function { 56 const char *name; 57 const char *desc; 58 union { 59 memcpy_t memcpy; 60 memset_t memset; 61 } fn; 62 }; 63 64 static struct perf_event_attr cycle_attr = { 65 .type = PERF_TYPE_HARDWARE, 66 .config = PERF_COUNT_HW_CPU_CYCLES 67 }; 68 69 static int init_cycles(void) 70 { 71 cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); 72 73 if (cycles_fd < 0 && errno == ENOSYS) { 74 pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 75 return -1; 76 } 77 78 return cycles_fd; 79 } 80 81 static u64 get_cycles(void) 82 { 83 int ret; 84 u64 clk; 85 86 ret = read(cycles_fd, &clk, sizeof(u64)); 87 BUG_ON(ret != sizeof(u64)); 88 89 return clk; 90 } 91 92 static double timeval2double(struct timeval *ts) 93 { 94 return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC; 95 } 96 97 #define print_bps(x) do { \ 98 if (x < K) \ 99 printf(" %14lf bytes/sec\n", x); \ 100 else if (x < K * K) \ 101 printf(" %14lfd KB/sec\n", x / K); \ 102 else if (x < K * K * K) \ 103 printf(" %14lf MB/sec\n", x / K / K); \ 104 else \ 105 printf(" %14lf GB/sec\n", x / K / K / K); \ 106 } while (0) 107 108 struct bench_mem_info { 109 const struct function *functions; 110 u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst); 111 double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst); 112 const char *const *usage; 113 bool alloc_src; 114 }; 115 116 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) 117 { 118 const struct function *r = &info->functions[r_idx]; 119 double result_bps = 0.0; 120 u64 result_cycles = 0; 121 void *src = NULL, *dst = zalloc(size); 122 123 printf("# function '%s' (%s)\n", r->name, r->desc); 124 125 if (dst == NULL) 126 goto out_alloc_failed; 127 128 if (info->alloc_src) { 129 src = zalloc(size); 130 if (src == NULL) 131 goto out_alloc_failed; 132 } 133 134 if (bench_format == BENCH_FORMAT_DEFAULT) 135 printf("# Copying %s bytes ...\n\n", size_str); 136 137 if (use_cycles) { 138 result_cycles = info->do_cycles(r, size, src, dst); 139 } else { 140 result_bps = info->do_gettimeofday(r, size, src, dst); 141 } 142 143 switch (bench_format) { 144 case BENCH_FORMAT_DEFAULT: 145 if (use_cycles) { 146 printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); 147 } else { 148 print_bps(result_bps); 149 } 150 break; 151 152 case BENCH_FORMAT_SIMPLE: 153 if (use_cycles) { 154 printf("%lf\n", (double)result_cycles/size_total); 155 } else { 156 printf("%lf\n", result_bps); 157 } 158 break; 159 160 default: 161 BUG_ON(1); 162 break; 163 } 164 165 out_free: 166 free(src); 167 free(dst); 168 return; 169 out_alloc_failed: 170 printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str); 171 goto out_free; 172 } 173 174 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) 175 { 176 int i; 177 size_t size; 178 double size_total; 179 180 argc = parse_options(argc, argv, options, info->usage, 0); 181 182 if (use_cycles) { 183 i = init_cycles(); 184 if (i < 0) { 185 fprintf(stderr, "Failed to open cycles counter\n"); 186 return i; 187 } 188 } 189 190 size = (size_t)perf_atoll((char *)size_str); 191 size_total = (double)size * nr_loops; 192 193 if ((s64)size <= 0) { 194 fprintf(stderr, "Invalid size:%s\n", size_str); 195 return 1; 196 } 197 198 if (!strncmp(function_str, "all", 3)) { 199 for (i = 0; info->functions[i].name; i++) 200 __bench_mem_function(info, i, size, size_total); 201 return 0; 202 } 203 204 for (i = 0; info->functions[i].name; i++) { 205 if (!strcmp(info->functions[i].name, function_str)) 206 break; 207 } 208 if (!info->functions[i].name) { 209 if (strcmp(function_str, "help") && strcmp(function_str, "h")) 210 printf("Unknown function: %s\n", function_str); 211 printf("Available functions:\n"); 212 for (i = 0; info->functions[i].name; i++) { 213 printf("\t%s ... %s\n", 214 info->functions[i].name, info->functions[i].desc); 215 } 216 return 1; 217 } 218 219 __bench_mem_function(info, i, size, size_total); 220 221 return 0; 222 } 223 224 static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) 225 { 226 u64 cycle_start = 0ULL, cycle_end = 0ULL; 227 memcpy_t fn = r->fn.memcpy; 228 int i; 229 230 /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ 231 memset(src, 0, size); 232 233 /* 234 * We prefault the freshly allocated memory range here, 235 * to not measure page fault overhead: 236 */ 237 fn(dst, src, size); 238 239 cycle_start = get_cycles(); 240 for (i = 0; i < nr_loops; ++i) 241 fn(dst, src, size); 242 cycle_end = get_cycles(); 243 244 return cycle_end - cycle_start; 245 } 246 247 static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst) 248 { 249 struct timeval tv_start, tv_end, tv_diff; 250 memcpy_t fn = r->fn.memcpy; 251 int i; 252 253 /* 254 * We prefault the freshly allocated memory range here, 255 * to not measure page fault overhead: 256 */ 257 fn(dst, src, size); 258 259 BUG_ON(gettimeofday(&tv_start, NULL)); 260 for (i = 0; i < nr_loops; ++i) 261 fn(dst, src, size); 262 BUG_ON(gettimeofday(&tv_end, NULL)); 263 264 timersub(&tv_end, &tv_start, &tv_diff); 265 266 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 267 } 268 269 struct function memcpy_functions[] = { 270 { .name = "default", 271 .desc = "Default memcpy() provided by glibc", 272 .fn.memcpy = memcpy }, 273 274 #ifdef HAVE_ARCH_X86_64_SUPPORT 275 # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, 276 # include "mem-memcpy-x86-64-asm-def.h" 277 # undef MEMCPY_FN 278 #endif 279 280 { .name = NULL, } 281 }; 282 283 static const char * const bench_mem_memcpy_usage[] = { 284 "perf bench mem memcpy <options>", 285 NULL 286 }; 287 288 int bench_mem_memcpy(int argc, const char **argv) 289 { 290 struct bench_mem_info info = { 291 .functions = memcpy_functions, 292 .do_cycles = do_memcpy_cycles, 293 .do_gettimeofday = do_memcpy_gettimeofday, 294 .usage = bench_mem_memcpy_usage, 295 .alloc_src = true, 296 }; 297 298 return bench_mem_common(argc, argv, &info); 299 } 300 301 static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst) 302 { 303 u64 cycle_start = 0ULL, cycle_end = 0ULL; 304 memset_t fn = r->fn.memset; 305 int i; 306 307 /* 308 * We prefault the freshly allocated memory range here, 309 * to not measure page fault overhead: 310 */ 311 fn(dst, -1, size); 312 313 cycle_start = get_cycles(); 314 for (i = 0; i < nr_loops; ++i) 315 fn(dst, i, size); 316 cycle_end = get_cycles(); 317 318 return cycle_end - cycle_start; 319 } 320 321 static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst) 322 { 323 struct timeval tv_start, tv_end, tv_diff; 324 memset_t fn = r->fn.memset; 325 int i; 326 327 /* 328 * We prefault the freshly allocated memory range here, 329 * to not measure page fault overhead: 330 */ 331 fn(dst, -1, size); 332 333 BUG_ON(gettimeofday(&tv_start, NULL)); 334 for (i = 0; i < nr_loops; ++i) 335 fn(dst, i, size); 336 BUG_ON(gettimeofday(&tv_end, NULL)); 337 338 timersub(&tv_end, &tv_start, &tv_diff); 339 340 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 341 } 342 343 static const char * const bench_mem_memset_usage[] = { 344 "perf bench mem memset <options>", 345 NULL 346 }; 347 348 static const struct function memset_functions[] = { 349 { .name = "default", 350 .desc = "Default memset() provided by glibc", 351 .fn.memset = memset }, 352 353 #ifdef HAVE_ARCH_X86_64_SUPPORT 354 # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, 355 # include "mem-memset-x86-64-asm-def.h" 356 # undef MEMSET_FN 357 #endif 358 359 { .name = NULL, } 360 }; 361 362 int bench_mem_memset(int argc, const char **argv) 363 { 364 struct bench_mem_info info = { 365 .functions = memset_functions, 366 .do_cycles = do_memset_cycles, 367 .do_gettimeofday = do_memset_gettimeofday, 368 .usage = bench_mem_memset_usage, 369 }; 370 371 return bench_mem_common(argc, argv, &info); 372 } 373