1 /* 2 * mem-memcpy.c 3 * 4 * Simple memcpy() and memset() benchmarks 5 * 6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 7 */ 8 9 #include "../perf.h" 10 #include "../util/util.h" 11 #include <subcmd/parse-options.h> 12 #include "../util/header.h" 13 #include "../util/cloexec.h" 14 #include "bench.h" 15 #include "mem-memcpy-arch.h" 16 #include "mem-memset-arch.h" 17 18 #include <stdio.h> 19 #include <stdlib.h> 20 #include <string.h> 21 #include <sys/time.h> 22 #include <errno.h> 23 24 #define K 1024 25 26 static const char *size_str = "1MB"; 27 static const char *function_str = "all"; 28 static int nr_loops = 1; 29 static bool use_cycles; 30 static int cycles_fd; 31 32 static const struct option options[] = { 33 OPT_STRING('s', "size", &size_str, "1MB", 34 "Specify the size of the memory buffers. " 35 "Available units: B, KB, MB, GB and TB (case insensitive)"), 36 37 OPT_STRING('f', "function", &function_str, "all", 38 "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), 39 40 OPT_INTEGER('l', "nr_loops", &nr_loops, 41 "Specify the number of loops to run. (default: 1)"), 42 43 OPT_BOOLEAN('c', "cycles", &use_cycles, 44 "Use a cycles event instead of gettimeofday() to measure performance"), 45 46 OPT_END() 47 }; 48 49 typedef void *(*memcpy_t)(void *, const void *, size_t); 50 typedef void *(*memset_t)(void *, int, size_t); 51 52 struct function { 53 const char *name; 54 const char *desc; 55 union { 56 memcpy_t memcpy; 57 memset_t memset; 58 } fn; 59 }; 60 61 static struct perf_event_attr cycle_attr = { 62 .type = PERF_TYPE_HARDWARE, 63 .config = PERF_COUNT_HW_CPU_CYCLES 64 }; 65 66 static void init_cycles(void) 67 { 68 cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); 69 70 if (cycles_fd < 0 && errno == ENOSYS) 71 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 72 else 73 BUG_ON(cycles_fd < 0); 74 } 75 76 static u64 get_cycles(void) 77 { 78 int ret; 79 u64 clk; 80 81 ret = read(cycles_fd, &clk, sizeof(u64)); 82 BUG_ON(ret != sizeof(u64)); 83 84 return clk; 85 } 86 87 static double timeval2double(struct timeval *ts) 88 { 89 return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; 90 } 91 92 #define print_bps(x) do { \ 93 if (x < K) \ 94 printf(" %14lf bytes/sec\n", x); \ 95 else if (x < K * K) \ 96 printf(" %14lfd KB/sec\n", x / K); \ 97 else if (x < K * K * K) \ 98 printf(" %14lf MB/sec\n", x / K / K); \ 99 else \ 100 printf(" %14lf GB/sec\n", x / K / K / K); \ 101 } while (0) 102 103 struct bench_mem_info { 104 const struct function *functions; 105 u64 (*do_cycles)(const struct function *r, size_t size); 106 double (*do_gettimeofday)(const struct function *r, size_t size); 107 const char *const *usage; 108 }; 109 110 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) 111 { 112 const struct function *r = &info->functions[r_idx]; 113 double result_bps = 0.0; 114 u64 result_cycles = 0; 115 116 printf("# function '%s' (%s)\n", r->name, r->desc); 117 118 if (bench_format == BENCH_FORMAT_DEFAULT) 119 printf("# Copying %s bytes ...\n\n", size_str); 120 121 if (use_cycles) { 122 result_cycles = info->do_cycles(r, size); 123 } else { 124 result_bps = info->do_gettimeofday(r, size); 125 } 126 127 switch (bench_format) { 128 case BENCH_FORMAT_DEFAULT: 129 if (use_cycles) { 130 printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); 131 } else { 132 print_bps(result_bps); 133 } 134 break; 135 136 case BENCH_FORMAT_SIMPLE: 137 if (use_cycles) { 138 printf("%lf\n", (double)result_cycles/size_total); 139 } else { 140 printf("%lf\n", result_bps); 141 } 142 break; 143 144 default: 145 BUG_ON(1); 146 break; 147 } 148 } 149 150 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) 151 { 152 int i; 153 size_t size; 154 double size_total; 155 156 argc = parse_options(argc, argv, options, info->usage, 0); 157 158 if (use_cycles) 159 init_cycles(); 160 161 size = (size_t)perf_atoll((char *)size_str); 162 size_total = (double)size * nr_loops; 163 164 if ((s64)size <= 0) { 165 fprintf(stderr, "Invalid size:%s\n", size_str); 166 return 1; 167 } 168 169 if (!strncmp(function_str, "all", 3)) { 170 for (i = 0; info->functions[i].name; i++) 171 __bench_mem_function(info, i, size, size_total); 172 return 0; 173 } 174 175 for (i = 0; info->functions[i].name; i++) { 176 if (!strcmp(info->functions[i].name, function_str)) 177 break; 178 } 179 if (!info->functions[i].name) { 180 if (strcmp(function_str, "help") && strcmp(function_str, "h")) 181 printf("Unknown function: %s\n", function_str); 182 printf("Available functions:\n"); 183 for (i = 0; info->functions[i].name; i++) { 184 printf("\t%s ... %s\n", 185 info->functions[i].name, info->functions[i].desc); 186 } 187 return 1; 188 } 189 190 __bench_mem_function(info, i, size, size_total); 191 192 return 0; 193 } 194 195 static void memcpy_alloc_mem(void **dst, void **src, size_t size) 196 { 197 *dst = zalloc(size); 198 if (!*dst) 199 die("memory allocation failed - maybe size is too large?\n"); 200 201 *src = zalloc(size); 202 if (!*src) 203 die("memory allocation failed - maybe size is too large?\n"); 204 205 /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ 206 memset(*src, 0, size); 207 } 208 209 static u64 do_memcpy_cycles(const struct function *r, size_t size) 210 { 211 u64 cycle_start = 0ULL, cycle_end = 0ULL; 212 void *src = NULL, *dst = NULL; 213 memcpy_t fn = r->fn.memcpy; 214 int i; 215 216 memcpy_alloc_mem(&dst, &src, size); 217 218 /* 219 * We prefault the freshly allocated memory range here, 220 * to not measure page fault overhead: 221 */ 222 fn(dst, src, size); 223 224 cycle_start = get_cycles(); 225 for (i = 0; i < nr_loops; ++i) 226 fn(dst, src, size); 227 cycle_end = get_cycles(); 228 229 free(src); 230 free(dst); 231 return cycle_end - cycle_start; 232 } 233 234 static double do_memcpy_gettimeofday(const struct function *r, size_t size) 235 { 236 struct timeval tv_start, tv_end, tv_diff; 237 memcpy_t fn = r->fn.memcpy; 238 void *src = NULL, *dst = NULL; 239 int i; 240 241 memcpy_alloc_mem(&dst, &src, size); 242 243 /* 244 * We prefault the freshly allocated memory range here, 245 * to not measure page fault overhead: 246 */ 247 fn(dst, src, size); 248 249 BUG_ON(gettimeofday(&tv_start, NULL)); 250 for (i = 0; i < nr_loops; ++i) 251 fn(dst, src, size); 252 BUG_ON(gettimeofday(&tv_end, NULL)); 253 254 timersub(&tv_end, &tv_start, &tv_diff); 255 256 free(src); 257 free(dst); 258 259 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 260 } 261 262 struct function memcpy_functions[] = { 263 { .name = "default", 264 .desc = "Default memcpy() provided by glibc", 265 .fn.memcpy = memcpy }, 266 267 #ifdef HAVE_ARCH_X86_64_SUPPORT 268 # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, 269 # include "mem-memcpy-x86-64-asm-def.h" 270 # undef MEMCPY_FN 271 #endif 272 273 { .name = NULL, } 274 }; 275 276 static const char * const bench_mem_memcpy_usage[] = { 277 "perf bench mem memcpy <options>", 278 NULL 279 }; 280 281 int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) 282 { 283 struct bench_mem_info info = { 284 .functions = memcpy_functions, 285 .do_cycles = do_memcpy_cycles, 286 .do_gettimeofday = do_memcpy_gettimeofday, 287 .usage = bench_mem_memcpy_usage, 288 }; 289 290 return bench_mem_common(argc, argv, &info); 291 } 292 293 static void memset_alloc_mem(void **dst, size_t size) 294 { 295 *dst = zalloc(size); 296 if (!*dst) 297 die("memory allocation failed - maybe size is too large?\n"); 298 } 299 300 static u64 do_memset_cycles(const struct function *r, size_t size) 301 { 302 u64 cycle_start = 0ULL, cycle_end = 0ULL; 303 memset_t fn = r->fn.memset; 304 void *dst = NULL; 305 int i; 306 307 memset_alloc_mem(&dst, size); 308 309 /* 310 * We prefault the freshly allocated memory range here, 311 * to not measure page fault overhead: 312 */ 313 fn(dst, -1, size); 314 315 cycle_start = get_cycles(); 316 for (i = 0; i < nr_loops; ++i) 317 fn(dst, i, size); 318 cycle_end = get_cycles(); 319 320 free(dst); 321 return cycle_end - cycle_start; 322 } 323 324 static double do_memset_gettimeofday(const struct function *r, size_t size) 325 { 326 struct timeval tv_start, tv_end, tv_diff; 327 memset_t fn = r->fn.memset; 328 void *dst = NULL; 329 int i; 330 331 memset_alloc_mem(&dst, size); 332 333 /* 334 * We prefault the freshly allocated memory range here, 335 * to not measure page fault overhead: 336 */ 337 fn(dst, -1, size); 338 339 BUG_ON(gettimeofday(&tv_start, NULL)); 340 for (i = 0; i < nr_loops; ++i) 341 fn(dst, i, size); 342 BUG_ON(gettimeofday(&tv_end, NULL)); 343 344 timersub(&tv_end, &tv_start, &tv_diff); 345 346 free(dst); 347 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 348 } 349 350 static const char * const bench_mem_memset_usage[] = { 351 "perf bench mem memset <options>", 352 NULL 353 }; 354 355 static const struct function memset_functions[] = { 356 { .name = "default", 357 .desc = "Default memset() provided by glibc", 358 .fn.memset = memset }, 359 360 #ifdef HAVE_ARCH_X86_64_SUPPORT 361 # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, 362 # include "mem-memset-x86-64-asm-def.h" 363 # undef MEMSET_FN 364 #endif 365 366 { .name = NULL, } 367 }; 368 369 int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) 370 { 371 struct bench_mem_info info = { 372 .functions = memset_functions, 373 .do_cycles = do_memset_cycles, 374 .do_gettimeofday = do_memset_gettimeofday, 375 .usage = bench_mem_memset_usage, 376 }; 377 378 return bench_mem_common(argc, argv, &info); 379 } 380