1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018 Davidlohr Bueso. 4 * 5 * Benchmark the various operations allowed for epoll_ctl(2). 6 * The idea is to concurrently stress a single epoll instance 7 */ 8 #ifdef HAVE_EVENTFD_SUPPORT 9 /* For the CLR_() macros */ 10 #include <string.h> 11 #include <pthread.h> 12 13 #include <errno.h> 14 #include <inttypes.h> 15 #include <signal.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 #include <linux/compiler.h> 19 #include <linux/kernel.h> 20 #include <sys/time.h> 21 #include <sys/resource.h> 22 #include <sys/epoll.h> 23 #include <sys/eventfd.h> 24 #include <perf/cpumap.h> 25 26 #include "../util/stat.h" 27 #include <subcmd/parse-options.h> 28 #include "bench.h" 29 30 #include <err.h> 31 32 #define printinfo(fmt, arg...) \ 33 do { if (__verbose) printf(fmt, ## arg); } while (0) 34 35 static unsigned int nthreads = 0; 36 static unsigned int nsecs = 8; 37 static bool done, __verbose, randomize; 38 39 /* 40 * epoll related shared variables. 41 */ 42 43 /* Maximum number of nesting allowed inside epoll sets */ 44 #define EPOLL_MAXNESTS 4 45 46 enum { 47 OP_EPOLL_ADD, 48 OP_EPOLL_MOD, 49 OP_EPOLL_DEL, 50 EPOLL_NR_OPS, 51 }; 52 53 static int epollfd; 54 static int *epollfdp; 55 static bool noaffinity; 56 static unsigned int nested = 0; 57 58 /* amount of fds to monitor, per thread */ 59 static unsigned int nfds = 64; 60 61 static pthread_mutex_t thread_lock; 62 static unsigned int threads_starting; 63 static struct stats all_stats[EPOLL_NR_OPS]; 64 static pthread_cond_t thread_parent, thread_worker; 65 66 struct worker { 67 int tid; 68 pthread_t thread; 69 unsigned long ops[EPOLL_NR_OPS]; 70 int *fdmap; 71 }; 72 73 static const struct option options[] = { 74 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), 75 OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), 76 OPT_UINTEGER('f', "nfds", &nfds, "Specify amount of file descriptors to monitor for each thread"), 77 OPT_BOOLEAN( 'n', "noaffinity", &noaffinity, "Disables CPU affinity"), 78 OPT_UINTEGER( 'N', "nested", &nested, "Nesting level epoll hierarchy (default is 0, no nesting)"), 79 OPT_BOOLEAN( 'R', "randomize", &randomize, "Perform random operations on random fds"), 80 OPT_BOOLEAN( 'v', "verbose", &__verbose, "Verbose mode"), 81 OPT_END() 82 }; 83 84 static const char * const bench_epoll_ctl_usage[] = { 85 "perf bench epoll ctl <options>", 86 NULL 87 }; 88 89 static void toggle_done(int sig __maybe_unused, 90 siginfo_t *info __maybe_unused, 91 void *uc __maybe_unused) 92 { 93 /* inform all threads that we're done for the day */ 94 done = true; 95 gettimeofday(&bench__end, NULL); 96 timersub(&bench__end, &bench__start, &bench__runtime); 97 } 98 99 static void nest_epollfd(void) 100 { 101 unsigned int i; 102 struct epoll_event ev; 103 104 if (nested > EPOLL_MAXNESTS) 105 nested = EPOLL_MAXNESTS; 106 printinfo("Nesting level(s): %d\n", nested); 107 108 epollfdp = calloc(nested, sizeof(int)); 109 if (!epollfdp) 110 err(EXIT_FAILURE, "calloc"); 111 112 for (i = 0; i < nested; i++) { 113 epollfdp[i] = epoll_create(1); 114 if (epollfd < 0) 115 err(EXIT_FAILURE, "epoll_create"); 116 } 117 118 ev.events = EPOLLHUP; /* anything */ 119 ev.data.u64 = i; /* any number */ 120 121 for (i = nested - 1; i; i--) { 122 if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD, 123 epollfdp[i], &ev) < 0) 124 err(EXIT_FAILURE, "epoll_ctl"); 125 } 126 127 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0) 128 err(EXIT_FAILURE, "epoll_ctl"); 129 } 130 131 static inline void do_epoll_op(struct worker *w, int op, int fd) 132 { 133 int error; 134 struct epoll_event ev; 135 136 ev.events = EPOLLIN; 137 ev.data.u64 = fd; 138 139 switch (op) { 140 case OP_EPOLL_ADD: 141 error = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev); 142 break; 143 case OP_EPOLL_MOD: 144 ev.events = EPOLLOUT; 145 error = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev); 146 break; 147 case OP_EPOLL_DEL: 148 error = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); 149 break; 150 default: 151 error = 1; 152 break; 153 } 154 155 if (!error) 156 w->ops[op]++; 157 } 158 159 static inline void do_random_epoll_op(struct worker *w) 160 { 161 unsigned long rnd1 = random(), rnd2 = random(); 162 int op, fd; 163 164 fd = w->fdmap[rnd1 % nfds]; 165 op = rnd2 % EPOLL_NR_OPS; 166 167 do_epoll_op(w, op, fd); 168 } 169 170 static void *workerfn(void *arg) 171 { 172 unsigned int i; 173 struct worker *w = (struct worker *) arg; 174 struct timespec ts = { .tv_sec = 0, 175 .tv_nsec = 250 }; 176 177 pthread_mutex_lock(&thread_lock); 178 threads_starting--; 179 if (!threads_starting) 180 pthread_cond_signal(&thread_parent); 181 pthread_cond_wait(&thread_worker, &thread_lock); 182 pthread_mutex_unlock(&thread_lock); 183 184 /* Let 'em loose */ 185 do { 186 /* random */ 187 if (randomize) { 188 do_random_epoll_op(w); 189 } else { 190 for (i = 0; i < nfds; i++) { 191 do_epoll_op(w, OP_EPOLL_ADD, w->fdmap[i]); 192 do_epoll_op(w, OP_EPOLL_MOD, w->fdmap[i]); 193 do_epoll_op(w, OP_EPOLL_DEL, w->fdmap[i]); 194 } 195 } 196 197 nanosleep(&ts, NULL); 198 } while (!done); 199 200 return NULL; 201 } 202 203 static void init_fdmaps(struct worker *w, int pct) 204 { 205 unsigned int i; 206 int inc; 207 struct epoll_event ev; 208 209 if (!pct) 210 return; 211 212 inc = 100/pct; 213 for (i = 0; i < nfds; i+=inc) { 214 ev.data.fd = w->fdmap[i]; 215 ev.events = EPOLLIN; 216 217 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, w->fdmap[i], &ev) < 0) 218 err(EXIT_FAILURE, "epoll_ct"); 219 } 220 } 221 222 static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) 223 { 224 pthread_attr_t thread_attr, *attrp = NULL; 225 cpu_set_t *cpuset; 226 unsigned int i, j; 227 int ret = 0; 228 int nrcpus; 229 size_t size; 230 231 if (!noaffinity) 232 pthread_attr_init(&thread_attr); 233 234 nrcpus = perf_cpu_map__nr(cpu); 235 cpuset = CPU_ALLOC(nrcpus); 236 BUG_ON(!cpuset); 237 size = CPU_ALLOC_SIZE(nrcpus); 238 239 for (i = 0; i < nthreads; i++) { 240 struct worker *w = &worker[i]; 241 242 w->tid = i; 243 w->fdmap = calloc(nfds, sizeof(int)); 244 if (!w->fdmap) 245 return 1; 246 247 for (j = 0; j < nfds; j++) { 248 w->fdmap[j] = eventfd(0, EFD_NONBLOCK); 249 if (w->fdmap[j] < 0) 250 err(EXIT_FAILURE, "eventfd"); 251 } 252 253 /* 254 * Lets add 50% of the fdmap to the epoll instance, and 255 * do it before any threads are started; otherwise there is 256 * an initial bias of the call failing (mod and del ops). 257 */ 258 if (randomize) 259 init_fdmaps(w, 50); 260 261 if (!noaffinity) { 262 CPU_ZERO_S(size, cpuset); 263 CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, 264 size, cpuset); 265 266 ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); 267 if (ret) { 268 CPU_FREE(cpuset); 269 err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); 270 } 271 272 attrp = &thread_attr; 273 } 274 275 ret = pthread_create(&w->thread, attrp, workerfn, 276 (void *)(struct worker *) w); 277 if (ret) { 278 CPU_FREE(cpuset); 279 err(EXIT_FAILURE, "pthread_create"); 280 } 281 } 282 283 CPU_FREE(cpuset); 284 if (!noaffinity) 285 pthread_attr_destroy(&thread_attr); 286 287 return ret; 288 } 289 290 static void print_summary(void) 291 { 292 int i; 293 unsigned long avg[EPOLL_NR_OPS]; 294 double stddev[EPOLL_NR_OPS]; 295 296 for (i = 0; i < EPOLL_NR_OPS; i++) { 297 avg[i] = avg_stats(&all_stats[i]); 298 stddev[i] = stddev_stats(&all_stats[i]); 299 } 300 301 printf("\nAveraged %ld ADD operations (+- %.2f%%)\n", 302 avg[OP_EPOLL_ADD], rel_stddev_stats(stddev[OP_EPOLL_ADD], 303 avg[OP_EPOLL_ADD])); 304 printf("Averaged %ld MOD operations (+- %.2f%%)\n", 305 avg[OP_EPOLL_MOD], rel_stddev_stats(stddev[OP_EPOLL_MOD], 306 avg[OP_EPOLL_MOD])); 307 printf("Averaged %ld DEL operations (+- %.2f%%)\n", 308 avg[OP_EPOLL_DEL], rel_stddev_stats(stddev[OP_EPOLL_DEL], 309 avg[OP_EPOLL_DEL])); 310 } 311 312 int bench_epoll_ctl(int argc, const char **argv) 313 { 314 int j, ret = 0; 315 struct sigaction act; 316 struct worker *worker = NULL; 317 struct perf_cpu_map *cpu; 318 struct rlimit rl, prevrl; 319 unsigned int i; 320 321 argc = parse_options(argc, argv, options, bench_epoll_ctl_usage, 0); 322 if (argc) { 323 usage_with_options(bench_epoll_ctl_usage, options); 324 exit(EXIT_FAILURE); 325 } 326 327 memset(&act, 0, sizeof(act)); 328 sigfillset(&act.sa_mask); 329 act.sa_sigaction = toggle_done; 330 sigaction(SIGINT, &act, NULL); 331 332 cpu = perf_cpu_map__new(NULL); 333 if (!cpu) 334 goto errmem; 335 336 /* a single, main epoll instance */ 337 epollfd = epoll_create(1); 338 if (epollfd < 0) 339 err(EXIT_FAILURE, "epoll_create"); 340 341 /* 342 * Deal with nested epolls, if any. 343 */ 344 if (nested) 345 nest_epollfd(); 346 347 /* default to the number of CPUs */ 348 if (!nthreads) 349 nthreads = perf_cpu_map__nr(cpu); 350 351 worker = calloc(nthreads, sizeof(*worker)); 352 if (!worker) 353 goto errmem; 354 355 if (getrlimit(RLIMIT_NOFILE, &prevrl)) 356 err(EXIT_FAILURE, "getrlimit"); 357 rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50; 358 printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n", 359 (uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max); 360 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 361 err(EXIT_FAILURE, "setrlimit"); 362 363 printf("Run summary [PID %d]: %d threads doing epoll_ctl ops " 364 "%d file-descriptors for %d secs.\n\n", 365 getpid(), nthreads, nfds, nsecs); 366 367 for (i = 0; i < EPOLL_NR_OPS; i++) 368 init_stats(&all_stats[i]); 369 370 pthread_mutex_init(&thread_lock, NULL); 371 pthread_cond_init(&thread_parent, NULL); 372 pthread_cond_init(&thread_worker, NULL); 373 374 threads_starting = nthreads; 375 376 gettimeofday(&bench__start, NULL); 377 378 do_threads(worker, cpu); 379 380 pthread_mutex_lock(&thread_lock); 381 while (threads_starting) 382 pthread_cond_wait(&thread_parent, &thread_lock); 383 pthread_cond_broadcast(&thread_worker); 384 pthread_mutex_unlock(&thread_lock); 385 386 sleep(nsecs); 387 toggle_done(0, NULL, NULL); 388 printinfo("main thread: toggling done\n"); 389 390 for (i = 0; i < nthreads; i++) { 391 ret = pthread_join(worker[i].thread, NULL); 392 if (ret) 393 err(EXIT_FAILURE, "pthread_join"); 394 } 395 396 /* cleanup & report results */ 397 pthread_cond_destroy(&thread_parent); 398 pthread_cond_destroy(&thread_worker); 399 pthread_mutex_destroy(&thread_lock); 400 401 for (i = 0; i < nthreads; i++) { 402 unsigned long t[EPOLL_NR_OPS]; 403 404 for (j = 0; j < EPOLL_NR_OPS; j++) { 405 t[j] = worker[i].ops[j]; 406 update_stats(&all_stats[j], t[j]); 407 } 408 409 if (nfds == 1) 410 printf("[thread %2d] fdmap: %p [ add: %04ld; mod: %04ld; del: %04lds ops ]\n", 411 worker[i].tid, &worker[i].fdmap[0], 412 t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); 413 else 414 printf("[thread %2d] fdmap: %p ... %p [ add: %04ld ops; mod: %04ld ops; del: %04ld ops ]\n", 415 worker[i].tid, &worker[i].fdmap[0], 416 &worker[i].fdmap[nfds-1], 417 t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); 418 } 419 420 print_summary(); 421 422 close(epollfd); 423 return ret; 424 errmem: 425 err(EXIT_FAILURE, "calloc"); 426 } 427 #endif // HAVE_EVENTFD_SUPPORT 428