1 /* 2 * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com> 3 * 4 * futex-requeue: Block a bunch of threads on futex1 and requeue them 5 * on futex2, N at a time. 6 * 7 * This program is particularly useful to measure the latency of nthread 8 * requeues without waking up any tasks -- thus mimicking a regular futex_wait. 9 */ 10 11 #include "../perf.h" 12 #include "../util/util.h" 13 #include "../util/stat.h" 14 #include "../util/parse-options.h" 15 #include "../util/header.h" 16 #include "bench.h" 17 #include "futex.h" 18 19 #include <err.h> 20 #include <stdlib.h> 21 #include <sys/time.h> 22 #include <pthread.h> 23 24 static u_int32_t futex1 = 0, futex2 = 0; 25 26 /* 27 * How many tasks to requeue at a time. 28 * Default to 1 in order to make the kernel work more. 29 */ 30 static unsigned int nrequeue = 1; 31 32 static pthread_t *worker; 33 static bool done = 0, silent = 0; 34 static pthread_mutex_t thread_lock; 35 static pthread_cond_t thread_parent, thread_worker; 36 static struct stats requeuetime_stats, requeued_stats; 37 static unsigned int ncpus, threads_starting, nthreads = 0; 38 39 static const struct option options[] = { 40 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), 41 OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"), 42 OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), 43 OPT_END() 44 }; 45 46 static const char * const bench_futex_requeue_usage[] = { 47 "perf bench futex requeue <options>", 48 NULL 49 }; 50 51 static void print_summary(void) 52 { 53 double requeuetime_avg = avg_stats(&requeuetime_stats); 54 double requeuetime_stddev = stddev_stats(&requeuetime_stats); 55 unsigned int requeued_avg = avg_stats(&requeued_stats); 56 57 printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n", 58 requeued_avg, 59 nthreads, 60 requeuetime_avg/1e3, 61 rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); 62 } 63 64 static void *workerfn(void *arg __maybe_unused) 65 { 66 pthread_mutex_lock(&thread_lock); 67 threads_starting--; 68 if (!threads_starting) 69 pthread_cond_signal(&thread_parent); 70 pthread_cond_wait(&thread_worker, &thread_lock); 71 pthread_mutex_unlock(&thread_lock); 72 73 futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG); 74 return NULL; 75 } 76 77 static void block_threads(pthread_t *w, 78 pthread_attr_t thread_attr) 79 { 80 cpu_set_t cpu; 81 unsigned int i; 82 83 threads_starting = nthreads; 84 85 /* create and block all threads */ 86 for (i = 0; i < nthreads; i++) { 87 CPU_ZERO(&cpu); 88 CPU_SET(i % ncpus, &cpu); 89 90 if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) 91 err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); 92 93 if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) 94 err(EXIT_FAILURE, "pthread_create"); 95 } 96 } 97 98 static void toggle_done(int sig __maybe_unused, 99 siginfo_t *info __maybe_unused, 100 void *uc __maybe_unused) 101 { 102 done = true; 103 } 104 105 int bench_futex_requeue(int argc, const char **argv, 106 const char *prefix __maybe_unused) 107 { 108 int ret = 0; 109 unsigned int i, j; 110 struct sigaction act; 111 pthread_attr_t thread_attr; 112 113 argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0); 114 if (argc) 115 goto err; 116 117 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 118 119 sigfillset(&act.sa_mask); 120 act.sa_sigaction = toggle_done; 121 sigaction(SIGINT, &act, NULL); 122 123 if (!nthreads) 124 nthreads = ncpus; 125 126 worker = calloc(nthreads, sizeof(*worker)); 127 if (!worker) 128 err(EXIT_FAILURE, "calloc"); 129 130 printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), " 131 "%d at a time.\n\n", 132 getpid(), nthreads, &futex1, &futex2, nrequeue); 133 134 init_stats(&requeued_stats); 135 init_stats(&requeuetime_stats); 136 pthread_attr_init(&thread_attr); 137 pthread_mutex_init(&thread_lock, NULL); 138 pthread_cond_init(&thread_parent, NULL); 139 pthread_cond_init(&thread_worker, NULL); 140 141 for (j = 0; j < bench_repeat && !done; j++) { 142 unsigned int nrequeued = 0; 143 struct timeval start, end, runtime; 144 145 /* create, launch & block all threads */ 146 block_threads(worker, thread_attr); 147 148 /* make sure all threads are already blocked */ 149 pthread_mutex_lock(&thread_lock); 150 while (threads_starting) 151 pthread_cond_wait(&thread_parent, &thread_lock); 152 pthread_cond_broadcast(&thread_worker); 153 pthread_mutex_unlock(&thread_lock); 154 155 usleep(100000); 156 157 /* Ok, all threads are patiently blocked, start requeueing */ 158 gettimeofday(&start, NULL); 159 for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) 160 /* 161 * Do not wakeup any tasks blocked on futex1, allowing 162 * us to really measure futex_wait functionality. 163 */ 164 futex_cmp_requeue(&futex1, 0, &futex2, 0, nrequeue, 165 FUTEX_PRIVATE_FLAG); 166 gettimeofday(&end, NULL); 167 timersub(&end, &start, &runtime); 168 169 update_stats(&requeued_stats, nrequeued); 170 update_stats(&requeuetime_stats, runtime.tv_usec); 171 172 if (!silent) { 173 printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n", 174 j + 1, nrequeued, nthreads, runtime.tv_usec/1e3); 175 } 176 177 /* everybody should be blocked on futex2, wake'em up */ 178 nrequeued = futex_wake(&futex2, nthreads, FUTEX_PRIVATE_FLAG); 179 if (nthreads != nrequeued) 180 warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads); 181 182 for (i = 0; i < nthreads; i++) { 183 ret = pthread_join(worker[i], NULL); 184 if (ret) 185 err(EXIT_FAILURE, "pthread_join"); 186 } 187 188 } 189 190 /* cleanup & report results */ 191 pthread_cond_destroy(&thread_parent); 192 pthread_cond_destroy(&thread_worker); 193 pthread_mutex_destroy(&thread_lock); 194 pthread_attr_destroy(&thread_attr); 195 196 print_summary(); 197 198 free(worker); 199 return ret; 200 err: 201 usage_with_options(bench_futex_requeue_usage, options); 202 exit(EXIT_FAILURE); 203 } 204