1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <subcmd/parse-options.h> 4 #include <linux/hw_breakpoint.h> 5 #include <linux/perf_event.h> 6 #include <linux/time64.h> 7 #include <sys/syscall.h> 8 #include <sys/ioctl.h> 9 #include <sys/time.h> 10 #include <pthread.h> 11 #include <stddef.h> 12 #include <stdlib.h> 13 #include <unistd.h> 14 #include <stdio.h> 15 #include <errno.h> 16 #include "bench.h" 17 #include "futex.h" 18 19 struct { 20 unsigned int nbreakpoints; 21 unsigned int nparallel; 22 unsigned int nthreads; 23 } thread_params = { 24 .nbreakpoints = 1, 25 .nparallel = 1, 26 .nthreads = 1, 27 }; 28 29 static const struct option thread_options[] = { 30 OPT_UINTEGER('b', "breakpoints", &thread_params.nbreakpoints, 31 "Specify amount of breakpoints"), 32 OPT_UINTEGER('p', "parallelism", &thread_params.nparallel, "Specify amount of parallelism"), 33 OPT_UINTEGER('t', "threads", &thread_params.nthreads, "Specify amount of threads"), 34 OPT_END() 35 }; 36 37 static const char * const thread_usage[] = { 38 "perf bench breakpoint thread <options>", 39 NULL 40 }; 41 42 struct breakpoint { 43 int fd; 44 char watched; 45 }; 46 47 static int breakpoint_setup(void *addr) 48 { 49 struct perf_event_attr attr = { .size = 0, }; 50 51 attr.type = PERF_TYPE_BREAKPOINT; 52 attr.size = sizeof(attr); 53 attr.inherit = 1; 54 attr.exclude_kernel = 1; 55 attr.exclude_hv = 1; 56 attr.bp_addr = (unsigned long)addr; 57 attr.bp_type = HW_BREAKPOINT_RW; 58 attr.bp_len = HW_BREAKPOINT_LEN_1; 59 return syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); 60 } 61 62 static void *passive_thread(void *arg) 63 { 64 unsigned int *done = (unsigned int *)arg; 65 66 while (!__atomic_load_n(done, __ATOMIC_RELAXED)) 67 futex_wait(done, 0, NULL, 0); 68 return NULL; 69 } 70 71 static void *active_thread(void *arg) 72 { 73 unsigned int *done = (unsigned int *)arg; 74 75 while (!__atomic_load_n(done, __ATOMIC_RELAXED)); 76 return NULL; 77 } 78 79 static void *breakpoint_thread(void *arg) 80 { 81 unsigned int i, done; 82 int *repeat = (int *)arg; 83 pthread_t *threads; 84 85 threads = calloc(thread_params.nthreads, sizeof(threads[0])); 86 if (!threads) 87 exit((perror("calloc"), EXIT_FAILURE)); 88 89 while (__atomic_fetch_sub(repeat, 1, __ATOMIC_RELAXED) > 0) { 90 done = 0; 91 for (i = 0; i < thread_params.nthreads; i++) { 92 if (pthread_create(&threads[i], NULL, passive_thread, &done)) 93 exit((perror("pthread_create"), EXIT_FAILURE)); 94 } 95 __atomic_store_n(&done, 1, __ATOMIC_RELAXED); 96 futex_wake(&done, thread_params.nthreads, 0); 97 for (i = 0; i < thread_params.nthreads; i++) 98 pthread_join(threads[i], NULL); 99 } 100 free(threads); 101 return NULL; 102 } 103 104 // The benchmark creates nbreakpoints inheritable breakpoints, 105 // then starts nparallel threads which create and join bench_repeat batches of nthreads threads. 106 int bench_breakpoint_thread(int argc, const char **argv) 107 { 108 unsigned int i, result_usec; 109 int repeat = bench_repeat; 110 struct breakpoint *breakpoints; 111 pthread_t *parallel; 112 struct timeval start, stop, diff; 113 114 if (parse_options(argc, argv, thread_options, thread_usage, 0)) { 115 usage_with_options(thread_usage, thread_options); 116 exit(EXIT_FAILURE); 117 } 118 breakpoints = calloc(thread_params.nbreakpoints, sizeof(breakpoints[0])); 119 parallel = calloc(thread_params.nparallel, sizeof(parallel[0])); 120 if (!breakpoints || !parallel) 121 exit((perror("calloc"), EXIT_FAILURE)); 122 123 for (i = 0; i < thread_params.nbreakpoints; i++) { 124 breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched); 125 if (breakpoints[i].fd == -1) 126 exit((perror("perf_event_open"), EXIT_FAILURE)); 127 } 128 gettimeofday(&start, NULL); 129 for (i = 0; i < thread_params.nparallel; i++) { 130 if (pthread_create(¶llel[i], NULL, breakpoint_thread, &repeat)) 131 exit((perror("pthread_create"), EXIT_FAILURE)); 132 } 133 for (i = 0; i < thread_params.nparallel; i++) 134 pthread_join(parallel[i], NULL); 135 gettimeofday(&stop, NULL); 136 timersub(&stop, &start, &diff); 137 for (i = 0; i < thread_params.nbreakpoints; i++) 138 close(breakpoints[i].fd); 139 free(parallel); 140 free(breakpoints); 141 switch (bench_format) { 142 case BENCH_FORMAT_DEFAULT: 143 printf("# Created/joined %d threads with %d breakpoints and %d parallelism\n", 144 bench_repeat, thread_params.nbreakpoints, thread_params.nparallel); 145 printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", 146 (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 147 result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 148 printf(" %14lf usecs/op\n", 149 (double)result_usec / bench_repeat / thread_params.nthreads); 150 printf(" %14lf usecs/op/cpu\n", 151 (double)result_usec / bench_repeat / 152 thread_params.nthreads * thread_params.nparallel); 153 break; 154 case BENCH_FORMAT_SIMPLE: 155 printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 156 break; 157 default: 158 fprintf(stderr, "Unknown format: %d\n", bench_format); 159 exit(EXIT_FAILURE); 160 } 161 return 0; 162 } 163 164 struct { 165 unsigned int npassive; 166 unsigned int nactive; 167 } enable_params = { 168 .nactive = 0, 169 .npassive = 0, 170 }; 171 172 static const struct option enable_options[] = { 173 OPT_UINTEGER('p', "passive", &enable_params.npassive, "Specify amount of passive threads"), 174 OPT_UINTEGER('a', "active", &enable_params.nactive, "Specify amount of active threads"), 175 OPT_END() 176 }; 177 178 static const char * const enable_usage[] = { 179 "perf bench breakpoint enable <options>", 180 NULL 181 }; 182 183 // The benchmark creates an inheritable breakpoint, 184 // then starts npassive threads that block and nactive threads that actively spin 185 // and then disables and enables the breakpoint bench_repeat times. 186 int bench_breakpoint_enable(int argc, const char **argv) 187 { 188 unsigned int i, nthreads, result_usec, done = 0; 189 char watched; 190 int fd; 191 pthread_t *threads; 192 struct timeval start, stop, diff; 193 194 if (parse_options(argc, argv, enable_options, enable_usage, 0)) { 195 usage_with_options(enable_usage, enable_options); 196 exit(EXIT_FAILURE); 197 } 198 fd = breakpoint_setup(&watched); 199 if (fd == -1) 200 exit((perror("perf_event_open"), EXIT_FAILURE)); 201 nthreads = enable_params.npassive + enable_params.nactive; 202 threads = calloc(nthreads, sizeof(threads[0])); 203 if (!threads) 204 exit((perror("calloc"), EXIT_FAILURE)); 205 206 for (i = 0; i < nthreads; i++) { 207 if (pthread_create(&threads[i], NULL, 208 i < enable_params.npassive ? passive_thread : active_thread, &done)) 209 exit((perror("pthread_create"), EXIT_FAILURE)); 210 } 211 usleep(10000); // let the threads block 212 gettimeofday(&start, NULL); 213 for (i = 0; i < bench_repeat; i++) { 214 if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0)) 215 exit((perror("ioctl(PERF_EVENT_IOC_DISABLE)"), EXIT_FAILURE)); 216 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) 217 exit((perror("ioctl(PERF_EVENT_IOC_ENABLE)"), EXIT_FAILURE)); 218 } 219 gettimeofday(&stop, NULL); 220 timersub(&stop, &start, &diff); 221 __atomic_store_n(&done, 1, __ATOMIC_RELAXED); 222 futex_wake(&done, enable_params.npassive, 0); 223 for (i = 0; i < nthreads; i++) 224 pthread_join(threads[i], NULL); 225 free(threads); 226 close(fd); 227 switch (bench_format) { 228 case BENCH_FORMAT_DEFAULT: 229 printf("# Enabled/disabled breakpoint %d time with %d passive and %d active threads\n", 230 bench_repeat, enable_params.npassive, enable_params.nactive); 231 printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", 232 (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 233 result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 234 printf(" %14lf usecs/op\n", (double)result_usec / bench_repeat); 235 break; 236 case BENCH_FORMAT_SIMPLE: 237 printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 238 break; 239 default: 240 fprintf(stderr, "Unknown format: %d\n", bench_format); 241 exit(EXIT_FAILURE); 242 } 243 return 0; 244 } 245