1c960e990SMathieu Desnoyers // SPDX-License-Identifier: LGPL-2.1 2c960e990SMathieu Desnoyers #define _GNU_SOURCE 3c960e990SMathieu Desnoyers #include <assert.h> 4f166b111SPeter Oskolkov #include <linux/membarrier.h> 5c960e990SMathieu Desnoyers #include <pthread.h> 6c960e990SMathieu Desnoyers #include <sched.h> 7f166b111SPeter Oskolkov #include <stdatomic.h> 8c960e990SMathieu Desnoyers #include <stdint.h> 9c960e990SMathieu Desnoyers #include <stdio.h> 10c960e990SMathieu Desnoyers #include <stdlib.h> 11c960e990SMathieu Desnoyers #include <string.h> 12c960e990SMathieu Desnoyers #include <syscall.h> 13c960e990SMathieu Desnoyers #include <unistd.h> 14c960e990SMathieu Desnoyers #include <poll.h> 15c960e990SMathieu Desnoyers #include <sys/types.h> 16c960e990SMathieu Desnoyers #include <signal.h> 17c960e990SMathieu Desnoyers #include <errno.h> 18c960e990SMathieu Desnoyers #include <stddef.h> 19c960e990SMathieu Desnoyers 208df34c56SMathieu Desnoyers static inline pid_t rseq_gettid(void) 21c960e990SMathieu Desnoyers { 22c960e990SMathieu Desnoyers return syscall(__NR_gettid); 23c960e990SMathieu Desnoyers } 24c960e990SMathieu Desnoyers 25c960e990SMathieu Desnoyers #define NR_INJECT 9 26c960e990SMathieu Desnoyers static int loop_cnt[NR_INJECT + 1]; 27c960e990SMathieu Desnoyers 28c960e990SMathieu Desnoyers static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 29c960e990SMathieu Desnoyers static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 30c960e990SMathieu Desnoyers static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 31c960e990SMathieu Desnoyers static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 32c960e990SMathieu Desnoyers static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 33c960e990SMathieu Desnoyers static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 34c960e990SMathieu Desnoyers 35c960e990SMathieu Desnoyers static int opt_modulo, verbose; 36c960e990SMathieu Desnoyers 37c960e990SMathieu Desnoyers static int opt_yield, opt_signal, opt_sleep, 38c960e990SMathieu Desnoyers opt_disable_rseq, opt_threads = 200, 39c960e990SMathieu Desnoyers opt_disable_mod = 0, opt_test = 's', opt_mb = 0; 40c960e990SMathieu Desnoyers 41c960e990SMathieu Desnoyers #ifndef RSEQ_SKIP_FASTPATH 42c960e990SMathieu Desnoyers static long long opt_reps = 5000; 43c960e990SMathieu Desnoyers #else 44c960e990SMathieu Desnoyers static long long opt_reps = 100; 45c960e990SMathieu Desnoyers #endif 46c960e990SMathieu Desnoyers 47c960e990SMathieu Desnoyers static __thread __attribute__((tls_model("initial-exec"))) 48c960e990SMathieu Desnoyers unsigned int signals_delivered; 49c960e990SMathieu Desnoyers 50c960e990SMathieu Desnoyers #ifndef BENCHMARK 51c960e990SMathieu Desnoyers 52c960e990SMathieu Desnoyers static __thread __attribute__((tls_model("initial-exec"), unused)) 53c960e990SMathieu Desnoyers unsigned int yield_mod_cnt, nr_abort; 54c960e990SMathieu Desnoyers 55c960e990SMathieu Desnoyers #define printf_verbose(fmt, ...) \ 56c960e990SMathieu Desnoyers do { \ 57c960e990SMathieu Desnoyers if (verbose) \ 58c960e990SMathieu Desnoyers printf(fmt, ## __VA_ARGS__); \ 59c960e990SMathieu Desnoyers } while (0) 60c960e990SMathieu Desnoyers 61ce01a157SMathieu Desnoyers #ifdef __i386__ 62c960e990SMathieu Desnoyers 63c960e990SMathieu Desnoyers #define INJECT_ASM_REG "eax" 64c960e990SMathieu Desnoyers 65c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \ 66c960e990SMathieu Desnoyers , INJECT_ASM_REG 67c960e990SMathieu Desnoyers 68c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \ 69c960e990SMathieu Desnoyers "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 70c960e990SMathieu Desnoyers "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 71c960e990SMathieu Desnoyers "jz 333f\n\t" \ 72c960e990SMathieu Desnoyers "222:\n\t" \ 73c960e990SMathieu Desnoyers "dec %%" INJECT_ASM_REG "\n\t" \ 74c960e990SMathieu Desnoyers "jnz 222b\n\t" \ 75c960e990SMathieu Desnoyers "333:\n\t" 76c960e990SMathieu Desnoyers 77c960e990SMathieu Desnoyers #elif defined(__x86_64__) 78c960e990SMathieu Desnoyers 79ce01a157SMathieu Desnoyers #define INJECT_ASM_REG_P "rax" 80ce01a157SMathieu Desnoyers #define INJECT_ASM_REG "eax" 81ce01a157SMathieu Desnoyers 82ce01a157SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \ 83ce01a157SMathieu Desnoyers , INJECT_ASM_REG_P \ 84ce01a157SMathieu Desnoyers , INJECT_ASM_REG 85ce01a157SMathieu Desnoyers 86c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \ 87ce01a157SMathieu Desnoyers "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ 88ce01a157SMathieu Desnoyers "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ 89c960e990SMathieu Desnoyers "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 90c960e990SMathieu Desnoyers "jz 333f\n\t" \ 91c960e990SMathieu Desnoyers "222:\n\t" \ 92c960e990SMathieu Desnoyers "dec %%" INJECT_ASM_REG "\n\t" \ 93c960e990SMathieu Desnoyers "jnz 222b\n\t" \ 94c960e990SMathieu Desnoyers "333:\n\t" 95c960e990SMathieu Desnoyers 964c14d1ceSVasily Gorbik #elif defined(__s390__) 974c14d1ceSVasily Gorbik 984c14d1ceSVasily Gorbik #define RSEQ_INJECT_INPUT \ 994c14d1ceSVasily Gorbik , [loop_cnt_1]"m"(loop_cnt[1]) \ 1004c14d1ceSVasily Gorbik , [loop_cnt_2]"m"(loop_cnt[2]) \ 1014c14d1ceSVasily Gorbik , [loop_cnt_3]"m"(loop_cnt[3]) \ 1024c14d1ceSVasily Gorbik , [loop_cnt_4]"m"(loop_cnt[4]) \ 1034c14d1ceSVasily Gorbik , [loop_cnt_5]"m"(loop_cnt[5]) \ 1044c14d1ceSVasily Gorbik , [loop_cnt_6]"m"(loop_cnt[6]) 1054c14d1ceSVasily Gorbik 1064c14d1ceSVasily Gorbik #define INJECT_ASM_REG "r12" 1074c14d1ceSVasily Gorbik 1084c14d1ceSVasily Gorbik #define RSEQ_INJECT_CLOBBER \ 1094c14d1ceSVasily Gorbik , INJECT_ASM_REG 1104c14d1ceSVasily Gorbik 1114c14d1ceSVasily Gorbik #define RSEQ_INJECT_ASM(n) \ 1124c14d1ceSVasily Gorbik "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 1134c14d1ceSVasily Gorbik "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 1144c14d1ceSVasily Gorbik "je 333f\n\t" \ 1154c14d1ceSVasily Gorbik "222:\n\t" \ 1164c14d1ceSVasily Gorbik "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 1174c14d1ceSVasily Gorbik "jnz 222b\n\t" \ 1184c14d1ceSVasily Gorbik "333:\n\t" 1194c14d1ceSVasily Gorbik 120c960e990SMathieu Desnoyers #elif defined(__ARMEL__) 121c960e990SMathieu Desnoyers 122c960e990SMathieu Desnoyers #define RSEQ_INJECT_INPUT \ 123c960e990SMathieu Desnoyers , [loop_cnt_1]"m"(loop_cnt[1]) \ 124c960e990SMathieu Desnoyers , [loop_cnt_2]"m"(loop_cnt[2]) \ 125c960e990SMathieu Desnoyers , [loop_cnt_3]"m"(loop_cnt[3]) \ 126c960e990SMathieu Desnoyers , [loop_cnt_4]"m"(loop_cnt[4]) \ 127c960e990SMathieu Desnoyers , [loop_cnt_5]"m"(loop_cnt[5]) \ 128c960e990SMathieu Desnoyers , [loop_cnt_6]"m"(loop_cnt[6]) 129c960e990SMathieu Desnoyers 130c960e990SMathieu Desnoyers #define INJECT_ASM_REG "r4" 131c960e990SMathieu Desnoyers 132c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \ 133c960e990SMathieu Desnoyers , INJECT_ASM_REG 134c960e990SMathieu Desnoyers 135c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \ 136c960e990SMathieu Desnoyers "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 137c960e990SMathieu Desnoyers "cmp " INJECT_ASM_REG ", #0\n\t" \ 138c960e990SMathieu Desnoyers "beq 333f\n\t" \ 139c960e990SMathieu Desnoyers "222:\n\t" \ 140c960e990SMathieu Desnoyers "subs " INJECT_ASM_REG ", #1\n\t" \ 141c960e990SMathieu Desnoyers "bne 222b\n\t" \ 142c960e990SMathieu Desnoyers "333:\n\t" 143c960e990SMathieu Desnoyers 144b9657463SWill Deacon #elif defined(__AARCH64EL__) 145b9657463SWill Deacon 146b9657463SWill Deacon #define RSEQ_INJECT_INPUT \ 147b9657463SWill Deacon , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 148b9657463SWill Deacon , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 149b9657463SWill Deacon , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 150b9657463SWill Deacon , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 151b9657463SWill Deacon , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 152b9657463SWill Deacon , [loop_cnt_6] "Qo" (loop_cnt[6]) 153b9657463SWill Deacon 154b9657463SWill Deacon #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 155b9657463SWill Deacon 156b9657463SWill Deacon #define RSEQ_INJECT_ASM(n) \ 157b9657463SWill Deacon " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 158b9657463SWill Deacon " cbz " INJECT_ASM_REG ", 333f\n" \ 159b9657463SWill Deacon "222:\n" \ 160b9657463SWill Deacon " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 161b9657463SWill Deacon " cbnz " INJECT_ASM_REG ", 222b\n" \ 162b9657463SWill Deacon "333:\n" 163b9657463SWill Deacon 164c960e990SMathieu Desnoyers #elif __PPC__ 165c960e990SMathieu Desnoyers 166c960e990SMathieu Desnoyers #define RSEQ_INJECT_INPUT \ 167c960e990SMathieu Desnoyers , [loop_cnt_1]"m"(loop_cnt[1]) \ 168c960e990SMathieu Desnoyers , [loop_cnt_2]"m"(loop_cnt[2]) \ 169c960e990SMathieu Desnoyers , [loop_cnt_3]"m"(loop_cnt[3]) \ 170c960e990SMathieu Desnoyers , [loop_cnt_4]"m"(loop_cnt[4]) \ 171c960e990SMathieu Desnoyers , [loop_cnt_5]"m"(loop_cnt[5]) \ 172c960e990SMathieu Desnoyers , [loop_cnt_6]"m"(loop_cnt[6]) 173c960e990SMathieu Desnoyers 174c960e990SMathieu Desnoyers #define INJECT_ASM_REG "r18" 175c960e990SMathieu Desnoyers 176c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \ 177c960e990SMathieu Desnoyers , INJECT_ASM_REG 178c960e990SMathieu Desnoyers 179c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \ 180c960e990SMathieu Desnoyers "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 181c960e990SMathieu Desnoyers "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 182c960e990SMathieu Desnoyers "beq 333f\n\t" \ 183c960e990SMathieu Desnoyers "222:\n\t" \ 184c960e990SMathieu Desnoyers "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 185c960e990SMathieu Desnoyers "bne 222b\n\t" \ 186c960e990SMathieu Desnoyers "333:\n\t" 187744f4be5SPaul Burton 188744f4be5SPaul Burton #elif defined(__mips__) 189744f4be5SPaul Burton 190744f4be5SPaul Burton #define RSEQ_INJECT_INPUT \ 191744f4be5SPaul Burton , [loop_cnt_1]"m"(loop_cnt[1]) \ 192744f4be5SPaul Burton , [loop_cnt_2]"m"(loop_cnt[2]) \ 193744f4be5SPaul Burton , [loop_cnt_3]"m"(loop_cnt[3]) \ 194744f4be5SPaul Burton , [loop_cnt_4]"m"(loop_cnt[4]) \ 195744f4be5SPaul Burton , [loop_cnt_5]"m"(loop_cnt[5]) \ 196744f4be5SPaul Burton , [loop_cnt_6]"m"(loop_cnt[6]) 197744f4be5SPaul Burton 198744f4be5SPaul Burton #define INJECT_ASM_REG "$5" 199744f4be5SPaul Burton 200744f4be5SPaul Burton #define RSEQ_INJECT_CLOBBER \ 201744f4be5SPaul Burton , INJECT_ASM_REG 202744f4be5SPaul Burton 203744f4be5SPaul Burton #define RSEQ_INJECT_ASM(n) \ 204744f4be5SPaul Burton "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 205744f4be5SPaul Burton "beqz " INJECT_ASM_REG ", 333f\n\t" \ 206744f4be5SPaul Burton "222:\n\t" \ 207744f4be5SPaul Burton "addiu " INJECT_ASM_REG ", -1\n\t" \ 208744f4be5SPaul Burton "bnez " INJECT_ASM_REG ", 222b\n\t" \ 209744f4be5SPaul Burton "333:\n\t" 210744f4be5SPaul Burton 211c960e990SMathieu Desnoyers #else 212c960e990SMathieu Desnoyers #error unsupported target 213c960e990SMathieu Desnoyers #endif 214c960e990SMathieu Desnoyers 215c960e990SMathieu Desnoyers #define RSEQ_INJECT_FAILED \ 216c960e990SMathieu Desnoyers nr_abort++; 217c960e990SMathieu Desnoyers 218c960e990SMathieu Desnoyers #define RSEQ_INJECT_C(n) \ 219c960e990SMathieu Desnoyers { \ 220c960e990SMathieu Desnoyers int loc_i, loc_nr_loops = loop_cnt[n]; \ 221c960e990SMathieu Desnoyers \ 222c960e990SMathieu Desnoyers for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 223c960e990SMathieu Desnoyers rseq_barrier(); \ 224c960e990SMathieu Desnoyers } \ 225c960e990SMathieu Desnoyers if (loc_nr_loops == -1 && opt_modulo) { \ 226c960e990SMathieu Desnoyers if (yield_mod_cnt == opt_modulo - 1) { \ 227c960e990SMathieu Desnoyers if (opt_sleep > 0) \ 228c960e990SMathieu Desnoyers poll(NULL, 0, opt_sleep); \ 229c960e990SMathieu Desnoyers if (opt_yield) \ 230c960e990SMathieu Desnoyers sched_yield(); \ 231c960e990SMathieu Desnoyers if (opt_signal) \ 232c960e990SMathieu Desnoyers raise(SIGUSR1); \ 233c960e990SMathieu Desnoyers yield_mod_cnt = 0; \ 234c960e990SMathieu Desnoyers } else { \ 235c960e990SMathieu Desnoyers yield_mod_cnt++; \ 236c960e990SMathieu Desnoyers } \ 237c960e990SMathieu Desnoyers } \ 238c960e990SMathieu Desnoyers } 239c960e990SMathieu Desnoyers 240c960e990SMathieu Desnoyers #else 241c960e990SMathieu Desnoyers 242c960e990SMathieu Desnoyers #define printf_verbose(fmt, ...) 243c960e990SMathieu Desnoyers 244c960e990SMathieu Desnoyers #endif /* BENCHMARK */ 245c960e990SMathieu Desnoyers 246c960e990SMathieu Desnoyers #include "rseq.h" 247c960e990SMathieu Desnoyers 248c960e990SMathieu Desnoyers struct percpu_lock_entry { 249c960e990SMathieu Desnoyers intptr_t v; 250c960e990SMathieu Desnoyers } __attribute__((aligned(128))); 251c960e990SMathieu Desnoyers 252c960e990SMathieu Desnoyers struct percpu_lock { 253c960e990SMathieu Desnoyers struct percpu_lock_entry c[CPU_SETSIZE]; 254c960e990SMathieu Desnoyers }; 255c960e990SMathieu Desnoyers 256c960e990SMathieu Desnoyers struct test_data_entry { 257c960e990SMathieu Desnoyers intptr_t count; 258c960e990SMathieu Desnoyers } __attribute__((aligned(128))); 259c960e990SMathieu Desnoyers 260c960e990SMathieu Desnoyers struct spinlock_test_data { 261c960e990SMathieu Desnoyers struct percpu_lock lock; 262c960e990SMathieu Desnoyers struct test_data_entry c[CPU_SETSIZE]; 263c960e990SMathieu Desnoyers }; 264c960e990SMathieu Desnoyers 265c960e990SMathieu Desnoyers struct spinlock_thread_test_data { 266c960e990SMathieu Desnoyers struct spinlock_test_data *data; 267c960e990SMathieu Desnoyers long long reps; 268c960e990SMathieu Desnoyers int reg; 269c960e990SMathieu Desnoyers }; 270c960e990SMathieu Desnoyers 271c960e990SMathieu Desnoyers struct inc_test_data { 272c960e990SMathieu Desnoyers struct test_data_entry c[CPU_SETSIZE]; 273c960e990SMathieu Desnoyers }; 274c960e990SMathieu Desnoyers 275c960e990SMathieu Desnoyers struct inc_thread_test_data { 276c960e990SMathieu Desnoyers struct inc_test_data *data; 277c960e990SMathieu Desnoyers long long reps; 278c960e990SMathieu Desnoyers int reg; 279c960e990SMathieu Desnoyers }; 280c960e990SMathieu Desnoyers 281c960e990SMathieu Desnoyers struct percpu_list_node { 282c960e990SMathieu Desnoyers intptr_t data; 283c960e990SMathieu Desnoyers struct percpu_list_node *next; 284c960e990SMathieu Desnoyers }; 285c960e990SMathieu Desnoyers 286c960e990SMathieu Desnoyers struct percpu_list_entry { 287c960e990SMathieu Desnoyers struct percpu_list_node *head; 288c960e990SMathieu Desnoyers } __attribute__((aligned(128))); 289c960e990SMathieu Desnoyers 290c960e990SMathieu Desnoyers struct percpu_list { 291c960e990SMathieu Desnoyers struct percpu_list_entry c[CPU_SETSIZE]; 292c960e990SMathieu Desnoyers }; 293c960e990SMathieu Desnoyers 294c960e990SMathieu Desnoyers #define BUFFER_ITEM_PER_CPU 100 295c960e990SMathieu Desnoyers 296c960e990SMathieu Desnoyers struct percpu_buffer_node { 297c960e990SMathieu Desnoyers intptr_t data; 298c960e990SMathieu Desnoyers }; 299c960e990SMathieu Desnoyers 300c960e990SMathieu Desnoyers struct percpu_buffer_entry { 301c960e990SMathieu Desnoyers intptr_t offset; 302c960e990SMathieu Desnoyers intptr_t buflen; 303c960e990SMathieu Desnoyers struct percpu_buffer_node **array; 304c960e990SMathieu Desnoyers } __attribute__((aligned(128))); 305c960e990SMathieu Desnoyers 306c960e990SMathieu Desnoyers struct percpu_buffer { 307c960e990SMathieu Desnoyers struct percpu_buffer_entry c[CPU_SETSIZE]; 308c960e990SMathieu Desnoyers }; 309c960e990SMathieu Desnoyers 310c960e990SMathieu Desnoyers #define MEMCPY_BUFFER_ITEM_PER_CPU 100 311c960e990SMathieu Desnoyers 312c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node { 313c960e990SMathieu Desnoyers intptr_t data1; 314c960e990SMathieu Desnoyers uint64_t data2; 315c960e990SMathieu Desnoyers }; 316c960e990SMathieu Desnoyers 317c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_entry { 318c960e990SMathieu Desnoyers intptr_t offset; 319c960e990SMathieu Desnoyers intptr_t buflen; 320c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node *array; 321c960e990SMathieu Desnoyers } __attribute__((aligned(128))); 322c960e990SMathieu Desnoyers 323c960e990SMathieu Desnoyers struct percpu_memcpy_buffer { 324c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 325c960e990SMathieu Desnoyers }; 326c960e990SMathieu Desnoyers 327c960e990SMathieu Desnoyers /* A simple percpu spinlock. Grabs lock on current cpu. */ 328c960e990SMathieu Desnoyers static int rseq_this_cpu_lock(struct percpu_lock *lock) 329c960e990SMathieu Desnoyers { 330c960e990SMathieu Desnoyers int cpu; 331c960e990SMathieu Desnoyers 332c960e990SMathieu Desnoyers for (;;) { 333c960e990SMathieu Desnoyers int ret; 334c960e990SMathieu Desnoyers 335c960e990SMathieu Desnoyers cpu = rseq_cpu_start(); 336c960e990SMathieu Desnoyers ret = rseq_cmpeqv_storev(&lock->c[cpu].v, 337c960e990SMathieu Desnoyers 0, 1, cpu); 338c960e990SMathieu Desnoyers if (rseq_likely(!ret)) 339c960e990SMathieu Desnoyers break; 340c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */ 341c960e990SMathieu Desnoyers } 342c960e990SMathieu Desnoyers /* 343c960e990SMathieu Desnoyers * Acquire semantic when taking lock after control dependency. 344c960e990SMathieu Desnoyers * Matches rseq_smp_store_release(). 345c960e990SMathieu Desnoyers */ 346c960e990SMathieu Desnoyers rseq_smp_acquire__after_ctrl_dep(); 347c960e990SMathieu Desnoyers return cpu; 348c960e990SMathieu Desnoyers } 349c960e990SMathieu Desnoyers 350c960e990SMathieu Desnoyers static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 351c960e990SMathieu Desnoyers { 352c960e990SMathieu Desnoyers assert(lock->c[cpu].v == 1); 353c960e990SMathieu Desnoyers /* 354c960e990SMathieu Desnoyers * Release lock, with release semantic. Matches 355c960e990SMathieu Desnoyers * rseq_smp_acquire__after_ctrl_dep(). 356c960e990SMathieu Desnoyers */ 357c960e990SMathieu Desnoyers rseq_smp_store_release(&lock->c[cpu].v, 0); 358c960e990SMathieu Desnoyers } 359c960e990SMathieu Desnoyers 360c960e990SMathieu Desnoyers void *test_percpu_spinlock_thread(void *arg) 361c960e990SMathieu Desnoyers { 362c960e990SMathieu Desnoyers struct spinlock_thread_test_data *thread_data = arg; 363c960e990SMathieu Desnoyers struct spinlock_test_data *data = thread_data->data; 364c960e990SMathieu Desnoyers long long i, reps; 365c960e990SMathieu Desnoyers 366c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg && 367c960e990SMathieu Desnoyers rseq_register_current_thread()) 368c960e990SMathieu Desnoyers abort(); 369c960e990SMathieu Desnoyers reps = thread_data->reps; 370c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) { 371*930378d0SMathieu Desnoyers int cpu = rseq_this_cpu_lock(&data->lock); 372c960e990SMathieu Desnoyers data->c[cpu].count++; 373c960e990SMathieu Desnoyers rseq_percpu_unlock(&data->lock, cpu); 374c960e990SMathieu Desnoyers #ifndef BENCHMARK 375c960e990SMathieu Desnoyers if (i != 0 && !(i % (reps / 10))) 3768df34c56SMathieu Desnoyers printf_verbose("tid %d: count %lld\n", 3778df34c56SMathieu Desnoyers (int) rseq_gettid(), i); 378c960e990SMathieu Desnoyers #endif 379c960e990SMathieu Desnoyers } 380c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 3818df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered); 382c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg && 383c960e990SMathieu Desnoyers rseq_unregister_current_thread()) 384c960e990SMathieu Desnoyers abort(); 385c960e990SMathieu Desnoyers return NULL; 386c960e990SMathieu Desnoyers } 387c960e990SMathieu Desnoyers 388c960e990SMathieu Desnoyers /* 389c960e990SMathieu Desnoyers * A simple test which implements a sharded counter using a per-cpu 390c960e990SMathieu Desnoyers * lock. Obviously real applications might prefer to simply use a 391c960e990SMathieu Desnoyers * per-cpu increment; however, this is reasonable for a test and the 392c960e990SMathieu Desnoyers * lock can be extended to synchronize more complicated operations. 393c960e990SMathieu Desnoyers */ 394c960e990SMathieu Desnoyers void test_percpu_spinlock(void) 395c960e990SMathieu Desnoyers { 396c960e990SMathieu Desnoyers const int num_threads = opt_threads; 397c960e990SMathieu Desnoyers int i, ret; 398c960e990SMathieu Desnoyers uint64_t sum; 399c960e990SMathieu Desnoyers pthread_t test_threads[num_threads]; 400c960e990SMathieu Desnoyers struct spinlock_test_data data; 401c960e990SMathieu Desnoyers struct spinlock_thread_test_data thread_data[num_threads]; 402c960e990SMathieu Desnoyers 403c960e990SMathieu Desnoyers memset(&data, 0, sizeof(data)); 404c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) { 405c960e990SMathieu Desnoyers thread_data[i].reps = opt_reps; 406c960e990SMathieu Desnoyers if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 407c960e990SMathieu Desnoyers thread_data[i].reg = 1; 408c960e990SMathieu Desnoyers else 409c960e990SMathieu Desnoyers thread_data[i].reg = 0; 410c960e990SMathieu Desnoyers thread_data[i].data = &data; 411c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL, 412c960e990SMathieu Desnoyers test_percpu_spinlock_thread, 413c960e990SMathieu Desnoyers &thread_data[i]); 414c960e990SMathieu Desnoyers if (ret) { 415c960e990SMathieu Desnoyers errno = ret; 416c960e990SMathieu Desnoyers perror("pthread_create"); 417c960e990SMathieu Desnoyers abort(); 418c960e990SMathieu Desnoyers } 419c960e990SMathieu Desnoyers } 420c960e990SMathieu Desnoyers 421c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) { 422c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL); 423c960e990SMathieu Desnoyers if (ret) { 424c960e990SMathieu Desnoyers errno = ret; 425c960e990SMathieu Desnoyers perror("pthread_join"); 426c960e990SMathieu Desnoyers abort(); 427c960e990SMathieu Desnoyers } 428c960e990SMathieu Desnoyers } 429c960e990SMathieu Desnoyers 430c960e990SMathieu Desnoyers sum = 0; 431c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) 432c960e990SMathieu Desnoyers sum += data.c[i].count; 433c960e990SMathieu Desnoyers 434c960e990SMathieu Desnoyers assert(sum == (uint64_t)opt_reps * num_threads); 435c960e990SMathieu Desnoyers } 436c960e990SMathieu Desnoyers 437c960e990SMathieu Desnoyers void *test_percpu_inc_thread(void *arg) 438c960e990SMathieu Desnoyers { 439c960e990SMathieu Desnoyers struct inc_thread_test_data *thread_data = arg; 440c960e990SMathieu Desnoyers struct inc_test_data *data = thread_data->data; 441c960e990SMathieu Desnoyers long long i, reps; 442c960e990SMathieu Desnoyers 443c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg && 444c960e990SMathieu Desnoyers rseq_register_current_thread()) 445c960e990SMathieu Desnoyers abort(); 446c960e990SMathieu Desnoyers reps = thread_data->reps; 447c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) { 448c960e990SMathieu Desnoyers int ret; 449c960e990SMathieu Desnoyers 450c960e990SMathieu Desnoyers do { 451c960e990SMathieu Desnoyers int cpu; 452c960e990SMathieu Desnoyers 453c960e990SMathieu Desnoyers cpu = rseq_cpu_start(); 454c960e990SMathieu Desnoyers ret = rseq_addv(&data->c[cpu].count, 1, cpu); 455c960e990SMathieu Desnoyers } while (rseq_unlikely(ret)); 456c960e990SMathieu Desnoyers #ifndef BENCHMARK 457c960e990SMathieu Desnoyers if (i != 0 && !(i % (reps / 10))) 4588df34c56SMathieu Desnoyers printf_verbose("tid %d: count %lld\n", 4598df34c56SMathieu Desnoyers (int) rseq_gettid(), i); 460c960e990SMathieu Desnoyers #endif 461c960e990SMathieu Desnoyers } 462c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 4638df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered); 464c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg && 465c960e990SMathieu Desnoyers rseq_unregister_current_thread()) 466c960e990SMathieu Desnoyers abort(); 467c960e990SMathieu Desnoyers return NULL; 468c960e990SMathieu Desnoyers } 469c960e990SMathieu Desnoyers 470c960e990SMathieu Desnoyers void test_percpu_inc(void) 471c960e990SMathieu Desnoyers { 472c960e990SMathieu Desnoyers const int num_threads = opt_threads; 473c960e990SMathieu Desnoyers int i, ret; 474c960e990SMathieu Desnoyers uint64_t sum; 475c960e990SMathieu Desnoyers pthread_t test_threads[num_threads]; 476c960e990SMathieu Desnoyers struct inc_test_data data; 477c960e990SMathieu Desnoyers struct inc_thread_test_data thread_data[num_threads]; 478c960e990SMathieu Desnoyers 479c960e990SMathieu Desnoyers memset(&data, 0, sizeof(data)); 480c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) { 481c960e990SMathieu Desnoyers thread_data[i].reps = opt_reps; 482c960e990SMathieu Desnoyers if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 483c960e990SMathieu Desnoyers thread_data[i].reg = 1; 484c960e990SMathieu Desnoyers else 485c960e990SMathieu Desnoyers thread_data[i].reg = 0; 486c960e990SMathieu Desnoyers thread_data[i].data = &data; 487c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL, 488c960e990SMathieu Desnoyers test_percpu_inc_thread, 489c960e990SMathieu Desnoyers &thread_data[i]); 490c960e990SMathieu Desnoyers if (ret) { 491c960e990SMathieu Desnoyers errno = ret; 492c960e990SMathieu Desnoyers perror("pthread_create"); 493c960e990SMathieu Desnoyers abort(); 494c960e990SMathieu Desnoyers } 495c960e990SMathieu Desnoyers } 496c960e990SMathieu Desnoyers 497c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) { 498c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL); 499c960e990SMathieu Desnoyers if (ret) { 500c960e990SMathieu Desnoyers errno = ret; 501c960e990SMathieu Desnoyers perror("pthread_join"); 502c960e990SMathieu Desnoyers abort(); 503c960e990SMathieu Desnoyers } 504c960e990SMathieu Desnoyers } 505c960e990SMathieu Desnoyers 506c960e990SMathieu Desnoyers sum = 0; 507c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) 508c960e990SMathieu Desnoyers sum += data.c[i].count; 509c960e990SMathieu Desnoyers 510c960e990SMathieu Desnoyers assert(sum == (uint64_t)opt_reps * num_threads); 511c960e990SMathieu Desnoyers } 512c960e990SMathieu Desnoyers 513c960e990SMathieu Desnoyers void this_cpu_list_push(struct percpu_list *list, 514c960e990SMathieu Desnoyers struct percpu_list_node *node, 515c960e990SMathieu Desnoyers int *_cpu) 516c960e990SMathieu Desnoyers { 517c960e990SMathieu Desnoyers int cpu; 518c960e990SMathieu Desnoyers 519c960e990SMathieu Desnoyers for (;;) { 520c960e990SMathieu Desnoyers intptr_t *targetptr, newval, expect; 521c960e990SMathieu Desnoyers int ret; 522c960e990SMathieu Desnoyers 523c960e990SMathieu Desnoyers cpu = rseq_cpu_start(); 524c960e990SMathieu Desnoyers /* Load list->c[cpu].head with single-copy atomicity. */ 525c960e990SMathieu Desnoyers expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 526c960e990SMathieu Desnoyers newval = (intptr_t)node; 527c960e990SMathieu Desnoyers targetptr = (intptr_t *)&list->c[cpu].head; 528c960e990SMathieu Desnoyers node->next = (struct percpu_list_node *)expect; 529c960e990SMathieu Desnoyers ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); 530c960e990SMathieu Desnoyers if (rseq_likely(!ret)) 531c960e990SMathieu Desnoyers break; 532c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */ 533c960e990SMathieu Desnoyers } 534c960e990SMathieu Desnoyers if (_cpu) 535c960e990SMathieu Desnoyers *_cpu = cpu; 536c960e990SMathieu Desnoyers } 537c960e990SMathieu Desnoyers 538c960e990SMathieu Desnoyers /* 539c960e990SMathieu Desnoyers * Unlike a traditional lock-less linked list; the availability of a 540c960e990SMathieu Desnoyers * rseq primitive allows us to implement pop without concerns over 541c960e990SMathieu Desnoyers * ABA-type races. 542c960e990SMathieu Desnoyers */ 543c960e990SMathieu Desnoyers struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 544c960e990SMathieu Desnoyers int *_cpu) 545c960e990SMathieu Desnoyers { 546c960e990SMathieu Desnoyers struct percpu_list_node *node = NULL; 547c960e990SMathieu Desnoyers int cpu; 548c960e990SMathieu Desnoyers 549c960e990SMathieu Desnoyers for (;;) { 550c960e990SMathieu Desnoyers struct percpu_list_node *head; 551c960e990SMathieu Desnoyers intptr_t *targetptr, expectnot, *load; 552c960e990SMathieu Desnoyers off_t offset; 553c960e990SMathieu Desnoyers int ret; 554c960e990SMathieu Desnoyers 555c960e990SMathieu Desnoyers cpu = rseq_cpu_start(); 556c960e990SMathieu Desnoyers targetptr = (intptr_t *)&list->c[cpu].head; 557c960e990SMathieu Desnoyers expectnot = (intptr_t)NULL; 558c960e990SMathieu Desnoyers offset = offsetof(struct percpu_list_node, next); 559c960e990SMathieu Desnoyers load = (intptr_t *)&head; 560c960e990SMathieu Desnoyers ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, 561c960e990SMathieu Desnoyers offset, load, cpu); 562c960e990SMathieu Desnoyers if (rseq_likely(!ret)) { 563c960e990SMathieu Desnoyers node = head; 564c960e990SMathieu Desnoyers break; 565c960e990SMathieu Desnoyers } 566c960e990SMathieu Desnoyers if (ret > 0) 567c960e990SMathieu Desnoyers break; 568c960e990SMathieu Desnoyers /* Retry if rseq aborts. */ 569c960e990SMathieu Desnoyers } 570c960e990SMathieu Desnoyers if (_cpu) 571c960e990SMathieu Desnoyers *_cpu = cpu; 572c960e990SMathieu Desnoyers return node; 573c960e990SMathieu Desnoyers } 574c960e990SMathieu Desnoyers 575c960e990SMathieu Desnoyers /* 576c960e990SMathieu Desnoyers * __percpu_list_pop is not safe against concurrent accesses. Should 577c960e990SMathieu Desnoyers * only be used on lists that are not concurrently modified. 578c960e990SMathieu Desnoyers */ 579c960e990SMathieu Desnoyers struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 580c960e990SMathieu Desnoyers { 581c960e990SMathieu Desnoyers struct percpu_list_node *node; 582c960e990SMathieu Desnoyers 583c960e990SMathieu Desnoyers node = list->c[cpu].head; 584c960e990SMathieu Desnoyers if (!node) 585c960e990SMathieu Desnoyers return NULL; 586c960e990SMathieu Desnoyers list->c[cpu].head = node->next; 587c960e990SMathieu Desnoyers return node; 588c960e990SMathieu Desnoyers } 589c960e990SMathieu Desnoyers 590c960e990SMathieu Desnoyers void *test_percpu_list_thread(void *arg) 591c960e990SMathieu Desnoyers { 592c960e990SMathieu Desnoyers long long i, reps; 593c960e990SMathieu Desnoyers struct percpu_list *list = (struct percpu_list *)arg; 594c960e990SMathieu Desnoyers 595c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread()) 596c960e990SMathieu Desnoyers abort(); 597c960e990SMathieu Desnoyers 598c960e990SMathieu Desnoyers reps = opt_reps; 599c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) { 600c960e990SMathieu Desnoyers struct percpu_list_node *node; 601c960e990SMathieu Desnoyers 602c960e990SMathieu Desnoyers node = this_cpu_list_pop(list, NULL); 603c960e990SMathieu Desnoyers if (opt_yield) 604c960e990SMathieu Desnoyers sched_yield(); /* encourage shuffling */ 605c960e990SMathieu Desnoyers if (node) 606c960e990SMathieu Desnoyers this_cpu_list_push(list, node, NULL); 607c960e990SMathieu Desnoyers } 608c960e990SMathieu Desnoyers 609c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 6108df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered); 611c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread()) 612c960e990SMathieu Desnoyers abort(); 613c960e990SMathieu Desnoyers 614c960e990SMathieu Desnoyers return NULL; 615c960e990SMathieu Desnoyers } 616c960e990SMathieu Desnoyers 617c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu linked list from many threads. */ 618c960e990SMathieu Desnoyers void test_percpu_list(void) 619c960e990SMathieu Desnoyers { 620c960e990SMathieu Desnoyers const int num_threads = opt_threads; 621c960e990SMathieu Desnoyers int i, j, ret; 622c960e990SMathieu Desnoyers uint64_t sum = 0, expected_sum = 0; 623c960e990SMathieu Desnoyers struct percpu_list list; 624c960e990SMathieu Desnoyers pthread_t test_threads[num_threads]; 625c960e990SMathieu Desnoyers cpu_set_t allowed_cpus; 626c960e990SMathieu Desnoyers 627c960e990SMathieu Desnoyers memset(&list, 0, sizeof(list)); 628c960e990SMathieu Desnoyers 629c960e990SMathieu Desnoyers /* Generate list entries for every usable cpu. */ 630c960e990SMathieu Desnoyers sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 631c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) { 632c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus)) 633c960e990SMathieu Desnoyers continue; 634c960e990SMathieu Desnoyers for (j = 1; j <= 100; j++) { 635c960e990SMathieu Desnoyers struct percpu_list_node *node; 636c960e990SMathieu Desnoyers 637c960e990SMathieu Desnoyers expected_sum += j; 638c960e990SMathieu Desnoyers 639c960e990SMathieu Desnoyers node = malloc(sizeof(*node)); 640c960e990SMathieu Desnoyers assert(node); 641c960e990SMathieu Desnoyers node->data = j; 642c960e990SMathieu Desnoyers node->next = list.c[i].head; 643c960e990SMathieu Desnoyers list.c[i].head = node; 644c960e990SMathieu Desnoyers } 645c960e990SMathieu Desnoyers } 646c960e990SMathieu Desnoyers 647c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) { 648c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL, 649c960e990SMathieu Desnoyers test_percpu_list_thread, &list); 650c960e990SMathieu Desnoyers if (ret) { 651c960e990SMathieu Desnoyers errno = ret; 652c960e990SMathieu Desnoyers perror("pthread_create"); 653c960e990SMathieu Desnoyers abort(); 654c960e990SMathieu Desnoyers } 655c960e990SMathieu Desnoyers } 656c960e990SMathieu Desnoyers 657c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) { 658c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL); 659c960e990SMathieu Desnoyers if (ret) { 660c960e990SMathieu Desnoyers errno = ret; 661c960e990SMathieu Desnoyers perror("pthread_join"); 662c960e990SMathieu Desnoyers abort(); 663c960e990SMathieu Desnoyers } 664c960e990SMathieu Desnoyers } 665c960e990SMathieu Desnoyers 666c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) { 667c960e990SMathieu Desnoyers struct percpu_list_node *node; 668c960e990SMathieu Desnoyers 669c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus)) 670c960e990SMathieu Desnoyers continue; 671c960e990SMathieu Desnoyers 672c960e990SMathieu Desnoyers while ((node = __percpu_list_pop(&list, i))) { 673c960e990SMathieu Desnoyers sum += node->data; 674c960e990SMathieu Desnoyers free(node); 675c960e990SMathieu Desnoyers } 676c960e990SMathieu Desnoyers } 677c960e990SMathieu Desnoyers 678c960e990SMathieu Desnoyers /* 679c960e990SMathieu Desnoyers * All entries should now be accounted for (unless some external 680c960e990SMathieu Desnoyers * actor is interfering with our allowed affinity while this 681c960e990SMathieu Desnoyers * test is running). 682c960e990SMathieu Desnoyers */ 683c960e990SMathieu Desnoyers assert(sum == expected_sum); 684c960e990SMathieu Desnoyers } 685c960e990SMathieu Desnoyers 686c960e990SMathieu Desnoyers bool this_cpu_buffer_push(struct percpu_buffer *buffer, 687c960e990SMathieu Desnoyers struct percpu_buffer_node *node, 688c960e990SMathieu Desnoyers int *_cpu) 689c960e990SMathieu Desnoyers { 690c960e990SMathieu Desnoyers bool result = false; 691c960e990SMathieu Desnoyers int cpu; 692c960e990SMathieu Desnoyers 693c960e990SMathieu Desnoyers for (;;) { 694c960e990SMathieu Desnoyers intptr_t *targetptr_spec, newval_spec; 695c960e990SMathieu Desnoyers intptr_t *targetptr_final, newval_final; 696c960e990SMathieu Desnoyers intptr_t offset; 697c960e990SMathieu Desnoyers int ret; 698c960e990SMathieu Desnoyers 699c960e990SMathieu Desnoyers cpu = rseq_cpu_start(); 700c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 701c960e990SMathieu Desnoyers if (offset == buffer->c[cpu].buflen) 702c960e990SMathieu Desnoyers break; 703c960e990SMathieu Desnoyers newval_spec = (intptr_t)node; 704c960e990SMathieu Desnoyers targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 705c960e990SMathieu Desnoyers newval_final = offset + 1; 706c960e990SMathieu Desnoyers targetptr_final = &buffer->c[cpu].offset; 707c960e990SMathieu Desnoyers if (opt_mb) 708c960e990SMathieu Desnoyers ret = rseq_cmpeqv_trystorev_storev_release( 709c960e990SMathieu Desnoyers targetptr_final, offset, targetptr_spec, 710c960e990SMathieu Desnoyers newval_spec, newval_final, cpu); 711c960e990SMathieu Desnoyers else 712c960e990SMathieu Desnoyers ret = rseq_cmpeqv_trystorev_storev(targetptr_final, 713c960e990SMathieu Desnoyers offset, targetptr_spec, newval_spec, 714c960e990SMathieu Desnoyers newval_final, cpu); 715c960e990SMathieu Desnoyers if (rseq_likely(!ret)) { 716c960e990SMathieu Desnoyers result = true; 717c960e990SMathieu Desnoyers break; 718c960e990SMathieu Desnoyers } 719c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */ 720c960e990SMathieu Desnoyers } 721c960e990SMathieu Desnoyers if (_cpu) 722c960e990SMathieu Desnoyers *_cpu = cpu; 723c960e990SMathieu Desnoyers return result; 724c960e990SMathieu Desnoyers } 725c960e990SMathieu Desnoyers 726c960e990SMathieu Desnoyers struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 727c960e990SMathieu Desnoyers int *_cpu) 728c960e990SMathieu Desnoyers { 729c960e990SMathieu Desnoyers struct percpu_buffer_node *head; 730c960e990SMathieu Desnoyers int cpu; 731c960e990SMathieu Desnoyers 732c960e990SMathieu Desnoyers for (;;) { 733c960e990SMathieu Desnoyers intptr_t *targetptr, newval; 734c960e990SMathieu Desnoyers intptr_t offset; 735c960e990SMathieu Desnoyers int ret; 736c960e990SMathieu Desnoyers 737c960e990SMathieu Desnoyers cpu = rseq_cpu_start(); 738c960e990SMathieu Desnoyers /* Load offset with single-copy atomicity. */ 739c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 740c960e990SMathieu Desnoyers if (offset == 0) { 741c960e990SMathieu Desnoyers head = NULL; 742c960e990SMathieu Desnoyers break; 743c960e990SMathieu Desnoyers } 744c960e990SMathieu Desnoyers head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 745c960e990SMathieu Desnoyers newval = offset - 1; 746c960e990SMathieu Desnoyers targetptr = (intptr_t *)&buffer->c[cpu].offset; 747c960e990SMathieu Desnoyers ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, 748c960e990SMathieu Desnoyers (intptr_t *)&buffer->c[cpu].array[offset - 1], 749c960e990SMathieu Desnoyers (intptr_t)head, newval, cpu); 750c960e990SMathieu Desnoyers if (rseq_likely(!ret)) 751c960e990SMathieu Desnoyers break; 752c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */ 753c960e990SMathieu Desnoyers } 754c960e990SMathieu Desnoyers if (_cpu) 755c960e990SMathieu Desnoyers *_cpu = cpu; 756c960e990SMathieu Desnoyers return head; 757c960e990SMathieu Desnoyers } 758c960e990SMathieu Desnoyers 759c960e990SMathieu Desnoyers /* 760c960e990SMathieu Desnoyers * __percpu_buffer_pop is not safe against concurrent accesses. Should 761c960e990SMathieu Desnoyers * only be used on buffers that are not concurrently modified. 762c960e990SMathieu Desnoyers */ 763c960e990SMathieu Desnoyers struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 764c960e990SMathieu Desnoyers int cpu) 765c960e990SMathieu Desnoyers { 766c960e990SMathieu Desnoyers struct percpu_buffer_node *head; 767c960e990SMathieu Desnoyers intptr_t offset; 768c960e990SMathieu Desnoyers 769c960e990SMathieu Desnoyers offset = buffer->c[cpu].offset; 770c960e990SMathieu Desnoyers if (offset == 0) 771c960e990SMathieu Desnoyers return NULL; 772c960e990SMathieu Desnoyers head = buffer->c[cpu].array[offset - 1]; 773c960e990SMathieu Desnoyers buffer->c[cpu].offset = offset - 1; 774c960e990SMathieu Desnoyers return head; 775c960e990SMathieu Desnoyers } 776c960e990SMathieu Desnoyers 777c960e990SMathieu Desnoyers void *test_percpu_buffer_thread(void *arg) 778c960e990SMathieu Desnoyers { 779c960e990SMathieu Desnoyers long long i, reps; 780c960e990SMathieu Desnoyers struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 781c960e990SMathieu Desnoyers 782c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread()) 783c960e990SMathieu Desnoyers abort(); 784c960e990SMathieu Desnoyers 785c960e990SMathieu Desnoyers reps = opt_reps; 786c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) { 787c960e990SMathieu Desnoyers struct percpu_buffer_node *node; 788c960e990SMathieu Desnoyers 789c960e990SMathieu Desnoyers node = this_cpu_buffer_pop(buffer, NULL); 790c960e990SMathieu Desnoyers if (opt_yield) 791c960e990SMathieu Desnoyers sched_yield(); /* encourage shuffling */ 792c960e990SMathieu Desnoyers if (node) { 793c960e990SMathieu Desnoyers if (!this_cpu_buffer_push(buffer, node, NULL)) { 794c960e990SMathieu Desnoyers /* Should increase buffer size. */ 795c960e990SMathieu Desnoyers abort(); 796c960e990SMathieu Desnoyers } 797c960e990SMathieu Desnoyers } 798c960e990SMathieu Desnoyers } 799c960e990SMathieu Desnoyers 800c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 8018df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered); 802c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread()) 803c960e990SMathieu Desnoyers abort(); 804c960e990SMathieu Desnoyers 805c960e990SMathieu Desnoyers return NULL; 806c960e990SMathieu Desnoyers } 807c960e990SMathieu Desnoyers 808c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu buffer from many threads. */ 809c960e990SMathieu Desnoyers void test_percpu_buffer(void) 810c960e990SMathieu Desnoyers { 811c960e990SMathieu Desnoyers const int num_threads = opt_threads; 812c960e990SMathieu Desnoyers int i, j, ret; 813c960e990SMathieu Desnoyers uint64_t sum = 0, expected_sum = 0; 814c960e990SMathieu Desnoyers struct percpu_buffer buffer; 815c960e990SMathieu Desnoyers pthread_t test_threads[num_threads]; 816c960e990SMathieu Desnoyers cpu_set_t allowed_cpus; 817c960e990SMathieu Desnoyers 818c960e990SMathieu Desnoyers memset(&buffer, 0, sizeof(buffer)); 819c960e990SMathieu Desnoyers 820c960e990SMathieu Desnoyers /* Generate list entries for every usable cpu. */ 821c960e990SMathieu Desnoyers sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 822c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) { 823c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus)) 824c960e990SMathieu Desnoyers continue; 825c960e990SMathieu Desnoyers /* Worse-case is every item in same CPU. */ 826c960e990SMathieu Desnoyers buffer.c[i].array = 827c960e990SMathieu Desnoyers malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 828c960e990SMathieu Desnoyers BUFFER_ITEM_PER_CPU); 829c960e990SMathieu Desnoyers assert(buffer.c[i].array); 830c960e990SMathieu Desnoyers buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 831c960e990SMathieu Desnoyers for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 832c960e990SMathieu Desnoyers struct percpu_buffer_node *node; 833c960e990SMathieu Desnoyers 834c960e990SMathieu Desnoyers expected_sum += j; 835c960e990SMathieu Desnoyers 836c960e990SMathieu Desnoyers /* 837c960e990SMathieu Desnoyers * We could theoretically put the word-sized 838c960e990SMathieu Desnoyers * "data" directly in the buffer. However, we 839c960e990SMathieu Desnoyers * want to model objects that would not fit 840c960e990SMathieu Desnoyers * within a single word, so allocate an object 841c960e990SMathieu Desnoyers * for each node. 842c960e990SMathieu Desnoyers */ 843c960e990SMathieu Desnoyers node = malloc(sizeof(*node)); 844c960e990SMathieu Desnoyers assert(node); 845c960e990SMathieu Desnoyers node->data = j; 846c960e990SMathieu Desnoyers buffer.c[i].array[j - 1] = node; 847c960e990SMathieu Desnoyers buffer.c[i].offset++; 848c960e990SMathieu Desnoyers } 849c960e990SMathieu Desnoyers } 850c960e990SMathieu Desnoyers 851c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) { 852c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL, 853c960e990SMathieu Desnoyers test_percpu_buffer_thread, &buffer); 854c960e990SMathieu Desnoyers if (ret) { 855c960e990SMathieu Desnoyers errno = ret; 856c960e990SMathieu Desnoyers perror("pthread_create"); 857c960e990SMathieu Desnoyers abort(); 858c960e990SMathieu Desnoyers } 859c960e990SMathieu Desnoyers } 860c960e990SMathieu Desnoyers 861c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) { 862c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL); 863c960e990SMathieu Desnoyers if (ret) { 864c960e990SMathieu Desnoyers errno = ret; 865c960e990SMathieu Desnoyers perror("pthread_join"); 866c960e990SMathieu Desnoyers abort(); 867c960e990SMathieu Desnoyers } 868c960e990SMathieu Desnoyers } 869c960e990SMathieu Desnoyers 870c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) { 871c960e990SMathieu Desnoyers struct percpu_buffer_node *node; 872c960e990SMathieu Desnoyers 873c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus)) 874c960e990SMathieu Desnoyers continue; 875c960e990SMathieu Desnoyers 876c960e990SMathieu Desnoyers while ((node = __percpu_buffer_pop(&buffer, i))) { 877c960e990SMathieu Desnoyers sum += node->data; 878c960e990SMathieu Desnoyers free(node); 879c960e990SMathieu Desnoyers } 880c960e990SMathieu Desnoyers free(buffer.c[i].array); 881c960e990SMathieu Desnoyers } 882c960e990SMathieu Desnoyers 883c960e990SMathieu Desnoyers /* 884c960e990SMathieu Desnoyers * All entries should now be accounted for (unless some external 885c960e990SMathieu Desnoyers * actor is interfering with our allowed affinity while this 886c960e990SMathieu Desnoyers * test is running). 887c960e990SMathieu Desnoyers */ 888c960e990SMathieu Desnoyers assert(sum == expected_sum); 889c960e990SMathieu Desnoyers } 890c960e990SMathieu Desnoyers 891c960e990SMathieu Desnoyers bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 892c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node item, 893c960e990SMathieu Desnoyers int *_cpu) 894c960e990SMathieu Desnoyers { 895c960e990SMathieu Desnoyers bool result = false; 896c960e990SMathieu Desnoyers int cpu; 897c960e990SMathieu Desnoyers 898c960e990SMathieu Desnoyers for (;;) { 899c960e990SMathieu Desnoyers intptr_t *targetptr_final, newval_final, offset; 900c960e990SMathieu Desnoyers char *destptr, *srcptr; 901c960e990SMathieu Desnoyers size_t copylen; 902c960e990SMathieu Desnoyers int ret; 903c960e990SMathieu Desnoyers 904c960e990SMathieu Desnoyers cpu = rseq_cpu_start(); 905c960e990SMathieu Desnoyers /* Load offset with single-copy atomicity. */ 906c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 907c960e990SMathieu Desnoyers if (offset == buffer->c[cpu].buflen) 908c960e990SMathieu Desnoyers break; 909c960e990SMathieu Desnoyers destptr = (char *)&buffer->c[cpu].array[offset]; 910c960e990SMathieu Desnoyers srcptr = (char *)&item; 911c960e990SMathieu Desnoyers /* copylen must be <= 4kB. */ 912c960e990SMathieu Desnoyers copylen = sizeof(item); 913c960e990SMathieu Desnoyers newval_final = offset + 1; 914c960e990SMathieu Desnoyers targetptr_final = &buffer->c[cpu].offset; 915c960e990SMathieu Desnoyers if (opt_mb) 916c960e990SMathieu Desnoyers ret = rseq_cmpeqv_trymemcpy_storev_release( 917c960e990SMathieu Desnoyers targetptr_final, offset, 918c960e990SMathieu Desnoyers destptr, srcptr, copylen, 919c960e990SMathieu Desnoyers newval_final, cpu); 920c960e990SMathieu Desnoyers else 921c960e990SMathieu Desnoyers ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 922c960e990SMathieu Desnoyers offset, destptr, srcptr, copylen, 923c960e990SMathieu Desnoyers newval_final, cpu); 924c960e990SMathieu Desnoyers if (rseq_likely(!ret)) { 925c960e990SMathieu Desnoyers result = true; 926c960e990SMathieu Desnoyers break; 927c960e990SMathieu Desnoyers } 928c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */ 929c960e990SMathieu Desnoyers } 930c960e990SMathieu Desnoyers if (_cpu) 931c960e990SMathieu Desnoyers *_cpu = cpu; 932c960e990SMathieu Desnoyers return result; 933c960e990SMathieu Desnoyers } 934c960e990SMathieu Desnoyers 935c960e990SMathieu Desnoyers bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 936c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node *item, 937c960e990SMathieu Desnoyers int *_cpu) 938c960e990SMathieu Desnoyers { 939c960e990SMathieu Desnoyers bool result = false; 940c960e990SMathieu Desnoyers int cpu; 941c960e990SMathieu Desnoyers 942c960e990SMathieu Desnoyers for (;;) { 943c960e990SMathieu Desnoyers intptr_t *targetptr_final, newval_final, offset; 944c960e990SMathieu Desnoyers char *destptr, *srcptr; 945c960e990SMathieu Desnoyers size_t copylen; 946c960e990SMathieu Desnoyers int ret; 947c960e990SMathieu Desnoyers 948c960e990SMathieu Desnoyers cpu = rseq_cpu_start(); 949c960e990SMathieu Desnoyers /* Load offset with single-copy atomicity. */ 950c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 951c960e990SMathieu Desnoyers if (offset == 0) 952c960e990SMathieu Desnoyers break; 953c960e990SMathieu Desnoyers destptr = (char *)item; 954c960e990SMathieu Desnoyers srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 955c960e990SMathieu Desnoyers /* copylen must be <= 4kB. */ 956c960e990SMathieu Desnoyers copylen = sizeof(*item); 957c960e990SMathieu Desnoyers newval_final = offset - 1; 958c960e990SMathieu Desnoyers targetptr_final = &buffer->c[cpu].offset; 959c960e990SMathieu Desnoyers ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 960c960e990SMathieu Desnoyers offset, destptr, srcptr, copylen, 961c960e990SMathieu Desnoyers newval_final, cpu); 962c960e990SMathieu Desnoyers if (rseq_likely(!ret)) { 963c960e990SMathieu Desnoyers result = true; 964c960e990SMathieu Desnoyers break; 965c960e990SMathieu Desnoyers } 966c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */ 967c960e990SMathieu Desnoyers } 968c960e990SMathieu Desnoyers if (_cpu) 969c960e990SMathieu Desnoyers *_cpu = cpu; 970c960e990SMathieu Desnoyers return result; 971c960e990SMathieu Desnoyers } 972c960e990SMathieu Desnoyers 973c960e990SMathieu Desnoyers /* 974c960e990SMathieu Desnoyers * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 975c960e990SMathieu Desnoyers * only be used on buffers that are not concurrently modified. 976c960e990SMathieu Desnoyers */ 977c960e990SMathieu Desnoyers bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 978c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node *item, 979c960e990SMathieu Desnoyers int cpu) 980c960e990SMathieu Desnoyers { 981c960e990SMathieu Desnoyers intptr_t offset; 982c960e990SMathieu Desnoyers 983c960e990SMathieu Desnoyers offset = buffer->c[cpu].offset; 984c960e990SMathieu Desnoyers if (offset == 0) 985c960e990SMathieu Desnoyers return false; 986c960e990SMathieu Desnoyers memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 987c960e990SMathieu Desnoyers buffer->c[cpu].offset = offset - 1; 988c960e990SMathieu Desnoyers return true; 989c960e990SMathieu Desnoyers } 990c960e990SMathieu Desnoyers 991c960e990SMathieu Desnoyers void *test_percpu_memcpy_buffer_thread(void *arg) 992c960e990SMathieu Desnoyers { 993c960e990SMathieu Desnoyers long long i, reps; 994c960e990SMathieu Desnoyers struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 995c960e990SMathieu Desnoyers 996c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread()) 997c960e990SMathieu Desnoyers abort(); 998c960e990SMathieu Desnoyers 999c960e990SMathieu Desnoyers reps = opt_reps; 1000c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) { 1001c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node item; 1002c960e990SMathieu Desnoyers bool result; 1003c960e990SMathieu Desnoyers 1004c960e990SMathieu Desnoyers result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1005c960e990SMathieu Desnoyers if (opt_yield) 1006c960e990SMathieu Desnoyers sched_yield(); /* encourage shuffling */ 1007c960e990SMathieu Desnoyers if (result) { 1008c960e990SMathieu Desnoyers if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1009c960e990SMathieu Desnoyers /* Should increase buffer size. */ 1010c960e990SMathieu Desnoyers abort(); 1011c960e990SMathieu Desnoyers } 1012c960e990SMathieu Desnoyers } 1013c960e990SMathieu Desnoyers } 1014c960e990SMathieu Desnoyers 1015c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 10168df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered); 1017c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread()) 1018c960e990SMathieu Desnoyers abort(); 1019c960e990SMathieu Desnoyers 1020c960e990SMathieu Desnoyers return NULL; 1021c960e990SMathieu Desnoyers } 1022c960e990SMathieu Desnoyers 1023c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu buffer from many threads. */ 1024c960e990SMathieu Desnoyers void test_percpu_memcpy_buffer(void) 1025c960e990SMathieu Desnoyers { 1026c960e990SMathieu Desnoyers const int num_threads = opt_threads; 1027c960e990SMathieu Desnoyers int i, j, ret; 1028c960e990SMathieu Desnoyers uint64_t sum = 0, expected_sum = 0; 1029c960e990SMathieu Desnoyers struct percpu_memcpy_buffer buffer; 1030c960e990SMathieu Desnoyers pthread_t test_threads[num_threads]; 1031c960e990SMathieu Desnoyers cpu_set_t allowed_cpus; 1032c960e990SMathieu Desnoyers 1033c960e990SMathieu Desnoyers memset(&buffer, 0, sizeof(buffer)); 1034c960e990SMathieu Desnoyers 1035c960e990SMathieu Desnoyers /* Generate list entries for every usable cpu. */ 1036c960e990SMathieu Desnoyers sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1037c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) { 1038c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus)) 1039c960e990SMathieu Desnoyers continue; 1040c960e990SMathieu Desnoyers /* Worse-case is every item in same CPU. */ 1041c960e990SMathieu Desnoyers buffer.c[i].array = 1042c960e990SMathieu Desnoyers malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1043c960e990SMathieu Desnoyers MEMCPY_BUFFER_ITEM_PER_CPU); 1044c960e990SMathieu Desnoyers assert(buffer.c[i].array); 1045c960e990SMathieu Desnoyers buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1046c960e990SMathieu Desnoyers for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1047c960e990SMathieu Desnoyers expected_sum += 2 * j + 1; 1048c960e990SMathieu Desnoyers 1049c960e990SMathieu Desnoyers /* 1050c960e990SMathieu Desnoyers * We could theoretically put the word-sized 1051c960e990SMathieu Desnoyers * "data" directly in the buffer. However, we 1052c960e990SMathieu Desnoyers * want to model objects that would not fit 1053c960e990SMathieu Desnoyers * within a single word, so allocate an object 1054c960e990SMathieu Desnoyers * for each node. 1055c960e990SMathieu Desnoyers */ 1056c960e990SMathieu Desnoyers buffer.c[i].array[j - 1].data1 = j; 1057c960e990SMathieu Desnoyers buffer.c[i].array[j - 1].data2 = j + 1; 1058c960e990SMathieu Desnoyers buffer.c[i].offset++; 1059c960e990SMathieu Desnoyers } 1060c960e990SMathieu Desnoyers } 1061c960e990SMathieu Desnoyers 1062c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) { 1063c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL, 1064c960e990SMathieu Desnoyers test_percpu_memcpy_buffer_thread, 1065c960e990SMathieu Desnoyers &buffer); 1066c960e990SMathieu Desnoyers if (ret) { 1067c960e990SMathieu Desnoyers errno = ret; 1068c960e990SMathieu Desnoyers perror("pthread_create"); 1069c960e990SMathieu Desnoyers abort(); 1070c960e990SMathieu Desnoyers } 1071c960e990SMathieu Desnoyers } 1072c960e990SMathieu Desnoyers 1073c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) { 1074c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL); 1075c960e990SMathieu Desnoyers if (ret) { 1076c960e990SMathieu Desnoyers errno = ret; 1077c960e990SMathieu Desnoyers perror("pthread_join"); 1078c960e990SMathieu Desnoyers abort(); 1079c960e990SMathieu Desnoyers } 1080c960e990SMathieu Desnoyers } 1081c960e990SMathieu Desnoyers 1082c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) { 1083c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node item; 1084c960e990SMathieu Desnoyers 1085c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus)) 1086c960e990SMathieu Desnoyers continue; 1087c960e990SMathieu Desnoyers 1088c960e990SMathieu Desnoyers while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1089c960e990SMathieu Desnoyers sum += item.data1; 1090c960e990SMathieu Desnoyers sum += item.data2; 1091c960e990SMathieu Desnoyers } 1092c960e990SMathieu Desnoyers free(buffer.c[i].array); 1093c960e990SMathieu Desnoyers } 1094c960e990SMathieu Desnoyers 1095c960e990SMathieu Desnoyers /* 1096c960e990SMathieu Desnoyers * All entries should now be accounted for (unless some external 1097c960e990SMathieu Desnoyers * actor is interfering with our allowed affinity while this 1098c960e990SMathieu Desnoyers * test is running). 1099c960e990SMathieu Desnoyers */ 1100c960e990SMathieu Desnoyers assert(sum == expected_sum); 1101c960e990SMathieu Desnoyers } 1102c960e990SMathieu Desnoyers 1103c960e990SMathieu Desnoyers static void test_signal_interrupt_handler(int signo) 1104c960e990SMathieu Desnoyers { 1105c960e990SMathieu Desnoyers signals_delivered++; 1106c960e990SMathieu Desnoyers } 1107c960e990SMathieu Desnoyers 1108c960e990SMathieu Desnoyers static int set_signal_handler(void) 1109c960e990SMathieu Desnoyers { 1110c960e990SMathieu Desnoyers int ret = 0; 1111c960e990SMathieu Desnoyers struct sigaction sa; 1112c960e990SMathieu Desnoyers sigset_t sigset; 1113c960e990SMathieu Desnoyers 1114c960e990SMathieu Desnoyers ret = sigemptyset(&sigset); 1115c960e990SMathieu Desnoyers if (ret < 0) { 1116c960e990SMathieu Desnoyers perror("sigemptyset"); 1117c960e990SMathieu Desnoyers return ret; 1118c960e990SMathieu Desnoyers } 1119c960e990SMathieu Desnoyers 1120c960e990SMathieu Desnoyers sa.sa_handler = test_signal_interrupt_handler; 1121c960e990SMathieu Desnoyers sa.sa_mask = sigset; 1122c960e990SMathieu Desnoyers sa.sa_flags = 0; 1123c960e990SMathieu Desnoyers ret = sigaction(SIGUSR1, &sa, NULL); 1124c960e990SMathieu Desnoyers if (ret < 0) { 1125c960e990SMathieu Desnoyers perror("sigaction"); 1126c960e990SMathieu Desnoyers return ret; 1127c960e990SMathieu Desnoyers } 1128c960e990SMathieu Desnoyers 1129c960e990SMathieu Desnoyers printf_verbose("Signal handler set for SIGUSR1\n"); 1130c960e990SMathieu Desnoyers 1131c960e990SMathieu Desnoyers return ret; 1132c960e990SMathieu Desnoyers } 1133c960e990SMathieu Desnoyers 11346f39cecdSXingxing Su /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ 11356f39cecdSXingxing Su #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV 1136f166b111SPeter Oskolkov struct test_membarrier_thread_args { 1137f166b111SPeter Oskolkov int stop; 1138f166b111SPeter Oskolkov intptr_t percpu_list_ptr; 1139f166b111SPeter Oskolkov }; 1140f166b111SPeter Oskolkov 1141f166b111SPeter Oskolkov /* Worker threads modify data in their "active" percpu lists. */ 1142f166b111SPeter Oskolkov void *test_membarrier_worker_thread(void *arg) 1143f166b111SPeter Oskolkov { 1144f166b111SPeter Oskolkov struct test_membarrier_thread_args *args = 1145f166b111SPeter Oskolkov (struct test_membarrier_thread_args *)arg; 1146f166b111SPeter Oskolkov const int iters = opt_reps; 1147f166b111SPeter Oskolkov int i; 1148f166b111SPeter Oskolkov 1149f166b111SPeter Oskolkov if (rseq_register_current_thread()) { 1150f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1151f166b111SPeter Oskolkov errno, strerror(errno)); 1152f166b111SPeter Oskolkov abort(); 1153f166b111SPeter Oskolkov } 1154f166b111SPeter Oskolkov 1155f166b111SPeter Oskolkov /* Wait for initialization. */ 1156f166b111SPeter Oskolkov while (!atomic_load(&args->percpu_list_ptr)) {} 1157f166b111SPeter Oskolkov 1158f166b111SPeter Oskolkov for (i = 0; i < iters; ++i) { 1159f166b111SPeter Oskolkov int ret; 1160f166b111SPeter Oskolkov 1161f166b111SPeter Oskolkov do { 1162f166b111SPeter Oskolkov int cpu = rseq_cpu_start(); 1163f166b111SPeter Oskolkov 1164f166b111SPeter Oskolkov ret = rseq_offset_deref_addv(&args->percpu_list_ptr, 1165f166b111SPeter Oskolkov sizeof(struct percpu_list_entry) * cpu, 1, cpu); 1166f166b111SPeter Oskolkov } while (rseq_unlikely(ret)); 1167f166b111SPeter Oskolkov } 1168f166b111SPeter Oskolkov 1169f166b111SPeter Oskolkov if (rseq_unregister_current_thread()) { 1170f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1171f166b111SPeter Oskolkov errno, strerror(errno)); 1172f166b111SPeter Oskolkov abort(); 1173f166b111SPeter Oskolkov } 1174f166b111SPeter Oskolkov return NULL; 1175f166b111SPeter Oskolkov } 1176f166b111SPeter Oskolkov 1177f166b111SPeter Oskolkov void test_membarrier_init_percpu_list(struct percpu_list *list) 1178f166b111SPeter Oskolkov { 1179f166b111SPeter Oskolkov int i; 1180f166b111SPeter Oskolkov 1181f166b111SPeter Oskolkov memset(list, 0, sizeof(*list)); 1182f166b111SPeter Oskolkov for (i = 0; i < CPU_SETSIZE; i++) { 1183f166b111SPeter Oskolkov struct percpu_list_node *node; 1184f166b111SPeter Oskolkov 1185f166b111SPeter Oskolkov node = malloc(sizeof(*node)); 1186f166b111SPeter Oskolkov assert(node); 1187f166b111SPeter Oskolkov node->data = 0; 1188f166b111SPeter Oskolkov node->next = NULL; 1189f166b111SPeter Oskolkov list->c[i].head = node; 1190f166b111SPeter Oskolkov } 1191f166b111SPeter Oskolkov } 1192f166b111SPeter Oskolkov 1193f166b111SPeter Oskolkov void test_membarrier_free_percpu_list(struct percpu_list *list) 1194f166b111SPeter Oskolkov { 1195f166b111SPeter Oskolkov int i; 1196f166b111SPeter Oskolkov 1197f166b111SPeter Oskolkov for (i = 0; i < CPU_SETSIZE; i++) 1198f166b111SPeter Oskolkov free(list->c[i].head); 1199f166b111SPeter Oskolkov } 1200f166b111SPeter Oskolkov 1201f166b111SPeter Oskolkov static int sys_membarrier(int cmd, int flags, int cpu_id) 1202f166b111SPeter Oskolkov { 1203f166b111SPeter Oskolkov return syscall(__NR_membarrier, cmd, flags, cpu_id); 1204f166b111SPeter Oskolkov } 1205f166b111SPeter Oskolkov 1206f166b111SPeter Oskolkov /* 1207f166b111SPeter Oskolkov * The manager thread swaps per-cpu lists that worker threads see, 1208f166b111SPeter Oskolkov * and validates that there are no unexpected modifications. 1209f166b111SPeter Oskolkov */ 1210f166b111SPeter Oskolkov void *test_membarrier_manager_thread(void *arg) 1211f166b111SPeter Oskolkov { 1212f166b111SPeter Oskolkov struct test_membarrier_thread_args *args = 1213f166b111SPeter Oskolkov (struct test_membarrier_thread_args *)arg; 1214f166b111SPeter Oskolkov struct percpu_list list_a, list_b; 1215f166b111SPeter Oskolkov intptr_t expect_a = 0, expect_b = 0; 1216f166b111SPeter Oskolkov int cpu_a = 0, cpu_b = 0; 1217f166b111SPeter Oskolkov 1218f166b111SPeter Oskolkov if (rseq_register_current_thread()) { 1219f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1220f166b111SPeter Oskolkov errno, strerror(errno)); 1221f166b111SPeter Oskolkov abort(); 1222f166b111SPeter Oskolkov } 1223f166b111SPeter Oskolkov 1224f166b111SPeter Oskolkov /* Init lists. */ 1225f166b111SPeter Oskolkov test_membarrier_init_percpu_list(&list_a); 1226f166b111SPeter Oskolkov test_membarrier_init_percpu_list(&list_b); 1227f166b111SPeter Oskolkov 1228f166b111SPeter Oskolkov atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1229f166b111SPeter Oskolkov 1230f166b111SPeter Oskolkov while (!atomic_load(&args->stop)) { 1231f166b111SPeter Oskolkov /* list_a is "active". */ 1232f166b111SPeter Oskolkov cpu_a = rand() % CPU_SETSIZE; 1233f166b111SPeter Oskolkov /* 1234f166b111SPeter Oskolkov * As list_b is "inactive", we should never see changes 1235f166b111SPeter Oskolkov * to list_b. 1236f166b111SPeter Oskolkov */ 1237f166b111SPeter Oskolkov if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { 1238f166b111SPeter Oskolkov fprintf(stderr, "Membarrier test failed\n"); 1239f166b111SPeter Oskolkov abort(); 1240f166b111SPeter Oskolkov } 1241f166b111SPeter Oskolkov 1242f166b111SPeter Oskolkov /* Make list_b "active". */ 1243f166b111SPeter Oskolkov atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); 1244f166b111SPeter Oskolkov if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 1245f166b111SPeter Oskolkov MEMBARRIER_CMD_FLAG_CPU, cpu_a) && 1246f166b111SPeter Oskolkov errno != ENXIO /* missing CPU */) { 1247f166b111SPeter Oskolkov perror("sys_membarrier"); 1248f166b111SPeter Oskolkov abort(); 1249f166b111SPeter Oskolkov } 1250f166b111SPeter Oskolkov /* 1251f166b111SPeter Oskolkov * Cpu A should now only modify list_b, so the values 1252f166b111SPeter Oskolkov * in list_a should be stable. 1253f166b111SPeter Oskolkov */ 1254f166b111SPeter Oskolkov expect_a = atomic_load(&list_a.c[cpu_a].head->data); 1255f166b111SPeter Oskolkov 1256f166b111SPeter Oskolkov cpu_b = rand() % CPU_SETSIZE; 1257f166b111SPeter Oskolkov /* 1258f166b111SPeter Oskolkov * As list_a is "inactive", we should never see changes 1259f166b111SPeter Oskolkov * to list_a. 1260f166b111SPeter Oskolkov */ 1261f166b111SPeter Oskolkov if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { 1262f166b111SPeter Oskolkov fprintf(stderr, "Membarrier test failed\n"); 1263f166b111SPeter Oskolkov abort(); 1264f166b111SPeter Oskolkov } 1265f166b111SPeter Oskolkov 1266f166b111SPeter Oskolkov /* Make list_a "active". */ 1267f166b111SPeter Oskolkov atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1268f166b111SPeter Oskolkov if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 1269f166b111SPeter Oskolkov MEMBARRIER_CMD_FLAG_CPU, cpu_b) && 1270f166b111SPeter Oskolkov errno != ENXIO /* missing CPU*/) { 1271f166b111SPeter Oskolkov perror("sys_membarrier"); 1272f166b111SPeter Oskolkov abort(); 1273f166b111SPeter Oskolkov } 1274f166b111SPeter Oskolkov /* Remember a value from list_b. */ 1275f166b111SPeter Oskolkov expect_b = atomic_load(&list_b.c[cpu_b].head->data); 1276f166b111SPeter Oskolkov } 1277f166b111SPeter Oskolkov 1278f166b111SPeter Oskolkov test_membarrier_free_percpu_list(&list_a); 1279f166b111SPeter Oskolkov test_membarrier_free_percpu_list(&list_b); 1280f166b111SPeter Oskolkov 1281f166b111SPeter Oskolkov if (rseq_unregister_current_thread()) { 1282f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1283f166b111SPeter Oskolkov errno, strerror(errno)); 1284f166b111SPeter Oskolkov abort(); 1285f166b111SPeter Oskolkov } 1286f166b111SPeter Oskolkov return NULL; 1287f166b111SPeter Oskolkov } 1288f166b111SPeter Oskolkov 1289f166b111SPeter Oskolkov void test_membarrier(void) 1290f166b111SPeter Oskolkov { 1291f166b111SPeter Oskolkov const int num_threads = opt_threads; 1292f166b111SPeter Oskolkov struct test_membarrier_thread_args thread_args; 1293f166b111SPeter Oskolkov pthread_t worker_threads[num_threads]; 1294f166b111SPeter Oskolkov pthread_t manager_thread; 1295f166b111SPeter Oskolkov int i, ret; 1296f166b111SPeter Oskolkov 1297f166b111SPeter Oskolkov if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { 1298f166b111SPeter Oskolkov perror("sys_membarrier"); 1299f166b111SPeter Oskolkov abort(); 1300f166b111SPeter Oskolkov } 1301f166b111SPeter Oskolkov 1302f166b111SPeter Oskolkov thread_args.stop = 0; 1303f166b111SPeter Oskolkov thread_args.percpu_list_ptr = 0; 1304f166b111SPeter Oskolkov ret = pthread_create(&manager_thread, NULL, 1305f166b111SPeter Oskolkov test_membarrier_manager_thread, &thread_args); 1306f166b111SPeter Oskolkov if (ret) { 1307f166b111SPeter Oskolkov errno = ret; 1308f166b111SPeter Oskolkov perror("pthread_create"); 1309f166b111SPeter Oskolkov abort(); 1310f166b111SPeter Oskolkov } 1311f166b111SPeter Oskolkov 1312f166b111SPeter Oskolkov for (i = 0; i < num_threads; i++) { 1313f166b111SPeter Oskolkov ret = pthread_create(&worker_threads[i], NULL, 1314f166b111SPeter Oskolkov test_membarrier_worker_thread, &thread_args); 1315f166b111SPeter Oskolkov if (ret) { 1316f166b111SPeter Oskolkov errno = ret; 1317f166b111SPeter Oskolkov perror("pthread_create"); 1318f166b111SPeter Oskolkov abort(); 1319f166b111SPeter Oskolkov } 1320f166b111SPeter Oskolkov } 1321f166b111SPeter Oskolkov 1322f166b111SPeter Oskolkov 1323f166b111SPeter Oskolkov for (i = 0; i < num_threads; i++) { 1324f166b111SPeter Oskolkov ret = pthread_join(worker_threads[i], NULL); 1325f166b111SPeter Oskolkov if (ret) { 1326f166b111SPeter Oskolkov errno = ret; 1327f166b111SPeter Oskolkov perror("pthread_join"); 1328f166b111SPeter Oskolkov abort(); 1329f166b111SPeter Oskolkov } 1330f166b111SPeter Oskolkov } 1331f166b111SPeter Oskolkov 1332f166b111SPeter Oskolkov atomic_store(&thread_args.stop, 1); 1333f166b111SPeter Oskolkov ret = pthread_join(manager_thread, NULL); 1334f166b111SPeter Oskolkov if (ret) { 1335f166b111SPeter Oskolkov errno = ret; 1336f166b111SPeter Oskolkov perror("pthread_join"); 1337f166b111SPeter Oskolkov abort(); 1338f166b111SPeter Oskolkov } 1339f166b111SPeter Oskolkov } 1340f166b111SPeter Oskolkov #else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ 1341f166b111SPeter Oskolkov void test_membarrier(void) 1342f166b111SPeter Oskolkov { 1343f166b111SPeter Oskolkov fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " 1344f166b111SPeter Oskolkov "Skipping membarrier test.\n"); 1345f166b111SPeter Oskolkov } 1346f166b111SPeter Oskolkov #endif 1347f166b111SPeter Oskolkov 1348c960e990SMathieu Desnoyers static void show_usage(int argc, char **argv) 1349c960e990SMathieu Desnoyers { 1350c960e990SMathieu Desnoyers printf("Usage : %s <OPTIONS>\n", 1351c960e990SMathieu Desnoyers argv[0]); 1352c960e990SMathieu Desnoyers printf("OPTIONS:\n"); 1353c960e990SMathieu Desnoyers printf(" [-1 loops] Number of loops for delay injection 1\n"); 1354c960e990SMathieu Desnoyers printf(" [-2 loops] Number of loops for delay injection 2\n"); 1355c960e990SMathieu Desnoyers printf(" [-3 loops] Number of loops for delay injection 3\n"); 1356c960e990SMathieu Desnoyers printf(" [-4 loops] Number of loops for delay injection 4\n"); 1357c960e990SMathieu Desnoyers printf(" [-5 loops] Number of loops for delay injection 5\n"); 1358c960e990SMathieu Desnoyers printf(" [-6 loops] Number of loops for delay injection 6\n"); 1359c960e990SMathieu Desnoyers printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1360c960e990SMathieu Desnoyers printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1361c960e990SMathieu Desnoyers printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1362c960e990SMathieu Desnoyers printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1363c960e990SMathieu Desnoyers printf(" [-y] Yield\n"); 1364c960e990SMathieu Desnoyers printf(" [-k] Kill thread with signal\n"); 1365c960e990SMathieu Desnoyers printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1366c960e990SMathieu Desnoyers printf(" [-t N] Number of threads (default 200)\n"); 1367c960e990SMathieu Desnoyers printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1368c960e990SMathieu Desnoyers printf(" [-d] Disable rseq system call (no initialization)\n"); 1369c960e990SMathieu Desnoyers printf(" [-D M] Disable rseq for each M threads\n"); 1370f166b111SPeter Oskolkov printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); 1371c960e990SMathieu Desnoyers printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1372c960e990SMathieu Desnoyers printf(" [-v] Verbose output.\n"); 1373c960e990SMathieu Desnoyers printf(" [-h] Show this help.\n"); 1374c960e990SMathieu Desnoyers printf("\n"); 1375c960e990SMathieu Desnoyers } 1376c960e990SMathieu Desnoyers 1377c960e990SMathieu Desnoyers int main(int argc, char **argv) 1378c960e990SMathieu Desnoyers { 1379c960e990SMathieu Desnoyers int i; 1380c960e990SMathieu Desnoyers 1381c960e990SMathieu Desnoyers for (i = 1; i < argc; i++) { 1382c960e990SMathieu Desnoyers if (argv[i][0] != '-') 1383c960e990SMathieu Desnoyers continue; 1384c960e990SMathieu Desnoyers switch (argv[i][1]) { 1385c960e990SMathieu Desnoyers case '1': 1386c960e990SMathieu Desnoyers case '2': 1387c960e990SMathieu Desnoyers case '3': 1388c960e990SMathieu Desnoyers case '4': 1389c960e990SMathieu Desnoyers case '5': 1390c960e990SMathieu Desnoyers case '6': 1391c960e990SMathieu Desnoyers case '7': 1392c960e990SMathieu Desnoyers case '8': 1393c960e990SMathieu Desnoyers case '9': 1394c960e990SMathieu Desnoyers if (argc < i + 2) { 1395c960e990SMathieu Desnoyers show_usage(argc, argv); 1396c960e990SMathieu Desnoyers goto error; 1397c960e990SMathieu Desnoyers } 1398c960e990SMathieu Desnoyers loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1399c960e990SMathieu Desnoyers i++; 1400c960e990SMathieu Desnoyers break; 1401c960e990SMathieu Desnoyers case 'm': 1402c960e990SMathieu Desnoyers if (argc < i + 2) { 1403c960e990SMathieu Desnoyers show_usage(argc, argv); 1404c960e990SMathieu Desnoyers goto error; 1405c960e990SMathieu Desnoyers } 1406c960e990SMathieu Desnoyers opt_modulo = atol(argv[i + 1]); 1407c960e990SMathieu Desnoyers if (opt_modulo < 0) { 1408c960e990SMathieu Desnoyers show_usage(argc, argv); 1409c960e990SMathieu Desnoyers goto error; 1410c960e990SMathieu Desnoyers } 1411c960e990SMathieu Desnoyers i++; 1412c960e990SMathieu Desnoyers break; 1413c960e990SMathieu Desnoyers case 's': 1414c960e990SMathieu Desnoyers if (argc < i + 2) { 1415c960e990SMathieu Desnoyers show_usage(argc, argv); 1416c960e990SMathieu Desnoyers goto error; 1417c960e990SMathieu Desnoyers } 1418c960e990SMathieu Desnoyers opt_sleep = atol(argv[i + 1]); 1419c960e990SMathieu Desnoyers if (opt_sleep < 0) { 1420c960e990SMathieu Desnoyers show_usage(argc, argv); 1421c960e990SMathieu Desnoyers goto error; 1422c960e990SMathieu Desnoyers } 1423c960e990SMathieu Desnoyers i++; 1424c960e990SMathieu Desnoyers break; 1425c960e990SMathieu Desnoyers case 'y': 1426c960e990SMathieu Desnoyers opt_yield = 1; 1427c960e990SMathieu Desnoyers break; 1428c960e990SMathieu Desnoyers case 'k': 1429c960e990SMathieu Desnoyers opt_signal = 1; 1430c960e990SMathieu Desnoyers break; 1431c960e990SMathieu Desnoyers case 'd': 1432c960e990SMathieu Desnoyers opt_disable_rseq = 1; 1433c960e990SMathieu Desnoyers break; 1434c960e990SMathieu Desnoyers case 'D': 1435c960e990SMathieu Desnoyers if (argc < i + 2) { 1436c960e990SMathieu Desnoyers show_usage(argc, argv); 1437c960e990SMathieu Desnoyers goto error; 1438c960e990SMathieu Desnoyers } 1439c960e990SMathieu Desnoyers opt_disable_mod = atol(argv[i + 1]); 1440c960e990SMathieu Desnoyers if (opt_disable_mod < 0) { 1441c960e990SMathieu Desnoyers show_usage(argc, argv); 1442c960e990SMathieu Desnoyers goto error; 1443c960e990SMathieu Desnoyers } 1444c960e990SMathieu Desnoyers i++; 1445c960e990SMathieu Desnoyers break; 1446c960e990SMathieu Desnoyers case 't': 1447c960e990SMathieu Desnoyers if (argc < i + 2) { 1448c960e990SMathieu Desnoyers show_usage(argc, argv); 1449c960e990SMathieu Desnoyers goto error; 1450c960e990SMathieu Desnoyers } 1451c960e990SMathieu Desnoyers opt_threads = atol(argv[i + 1]); 1452c960e990SMathieu Desnoyers if (opt_threads < 0) { 1453c960e990SMathieu Desnoyers show_usage(argc, argv); 1454c960e990SMathieu Desnoyers goto error; 1455c960e990SMathieu Desnoyers } 1456c960e990SMathieu Desnoyers i++; 1457c960e990SMathieu Desnoyers break; 1458c960e990SMathieu Desnoyers case 'r': 1459c960e990SMathieu Desnoyers if (argc < i + 2) { 1460c960e990SMathieu Desnoyers show_usage(argc, argv); 1461c960e990SMathieu Desnoyers goto error; 1462c960e990SMathieu Desnoyers } 1463c960e990SMathieu Desnoyers opt_reps = atoll(argv[i + 1]); 1464c960e990SMathieu Desnoyers if (opt_reps < 0) { 1465c960e990SMathieu Desnoyers show_usage(argc, argv); 1466c960e990SMathieu Desnoyers goto error; 1467c960e990SMathieu Desnoyers } 1468c960e990SMathieu Desnoyers i++; 1469c960e990SMathieu Desnoyers break; 1470c960e990SMathieu Desnoyers case 'h': 1471c960e990SMathieu Desnoyers show_usage(argc, argv); 1472c960e990SMathieu Desnoyers goto end; 1473c960e990SMathieu Desnoyers case 'T': 1474c960e990SMathieu Desnoyers if (argc < i + 2) { 1475c960e990SMathieu Desnoyers show_usage(argc, argv); 1476c960e990SMathieu Desnoyers goto error; 1477c960e990SMathieu Desnoyers } 1478c960e990SMathieu Desnoyers opt_test = *argv[i + 1]; 1479c960e990SMathieu Desnoyers switch (opt_test) { 1480c960e990SMathieu Desnoyers case 's': 1481c960e990SMathieu Desnoyers case 'l': 1482c960e990SMathieu Desnoyers case 'i': 1483c960e990SMathieu Desnoyers case 'b': 1484c960e990SMathieu Desnoyers case 'm': 1485f166b111SPeter Oskolkov case 'r': 1486c960e990SMathieu Desnoyers break; 1487c960e990SMathieu Desnoyers default: 1488c960e990SMathieu Desnoyers show_usage(argc, argv); 1489c960e990SMathieu Desnoyers goto error; 1490c960e990SMathieu Desnoyers } 1491c960e990SMathieu Desnoyers i++; 1492c960e990SMathieu Desnoyers break; 1493c960e990SMathieu Desnoyers case 'v': 1494c960e990SMathieu Desnoyers verbose = 1; 1495c960e990SMathieu Desnoyers break; 1496c960e990SMathieu Desnoyers case 'M': 1497c960e990SMathieu Desnoyers opt_mb = 1; 1498c960e990SMathieu Desnoyers break; 1499c960e990SMathieu Desnoyers default: 1500c960e990SMathieu Desnoyers show_usage(argc, argv); 1501c960e990SMathieu Desnoyers goto error; 1502c960e990SMathieu Desnoyers } 1503c960e990SMathieu Desnoyers } 1504c960e990SMathieu Desnoyers 1505c960e990SMathieu Desnoyers loop_cnt_1 = loop_cnt[1]; 1506c960e990SMathieu Desnoyers loop_cnt_2 = loop_cnt[2]; 1507c960e990SMathieu Desnoyers loop_cnt_3 = loop_cnt[3]; 1508c960e990SMathieu Desnoyers loop_cnt_4 = loop_cnt[4]; 1509c960e990SMathieu Desnoyers loop_cnt_5 = loop_cnt[5]; 1510c960e990SMathieu Desnoyers loop_cnt_6 = loop_cnt[6]; 1511c960e990SMathieu Desnoyers 1512c960e990SMathieu Desnoyers if (set_signal_handler()) 1513c960e990SMathieu Desnoyers goto error; 1514c960e990SMathieu Desnoyers 1515c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread()) 1516c960e990SMathieu Desnoyers goto error; 1517c960e990SMathieu Desnoyers switch (opt_test) { 1518c960e990SMathieu Desnoyers case 's': 1519c960e990SMathieu Desnoyers printf_verbose("spinlock\n"); 1520c960e990SMathieu Desnoyers test_percpu_spinlock(); 1521c960e990SMathieu Desnoyers break; 1522c960e990SMathieu Desnoyers case 'l': 1523c960e990SMathieu Desnoyers printf_verbose("linked list\n"); 1524c960e990SMathieu Desnoyers test_percpu_list(); 1525c960e990SMathieu Desnoyers break; 1526c960e990SMathieu Desnoyers case 'b': 1527c960e990SMathieu Desnoyers printf_verbose("buffer\n"); 1528c960e990SMathieu Desnoyers test_percpu_buffer(); 1529c960e990SMathieu Desnoyers break; 1530c960e990SMathieu Desnoyers case 'm': 1531c960e990SMathieu Desnoyers printf_verbose("memcpy buffer\n"); 1532c960e990SMathieu Desnoyers test_percpu_memcpy_buffer(); 1533c960e990SMathieu Desnoyers break; 1534c960e990SMathieu Desnoyers case 'i': 1535c960e990SMathieu Desnoyers printf_verbose("counter increment\n"); 1536c960e990SMathieu Desnoyers test_percpu_inc(); 1537c960e990SMathieu Desnoyers break; 1538f166b111SPeter Oskolkov case 'r': 1539f166b111SPeter Oskolkov printf_verbose("membarrier\n"); 1540f166b111SPeter Oskolkov test_membarrier(); 1541f166b111SPeter Oskolkov break; 1542c960e990SMathieu Desnoyers } 1543c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread()) 1544c960e990SMathieu Desnoyers abort(); 1545c960e990SMathieu Desnoyers end: 1546c960e990SMathieu Desnoyers return 0; 1547c960e990SMathieu Desnoyers 1548c960e990SMathieu Desnoyers error: 1549c960e990SMathieu Desnoyers return -1; 1550c960e990SMathieu Desnoyers } 1551