1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <linux/membarrier.h> 5 #include <pthread.h> 6 #include <sched.h> 7 #include <stdatomic.h> 8 #include <stdint.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <unistd.h> 14 #include <poll.h> 15 #include <sys/types.h> 16 #include <signal.h> 17 #include <errno.h> 18 #include <stddef.h> 19 #include <stdbool.h> 20 21 static inline pid_t rseq_gettid(void) 22 { 23 return syscall(__NR_gettid); 24 } 25 26 #define NR_INJECT 9 27 static int loop_cnt[NR_INJECT + 1]; 28 29 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 30 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 31 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 32 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 33 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 34 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 35 36 static int opt_modulo, verbose; 37 38 static int opt_yield, opt_signal, opt_sleep, 39 opt_disable_rseq, opt_threads = 200, 40 opt_disable_mod = 0, opt_test = 's'; 41 42 static long long opt_reps = 5000; 43 44 static __thread __attribute__((tls_model("initial-exec"))) 45 unsigned int signals_delivered; 46 47 #ifndef BENCHMARK 48 49 static __thread __attribute__((tls_model("initial-exec"), unused)) 50 unsigned int yield_mod_cnt, nr_abort; 51 52 #define printf_verbose(fmt, ...) \ 53 do { \ 54 if (verbose) \ 55 printf(fmt, ## __VA_ARGS__); \ 56 } while (0) 57 58 #ifdef __i386__ 59 60 #define INJECT_ASM_REG "eax" 61 62 #define RSEQ_INJECT_CLOBBER \ 63 , INJECT_ASM_REG 64 65 #define RSEQ_INJECT_ASM(n) \ 66 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 67 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 68 "jz 333f\n\t" \ 69 "222:\n\t" \ 70 "dec %%" INJECT_ASM_REG "\n\t" \ 71 "jnz 222b\n\t" \ 72 "333:\n\t" 73 74 #elif defined(__x86_64__) 75 76 #define INJECT_ASM_REG_P "rax" 77 #define INJECT_ASM_REG "eax" 78 79 #define RSEQ_INJECT_CLOBBER \ 80 , INJECT_ASM_REG_P \ 81 , INJECT_ASM_REG 82 83 #define RSEQ_INJECT_ASM(n) \ 84 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ 85 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ 86 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 87 "jz 333f\n\t" \ 88 "222:\n\t" \ 89 "dec %%" INJECT_ASM_REG "\n\t" \ 90 "jnz 222b\n\t" \ 91 "333:\n\t" 92 93 #elif defined(__s390__) 94 95 #define RSEQ_INJECT_INPUT \ 96 , [loop_cnt_1]"m"(loop_cnt[1]) \ 97 , [loop_cnt_2]"m"(loop_cnt[2]) \ 98 , [loop_cnt_3]"m"(loop_cnt[3]) \ 99 , [loop_cnt_4]"m"(loop_cnt[4]) \ 100 , [loop_cnt_5]"m"(loop_cnt[5]) \ 101 , [loop_cnt_6]"m"(loop_cnt[6]) 102 103 #define INJECT_ASM_REG "r12" 104 105 #define RSEQ_INJECT_CLOBBER \ 106 , INJECT_ASM_REG 107 108 #define RSEQ_INJECT_ASM(n) \ 109 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 110 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 111 "je 333f\n\t" \ 112 "222:\n\t" \ 113 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 114 "jnz 222b\n\t" \ 115 "333:\n\t" 116 117 #elif defined(__ARMEL__) 118 119 #define RSEQ_INJECT_INPUT \ 120 , [loop_cnt_1]"m"(loop_cnt[1]) \ 121 , [loop_cnt_2]"m"(loop_cnt[2]) \ 122 , [loop_cnt_3]"m"(loop_cnt[3]) \ 123 , [loop_cnt_4]"m"(loop_cnt[4]) \ 124 , [loop_cnt_5]"m"(loop_cnt[5]) \ 125 , [loop_cnt_6]"m"(loop_cnt[6]) 126 127 #define INJECT_ASM_REG "r4" 128 129 #define RSEQ_INJECT_CLOBBER \ 130 , INJECT_ASM_REG 131 132 #define RSEQ_INJECT_ASM(n) \ 133 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 134 "cmp " INJECT_ASM_REG ", #0\n\t" \ 135 "beq 333f\n\t" \ 136 "222:\n\t" \ 137 "subs " INJECT_ASM_REG ", #1\n\t" \ 138 "bne 222b\n\t" \ 139 "333:\n\t" 140 141 #elif defined(__AARCH64EL__) 142 143 #define RSEQ_INJECT_INPUT \ 144 , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 145 , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 146 , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 147 , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 148 , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 149 , [loop_cnt_6] "Qo" (loop_cnt[6]) 150 151 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 152 153 #define RSEQ_INJECT_ASM(n) \ 154 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 155 " cbz " INJECT_ASM_REG ", 333f\n" \ 156 "222:\n" \ 157 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 158 " cbnz " INJECT_ASM_REG ", 222b\n" \ 159 "333:\n" 160 161 #elif defined(__PPC__) 162 163 #define RSEQ_INJECT_INPUT \ 164 , [loop_cnt_1]"m"(loop_cnt[1]) \ 165 , [loop_cnt_2]"m"(loop_cnt[2]) \ 166 , [loop_cnt_3]"m"(loop_cnt[3]) \ 167 , [loop_cnt_4]"m"(loop_cnt[4]) \ 168 , [loop_cnt_5]"m"(loop_cnt[5]) \ 169 , [loop_cnt_6]"m"(loop_cnt[6]) 170 171 #define INJECT_ASM_REG "r18" 172 173 #define RSEQ_INJECT_CLOBBER \ 174 , INJECT_ASM_REG 175 176 #define RSEQ_INJECT_ASM(n) \ 177 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 178 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 179 "beq 333f\n\t" \ 180 "222:\n\t" \ 181 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 182 "bne 222b\n\t" \ 183 "333:\n\t" 184 185 #elif defined(__mips__) 186 187 #define RSEQ_INJECT_INPUT \ 188 , [loop_cnt_1]"m"(loop_cnt[1]) \ 189 , [loop_cnt_2]"m"(loop_cnt[2]) \ 190 , [loop_cnt_3]"m"(loop_cnt[3]) \ 191 , [loop_cnt_4]"m"(loop_cnt[4]) \ 192 , [loop_cnt_5]"m"(loop_cnt[5]) \ 193 , [loop_cnt_6]"m"(loop_cnt[6]) 194 195 #define INJECT_ASM_REG "$5" 196 197 #define RSEQ_INJECT_CLOBBER \ 198 , INJECT_ASM_REG 199 200 #define RSEQ_INJECT_ASM(n) \ 201 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 202 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 203 "222:\n\t" \ 204 "addiu " INJECT_ASM_REG ", -1\n\t" \ 205 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 206 "333:\n\t" 207 #elif defined(__riscv) 208 209 #define RSEQ_INJECT_INPUT \ 210 , [loop_cnt_1]"m"(loop_cnt[1]) \ 211 , [loop_cnt_2]"m"(loop_cnt[2]) \ 212 , [loop_cnt_3]"m"(loop_cnt[3]) \ 213 , [loop_cnt_4]"m"(loop_cnt[4]) \ 214 , [loop_cnt_5]"m"(loop_cnt[5]) \ 215 , [loop_cnt_6]"m"(loop_cnt[6]) 216 217 #define INJECT_ASM_REG "t1" 218 219 #define RSEQ_INJECT_CLOBBER \ 220 , INJECT_ASM_REG 221 222 #define RSEQ_INJECT_ASM(n) \ 223 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 224 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 225 "222:\n\t" \ 226 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ 227 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 228 "333:\n\t" 229 230 231 #else 232 #error unsupported target 233 #endif 234 235 #define RSEQ_INJECT_FAILED \ 236 nr_abort++; 237 238 #define RSEQ_INJECT_C(n) \ 239 { \ 240 int loc_i, loc_nr_loops = loop_cnt[n]; \ 241 \ 242 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 243 rseq_barrier(); \ 244 } \ 245 if (loc_nr_loops == -1 && opt_modulo) { \ 246 if (yield_mod_cnt == opt_modulo - 1) { \ 247 if (opt_sleep > 0) \ 248 poll(NULL, 0, opt_sleep); \ 249 if (opt_yield) \ 250 sched_yield(); \ 251 if (opt_signal) \ 252 raise(SIGUSR1); \ 253 yield_mod_cnt = 0; \ 254 } else { \ 255 yield_mod_cnt++; \ 256 } \ 257 } \ 258 } 259 260 #else 261 262 #define printf_verbose(fmt, ...) 263 264 #endif /* BENCHMARK */ 265 266 #include "rseq.h" 267 268 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED; 269 270 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV 271 #define TEST_MEMBARRIER 272 273 static int sys_membarrier(int cmd, int flags, int cpu_id) 274 { 275 return syscall(__NR_membarrier, cmd, flags, cpu_id); 276 } 277 #endif 278 279 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID 280 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID 281 static 282 int get_current_cpu_id(void) 283 { 284 return rseq_current_mm_cid(); 285 } 286 static 287 bool rseq_validate_cpu_id(void) 288 { 289 return rseq_mm_cid_available(); 290 } 291 # ifdef TEST_MEMBARRIER 292 /* 293 * Membarrier does not currently support targeting a mm_cid, so 294 * issue the barrier on all cpus. 295 */ 296 static 297 int rseq_membarrier_expedited(int cpu) 298 { 299 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 300 0, 0); 301 } 302 # endif /* TEST_MEMBARRIER */ 303 #else 304 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID 305 static 306 int get_current_cpu_id(void) 307 { 308 return rseq_cpu_start(); 309 } 310 static 311 bool rseq_validate_cpu_id(void) 312 { 313 return rseq_current_cpu_raw() >= 0; 314 } 315 # ifdef TEST_MEMBARRIER 316 static 317 int rseq_membarrier_expedited(int cpu) 318 { 319 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 320 MEMBARRIER_CMD_FLAG_CPU, cpu); 321 } 322 # endif /* TEST_MEMBARRIER */ 323 #endif 324 325 struct percpu_lock_entry { 326 intptr_t v; 327 } __attribute__((aligned(128))); 328 329 struct percpu_lock { 330 struct percpu_lock_entry c[CPU_SETSIZE]; 331 }; 332 333 struct test_data_entry { 334 intptr_t count; 335 } __attribute__((aligned(128))); 336 337 struct spinlock_test_data { 338 struct percpu_lock lock; 339 struct test_data_entry c[CPU_SETSIZE]; 340 }; 341 342 struct spinlock_thread_test_data { 343 struct spinlock_test_data *data; 344 long long reps; 345 int reg; 346 }; 347 348 struct inc_test_data { 349 struct test_data_entry c[CPU_SETSIZE]; 350 }; 351 352 struct inc_thread_test_data { 353 struct inc_test_data *data; 354 long long reps; 355 int reg; 356 }; 357 358 struct percpu_list_node { 359 intptr_t data; 360 struct percpu_list_node *next; 361 }; 362 363 struct percpu_list_entry { 364 struct percpu_list_node *head; 365 } __attribute__((aligned(128))); 366 367 struct percpu_list { 368 struct percpu_list_entry c[CPU_SETSIZE]; 369 }; 370 371 #define BUFFER_ITEM_PER_CPU 100 372 373 struct percpu_buffer_node { 374 intptr_t data; 375 }; 376 377 struct percpu_buffer_entry { 378 intptr_t offset; 379 intptr_t buflen; 380 struct percpu_buffer_node **array; 381 } __attribute__((aligned(128))); 382 383 struct percpu_buffer { 384 struct percpu_buffer_entry c[CPU_SETSIZE]; 385 }; 386 387 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 388 389 struct percpu_memcpy_buffer_node { 390 intptr_t data1; 391 uint64_t data2; 392 }; 393 394 struct percpu_memcpy_buffer_entry { 395 intptr_t offset; 396 intptr_t buflen; 397 struct percpu_memcpy_buffer_node *array; 398 } __attribute__((aligned(128))); 399 400 struct percpu_memcpy_buffer { 401 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 402 }; 403 404 /* A simple percpu spinlock. Grabs lock on current cpu. */ 405 static int rseq_this_cpu_lock(struct percpu_lock *lock) 406 { 407 int cpu; 408 409 for (;;) { 410 int ret; 411 412 cpu = get_current_cpu_id(); 413 if (cpu < 0) { 414 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n", 415 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu); 416 abort(); 417 } 418 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 419 &lock->c[cpu].v, 420 0, 1, cpu); 421 if (rseq_likely(!ret)) 422 break; 423 /* Retry if comparison fails or rseq aborts. */ 424 } 425 /* 426 * Acquire semantic when taking lock after control dependency. 427 * Matches rseq_smp_store_release(). 428 */ 429 rseq_smp_acquire__after_ctrl_dep(); 430 return cpu; 431 } 432 433 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 434 { 435 assert(lock->c[cpu].v == 1); 436 /* 437 * Release lock, with release semantic. Matches 438 * rseq_smp_acquire__after_ctrl_dep(). 439 */ 440 rseq_smp_store_release(&lock->c[cpu].v, 0); 441 } 442 443 void *test_percpu_spinlock_thread(void *arg) 444 { 445 struct spinlock_thread_test_data *thread_data = arg; 446 struct spinlock_test_data *data = thread_data->data; 447 long long i, reps; 448 449 if (!opt_disable_rseq && thread_data->reg && 450 rseq_register_current_thread()) 451 abort(); 452 reps = thread_data->reps; 453 for (i = 0; i < reps; i++) { 454 int cpu = rseq_this_cpu_lock(&data->lock); 455 data->c[cpu].count++; 456 rseq_percpu_unlock(&data->lock, cpu); 457 #ifndef BENCHMARK 458 if (i != 0 && !(i % (reps / 10))) 459 printf_verbose("tid %d: count %lld\n", 460 (int) rseq_gettid(), i); 461 #endif 462 } 463 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 464 (int) rseq_gettid(), nr_abort, signals_delivered); 465 if (!opt_disable_rseq && thread_data->reg && 466 rseq_unregister_current_thread()) 467 abort(); 468 return NULL; 469 } 470 471 /* 472 * A simple test which implements a sharded counter using a per-cpu 473 * lock. Obviously real applications might prefer to simply use a 474 * per-cpu increment; however, this is reasonable for a test and the 475 * lock can be extended to synchronize more complicated operations. 476 */ 477 void test_percpu_spinlock(void) 478 { 479 const int num_threads = opt_threads; 480 int i, ret; 481 uint64_t sum; 482 pthread_t test_threads[num_threads]; 483 struct spinlock_test_data data; 484 struct spinlock_thread_test_data thread_data[num_threads]; 485 486 memset(&data, 0, sizeof(data)); 487 for (i = 0; i < num_threads; i++) { 488 thread_data[i].reps = opt_reps; 489 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 490 thread_data[i].reg = 1; 491 else 492 thread_data[i].reg = 0; 493 thread_data[i].data = &data; 494 ret = pthread_create(&test_threads[i], NULL, 495 test_percpu_spinlock_thread, 496 &thread_data[i]); 497 if (ret) { 498 errno = ret; 499 perror("pthread_create"); 500 abort(); 501 } 502 } 503 504 for (i = 0; i < num_threads; i++) { 505 ret = pthread_join(test_threads[i], NULL); 506 if (ret) { 507 errno = ret; 508 perror("pthread_join"); 509 abort(); 510 } 511 } 512 513 sum = 0; 514 for (i = 0; i < CPU_SETSIZE; i++) 515 sum += data.c[i].count; 516 517 assert(sum == (uint64_t)opt_reps * num_threads); 518 } 519 520 void *test_percpu_inc_thread(void *arg) 521 { 522 struct inc_thread_test_data *thread_data = arg; 523 struct inc_test_data *data = thread_data->data; 524 long long i, reps; 525 526 if (!opt_disable_rseq && thread_data->reg && 527 rseq_register_current_thread()) 528 abort(); 529 reps = thread_data->reps; 530 for (i = 0; i < reps; i++) { 531 int ret; 532 533 do { 534 int cpu; 535 536 cpu = get_current_cpu_id(); 537 ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU, 538 &data->c[cpu].count, 1, cpu); 539 } while (rseq_unlikely(ret)); 540 #ifndef BENCHMARK 541 if (i != 0 && !(i % (reps / 10))) 542 printf_verbose("tid %d: count %lld\n", 543 (int) rseq_gettid(), i); 544 #endif 545 } 546 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 547 (int) rseq_gettid(), nr_abort, signals_delivered); 548 if (!opt_disable_rseq && thread_data->reg && 549 rseq_unregister_current_thread()) 550 abort(); 551 return NULL; 552 } 553 554 void test_percpu_inc(void) 555 { 556 const int num_threads = opt_threads; 557 int i, ret; 558 uint64_t sum; 559 pthread_t test_threads[num_threads]; 560 struct inc_test_data data; 561 struct inc_thread_test_data thread_data[num_threads]; 562 563 memset(&data, 0, sizeof(data)); 564 for (i = 0; i < num_threads; i++) { 565 thread_data[i].reps = opt_reps; 566 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 567 thread_data[i].reg = 1; 568 else 569 thread_data[i].reg = 0; 570 thread_data[i].data = &data; 571 ret = pthread_create(&test_threads[i], NULL, 572 test_percpu_inc_thread, 573 &thread_data[i]); 574 if (ret) { 575 errno = ret; 576 perror("pthread_create"); 577 abort(); 578 } 579 } 580 581 for (i = 0; i < num_threads; i++) { 582 ret = pthread_join(test_threads[i], NULL); 583 if (ret) { 584 errno = ret; 585 perror("pthread_join"); 586 abort(); 587 } 588 } 589 590 sum = 0; 591 for (i = 0; i < CPU_SETSIZE; i++) 592 sum += data.c[i].count; 593 594 assert(sum == (uint64_t)opt_reps * num_threads); 595 } 596 597 void this_cpu_list_push(struct percpu_list *list, 598 struct percpu_list_node *node, 599 int *_cpu) 600 { 601 int cpu; 602 603 for (;;) { 604 intptr_t *targetptr, newval, expect; 605 int ret; 606 607 cpu = get_current_cpu_id(); 608 /* Load list->c[cpu].head with single-copy atomicity. */ 609 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 610 newval = (intptr_t)node; 611 targetptr = (intptr_t *)&list->c[cpu].head; 612 node->next = (struct percpu_list_node *)expect; 613 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 614 targetptr, expect, newval, cpu); 615 if (rseq_likely(!ret)) 616 break; 617 /* Retry if comparison fails or rseq aborts. */ 618 } 619 if (_cpu) 620 *_cpu = cpu; 621 } 622 623 /* 624 * Unlike a traditional lock-less linked list; the availability of a 625 * rseq primitive allows us to implement pop without concerns over 626 * ABA-type races. 627 */ 628 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 629 int *_cpu) 630 { 631 struct percpu_list_node *node = NULL; 632 int cpu; 633 634 for (;;) { 635 struct percpu_list_node *head; 636 intptr_t *targetptr, expectnot, *load; 637 long offset; 638 int ret; 639 640 cpu = get_current_cpu_id(); 641 targetptr = (intptr_t *)&list->c[cpu].head; 642 expectnot = (intptr_t)NULL; 643 offset = offsetof(struct percpu_list_node, next); 644 load = (intptr_t *)&head; 645 ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU, 646 targetptr, expectnot, 647 offset, load, cpu); 648 if (rseq_likely(!ret)) { 649 node = head; 650 break; 651 } 652 if (ret > 0) 653 break; 654 /* Retry if rseq aborts. */ 655 } 656 if (_cpu) 657 *_cpu = cpu; 658 return node; 659 } 660 661 /* 662 * __percpu_list_pop is not safe against concurrent accesses. Should 663 * only be used on lists that are not concurrently modified. 664 */ 665 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 666 { 667 struct percpu_list_node *node; 668 669 node = list->c[cpu].head; 670 if (!node) 671 return NULL; 672 list->c[cpu].head = node->next; 673 return node; 674 } 675 676 void *test_percpu_list_thread(void *arg) 677 { 678 long long i, reps; 679 struct percpu_list *list = (struct percpu_list *)arg; 680 681 if (!opt_disable_rseq && rseq_register_current_thread()) 682 abort(); 683 684 reps = opt_reps; 685 for (i = 0; i < reps; i++) { 686 struct percpu_list_node *node; 687 688 node = this_cpu_list_pop(list, NULL); 689 if (opt_yield) 690 sched_yield(); /* encourage shuffling */ 691 if (node) 692 this_cpu_list_push(list, node, NULL); 693 } 694 695 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 696 (int) rseq_gettid(), nr_abort, signals_delivered); 697 if (!opt_disable_rseq && rseq_unregister_current_thread()) 698 abort(); 699 700 return NULL; 701 } 702 703 /* Simultaneous modification to a per-cpu linked list from many threads. */ 704 void test_percpu_list(void) 705 { 706 const int num_threads = opt_threads; 707 int i, j, ret; 708 uint64_t sum = 0, expected_sum = 0; 709 struct percpu_list list; 710 pthread_t test_threads[num_threads]; 711 cpu_set_t allowed_cpus; 712 713 memset(&list, 0, sizeof(list)); 714 715 /* Generate list entries for every usable cpu. */ 716 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 717 for (i = 0; i < CPU_SETSIZE; i++) { 718 if (!CPU_ISSET(i, &allowed_cpus)) 719 continue; 720 for (j = 1; j <= 100; j++) { 721 struct percpu_list_node *node; 722 723 expected_sum += j; 724 725 node = malloc(sizeof(*node)); 726 assert(node); 727 node->data = j; 728 node->next = list.c[i].head; 729 list.c[i].head = node; 730 } 731 } 732 733 for (i = 0; i < num_threads; i++) { 734 ret = pthread_create(&test_threads[i], NULL, 735 test_percpu_list_thread, &list); 736 if (ret) { 737 errno = ret; 738 perror("pthread_create"); 739 abort(); 740 } 741 } 742 743 for (i = 0; i < num_threads; i++) { 744 ret = pthread_join(test_threads[i], NULL); 745 if (ret) { 746 errno = ret; 747 perror("pthread_join"); 748 abort(); 749 } 750 } 751 752 for (i = 0; i < CPU_SETSIZE; i++) { 753 struct percpu_list_node *node; 754 755 if (!CPU_ISSET(i, &allowed_cpus)) 756 continue; 757 758 while ((node = __percpu_list_pop(&list, i))) { 759 sum += node->data; 760 free(node); 761 } 762 } 763 764 /* 765 * All entries should now be accounted for (unless some external 766 * actor is interfering with our allowed affinity while this 767 * test is running). 768 */ 769 assert(sum == expected_sum); 770 } 771 772 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 773 struct percpu_buffer_node *node, 774 int *_cpu) 775 { 776 bool result = false; 777 int cpu; 778 779 for (;;) { 780 intptr_t *targetptr_spec, newval_spec; 781 intptr_t *targetptr_final, newval_final; 782 intptr_t offset; 783 int ret; 784 785 cpu = get_current_cpu_id(); 786 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 787 if (offset == buffer->c[cpu].buflen) 788 break; 789 newval_spec = (intptr_t)node; 790 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 791 newval_final = offset + 1; 792 targetptr_final = &buffer->c[cpu].offset; 793 ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU, 794 targetptr_final, offset, targetptr_spec, 795 newval_spec, newval_final, cpu); 796 if (rseq_likely(!ret)) { 797 result = true; 798 break; 799 } 800 /* Retry if comparison fails or rseq aborts. */ 801 } 802 if (_cpu) 803 *_cpu = cpu; 804 return result; 805 } 806 807 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 808 int *_cpu) 809 { 810 struct percpu_buffer_node *head; 811 int cpu; 812 813 for (;;) { 814 intptr_t *targetptr, newval; 815 intptr_t offset; 816 int ret; 817 818 cpu = get_current_cpu_id(); 819 /* Load offset with single-copy atomicity. */ 820 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 821 if (offset == 0) { 822 head = NULL; 823 break; 824 } 825 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 826 newval = offset - 1; 827 targetptr = (intptr_t *)&buffer->c[cpu].offset; 828 ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 829 targetptr, offset, 830 (intptr_t *)&buffer->c[cpu].array[offset - 1], 831 (intptr_t)head, newval, cpu); 832 if (rseq_likely(!ret)) 833 break; 834 /* Retry if comparison fails or rseq aborts. */ 835 } 836 if (_cpu) 837 *_cpu = cpu; 838 return head; 839 } 840 841 /* 842 * __percpu_buffer_pop is not safe against concurrent accesses. Should 843 * only be used on buffers that are not concurrently modified. 844 */ 845 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 846 int cpu) 847 { 848 struct percpu_buffer_node *head; 849 intptr_t offset; 850 851 offset = buffer->c[cpu].offset; 852 if (offset == 0) 853 return NULL; 854 head = buffer->c[cpu].array[offset - 1]; 855 buffer->c[cpu].offset = offset - 1; 856 return head; 857 } 858 859 void *test_percpu_buffer_thread(void *arg) 860 { 861 long long i, reps; 862 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 863 864 if (!opt_disable_rseq && rseq_register_current_thread()) 865 abort(); 866 867 reps = opt_reps; 868 for (i = 0; i < reps; i++) { 869 struct percpu_buffer_node *node; 870 871 node = this_cpu_buffer_pop(buffer, NULL); 872 if (opt_yield) 873 sched_yield(); /* encourage shuffling */ 874 if (node) { 875 if (!this_cpu_buffer_push(buffer, node, NULL)) { 876 /* Should increase buffer size. */ 877 abort(); 878 } 879 } 880 } 881 882 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 883 (int) rseq_gettid(), nr_abort, signals_delivered); 884 if (!opt_disable_rseq && rseq_unregister_current_thread()) 885 abort(); 886 887 return NULL; 888 } 889 890 /* Simultaneous modification to a per-cpu buffer from many threads. */ 891 void test_percpu_buffer(void) 892 { 893 const int num_threads = opt_threads; 894 int i, j, ret; 895 uint64_t sum = 0, expected_sum = 0; 896 struct percpu_buffer buffer; 897 pthread_t test_threads[num_threads]; 898 cpu_set_t allowed_cpus; 899 900 memset(&buffer, 0, sizeof(buffer)); 901 902 /* Generate list entries for every usable cpu. */ 903 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 904 for (i = 0; i < CPU_SETSIZE; i++) { 905 if (!CPU_ISSET(i, &allowed_cpus)) 906 continue; 907 /* Worse-case is every item in same CPU. */ 908 buffer.c[i].array = 909 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 910 BUFFER_ITEM_PER_CPU); 911 assert(buffer.c[i].array); 912 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 913 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 914 struct percpu_buffer_node *node; 915 916 expected_sum += j; 917 918 /* 919 * We could theoretically put the word-sized 920 * "data" directly in the buffer. However, we 921 * want to model objects that would not fit 922 * within a single word, so allocate an object 923 * for each node. 924 */ 925 node = malloc(sizeof(*node)); 926 assert(node); 927 node->data = j; 928 buffer.c[i].array[j - 1] = node; 929 buffer.c[i].offset++; 930 } 931 } 932 933 for (i = 0; i < num_threads; i++) { 934 ret = pthread_create(&test_threads[i], NULL, 935 test_percpu_buffer_thread, &buffer); 936 if (ret) { 937 errno = ret; 938 perror("pthread_create"); 939 abort(); 940 } 941 } 942 943 for (i = 0; i < num_threads; i++) { 944 ret = pthread_join(test_threads[i], NULL); 945 if (ret) { 946 errno = ret; 947 perror("pthread_join"); 948 abort(); 949 } 950 } 951 952 for (i = 0; i < CPU_SETSIZE; i++) { 953 struct percpu_buffer_node *node; 954 955 if (!CPU_ISSET(i, &allowed_cpus)) 956 continue; 957 958 while ((node = __percpu_buffer_pop(&buffer, i))) { 959 sum += node->data; 960 free(node); 961 } 962 free(buffer.c[i].array); 963 } 964 965 /* 966 * All entries should now be accounted for (unless some external 967 * actor is interfering with our allowed affinity while this 968 * test is running). 969 */ 970 assert(sum == expected_sum); 971 } 972 973 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 974 struct percpu_memcpy_buffer_node item, 975 int *_cpu) 976 { 977 bool result = false; 978 int cpu; 979 980 for (;;) { 981 intptr_t *targetptr_final, newval_final, offset; 982 char *destptr, *srcptr; 983 size_t copylen; 984 int ret; 985 986 cpu = get_current_cpu_id(); 987 /* Load offset with single-copy atomicity. */ 988 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 989 if (offset == buffer->c[cpu].buflen) 990 break; 991 destptr = (char *)&buffer->c[cpu].array[offset]; 992 srcptr = (char *)&item; 993 /* copylen must be <= 4kB. */ 994 copylen = sizeof(item); 995 newval_final = offset + 1; 996 targetptr_final = &buffer->c[cpu].offset; 997 ret = rseq_cmpeqv_trymemcpy_storev( 998 opt_mo, RSEQ_PERCPU, 999 targetptr_final, offset, 1000 destptr, srcptr, copylen, 1001 newval_final, cpu); 1002 if (rseq_likely(!ret)) { 1003 result = true; 1004 break; 1005 } 1006 /* Retry if comparison fails or rseq aborts. */ 1007 } 1008 if (_cpu) 1009 *_cpu = cpu; 1010 return result; 1011 } 1012 1013 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 1014 struct percpu_memcpy_buffer_node *item, 1015 int *_cpu) 1016 { 1017 bool result = false; 1018 int cpu; 1019 1020 for (;;) { 1021 intptr_t *targetptr_final, newval_final, offset; 1022 char *destptr, *srcptr; 1023 size_t copylen; 1024 int ret; 1025 1026 cpu = get_current_cpu_id(); 1027 /* Load offset with single-copy atomicity. */ 1028 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 1029 if (offset == 0) 1030 break; 1031 destptr = (char *)item; 1032 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 1033 /* copylen must be <= 4kB. */ 1034 copylen = sizeof(*item); 1035 newval_final = offset - 1; 1036 targetptr_final = &buffer->c[cpu].offset; 1037 ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 1038 targetptr_final, offset, destptr, srcptr, copylen, 1039 newval_final, cpu); 1040 if (rseq_likely(!ret)) { 1041 result = true; 1042 break; 1043 } 1044 /* Retry if comparison fails or rseq aborts. */ 1045 } 1046 if (_cpu) 1047 *_cpu = cpu; 1048 return result; 1049 } 1050 1051 /* 1052 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 1053 * only be used on buffers that are not concurrently modified. 1054 */ 1055 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 1056 struct percpu_memcpy_buffer_node *item, 1057 int cpu) 1058 { 1059 intptr_t offset; 1060 1061 offset = buffer->c[cpu].offset; 1062 if (offset == 0) 1063 return false; 1064 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 1065 buffer->c[cpu].offset = offset - 1; 1066 return true; 1067 } 1068 1069 void *test_percpu_memcpy_buffer_thread(void *arg) 1070 { 1071 long long i, reps; 1072 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 1073 1074 if (!opt_disable_rseq && rseq_register_current_thread()) 1075 abort(); 1076 1077 reps = opt_reps; 1078 for (i = 0; i < reps; i++) { 1079 struct percpu_memcpy_buffer_node item; 1080 bool result; 1081 1082 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1083 if (opt_yield) 1084 sched_yield(); /* encourage shuffling */ 1085 if (result) { 1086 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1087 /* Should increase buffer size. */ 1088 abort(); 1089 } 1090 } 1091 } 1092 1093 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 1094 (int) rseq_gettid(), nr_abort, signals_delivered); 1095 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1096 abort(); 1097 1098 return NULL; 1099 } 1100 1101 /* Simultaneous modification to a per-cpu buffer from many threads. */ 1102 void test_percpu_memcpy_buffer(void) 1103 { 1104 const int num_threads = opt_threads; 1105 int i, j, ret; 1106 uint64_t sum = 0, expected_sum = 0; 1107 struct percpu_memcpy_buffer buffer; 1108 pthread_t test_threads[num_threads]; 1109 cpu_set_t allowed_cpus; 1110 1111 memset(&buffer, 0, sizeof(buffer)); 1112 1113 /* Generate list entries for every usable cpu. */ 1114 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1115 for (i = 0; i < CPU_SETSIZE; i++) { 1116 if (!CPU_ISSET(i, &allowed_cpus)) 1117 continue; 1118 /* Worse-case is every item in same CPU. */ 1119 buffer.c[i].array = 1120 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1121 MEMCPY_BUFFER_ITEM_PER_CPU); 1122 assert(buffer.c[i].array); 1123 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1124 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1125 expected_sum += 2 * j + 1; 1126 1127 /* 1128 * We could theoretically put the word-sized 1129 * "data" directly in the buffer. However, we 1130 * want to model objects that would not fit 1131 * within a single word, so allocate an object 1132 * for each node. 1133 */ 1134 buffer.c[i].array[j - 1].data1 = j; 1135 buffer.c[i].array[j - 1].data2 = j + 1; 1136 buffer.c[i].offset++; 1137 } 1138 } 1139 1140 for (i = 0; i < num_threads; i++) { 1141 ret = pthread_create(&test_threads[i], NULL, 1142 test_percpu_memcpy_buffer_thread, 1143 &buffer); 1144 if (ret) { 1145 errno = ret; 1146 perror("pthread_create"); 1147 abort(); 1148 } 1149 } 1150 1151 for (i = 0; i < num_threads; i++) { 1152 ret = pthread_join(test_threads[i], NULL); 1153 if (ret) { 1154 errno = ret; 1155 perror("pthread_join"); 1156 abort(); 1157 } 1158 } 1159 1160 for (i = 0; i < CPU_SETSIZE; i++) { 1161 struct percpu_memcpy_buffer_node item; 1162 1163 if (!CPU_ISSET(i, &allowed_cpus)) 1164 continue; 1165 1166 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1167 sum += item.data1; 1168 sum += item.data2; 1169 } 1170 free(buffer.c[i].array); 1171 } 1172 1173 /* 1174 * All entries should now be accounted for (unless some external 1175 * actor is interfering with our allowed affinity while this 1176 * test is running). 1177 */ 1178 assert(sum == expected_sum); 1179 } 1180 1181 static void test_signal_interrupt_handler(int signo) 1182 { 1183 signals_delivered++; 1184 } 1185 1186 static int set_signal_handler(void) 1187 { 1188 int ret = 0; 1189 struct sigaction sa; 1190 sigset_t sigset; 1191 1192 ret = sigemptyset(&sigset); 1193 if (ret < 0) { 1194 perror("sigemptyset"); 1195 return ret; 1196 } 1197 1198 sa.sa_handler = test_signal_interrupt_handler; 1199 sa.sa_mask = sigset; 1200 sa.sa_flags = 0; 1201 ret = sigaction(SIGUSR1, &sa, NULL); 1202 if (ret < 0) { 1203 perror("sigaction"); 1204 return ret; 1205 } 1206 1207 printf_verbose("Signal handler set for SIGUSR1\n"); 1208 1209 return ret; 1210 } 1211 1212 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ 1213 #ifdef TEST_MEMBARRIER 1214 struct test_membarrier_thread_args { 1215 int stop; 1216 intptr_t percpu_list_ptr; 1217 }; 1218 1219 /* Worker threads modify data in their "active" percpu lists. */ 1220 void *test_membarrier_worker_thread(void *arg) 1221 { 1222 struct test_membarrier_thread_args *args = 1223 (struct test_membarrier_thread_args *)arg; 1224 const int iters = opt_reps; 1225 int i; 1226 1227 if (rseq_register_current_thread()) { 1228 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1229 errno, strerror(errno)); 1230 abort(); 1231 } 1232 1233 /* Wait for initialization. */ 1234 while (!atomic_load(&args->percpu_list_ptr)) {} 1235 1236 for (i = 0; i < iters; ++i) { 1237 int ret; 1238 1239 do { 1240 int cpu = get_current_cpu_id(); 1241 1242 ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU, 1243 &args->percpu_list_ptr, 1244 sizeof(struct percpu_list_entry) * cpu, 1, cpu); 1245 } while (rseq_unlikely(ret)); 1246 } 1247 1248 if (rseq_unregister_current_thread()) { 1249 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1250 errno, strerror(errno)); 1251 abort(); 1252 } 1253 return NULL; 1254 } 1255 1256 void test_membarrier_init_percpu_list(struct percpu_list *list) 1257 { 1258 int i; 1259 1260 memset(list, 0, sizeof(*list)); 1261 for (i = 0; i < CPU_SETSIZE; i++) { 1262 struct percpu_list_node *node; 1263 1264 node = malloc(sizeof(*node)); 1265 assert(node); 1266 node->data = 0; 1267 node->next = NULL; 1268 list->c[i].head = node; 1269 } 1270 } 1271 1272 void test_membarrier_free_percpu_list(struct percpu_list *list) 1273 { 1274 int i; 1275 1276 for (i = 0; i < CPU_SETSIZE; i++) 1277 free(list->c[i].head); 1278 } 1279 1280 /* 1281 * The manager thread swaps per-cpu lists that worker threads see, 1282 * and validates that there are no unexpected modifications. 1283 */ 1284 void *test_membarrier_manager_thread(void *arg) 1285 { 1286 struct test_membarrier_thread_args *args = 1287 (struct test_membarrier_thread_args *)arg; 1288 struct percpu_list list_a, list_b; 1289 intptr_t expect_a = 0, expect_b = 0; 1290 int cpu_a = 0, cpu_b = 0; 1291 1292 if (rseq_register_current_thread()) { 1293 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1294 errno, strerror(errno)); 1295 abort(); 1296 } 1297 1298 /* Init lists. */ 1299 test_membarrier_init_percpu_list(&list_a); 1300 test_membarrier_init_percpu_list(&list_b); 1301 1302 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1303 1304 while (!atomic_load(&args->stop)) { 1305 /* list_a is "active". */ 1306 cpu_a = rand() % CPU_SETSIZE; 1307 /* 1308 * As list_b is "inactive", we should never see changes 1309 * to list_b. 1310 */ 1311 if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { 1312 fprintf(stderr, "Membarrier test failed\n"); 1313 abort(); 1314 } 1315 1316 /* Make list_b "active". */ 1317 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); 1318 if (rseq_membarrier_expedited(cpu_a) && 1319 errno != ENXIO /* missing CPU */) { 1320 perror("sys_membarrier"); 1321 abort(); 1322 } 1323 /* 1324 * Cpu A should now only modify list_b, so the values 1325 * in list_a should be stable. 1326 */ 1327 expect_a = atomic_load(&list_a.c[cpu_a].head->data); 1328 1329 cpu_b = rand() % CPU_SETSIZE; 1330 /* 1331 * As list_a is "inactive", we should never see changes 1332 * to list_a. 1333 */ 1334 if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { 1335 fprintf(stderr, "Membarrier test failed\n"); 1336 abort(); 1337 } 1338 1339 /* Make list_a "active". */ 1340 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1341 if (rseq_membarrier_expedited(cpu_b) && 1342 errno != ENXIO /* missing CPU*/) { 1343 perror("sys_membarrier"); 1344 abort(); 1345 } 1346 /* Remember a value from list_b. */ 1347 expect_b = atomic_load(&list_b.c[cpu_b].head->data); 1348 } 1349 1350 test_membarrier_free_percpu_list(&list_a); 1351 test_membarrier_free_percpu_list(&list_b); 1352 1353 if (rseq_unregister_current_thread()) { 1354 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1355 errno, strerror(errno)); 1356 abort(); 1357 } 1358 return NULL; 1359 } 1360 1361 void test_membarrier(void) 1362 { 1363 const int num_threads = opt_threads; 1364 struct test_membarrier_thread_args thread_args; 1365 pthread_t worker_threads[num_threads]; 1366 pthread_t manager_thread; 1367 int i, ret; 1368 1369 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { 1370 perror("sys_membarrier"); 1371 abort(); 1372 } 1373 1374 thread_args.stop = 0; 1375 thread_args.percpu_list_ptr = 0; 1376 ret = pthread_create(&manager_thread, NULL, 1377 test_membarrier_manager_thread, &thread_args); 1378 if (ret) { 1379 errno = ret; 1380 perror("pthread_create"); 1381 abort(); 1382 } 1383 1384 for (i = 0; i < num_threads; i++) { 1385 ret = pthread_create(&worker_threads[i], NULL, 1386 test_membarrier_worker_thread, &thread_args); 1387 if (ret) { 1388 errno = ret; 1389 perror("pthread_create"); 1390 abort(); 1391 } 1392 } 1393 1394 1395 for (i = 0; i < num_threads; i++) { 1396 ret = pthread_join(worker_threads[i], NULL); 1397 if (ret) { 1398 errno = ret; 1399 perror("pthread_join"); 1400 abort(); 1401 } 1402 } 1403 1404 atomic_store(&thread_args.stop, 1); 1405 ret = pthread_join(manager_thread, NULL); 1406 if (ret) { 1407 errno = ret; 1408 perror("pthread_join"); 1409 abort(); 1410 } 1411 } 1412 #else /* TEST_MEMBARRIER */ 1413 void test_membarrier(void) 1414 { 1415 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " 1416 "Skipping membarrier test.\n"); 1417 } 1418 #endif 1419 1420 static void show_usage(int argc, char **argv) 1421 { 1422 printf("Usage : %s <OPTIONS>\n", 1423 argv[0]); 1424 printf("OPTIONS:\n"); 1425 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1426 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1427 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1428 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1429 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1430 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1431 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1432 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1433 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1434 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1435 printf(" [-y] Yield\n"); 1436 printf(" [-k] Kill thread with signal\n"); 1437 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1438 printf(" [-t N] Number of threads (default 200)\n"); 1439 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1440 printf(" [-d] Disable rseq system call (no initialization)\n"); 1441 printf(" [-D M] Disable rseq for each M threads\n"); 1442 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); 1443 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1444 printf(" [-v] Verbose output.\n"); 1445 printf(" [-h] Show this help.\n"); 1446 printf("\n"); 1447 } 1448 1449 int main(int argc, char **argv) 1450 { 1451 int i; 1452 1453 for (i = 1; i < argc; i++) { 1454 if (argv[i][0] != '-') 1455 continue; 1456 switch (argv[i][1]) { 1457 case '1': 1458 case '2': 1459 case '3': 1460 case '4': 1461 case '5': 1462 case '6': 1463 case '7': 1464 case '8': 1465 case '9': 1466 if (argc < i + 2) { 1467 show_usage(argc, argv); 1468 goto error; 1469 } 1470 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1471 i++; 1472 break; 1473 case 'm': 1474 if (argc < i + 2) { 1475 show_usage(argc, argv); 1476 goto error; 1477 } 1478 opt_modulo = atol(argv[i + 1]); 1479 if (opt_modulo < 0) { 1480 show_usage(argc, argv); 1481 goto error; 1482 } 1483 i++; 1484 break; 1485 case 's': 1486 if (argc < i + 2) { 1487 show_usage(argc, argv); 1488 goto error; 1489 } 1490 opt_sleep = atol(argv[i + 1]); 1491 if (opt_sleep < 0) { 1492 show_usage(argc, argv); 1493 goto error; 1494 } 1495 i++; 1496 break; 1497 case 'y': 1498 opt_yield = 1; 1499 break; 1500 case 'k': 1501 opt_signal = 1; 1502 break; 1503 case 'd': 1504 opt_disable_rseq = 1; 1505 break; 1506 case 'D': 1507 if (argc < i + 2) { 1508 show_usage(argc, argv); 1509 goto error; 1510 } 1511 opt_disable_mod = atol(argv[i + 1]); 1512 if (opt_disable_mod < 0) { 1513 show_usage(argc, argv); 1514 goto error; 1515 } 1516 i++; 1517 break; 1518 case 't': 1519 if (argc < i + 2) { 1520 show_usage(argc, argv); 1521 goto error; 1522 } 1523 opt_threads = atol(argv[i + 1]); 1524 if (opt_threads < 0) { 1525 show_usage(argc, argv); 1526 goto error; 1527 } 1528 i++; 1529 break; 1530 case 'r': 1531 if (argc < i + 2) { 1532 show_usage(argc, argv); 1533 goto error; 1534 } 1535 opt_reps = atoll(argv[i + 1]); 1536 if (opt_reps < 0) { 1537 show_usage(argc, argv); 1538 goto error; 1539 } 1540 i++; 1541 break; 1542 case 'h': 1543 show_usage(argc, argv); 1544 goto end; 1545 case 'T': 1546 if (argc < i + 2) { 1547 show_usage(argc, argv); 1548 goto error; 1549 } 1550 opt_test = *argv[i + 1]; 1551 switch (opt_test) { 1552 case 's': 1553 case 'l': 1554 case 'i': 1555 case 'b': 1556 case 'm': 1557 case 'r': 1558 break; 1559 default: 1560 show_usage(argc, argv); 1561 goto error; 1562 } 1563 i++; 1564 break; 1565 case 'v': 1566 verbose = 1; 1567 break; 1568 case 'M': 1569 opt_mo = RSEQ_MO_RELEASE; 1570 break; 1571 default: 1572 show_usage(argc, argv); 1573 goto error; 1574 } 1575 } 1576 1577 loop_cnt_1 = loop_cnt[1]; 1578 loop_cnt_2 = loop_cnt[2]; 1579 loop_cnt_3 = loop_cnt[3]; 1580 loop_cnt_4 = loop_cnt[4]; 1581 loop_cnt_5 = loop_cnt[5]; 1582 loop_cnt_6 = loop_cnt[6]; 1583 1584 if (set_signal_handler()) 1585 goto error; 1586 1587 if (!opt_disable_rseq && rseq_register_current_thread()) 1588 goto error; 1589 if (!opt_disable_rseq && !rseq_validate_cpu_id()) { 1590 fprintf(stderr, "Error: cpu id getter unavailable\n"); 1591 goto error; 1592 } 1593 switch (opt_test) { 1594 case 's': 1595 printf_verbose("spinlock\n"); 1596 test_percpu_spinlock(); 1597 break; 1598 case 'l': 1599 printf_verbose("linked list\n"); 1600 test_percpu_list(); 1601 break; 1602 case 'b': 1603 printf_verbose("buffer\n"); 1604 test_percpu_buffer(); 1605 break; 1606 case 'm': 1607 printf_verbose("memcpy buffer\n"); 1608 test_percpu_memcpy_buffer(); 1609 break; 1610 case 'i': 1611 printf_verbose("counter increment\n"); 1612 test_percpu_inc(); 1613 break; 1614 case 'r': 1615 printf_verbose("membarrier\n"); 1616 test_membarrier(); 1617 break; 1618 } 1619 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1620 abort(); 1621 end: 1622 return 0; 1623 1624 error: 1625 return -1; 1626 } 1627