1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <linux/membarrier.h> 5 #include <pthread.h> 6 #include <sched.h> 7 #include <stdatomic.h> 8 #include <stdint.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <unistd.h> 14 #include <poll.h> 15 #include <sys/types.h> 16 #include <signal.h> 17 #include <errno.h> 18 #include <stddef.h> 19 20 static inline pid_t rseq_gettid(void) 21 { 22 return syscall(__NR_gettid); 23 } 24 25 #define NR_INJECT 9 26 static int loop_cnt[NR_INJECT + 1]; 27 28 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 29 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 30 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 31 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 32 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 33 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 34 35 static int opt_modulo, verbose; 36 37 static int opt_yield, opt_signal, opt_sleep, 38 opt_disable_rseq, opt_threads = 200, 39 opt_disable_mod = 0, opt_test = 's', opt_mb = 0; 40 41 #ifndef RSEQ_SKIP_FASTPATH 42 static long long opt_reps = 5000; 43 #else 44 static long long opt_reps = 100; 45 #endif 46 47 static __thread __attribute__((tls_model("initial-exec"))) 48 unsigned int signals_delivered; 49 50 #ifndef BENCHMARK 51 52 static __thread __attribute__((tls_model("initial-exec"), unused)) 53 unsigned int yield_mod_cnt, nr_abort; 54 55 #define printf_verbose(fmt, ...) \ 56 do { \ 57 if (verbose) \ 58 printf(fmt, ## __VA_ARGS__); \ 59 } while (0) 60 61 #ifdef __i386__ 62 63 #define INJECT_ASM_REG "eax" 64 65 #define RSEQ_INJECT_CLOBBER \ 66 , INJECT_ASM_REG 67 68 #define RSEQ_INJECT_ASM(n) \ 69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 71 "jz 333f\n\t" \ 72 "222:\n\t" \ 73 "dec %%" INJECT_ASM_REG "\n\t" \ 74 "jnz 222b\n\t" \ 75 "333:\n\t" 76 77 #elif defined(__x86_64__) 78 79 #define INJECT_ASM_REG_P "rax" 80 #define INJECT_ASM_REG "eax" 81 82 #define RSEQ_INJECT_CLOBBER \ 83 , INJECT_ASM_REG_P \ 84 , INJECT_ASM_REG 85 86 #define RSEQ_INJECT_ASM(n) \ 87 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ 88 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ 89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 90 "jz 333f\n\t" \ 91 "222:\n\t" \ 92 "dec %%" INJECT_ASM_REG "\n\t" \ 93 "jnz 222b\n\t" \ 94 "333:\n\t" 95 96 #elif defined(__s390__) 97 98 #define RSEQ_INJECT_INPUT \ 99 , [loop_cnt_1]"m"(loop_cnt[1]) \ 100 , [loop_cnt_2]"m"(loop_cnt[2]) \ 101 , [loop_cnt_3]"m"(loop_cnt[3]) \ 102 , [loop_cnt_4]"m"(loop_cnt[4]) \ 103 , [loop_cnt_5]"m"(loop_cnt[5]) \ 104 , [loop_cnt_6]"m"(loop_cnt[6]) 105 106 #define INJECT_ASM_REG "r12" 107 108 #define RSEQ_INJECT_CLOBBER \ 109 , INJECT_ASM_REG 110 111 #define RSEQ_INJECT_ASM(n) \ 112 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 113 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 114 "je 333f\n\t" \ 115 "222:\n\t" \ 116 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 117 "jnz 222b\n\t" \ 118 "333:\n\t" 119 120 #elif defined(__ARMEL__) 121 122 #define RSEQ_INJECT_INPUT \ 123 , [loop_cnt_1]"m"(loop_cnt[1]) \ 124 , [loop_cnt_2]"m"(loop_cnt[2]) \ 125 , [loop_cnt_3]"m"(loop_cnt[3]) \ 126 , [loop_cnt_4]"m"(loop_cnt[4]) \ 127 , [loop_cnt_5]"m"(loop_cnt[5]) \ 128 , [loop_cnt_6]"m"(loop_cnt[6]) 129 130 #define INJECT_ASM_REG "r4" 131 132 #define RSEQ_INJECT_CLOBBER \ 133 , INJECT_ASM_REG 134 135 #define RSEQ_INJECT_ASM(n) \ 136 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 137 "cmp " INJECT_ASM_REG ", #0\n\t" \ 138 "beq 333f\n\t" \ 139 "222:\n\t" \ 140 "subs " INJECT_ASM_REG ", #1\n\t" \ 141 "bne 222b\n\t" \ 142 "333:\n\t" 143 144 #elif defined(__AARCH64EL__) 145 146 #define RSEQ_INJECT_INPUT \ 147 , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 148 , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 149 , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 150 , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 151 , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 152 , [loop_cnt_6] "Qo" (loop_cnt[6]) 153 154 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 155 156 #define RSEQ_INJECT_ASM(n) \ 157 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 158 " cbz " INJECT_ASM_REG ", 333f\n" \ 159 "222:\n" \ 160 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 161 " cbnz " INJECT_ASM_REG ", 222b\n" \ 162 "333:\n" 163 164 #elif __PPC__ 165 166 #define RSEQ_INJECT_INPUT \ 167 , [loop_cnt_1]"m"(loop_cnt[1]) \ 168 , [loop_cnt_2]"m"(loop_cnt[2]) \ 169 , [loop_cnt_3]"m"(loop_cnt[3]) \ 170 , [loop_cnt_4]"m"(loop_cnt[4]) \ 171 , [loop_cnt_5]"m"(loop_cnt[5]) \ 172 , [loop_cnt_6]"m"(loop_cnt[6]) 173 174 #define INJECT_ASM_REG "r18" 175 176 #define RSEQ_INJECT_CLOBBER \ 177 , INJECT_ASM_REG 178 179 #define RSEQ_INJECT_ASM(n) \ 180 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 181 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 182 "beq 333f\n\t" \ 183 "222:\n\t" \ 184 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 185 "bne 222b\n\t" \ 186 "333:\n\t" 187 188 #elif defined(__mips__) 189 190 #define RSEQ_INJECT_INPUT \ 191 , [loop_cnt_1]"m"(loop_cnt[1]) \ 192 , [loop_cnt_2]"m"(loop_cnt[2]) \ 193 , [loop_cnt_3]"m"(loop_cnt[3]) \ 194 , [loop_cnt_4]"m"(loop_cnt[4]) \ 195 , [loop_cnt_5]"m"(loop_cnt[5]) \ 196 , [loop_cnt_6]"m"(loop_cnt[6]) 197 198 #define INJECT_ASM_REG "$5" 199 200 #define RSEQ_INJECT_CLOBBER \ 201 , INJECT_ASM_REG 202 203 #define RSEQ_INJECT_ASM(n) \ 204 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 205 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 206 "222:\n\t" \ 207 "addiu " INJECT_ASM_REG ", -1\n\t" \ 208 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 209 "333:\n\t" 210 211 #else 212 #error unsupported target 213 #endif 214 215 #define RSEQ_INJECT_FAILED \ 216 nr_abort++; 217 218 #define RSEQ_INJECT_C(n) \ 219 { \ 220 int loc_i, loc_nr_loops = loop_cnt[n]; \ 221 \ 222 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 223 rseq_barrier(); \ 224 } \ 225 if (loc_nr_loops == -1 && opt_modulo) { \ 226 if (yield_mod_cnt == opt_modulo - 1) { \ 227 if (opt_sleep > 0) \ 228 poll(NULL, 0, opt_sleep); \ 229 if (opt_yield) \ 230 sched_yield(); \ 231 if (opt_signal) \ 232 raise(SIGUSR1); \ 233 yield_mod_cnt = 0; \ 234 } else { \ 235 yield_mod_cnt++; \ 236 } \ 237 } \ 238 } 239 240 #else 241 242 #define printf_verbose(fmt, ...) 243 244 #endif /* BENCHMARK */ 245 246 #include "rseq.h" 247 248 struct percpu_lock_entry { 249 intptr_t v; 250 } __attribute__((aligned(128))); 251 252 struct percpu_lock { 253 struct percpu_lock_entry c[CPU_SETSIZE]; 254 }; 255 256 struct test_data_entry { 257 intptr_t count; 258 } __attribute__((aligned(128))); 259 260 struct spinlock_test_data { 261 struct percpu_lock lock; 262 struct test_data_entry c[CPU_SETSIZE]; 263 }; 264 265 struct spinlock_thread_test_data { 266 struct spinlock_test_data *data; 267 long long reps; 268 int reg; 269 }; 270 271 struct inc_test_data { 272 struct test_data_entry c[CPU_SETSIZE]; 273 }; 274 275 struct inc_thread_test_data { 276 struct inc_test_data *data; 277 long long reps; 278 int reg; 279 }; 280 281 struct percpu_list_node { 282 intptr_t data; 283 struct percpu_list_node *next; 284 }; 285 286 struct percpu_list_entry { 287 struct percpu_list_node *head; 288 } __attribute__((aligned(128))); 289 290 struct percpu_list { 291 struct percpu_list_entry c[CPU_SETSIZE]; 292 }; 293 294 #define BUFFER_ITEM_PER_CPU 100 295 296 struct percpu_buffer_node { 297 intptr_t data; 298 }; 299 300 struct percpu_buffer_entry { 301 intptr_t offset; 302 intptr_t buflen; 303 struct percpu_buffer_node **array; 304 } __attribute__((aligned(128))); 305 306 struct percpu_buffer { 307 struct percpu_buffer_entry c[CPU_SETSIZE]; 308 }; 309 310 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 311 312 struct percpu_memcpy_buffer_node { 313 intptr_t data1; 314 uint64_t data2; 315 }; 316 317 struct percpu_memcpy_buffer_entry { 318 intptr_t offset; 319 intptr_t buflen; 320 struct percpu_memcpy_buffer_node *array; 321 } __attribute__((aligned(128))); 322 323 struct percpu_memcpy_buffer { 324 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 325 }; 326 327 /* A simple percpu spinlock. Grabs lock on current cpu. */ 328 static int rseq_this_cpu_lock(struct percpu_lock *lock) 329 { 330 int cpu; 331 332 for (;;) { 333 int ret; 334 335 cpu = rseq_cpu_start(); 336 ret = rseq_cmpeqv_storev(&lock->c[cpu].v, 337 0, 1, cpu); 338 if (rseq_likely(!ret)) 339 break; 340 /* Retry if comparison fails or rseq aborts. */ 341 } 342 /* 343 * Acquire semantic when taking lock after control dependency. 344 * Matches rseq_smp_store_release(). 345 */ 346 rseq_smp_acquire__after_ctrl_dep(); 347 return cpu; 348 } 349 350 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 351 { 352 assert(lock->c[cpu].v == 1); 353 /* 354 * Release lock, with release semantic. Matches 355 * rseq_smp_acquire__after_ctrl_dep(). 356 */ 357 rseq_smp_store_release(&lock->c[cpu].v, 0); 358 } 359 360 void *test_percpu_spinlock_thread(void *arg) 361 { 362 struct spinlock_thread_test_data *thread_data = arg; 363 struct spinlock_test_data *data = thread_data->data; 364 long long i, reps; 365 366 if (!opt_disable_rseq && thread_data->reg && 367 rseq_register_current_thread()) 368 abort(); 369 reps = thread_data->reps; 370 for (i = 0; i < reps; i++) { 371 int cpu = rseq_cpu_start(); 372 373 cpu = rseq_this_cpu_lock(&data->lock); 374 data->c[cpu].count++; 375 rseq_percpu_unlock(&data->lock, cpu); 376 #ifndef BENCHMARK 377 if (i != 0 && !(i % (reps / 10))) 378 printf_verbose("tid %d: count %lld\n", 379 (int) rseq_gettid(), i); 380 #endif 381 } 382 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 383 (int) rseq_gettid(), nr_abort, signals_delivered); 384 if (!opt_disable_rseq && thread_data->reg && 385 rseq_unregister_current_thread()) 386 abort(); 387 return NULL; 388 } 389 390 /* 391 * A simple test which implements a sharded counter using a per-cpu 392 * lock. Obviously real applications might prefer to simply use a 393 * per-cpu increment; however, this is reasonable for a test and the 394 * lock can be extended to synchronize more complicated operations. 395 */ 396 void test_percpu_spinlock(void) 397 { 398 const int num_threads = opt_threads; 399 int i, ret; 400 uint64_t sum; 401 pthread_t test_threads[num_threads]; 402 struct spinlock_test_data data; 403 struct spinlock_thread_test_data thread_data[num_threads]; 404 405 memset(&data, 0, sizeof(data)); 406 for (i = 0; i < num_threads; i++) { 407 thread_data[i].reps = opt_reps; 408 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 409 thread_data[i].reg = 1; 410 else 411 thread_data[i].reg = 0; 412 thread_data[i].data = &data; 413 ret = pthread_create(&test_threads[i], NULL, 414 test_percpu_spinlock_thread, 415 &thread_data[i]); 416 if (ret) { 417 errno = ret; 418 perror("pthread_create"); 419 abort(); 420 } 421 } 422 423 for (i = 0; i < num_threads; i++) { 424 ret = pthread_join(test_threads[i], NULL); 425 if (ret) { 426 errno = ret; 427 perror("pthread_join"); 428 abort(); 429 } 430 } 431 432 sum = 0; 433 for (i = 0; i < CPU_SETSIZE; i++) 434 sum += data.c[i].count; 435 436 assert(sum == (uint64_t)opt_reps * num_threads); 437 } 438 439 void *test_percpu_inc_thread(void *arg) 440 { 441 struct inc_thread_test_data *thread_data = arg; 442 struct inc_test_data *data = thread_data->data; 443 long long i, reps; 444 445 if (!opt_disable_rseq && thread_data->reg && 446 rseq_register_current_thread()) 447 abort(); 448 reps = thread_data->reps; 449 for (i = 0; i < reps; i++) { 450 int ret; 451 452 do { 453 int cpu; 454 455 cpu = rseq_cpu_start(); 456 ret = rseq_addv(&data->c[cpu].count, 1, cpu); 457 } while (rseq_unlikely(ret)); 458 #ifndef BENCHMARK 459 if (i != 0 && !(i % (reps / 10))) 460 printf_verbose("tid %d: count %lld\n", 461 (int) rseq_gettid(), i); 462 #endif 463 } 464 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 465 (int) rseq_gettid(), nr_abort, signals_delivered); 466 if (!opt_disable_rseq && thread_data->reg && 467 rseq_unregister_current_thread()) 468 abort(); 469 return NULL; 470 } 471 472 void test_percpu_inc(void) 473 { 474 const int num_threads = opt_threads; 475 int i, ret; 476 uint64_t sum; 477 pthread_t test_threads[num_threads]; 478 struct inc_test_data data; 479 struct inc_thread_test_data thread_data[num_threads]; 480 481 memset(&data, 0, sizeof(data)); 482 for (i = 0; i < num_threads; i++) { 483 thread_data[i].reps = opt_reps; 484 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 485 thread_data[i].reg = 1; 486 else 487 thread_data[i].reg = 0; 488 thread_data[i].data = &data; 489 ret = pthread_create(&test_threads[i], NULL, 490 test_percpu_inc_thread, 491 &thread_data[i]); 492 if (ret) { 493 errno = ret; 494 perror("pthread_create"); 495 abort(); 496 } 497 } 498 499 for (i = 0; i < num_threads; i++) { 500 ret = pthread_join(test_threads[i], NULL); 501 if (ret) { 502 errno = ret; 503 perror("pthread_join"); 504 abort(); 505 } 506 } 507 508 sum = 0; 509 for (i = 0; i < CPU_SETSIZE; i++) 510 sum += data.c[i].count; 511 512 assert(sum == (uint64_t)opt_reps * num_threads); 513 } 514 515 void this_cpu_list_push(struct percpu_list *list, 516 struct percpu_list_node *node, 517 int *_cpu) 518 { 519 int cpu; 520 521 for (;;) { 522 intptr_t *targetptr, newval, expect; 523 int ret; 524 525 cpu = rseq_cpu_start(); 526 /* Load list->c[cpu].head with single-copy atomicity. */ 527 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 528 newval = (intptr_t)node; 529 targetptr = (intptr_t *)&list->c[cpu].head; 530 node->next = (struct percpu_list_node *)expect; 531 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); 532 if (rseq_likely(!ret)) 533 break; 534 /* Retry if comparison fails or rseq aborts. */ 535 } 536 if (_cpu) 537 *_cpu = cpu; 538 } 539 540 /* 541 * Unlike a traditional lock-less linked list; the availability of a 542 * rseq primitive allows us to implement pop without concerns over 543 * ABA-type races. 544 */ 545 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 546 int *_cpu) 547 { 548 struct percpu_list_node *node = NULL; 549 int cpu; 550 551 for (;;) { 552 struct percpu_list_node *head; 553 intptr_t *targetptr, expectnot, *load; 554 off_t offset; 555 int ret; 556 557 cpu = rseq_cpu_start(); 558 targetptr = (intptr_t *)&list->c[cpu].head; 559 expectnot = (intptr_t)NULL; 560 offset = offsetof(struct percpu_list_node, next); 561 load = (intptr_t *)&head; 562 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, 563 offset, load, cpu); 564 if (rseq_likely(!ret)) { 565 node = head; 566 break; 567 } 568 if (ret > 0) 569 break; 570 /* Retry if rseq aborts. */ 571 } 572 if (_cpu) 573 *_cpu = cpu; 574 return node; 575 } 576 577 /* 578 * __percpu_list_pop is not safe against concurrent accesses. Should 579 * only be used on lists that are not concurrently modified. 580 */ 581 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 582 { 583 struct percpu_list_node *node; 584 585 node = list->c[cpu].head; 586 if (!node) 587 return NULL; 588 list->c[cpu].head = node->next; 589 return node; 590 } 591 592 void *test_percpu_list_thread(void *arg) 593 { 594 long long i, reps; 595 struct percpu_list *list = (struct percpu_list *)arg; 596 597 if (!opt_disable_rseq && rseq_register_current_thread()) 598 abort(); 599 600 reps = opt_reps; 601 for (i = 0; i < reps; i++) { 602 struct percpu_list_node *node; 603 604 node = this_cpu_list_pop(list, NULL); 605 if (opt_yield) 606 sched_yield(); /* encourage shuffling */ 607 if (node) 608 this_cpu_list_push(list, node, NULL); 609 } 610 611 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 612 (int) rseq_gettid(), nr_abort, signals_delivered); 613 if (!opt_disable_rseq && rseq_unregister_current_thread()) 614 abort(); 615 616 return NULL; 617 } 618 619 /* Simultaneous modification to a per-cpu linked list from many threads. */ 620 void test_percpu_list(void) 621 { 622 const int num_threads = opt_threads; 623 int i, j, ret; 624 uint64_t sum = 0, expected_sum = 0; 625 struct percpu_list list; 626 pthread_t test_threads[num_threads]; 627 cpu_set_t allowed_cpus; 628 629 memset(&list, 0, sizeof(list)); 630 631 /* Generate list entries for every usable cpu. */ 632 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 633 for (i = 0; i < CPU_SETSIZE; i++) { 634 if (!CPU_ISSET(i, &allowed_cpus)) 635 continue; 636 for (j = 1; j <= 100; j++) { 637 struct percpu_list_node *node; 638 639 expected_sum += j; 640 641 node = malloc(sizeof(*node)); 642 assert(node); 643 node->data = j; 644 node->next = list.c[i].head; 645 list.c[i].head = node; 646 } 647 } 648 649 for (i = 0; i < num_threads; i++) { 650 ret = pthread_create(&test_threads[i], NULL, 651 test_percpu_list_thread, &list); 652 if (ret) { 653 errno = ret; 654 perror("pthread_create"); 655 abort(); 656 } 657 } 658 659 for (i = 0; i < num_threads; i++) { 660 ret = pthread_join(test_threads[i], NULL); 661 if (ret) { 662 errno = ret; 663 perror("pthread_join"); 664 abort(); 665 } 666 } 667 668 for (i = 0; i < CPU_SETSIZE; i++) { 669 struct percpu_list_node *node; 670 671 if (!CPU_ISSET(i, &allowed_cpus)) 672 continue; 673 674 while ((node = __percpu_list_pop(&list, i))) { 675 sum += node->data; 676 free(node); 677 } 678 } 679 680 /* 681 * All entries should now be accounted for (unless some external 682 * actor is interfering with our allowed affinity while this 683 * test is running). 684 */ 685 assert(sum == expected_sum); 686 } 687 688 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 689 struct percpu_buffer_node *node, 690 int *_cpu) 691 { 692 bool result = false; 693 int cpu; 694 695 for (;;) { 696 intptr_t *targetptr_spec, newval_spec; 697 intptr_t *targetptr_final, newval_final; 698 intptr_t offset; 699 int ret; 700 701 cpu = rseq_cpu_start(); 702 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 703 if (offset == buffer->c[cpu].buflen) 704 break; 705 newval_spec = (intptr_t)node; 706 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 707 newval_final = offset + 1; 708 targetptr_final = &buffer->c[cpu].offset; 709 if (opt_mb) 710 ret = rseq_cmpeqv_trystorev_storev_release( 711 targetptr_final, offset, targetptr_spec, 712 newval_spec, newval_final, cpu); 713 else 714 ret = rseq_cmpeqv_trystorev_storev(targetptr_final, 715 offset, targetptr_spec, newval_spec, 716 newval_final, cpu); 717 if (rseq_likely(!ret)) { 718 result = true; 719 break; 720 } 721 /* Retry if comparison fails or rseq aborts. */ 722 } 723 if (_cpu) 724 *_cpu = cpu; 725 return result; 726 } 727 728 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 729 int *_cpu) 730 { 731 struct percpu_buffer_node *head; 732 int cpu; 733 734 for (;;) { 735 intptr_t *targetptr, newval; 736 intptr_t offset; 737 int ret; 738 739 cpu = rseq_cpu_start(); 740 /* Load offset with single-copy atomicity. */ 741 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 742 if (offset == 0) { 743 head = NULL; 744 break; 745 } 746 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 747 newval = offset - 1; 748 targetptr = (intptr_t *)&buffer->c[cpu].offset; 749 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, 750 (intptr_t *)&buffer->c[cpu].array[offset - 1], 751 (intptr_t)head, newval, cpu); 752 if (rseq_likely(!ret)) 753 break; 754 /* Retry if comparison fails or rseq aborts. */ 755 } 756 if (_cpu) 757 *_cpu = cpu; 758 return head; 759 } 760 761 /* 762 * __percpu_buffer_pop is not safe against concurrent accesses. Should 763 * only be used on buffers that are not concurrently modified. 764 */ 765 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 766 int cpu) 767 { 768 struct percpu_buffer_node *head; 769 intptr_t offset; 770 771 offset = buffer->c[cpu].offset; 772 if (offset == 0) 773 return NULL; 774 head = buffer->c[cpu].array[offset - 1]; 775 buffer->c[cpu].offset = offset - 1; 776 return head; 777 } 778 779 void *test_percpu_buffer_thread(void *arg) 780 { 781 long long i, reps; 782 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 783 784 if (!opt_disable_rseq && rseq_register_current_thread()) 785 abort(); 786 787 reps = opt_reps; 788 for (i = 0; i < reps; i++) { 789 struct percpu_buffer_node *node; 790 791 node = this_cpu_buffer_pop(buffer, NULL); 792 if (opt_yield) 793 sched_yield(); /* encourage shuffling */ 794 if (node) { 795 if (!this_cpu_buffer_push(buffer, node, NULL)) { 796 /* Should increase buffer size. */ 797 abort(); 798 } 799 } 800 } 801 802 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 803 (int) rseq_gettid(), nr_abort, signals_delivered); 804 if (!opt_disable_rseq && rseq_unregister_current_thread()) 805 abort(); 806 807 return NULL; 808 } 809 810 /* Simultaneous modification to a per-cpu buffer from many threads. */ 811 void test_percpu_buffer(void) 812 { 813 const int num_threads = opt_threads; 814 int i, j, ret; 815 uint64_t sum = 0, expected_sum = 0; 816 struct percpu_buffer buffer; 817 pthread_t test_threads[num_threads]; 818 cpu_set_t allowed_cpus; 819 820 memset(&buffer, 0, sizeof(buffer)); 821 822 /* Generate list entries for every usable cpu. */ 823 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 824 for (i = 0; i < CPU_SETSIZE; i++) { 825 if (!CPU_ISSET(i, &allowed_cpus)) 826 continue; 827 /* Worse-case is every item in same CPU. */ 828 buffer.c[i].array = 829 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 830 BUFFER_ITEM_PER_CPU); 831 assert(buffer.c[i].array); 832 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 833 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 834 struct percpu_buffer_node *node; 835 836 expected_sum += j; 837 838 /* 839 * We could theoretically put the word-sized 840 * "data" directly in the buffer. However, we 841 * want to model objects that would not fit 842 * within a single word, so allocate an object 843 * for each node. 844 */ 845 node = malloc(sizeof(*node)); 846 assert(node); 847 node->data = j; 848 buffer.c[i].array[j - 1] = node; 849 buffer.c[i].offset++; 850 } 851 } 852 853 for (i = 0; i < num_threads; i++) { 854 ret = pthread_create(&test_threads[i], NULL, 855 test_percpu_buffer_thread, &buffer); 856 if (ret) { 857 errno = ret; 858 perror("pthread_create"); 859 abort(); 860 } 861 } 862 863 for (i = 0; i < num_threads; i++) { 864 ret = pthread_join(test_threads[i], NULL); 865 if (ret) { 866 errno = ret; 867 perror("pthread_join"); 868 abort(); 869 } 870 } 871 872 for (i = 0; i < CPU_SETSIZE; i++) { 873 struct percpu_buffer_node *node; 874 875 if (!CPU_ISSET(i, &allowed_cpus)) 876 continue; 877 878 while ((node = __percpu_buffer_pop(&buffer, i))) { 879 sum += node->data; 880 free(node); 881 } 882 free(buffer.c[i].array); 883 } 884 885 /* 886 * All entries should now be accounted for (unless some external 887 * actor is interfering with our allowed affinity while this 888 * test is running). 889 */ 890 assert(sum == expected_sum); 891 } 892 893 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 894 struct percpu_memcpy_buffer_node item, 895 int *_cpu) 896 { 897 bool result = false; 898 int cpu; 899 900 for (;;) { 901 intptr_t *targetptr_final, newval_final, offset; 902 char *destptr, *srcptr; 903 size_t copylen; 904 int ret; 905 906 cpu = rseq_cpu_start(); 907 /* Load offset with single-copy atomicity. */ 908 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 909 if (offset == buffer->c[cpu].buflen) 910 break; 911 destptr = (char *)&buffer->c[cpu].array[offset]; 912 srcptr = (char *)&item; 913 /* copylen must be <= 4kB. */ 914 copylen = sizeof(item); 915 newval_final = offset + 1; 916 targetptr_final = &buffer->c[cpu].offset; 917 if (opt_mb) 918 ret = rseq_cmpeqv_trymemcpy_storev_release( 919 targetptr_final, offset, 920 destptr, srcptr, copylen, 921 newval_final, cpu); 922 else 923 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 924 offset, destptr, srcptr, copylen, 925 newval_final, cpu); 926 if (rseq_likely(!ret)) { 927 result = true; 928 break; 929 } 930 /* Retry if comparison fails or rseq aborts. */ 931 } 932 if (_cpu) 933 *_cpu = cpu; 934 return result; 935 } 936 937 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 938 struct percpu_memcpy_buffer_node *item, 939 int *_cpu) 940 { 941 bool result = false; 942 int cpu; 943 944 for (;;) { 945 intptr_t *targetptr_final, newval_final, offset; 946 char *destptr, *srcptr; 947 size_t copylen; 948 int ret; 949 950 cpu = rseq_cpu_start(); 951 /* Load offset with single-copy atomicity. */ 952 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 953 if (offset == 0) 954 break; 955 destptr = (char *)item; 956 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 957 /* copylen must be <= 4kB. */ 958 copylen = sizeof(*item); 959 newval_final = offset - 1; 960 targetptr_final = &buffer->c[cpu].offset; 961 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 962 offset, destptr, srcptr, copylen, 963 newval_final, cpu); 964 if (rseq_likely(!ret)) { 965 result = true; 966 break; 967 } 968 /* Retry if comparison fails or rseq aborts. */ 969 } 970 if (_cpu) 971 *_cpu = cpu; 972 return result; 973 } 974 975 /* 976 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 977 * only be used on buffers that are not concurrently modified. 978 */ 979 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 980 struct percpu_memcpy_buffer_node *item, 981 int cpu) 982 { 983 intptr_t offset; 984 985 offset = buffer->c[cpu].offset; 986 if (offset == 0) 987 return false; 988 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 989 buffer->c[cpu].offset = offset - 1; 990 return true; 991 } 992 993 void *test_percpu_memcpy_buffer_thread(void *arg) 994 { 995 long long i, reps; 996 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 997 998 if (!opt_disable_rseq && rseq_register_current_thread()) 999 abort(); 1000 1001 reps = opt_reps; 1002 for (i = 0; i < reps; i++) { 1003 struct percpu_memcpy_buffer_node item; 1004 bool result; 1005 1006 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1007 if (opt_yield) 1008 sched_yield(); /* encourage shuffling */ 1009 if (result) { 1010 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1011 /* Should increase buffer size. */ 1012 abort(); 1013 } 1014 } 1015 } 1016 1017 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 1018 (int) rseq_gettid(), nr_abort, signals_delivered); 1019 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1020 abort(); 1021 1022 return NULL; 1023 } 1024 1025 /* Simultaneous modification to a per-cpu buffer from many threads. */ 1026 void test_percpu_memcpy_buffer(void) 1027 { 1028 const int num_threads = opt_threads; 1029 int i, j, ret; 1030 uint64_t sum = 0, expected_sum = 0; 1031 struct percpu_memcpy_buffer buffer; 1032 pthread_t test_threads[num_threads]; 1033 cpu_set_t allowed_cpus; 1034 1035 memset(&buffer, 0, sizeof(buffer)); 1036 1037 /* Generate list entries for every usable cpu. */ 1038 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1039 for (i = 0; i < CPU_SETSIZE; i++) { 1040 if (!CPU_ISSET(i, &allowed_cpus)) 1041 continue; 1042 /* Worse-case is every item in same CPU. */ 1043 buffer.c[i].array = 1044 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1045 MEMCPY_BUFFER_ITEM_PER_CPU); 1046 assert(buffer.c[i].array); 1047 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1048 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1049 expected_sum += 2 * j + 1; 1050 1051 /* 1052 * We could theoretically put the word-sized 1053 * "data" directly in the buffer. However, we 1054 * want to model objects that would not fit 1055 * within a single word, so allocate an object 1056 * for each node. 1057 */ 1058 buffer.c[i].array[j - 1].data1 = j; 1059 buffer.c[i].array[j - 1].data2 = j + 1; 1060 buffer.c[i].offset++; 1061 } 1062 } 1063 1064 for (i = 0; i < num_threads; i++) { 1065 ret = pthread_create(&test_threads[i], NULL, 1066 test_percpu_memcpy_buffer_thread, 1067 &buffer); 1068 if (ret) { 1069 errno = ret; 1070 perror("pthread_create"); 1071 abort(); 1072 } 1073 } 1074 1075 for (i = 0; i < num_threads; i++) { 1076 ret = pthread_join(test_threads[i], NULL); 1077 if (ret) { 1078 errno = ret; 1079 perror("pthread_join"); 1080 abort(); 1081 } 1082 } 1083 1084 for (i = 0; i < CPU_SETSIZE; i++) { 1085 struct percpu_memcpy_buffer_node item; 1086 1087 if (!CPU_ISSET(i, &allowed_cpus)) 1088 continue; 1089 1090 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1091 sum += item.data1; 1092 sum += item.data2; 1093 } 1094 free(buffer.c[i].array); 1095 } 1096 1097 /* 1098 * All entries should now be accounted for (unless some external 1099 * actor is interfering with our allowed affinity while this 1100 * test is running). 1101 */ 1102 assert(sum == expected_sum); 1103 } 1104 1105 static void test_signal_interrupt_handler(int signo) 1106 { 1107 signals_delivered++; 1108 } 1109 1110 static int set_signal_handler(void) 1111 { 1112 int ret = 0; 1113 struct sigaction sa; 1114 sigset_t sigset; 1115 1116 ret = sigemptyset(&sigset); 1117 if (ret < 0) { 1118 perror("sigemptyset"); 1119 return ret; 1120 } 1121 1122 sa.sa_handler = test_signal_interrupt_handler; 1123 sa.sa_mask = sigset; 1124 sa.sa_flags = 0; 1125 ret = sigaction(SIGUSR1, &sa, NULL); 1126 if (ret < 0) { 1127 perror("sigaction"); 1128 return ret; 1129 } 1130 1131 printf_verbose("Signal handler set for SIGUSR1\n"); 1132 1133 return ret; 1134 } 1135 1136 struct test_membarrier_thread_args { 1137 int stop; 1138 intptr_t percpu_list_ptr; 1139 }; 1140 1141 /* Worker threads modify data in their "active" percpu lists. */ 1142 void *test_membarrier_worker_thread(void *arg) 1143 { 1144 struct test_membarrier_thread_args *args = 1145 (struct test_membarrier_thread_args *)arg; 1146 const int iters = opt_reps; 1147 int i; 1148 1149 if (rseq_register_current_thread()) { 1150 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1151 errno, strerror(errno)); 1152 abort(); 1153 } 1154 1155 /* Wait for initialization. */ 1156 while (!atomic_load(&args->percpu_list_ptr)) {} 1157 1158 for (i = 0; i < iters; ++i) { 1159 int ret; 1160 1161 do { 1162 int cpu = rseq_cpu_start(); 1163 1164 ret = rseq_offset_deref_addv(&args->percpu_list_ptr, 1165 sizeof(struct percpu_list_entry) * cpu, 1, cpu); 1166 } while (rseq_unlikely(ret)); 1167 } 1168 1169 if (rseq_unregister_current_thread()) { 1170 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1171 errno, strerror(errno)); 1172 abort(); 1173 } 1174 return NULL; 1175 } 1176 1177 void test_membarrier_init_percpu_list(struct percpu_list *list) 1178 { 1179 int i; 1180 1181 memset(list, 0, sizeof(*list)); 1182 for (i = 0; i < CPU_SETSIZE; i++) { 1183 struct percpu_list_node *node; 1184 1185 node = malloc(sizeof(*node)); 1186 assert(node); 1187 node->data = 0; 1188 node->next = NULL; 1189 list->c[i].head = node; 1190 } 1191 } 1192 1193 void test_membarrier_free_percpu_list(struct percpu_list *list) 1194 { 1195 int i; 1196 1197 for (i = 0; i < CPU_SETSIZE; i++) 1198 free(list->c[i].head); 1199 } 1200 1201 static int sys_membarrier(int cmd, int flags, int cpu_id) 1202 { 1203 return syscall(__NR_membarrier, cmd, flags, cpu_id); 1204 } 1205 1206 /* 1207 * The manager thread swaps per-cpu lists that worker threads see, 1208 * and validates that there are no unexpected modifications. 1209 */ 1210 void *test_membarrier_manager_thread(void *arg) 1211 { 1212 struct test_membarrier_thread_args *args = 1213 (struct test_membarrier_thread_args *)arg; 1214 struct percpu_list list_a, list_b; 1215 intptr_t expect_a = 0, expect_b = 0; 1216 int cpu_a = 0, cpu_b = 0; 1217 1218 if (rseq_register_current_thread()) { 1219 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1220 errno, strerror(errno)); 1221 abort(); 1222 } 1223 1224 /* Init lists. */ 1225 test_membarrier_init_percpu_list(&list_a); 1226 test_membarrier_init_percpu_list(&list_b); 1227 1228 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1229 1230 while (!atomic_load(&args->stop)) { 1231 /* list_a is "active". */ 1232 cpu_a = rand() % CPU_SETSIZE; 1233 /* 1234 * As list_b is "inactive", we should never see changes 1235 * to list_b. 1236 */ 1237 if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { 1238 fprintf(stderr, "Membarrier test failed\n"); 1239 abort(); 1240 } 1241 1242 /* Make list_b "active". */ 1243 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); 1244 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 1245 MEMBARRIER_CMD_FLAG_CPU, cpu_a) && 1246 errno != ENXIO /* missing CPU */) { 1247 perror("sys_membarrier"); 1248 abort(); 1249 } 1250 /* 1251 * Cpu A should now only modify list_b, so the values 1252 * in list_a should be stable. 1253 */ 1254 expect_a = atomic_load(&list_a.c[cpu_a].head->data); 1255 1256 cpu_b = rand() % CPU_SETSIZE; 1257 /* 1258 * As list_a is "inactive", we should never see changes 1259 * to list_a. 1260 */ 1261 if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { 1262 fprintf(stderr, "Membarrier test failed\n"); 1263 abort(); 1264 } 1265 1266 /* Make list_a "active". */ 1267 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1268 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 1269 MEMBARRIER_CMD_FLAG_CPU, cpu_b) && 1270 errno != ENXIO /* missing CPU*/) { 1271 perror("sys_membarrier"); 1272 abort(); 1273 } 1274 /* Remember a value from list_b. */ 1275 expect_b = atomic_load(&list_b.c[cpu_b].head->data); 1276 } 1277 1278 test_membarrier_free_percpu_list(&list_a); 1279 test_membarrier_free_percpu_list(&list_b); 1280 1281 if (rseq_unregister_current_thread()) { 1282 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1283 errno, strerror(errno)); 1284 abort(); 1285 } 1286 return NULL; 1287 } 1288 1289 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ 1290 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV 1291 void test_membarrier(void) 1292 { 1293 const int num_threads = opt_threads; 1294 struct test_membarrier_thread_args thread_args; 1295 pthread_t worker_threads[num_threads]; 1296 pthread_t manager_thread; 1297 int i, ret; 1298 1299 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { 1300 perror("sys_membarrier"); 1301 abort(); 1302 } 1303 1304 thread_args.stop = 0; 1305 thread_args.percpu_list_ptr = 0; 1306 ret = pthread_create(&manager_thread, NULL, 1307 test_membarrier_manager_thread, &thread_args); 1308 if (ret) { 1309 errno = ret; 1310 perror("pthread_create"); 1311 abort(); 1312 } 1313 1314 for (i = 0; i < num_threads; i++) { 1315 ret = pthread_create(&worker_threads[i], NULL, 1316 test_membarrier_worker_thread, &thread_args); 1317 if (ret) { 1318 errno = ret; 1319 perror("pthread_create"); 1320 abort(); 1321 } 1322 } 1323 1324 1325 for (i = 0; i < num_threads; i++) { 1326 ret = pthread_join(worker_threads[i], NULL); 1327 if (ret) { 1328 errno = ret; 1329 perror("pthread_join"); 1330 abort(); 1331 } 1332 } 1333 1334 atomic_store(&thread_args.stop, 1); 1335 ret = pthread_join(manager_thread, NULL); 1336 if (ret) { 1337 errno = ret; 1338 perror("pthread_join"); 1339 abort(); 1340 } 1341 } 1342 #else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ 1343 void test_membarrier(void) 1344 { 1345 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " 1346 "Skipping membarrier test.\n"); 1347 } 1348 #endif 1349 1350 static void show_usage(int argc, char **argv) 1351 { 1352 printf("Usage : %s <OPTIONS>\n", 1353 argv[0]); 1354 printf("OPTIONS:\n"); 1355 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1356 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1357 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1358 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1359 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1360 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1361 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1362 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1363 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1364 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1365 printf(" [-y] Yield\n"); 1366 printf(" [-k] Kill thread with signal\n"); 1367 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1368 printf(" [-t N] Number of threads (default 200)\n"); 1369 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1370 printf(" [-d] Disable rseq system call (no initialization)\n"); 1371 printf(" [-D M] Disable rseq for each M threads\n"); 1372 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); 1373 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1374 printf(" [-v] Verbose output.\n"); 1375 printf(" [-h] Show this help.\n"); 1376 printf("\n"); 1377 } 1378 1379 int main(int argc, char **argv) 1380 { 1381 int i; 1382 1383 for (i = 1; i < argc; i++) { 1384 if (argv[i][0] != '-') 1385 continue; 1386 switch (argv[i][1]) { 1387 case '1': 1388 case '2': 1389 case '3': 1390 case '4': 1391 case '5': 1392 case '6': 1393 case '7': 1394 case '8': 1395 case '9': 1396 if (argc < i + 2) { 1397 show_usage(argc, argv); 1398 goto error; 1399 } 1400 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1401 i++; 1402 break; 1403 case 'm': 1404 if (argc < i + 2) { 1405 show_usage(argc, argv); 1406 goto error; 1407 } 1408 opt_modulo = atol(argv[i + 1]); 1409 if (opt_modulo < 0) { 1410 show_usage(argc, argv); 1411 goto error; 1412 } 1413 i++; 1414 break; 1415 case 's': 1416 if (argc < i + 2) { 1417 show_usage(argc, argv); 1418 goto error; 1419 } 1420 opt_sleep = atol(argv[i + 1]); 1421 if (opt_sleep < 0) { 1422 show_usage(argc, argv); 1423 goto error; 1424 } 1425 i++; 1426 break; 1427 case 'y': 1428 opt_yield = 1; 1429 break; 1430 case 'k': 1431 opt_signal = 1; 1432 break; 1433 case 'd': 1434 opt_disable_rseq = 1; 1435 break; 1436 case 'D': 1437 if (argc < i + 2) { 1438 show_usage(argc, argv); 1439 goto error; 1440 } 1441 opt_disable_mod = atol(argv[i + 1]); 1442 if (opt_disable_mod < 0) { 1443 show_usage(argc, argv); 1444 goto error; 1445 } 1446 i++; 1447 break; 1448 case 't': 1449 if (argc < i + 2) { 1450 show_usage(argc, argv); 1451 goto error; 1452 } 1453 opt_threads = atol(argv[i + 1]); 1454 if (opt_threads < 0) { 1455 show_usage(argc, argv); 1456 goto error; 1457 } 1458 i++; 1459 break; 1460 case 'r': 1461 if (argc < i + 2) { 1462 show_usage(argc, argv); 1463 goto error; 1464 } 1465 opt_reps = atoll(argv[i + 1]); 1466 if (opt_reps < 0) { 1467 show_usage(argc, argv); 1468 goto error; 1469 } 1470 i++; 1471 break; 1472 case 'h': 1473 show_usage(argc, argv); 1474 goto end; 1475 case 'T': 1476 if (argc < i + 2) { 1477 show_usage(argc, argv); 1478 goto error; 1479 } 1480 opt_test = *argv[i + 1]; 1481 switch (opt_test) { 1482 case 's': 1483 case 'l': 1484 case 'i': 1485 case 'b': 1486 case 'm': 1487 case 'r': 1488 break; 1489 default: 1490 show_usage(argc, argv); 1491 goto error; 1492 } 1493 i++; 1494 break; 1495 case 'v': 1496 verbose = 1; 1497 break; 1498 case 'M': 1499 opt_mb = 1; 1500 break; 1501 default: 1502 show_usage(argc, argv); 1503 goto error; 1504 } 1505 } 1506 1507 loop_cnt_1 = loop_cnt[1]; 1508 loop_cnt_2 = loop_cnt[2]; 1509 loop_cnt_3 = loop_cnt[3]; 1510 loop_cnt_4 = loop_cnt[4]; 1511 loop_cnt_5 = loop_cnt[5]; 1512 loop_cnt_6 = loop_cnt[6]; 1513 1514 if (set_signal_handler()) 1515 goto error; 1516 1517 if (!opt_disable_rseq && rseq_register_current_thread()) 1518 goto error; 1519 switch (opt_test) { 1520 case 's': 1521 printf_verbose("spinlock\n"); 1522 test_percpu_spinlock(); 1523 break; 1524 case 'l': 1525 printf_verbose("linked list\n"); 1526 test_percpu_list(); 1527 break; 1528 case 'b': 1529 printf_verbose("buffer\n"); 1530 test_percpu_buffer(); 1531 break; 1532 case 'm': 1533 printf_verbose("memcpy buffer\n"); 1534 test_percpu_memcpy_buffer(); 1535 break; 1536 case 'i': 1537 printf_verbose("counter increment\n"); 1538 test_percpu_inc(); 1539 break; 1540 case 'r': 1541 printf_verbose("membarrier\n"); 1542 test_membarrier(); 1543 break; 1544 } 1545 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1546 abort(); 1547 end: 1548 return 0; 1549 1550 error: 1551 return -1; 1552 } 1553