1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <linux/membarrier.h> 5 #include <pthread.h> 6 #include <sched.h> 7 #include <stdatomic.h> 8 #include <stdint.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <unistd.h> 14 #include <poll.h> 15 #include <sys/types.h> 16 #include <signal.h> 17 #include <errno.h> 18 #include <stddef.h> 19 20 static inline pid_t rseq_gettid(void) 21 { 22 return syscall(__NR_gettid); 23 } 24 25 #define NR_INJECT 9 26 static int loop_cnt[NR_INJECT + 1]; 27 28 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 29 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 30 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 31 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 32 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 33 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 34 35 static int opt_modulo, verbose; 36 37 static int opt_yield, opt_signal, opt_sleep, 38 opt_disable_rseq, opt_threads = 200, 39 opt_disable_mod = 0, opt_test = 's', opt_mb = 0; 40 41 #ifndef RSEQ_SKIP_FASTPATH 42 static long long opt_reps = 5000; 43 #else 44 static long long opt_reps = 100; 45 #endif 46 47 static __thread __attribute__((tls_model("initial-exec"))) 48 unsigned int signals_delivered; 49 50 #ifndef BENCHMARK 51 52 static __thread __attribute__((tls_model("initial-exec"), unused)) 53 unsigned int yield_mod_cnt, nr_abort; 54 55 #define printf_verbose(fmt, ...) \ 56 do { \ 57 if (verbose) \ 58 printf(fmt, ## __VA_ARGS__); \ 59 } while (0) 60 61 #ifdef __i386__ 62 63 #define INJECT_ASM_REG "eax" 64 65 #define RSEQ_INJECT_CLOBBER \ 66 , INJECT_ASM_REG 67 68 #define RSEQ_INJECT_ASM(n) \ 69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 71 "jz 333f\n\t" \ 72 "222:\n\t" \ 73 "dec %%" INJECT_ASM_REG "\n\t" \ 74 "jnz 222b\n\t" \ 75 "333:\n\t" 76 77 #elif defined(__x86_64__) 78 79 #define INJECT_ASM_REG_P "rax" 80 #define INJECT_ASM_REG "eax" 81 82 #define RSEQ_INJECT_CLOBBER \ 83 , INJECT_ASM_REG_P \ 84 , INJECT_ASM_REG 85 86 #define RSEQ_INJECT_ASM(n) \ 87 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ 88 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ 89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 90 "jz 333f\n\t" \ 91 "222:\n\t" \ 92 "dec %%" INJECT_ASM_REG "\n\t" \ 93 "jnz 222b\n\t" \ 94 "333:\n\t" 95 96 #elif defined(__s390__) 97 98 #define RSEQ_INJECT_INPUT \ 99 , [loop_cnt_1]"m"(loop_cnt[1]) \ 100 , [loop_cnt_2]"m"(loop_cnt[2]) \ 101 , [loop_cnt_3]"m"(loop_cnt[3]) \ 102 , [loop_cnt_4]"m"(loop_cnt[4]) \ 103 , [loop_cnt_5]"m"(loop_cnt[5]) \ 104 , [loop_cnt_6]"m"(loop_cnt[6]) 105 106 #define INJECT_ASM_REG "r12" 107 108 #define RSEQ_INJECT_CLOBBER \ 109 , INJECT_ASM_REG 110 111 #define RSEQ_INJECT_ASM(n) \ 112 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 113 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 114 "je 333f\n\t" \ 115 "222:\n\t" \ 116 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 117 "jnz 222b\n\t" \ 118 "333:\n\t" 119 120 #elif defined(__ARMEL__) 121 122 #define RSEQ_INJECT_INPUT \ 123 , [loop_cnt_1]"m"(loop_cnt[1]) \ 124 , [loop_cnt_2]"m"(loop_cnt[2]) \ 125 , [loop_cnt_3]"m"(loop_cnt[3]) \ 126 , [loop_cnt_4]"m"(loop_cnt[4]) \ 127 , [loop_cnt_5]"m"(loop_cnt[5]) \ 128 , [loop_cnt_6]"m"(loop_cnt[6]) 129 130 #define INJECT_ASM_REG "r4" 131 132 #define RSEQ_INJECT_CLOBBER \ 133 , INJECT_ASM_REG 134 135 #define RSEQ_INJECT_ASM(n) \ 136 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 137 "cmp " INJECT_ASM_REG ", #0\n\t" \ 138 "beq 333f\n\t" \ 139 "222:\n\t" \ 140 "subs " INJECT_ASM_REG ", #1\n\t" \ 141 "bne 222b\n\t" \ 142 "333:\n\t" 143 144 #elif defined(__AARCH64EL__) 145 146 #define RSEQ_INJECT_INPUT \ 147 , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 148 , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 149 , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 150 , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 151 , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 152 , [loop_cnt_6] "Qo" (loop_cnt[6]) 153 154 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 155 156 #define RSEQ_INJECT_ASM(n) \ 157 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 158 " cbz " INJECT_ASM_REG ", 333f\n" \ 159 "222:\n" \ 160 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 161 " cbnz " INJECT_ASM_REG ", 222b\n" \ 162 "333:\n" 163 164 #elif defined(__PPC__) 165 166 #define RSEQ_INJECT_INPUT \ 167 , [loop_cnt_1]"m"(loop_cnt[1]) \ 168 , [loop_cnt_2]"m"(loop_cnt[2]) \ 169 , [loop_cnt_3]"m"(loop_cnt[3]) \ 170 , [loop_cnt_4]"m"(loop_cnt[4]) \ 171 , [loop_cnt_5]"m"(loop_cnt[5]) \ 172 , [loop_cnt_6]"m"(loop_cnt[6]) 173 174 #define INJECT_ASM_REG "r18" 175 176 #define RSEQ_INJECT_CLOBBER \ 177 , INJECT_ASM_REG 178 179 #define RSEQ_INJECT_ASM(n) \ 180 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 181 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 182 "beq 333f\n\t" \ 183 "222:\n\t" \ 184 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 185 "bne 222b\n\t" \ 186 "333:\n\t" 187 188 #elif defined(__mips__) 189 190 #define RSEQ_INJECT_INPUT \ 191 , [loop_cnt_1]"m"(loop_cnt[1]) \ 192 , [loop_cnt_2]"m"(loop_cnt[2]) \ 193 , [loop_cnt_3]"m"(loop_cnt[3]) \ 194 , [loop_cnt_4]"m"(loop_cnt[4]) \ 195 , [loop_cnt_5]"m"(loop_cnt[5]) \ 196 , [loop_cnt_6]"m"(loop_cnt[6]) 197 198 #define INJECT_ASM_REG "$5" 199 200 #define RSEQ_INJECT_CLOBBER \ 201 , INJECT_ASM_REG 202 203 #define RSEQ_INJECT_ASM(n) \ 204 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 205 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 206 "222:\n\t" \ 207 "addiu " INJECT_ASM_REG ", -1\n\t" \ 208 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 209 "333:\n\t" 210 211 #else 212 #error unsupported target 213 #endif 214 215 #define RSEQ_INJECT_FAILED \ 216 nr_abort++; 217 218 #define RSEQ_INJECT_C(n) \ 219 { \ 220 int loc_i, loc_nr_loops = loop_cnt[n]; \ 221 \ 222 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 223 rseq_barrier(); \ 224 } \ 225 if (loc_nr_loops == -1 && opt_modulo) { \ 226 if (yield_mod_cnt == opt_modulo - 1) { \ 227 if (opt_sleep > 0) \ 228 poll(NULL, 0, opt_sleep); \ 229 if (opt_yield) \ 230 sched_yield(); \ 231 if (opt_signal) \ 232 raise(SIGUSR1); \ 233 yield_mod_cnt = 0; \ 234 } else { \ 235 yield_mod_cnt++; \ 236 } \ 237 } \ 238 } 239 240 #else 241 242 #define printf_verbose(fmt, ...) 243 244 #endif /* BENCHMARK */ 245 246 #include "rseq.h" 247 248 struct percpu_lock_entry { 249 intptr_t v; 250 } __attribute__((aligned(128))); 251 252 struct percpu_lock { 253 struct percpu_lock_entry c[CPU_SETSIZE]; 254 }; 255 256 struct test_data_entry { 257 intptr_t count; 258 } __attribute__((aligned(128))); 259 260 struct spinlock_test_data { 261 struct percpu_lock lock; 262 struct test_data_entry c[CPU_SETSIZE]; 263 }; 264 265 struct spinlock_thread_test_data { 266 struct spinlock_test_data *data; 267 long long reps; 268 int reg; 269 }; 270 271 struct inc_test_data { 272 struct test_data_entry c[CPU_SETSIZE]; 273 }; 274 275 struct inc_thread_test_data { 276 struct inc_test_data *data; 277 long long reps; 278 int reg; 279 }; 280 281 struct percpu_list_node { 282 intptr_t data; 283 struct percpu_list_node *next; 284 }; 285 286 struct percpu_list_entry { 287 struct percpu_list_node *head; 288 } __attribute__((aligned(128))); 289 290 struct percpu_list { 291 struct percpu_list_entry c[CPU_SETSIZE]; 292 }; 293 294 #define BUFFER_ITEM_PER_CPU 100 295 296 struct percpu_buffer_node { 297 intptr_t data; 298 }; 299 300 struct percpu_buffer_entry { 301 intptr_t offset; 302 intptr_t buflen; 303 struct percpu_buffer_node **array; 304 } __attribute__((aligned(128))); 305 306 struct percpu_buffer { 307 struct percpu_buffer_entry c[CPU_SETSIZE]; 308 }; 309 310 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 311 312 struct percpu_memcpy_buffer_node { 313 intptr_t data1; 314 uint64_t data2; 315 }; 316 317 struct percpu_memcpy_buffer_entry { 318 intptr_t offset; 319 intptr_t buflen; 320 struct percpu_memcpy_buffer_node *array; 321 } __attribute__((aligned(128))); 322 323 struct percpu_memcpy_buffer { 324 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 325 }; 326 327 /* A simple percpu spinlock. Grabs lock on current cpu. */ 328 static int rseq_this_cpu_lock(struct percpu_lock *lock) 329 { 330 int cpu; 331 332 for (;;) { 333 int ret; 334 335 cpu = rseq_cpu_start(); 336 ret = rseq_cmpeqv_storev(&lock->c[cpu].v, 337 0, 1, cpu); 338 if (rseq_likely(!ret)) 339 break; 340 /* Retry if comparison fails or rseq aborts. */ 341 } 342 /* 343 * Acquire semantic when taking lock after control dependency. 344 * Matches rseq_smp_store_release(). 345 */ 346 rseq_smp_acquire__after_ctrl_dep(); 347 return cpu; 348 } 349 350 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 351 { 352 assert(lock->c[cpu].v == 1); 353 /* 354 * Release lock, with release semantic. Matches 355 * rseq_smp_acquire__after_ctrl_dep(). 356 */ 357 rseq_smp_store_release(&lock->c[cpu].v, 0); 358 } 359 360 void *test_percpu_spinlock_thread(void *arg) 361 { 362 struct spinlock_thread_test_data *thread_data = arg; 363 struct spinlock_test_data *data = thread_data->data; 364 long long i, reps; 365 366 if (!opt_disable_rseq && thread_data->reg && 367 rseq_register_current_thread()) 368 abort(); 369 reps = thread_data->reps; 370 for (i = 0; i < reps; i++) { 371 int cpu = rseq_this_cpu_lock(&data->lock); 372 data->c[cpu].count++; 373 rseq_percpu_unlock(&data->lock, cpu); 374 #ifndef BENCHMARK 375 if (i != 0 && !(i % (reps / 10))) 376 printf_verbose("tid %d: count %lld\n", 377 (int) rseq_gettid(), i); 378 #endif 379 } 380 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 381 (int) rseq_gettid(), nr_abort, signals_delivered); 382 if (!opt_disable_rseq && thread_data->reg && 383 rseq_unregister_current_thread()) 384 abort(); 385 return NULL; 386 } 387 388 /* 389 * A simple test which implements a sharded counter using a per-cpu 390 * lock. Obviously real applications might prefer to simply use a 391 * per-cpu increment; however, this is reasonable for a test and the 392 * lock can be extended to synchronize more complicated operations. 393 */ 394 void test_percpu_spinlock(void) 395 { 396 const int num_threads = opt_threads; 397 int i, ret; 398 uint64_t sum; 399 pthread_t test_threads[num_threads]; 400 struct spinlock_test_data data; 401 struct spinlock_thread_test_data thread_data[num_threads]; 402 403 memset(&data, 0, sizeof(data)); 404 for (i = 0; i < num_threads; i++) { 405 thread_data[i].reps = opt_reps; 406 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 407 thread_data[i].reg = 1; 408 else 409 thread_data[i].reg = 0; 410 thread_data[i].data = &data; 411 ret = pthread_create(&test_threads[i], NULL, 412 test_percpu_spinlock_thread, 413 &thread_data[i]); 414 if (ret) { 415 errno = ret; 416 perror("pthread_create"); 417 abort(); 418 } 419 } 420 421 for (i = 0; i < num_threads; i++) { 422 ret = pthread_join(test_threads[i], NULL); 423 if (ret) { 424 errno = ret; 425 perror("pthread_join"); 426 abort(); 427 } 428 } 429 430 sum = 0; 431 for (i = 0; i < CPU_SETSIZE; i++) 432 sum += data.c[i].count; 433 434 assert(sum == (uint64_t)opt_reps * num_threads); 435 } 436 437 void *test_percpu_inc_thread(void *arg) 438 { 439 struct inc_thread_test_data *thread_data = arg; 440 struct inc_test_data *data = thread_data->data; 441 long long i, reps; 442 443 if (!opt_disable_rseq && thread_data->reg && 444 rseq_register_current_thread()) 445 abort(); 446 reps = thread_data->reps; 447 for (i = 0; i < reps; i++) { 448 int ret; 449 450 do { 451 int cpu; 452 453 cpu = rseq_cpu_start(); 454 ret = rseq_addv(&data->c[cpu].count, 1, cpu); 455 } while (rseq_unlikely(ret)); 456 #ifndef BENCHMARK 457 if (i != 0 && !(i % (reps / 10))) 458 printf_verbose("tid %d: count %lld\n", 459 (int) rseq_gettid(), i); 460 #endif 461 } 462 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 463 (int) rseq_gettid(), nr_abort, signals_delivered); 464 if (!opt_disable_rseq && thread_data->reg && 465 rseq_unregister_current_thread()) 466 abort(); 467 return NULL; 468 } 469 470 void test_percpu_inc(void) 471 { 472 const int num_threads = opt_threads; 473 int i, ret; 474 uint64_t sum; 475 pthread_t test_threads[num_threads]; 476 struct inc_test_data data; 477 struct inc_thread_test_data thread_data[num_threads]; 478 479 memset(&data, 0, sizeof(data)); 480 for (i = 0; i < num_threads; i++) { 481 thread_data[i].reps = opt_reps; 482 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 483 thread_data[i].reg = 1; 484 else 485 thread_data[i].reg = 0; 486 thread_data[i].data = &data; 487 ret = pthread_create(&test_threads[i], NULL, 488 test_percpu_inc_thread, 489 &thread_data[i]); 490 if (ret) { 491 errno = ret; 492 perror("pthread_create"); 493 abort(); 494 } 495 } 496 497 for (i = 0; i < num_threads; i++) { 498 ret = pthread_join(test_threads[i], NULL); 499 if (ret) { 500 errno = ret; 501 perror("pthread_join"); 502 abort(); 503 } 504 } 505 506 sum = 0; 507 for (i = 0; i < CPU_SETSIZE; i++) 508 sum += data.c[i].count; 509 510 assert(sum == (uint64_t)opt_reps * num_threads); 511 } 512 513 void this_cpu_list_push(struct percpu_list *list, 514 struct percpu_list_node *node, 515 int *_cpu) 516 { 517 int cpu; 518 519 for (;;) { 520 intptr_t *targetptr, newval, expect; 521 int ret; 522 523 cpu = rseq_cpu_start(); 524 /* Load list->c[cpu].head with single-copy atomicity. */ 525 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 526 newval = (intptr_t)node; 527 targetptr = (intptr_t *)&list->c[cpu].head; 528 node->next = (struct percpu_list_node *)expect; 529 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); 530 if (rseq_likely(!ret)) 531 break; 532 /* Retry if comparison fails or rseq aborts. */ 533 } 534 if (_cpu) 535 *_cpu = cpu; 536 } 537 538 /* 539 * Unlike a traditional lock-less linked list; the availability of a 540 * rseq primitive allows us to implement pop without concerns over 541 * ABA-type races. 542 */ 543 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 544 int *_cpu) 545 { 546 struct percpu_list_node *node = NULL; 547 int cpu; 548 549 for (;;) { 550 struct percpu_list_node *head; 551 intptr_t *targetptr, expectnot, *load; 552 long offset; 553 int ret; 554 555 cpu = rseq_cpu_start(); 556 targetptr = (intptr_t *)&list->c[cpu].head; 557 expectnot = (intptr_t)NULL; 558 offset = offsetof(struct percpu_list_node, next); 559 load = (intptr_t *)&head; 560 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, 561 offset, load, cpu); 562 if (rseq_likely(!ret)) { 563 node = head; 564 break; 565 } 566 if (ret > 0) 567 break; 568 /* Retry if rseq aborts. */ 569 } 570 if (_cpu) 571 *_cpu = cpu; 572 return node; 573 } 574 575 /* 576 * __percpu_list_pop is not safe against concurrent accesses. Should 577 * only be used on lists that are not concurrently modified. 578 */ 579 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 580 { 581 struct percpu_list_node *node; 582 583 node = list->c[cpu].head; 584 if (!node) 585 return NULL; 586 list->c[cpu].head = node->next; 587 return node; 588 } 589 590 void *test_percpu_list_thread(void *arg) 591 { 592 long long i, reps; 593 struct percpu_list *list = (struct percpu_list *)arg; 594 595 if (!opt_disable_rseq && rseq_register_current_thread()) 596 abort(); 597 598 reps = opt_reps; 599 for (i = 0; i < reps; i++) { 600 struct percpu_list_node *node; 601 602 node = this_cpu_list_pop(list, NULL); 603 if (opt_yield) 604 sched_yield(); /* encourage shuffling */ 605 if (node) 606 this_cpu_list_push(list, node, NULL); 607 } 608 609 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 610 (int) rseq_gettid(), nr_abort, signals_delivered); 611 if (!opt_disable_rseq && rseq_unregister_current_thread()) 612 abort(); 613 614 return NULL; 615 } 616 617 /* Simultaneous modification to a per-cpu linked list from many threads. */ 618 void test_percpu_list(void) 619 { 620 const int num_threads = opt_threads; 621 int i, j, ret; 622 uint64_t sum = 0, expected_sum = 0; 623 struct percpu_list list; 624 pthread_t test_threads[num_threads]; 625 cpu_set_t allowed_cpus; 626 627 memset(&list, 0, sizeof(list)); 628 629 /* Generate list entries for every usable cpu. */ 630 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 631 for (i = 0; i < CPU_SETSIZE; i++) { 632 if (!CPU_ISSET(i, &allowed_cpus)) 633 continue; 634 for (j = 1; j <= 100; j++) { 635 struct percpu_list_node *node; 636 637 expected_sum += j; 638 639 node = malloc(sizeof(*node)); 640 assert(node); 641 node->data = j; 642 node->next = list.c[i].head; 643 list.c[i].head = node; 644 } 645 } 646 647 for (i = 0; i < num_threads; i++) { 648 ret = pthread_create(&test_threads[i], NULL, 649 test_percpu_list_thread, &list); 650 if (ret) { 651 errno = ret; 652 perror("pthread_create"); 653 abort(); 654 } 655 } 656 657 for (i = 0; i < num_threads; i++) { 658 ret = pthread_join(test_threads[i], NULL); 659 if (ret) { 660 errno = ret; 661 perror("pthread_join"); 662 abort(); 663 } 664 } 665 666 for (i = 0; i < CPU_SETSIZE; i++) { 667 struct percpu_list_node *node; 668 669 if (!CPU_ISSET(i, &allowed_cpus)) 670 continue; 671 672 while ((node = __percpu_list_pop(&list, i))) { 673 sum += node->data; 674 free(node); 675 } 676 } 677 678 /* 679 * All entries should now be accounted for (unless some external 680 * actor is interfering with our allowed affinity while this 681 * test is running). 682 */ 683 assert(sum == expected_sum); 684 } 685 686 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 687 struct percpu_buffer_node *node, 688 int *_cpu) 689 { 690 bool result = false; 691 int cpu; 692 693 for (;;) { 694 intptr_t *targetptr_spec, newval_spec; 695 intptr_t *targetptr_final, newval_final; 696 intptr_t offset; 697 int ret; 698 699 cpu = rseq_cpu_start(); 700 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 701 if (offset == buffer->c[cpu].buflen) 702 break; 703 newval_spec = (intptr_t)node; 704 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 705 newval_final = offset + 1; 706 targetptr_final = &buffer->c[cpu].offset; 707 if (opt_mb) 708 ret = rseq_cmpeqv_trystorev_storev_release( 709 targetptr_final, offset, targetptr_spec, 710 newval_spec, newval_final, cpu); 711 else 712 ret = rseq_cmpeqv_trystorev_storev(targetptr_final, 713 offset, targetptr_spec, newval_spec, 714 newval_final, cpu); 715 if (rseq_likely(!ret)) { 716 result = true; 717 break; 718 } 719 /* Retry if comparison fails or rseq aborts. */ 720 } 721 if (_cpu) 722 *_cpu = cpu; 723 return result; 724 } 725 726 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 727 int *_cpu) 728 { 729 struct percpu_buffer_node *head; 730 int cpu; 731 732 for (;;) { 733 intptr_t *targetptr, newval; 734 intptr_t offset; 735 int ret; 736 737 cpu = rseq_cpu_start(); 738 /* Load offset with single-copy atomicity. */ 739 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 740 if (offset == 0) { 741 head = NULL; 742 break; 743 } 744 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 745 newval = offset - 1; 746 targetptr = (intptr_t *)&buffer->c[cpu].offset; 747 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, 748 (intptr_t *)&buffer->c[cpu].array[offset - 1], 749 (intptr_t)head, newval, cpu); 750 if (rseq_likely(!ret)) 751 break; 752 /* Retry if comparison fails or rseq aborts. */ 753 } 754 if (_cpu) 755 *_cpu = cpu; 756 return head; 757 } 758 759 /* 760 * __percpu_buffer_pop is not safe against concurrent accesses. Should 761 * only be used on buffers that are not concurrently modified. 762 */ 763 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 764 int cpu) 765 { 766 struct percpu_buffer_node *head; 767 intptr_t offset; 768 769 offset = buffer->c[cpu].offset; 770 if (offset == 0) 771 return NULL; 772 head = buffer->c[cpu].array[offset - 1]; 773 buffer->c[cpu].offset = offset - 1; 774 return head; 775 } 776 777 void *test_percpu_buffer_thread(void *arg) 778 { 779 long long i, reps; 780 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 781 782 if (!opt_disable_rseq && rseq_register_current_thread()) 783 abort(); 784 785 reps = opt_reps; 786 for (i = 0; i < reps; i++) { 787 struct percpu_buffer_node *node; 788 789 node = this_cpu_buffer_pop(buffer, NULL); 790 if (opt_yield) 791 sched_yield(); /* encourage shuffling */ 792 if (node) { 793 if (!this_cpu_buffer_push(buffer, node, NULL)) { 794 /* Should increase buffer size. */ 795 abort(); 796 } 797 } 798 } 799 800 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 801 (int) rseq_gettid(), nr_abort, signals_delivered); 802 if (!opt_disable_rseq && rseq_unregister_current_thread()) 803 abort(); 804 805 return NULL; 806 } 807 808 /* Simultaneous modification to a per-cpu buffer from many threads. */ 809 void test_percpu_buffer(void) 810 { 811 const int num_threads = opt_threads; 812 int i, j, ret; 813 uint64_t sum = 0, expected_sum = 0; 814 struct percpu_buffer buffer; 815 pthread_t test_threads[num_threads]; 816 cpu_set_t allowed_cpus; 817 818 memset(&buffer, 0, sizeof(buffer)); 819 820 /* Generate list entries for every usable cpu. */ 821 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 822 for (i = 0; i < CPU_SETSIZE; i++) { 823 if (!CPU_ISSET(i, &allowed_cpus)) 824 continue; 825 /* Worse-case is every item in same CPU. */ 826 buffer.c[i].array = 827 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 828 BUFFER_ITEM_PER_CPU); 829 assert(buffer.c[i].array); 830 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 831 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 832 struct percpu_buffer_node *node; 833 834 expected_sum += j; 835 836 /* 837 * We could theoretically put the word-sized 838 * "data" directly in the buffer. However, we 839 * want to model objects that would not fit 840 * within a single word, so allocate an object 841 * for each node. 842 */ 843 node = malloc(sizeof(*node)); 844 assert(node); 845 node->data = j; 846 buffer.c[i].array[j - 1] = node; 847 buffer.c[i].offset++; 848 } 849 } 850 851 for (i = 0; i < num_threads; i++) { 852 ret = pthread_create(&test_threads[i], NULL, 853 test_percpu_buffer_thread, &buffer); 854 if (ret) { 855 errno = ret; 856 perror("pthread_create"); 857 abort(); 858 } 859 } 860 861 for (i = 0; i < num_threads; i++) { 862 ret = pthread_join(test_threads[i], NULL); 863 if (ret) { 864 errno = ret; 865 perror("pthread_join"); 866 abort(); 867 } 868 } 869 870 for (i = 0; i < CPU_SETSIZE; i++) { 871 struct percpu_buffer_node *node; 872 873 if (!CPU_ISSET(i, &allowed_cpus)) 874 continue; 875 876 while ((node = __percpu_buffer_pop(&buffer, i))) { 877 sum += node->data; 878 free(node); 879 } 880 free(buffer.c[i].array); 881 } 882 883 /* 884 * All entries should now be accounted for (unless some external 885 * actor is interfering with our allowed affinity while this 886 * test is running). 887 */ 888 assert(sum == expected_sum); 889 } 890 891 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 892 struct percpu_memcpy_buffer_node item, 893 int *_cpu) 894 { 895 bool result = false; 896 int cpu; 897 898 for (;;) { 899 intptr_t *targetptr_final, newval_final, offset; 900 char *destptr, *srcptr; 901 size_t copylen; 902 int ret; 903 904 cpu = rseq_cpu_start(); 905 /* Load offset with single-copy atomicity. */ 906 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 907 if (offset == buffer->c[cpu].buflen) 908 break; 909 destptr = (char *)&buffer->c[cpu].array[offset]; 910 srcptr = (char *)&item; 911 /* copylen must be <= 4kB. */ 912 copylen = sizeof(item); 913 newval_final = offset + 1; 914 targetptr_final = &buffer->c[cpu].offset; 915 if (opt_mb) 916 ret = rseq_cmpeqv_trymemcpy_storev_release( 917 targetptr_final, offset, 918 destptr, srcptr, copylen, 919 newval_final, cpu); 920 else 921 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 922 offset, destptr, srcptr, copylen, 923 newval_final, cpu); 924 if (rseq_likely(!ret)) { 925 result = true; 926 break; 927 } 928 /* Retry if comparison fails or rseq aborts. */ 929 } 930 if (_cpu) 931 *_cpu = cpu; 932 return result; 933 } 934 935 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 936 struct percpu_memcpy_buffer_node *item, 937 int *_cpu) 938 { 939 bool result = false; 940 int cpu; 941 942 for (;;) { 943 intptr_t *targetptr_final, newval_final, offset; 944 char *destptr, *srcptr; 945 size_t copylen; 946 int ret; 947 948 cpu = rseq_cpu_start(); 949 /* Load offset with single-copy atomicity. */ 950 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 951 if (offset == 0) 952 break; 953 destptr = (char *)item; 954 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 955 /* copylen must be <= 4kB. */ 956 copylen = sizeof(*item); 957 newval_final = offset - 1; 958 targetptr_final = &buffer->c[cpu].offset; 959 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 960 offset, destptr, srcptr, copylen, 961 newval_final, cpu); 962 if (rseq_likely(!ret)) { 963 result = true; 964 break; 965 } 966 /* Retry if comparison fails or rseq aborts. */ 967 } 968 if (_cpu) 969 *_cpu = cpu; 970 return result; 971 } 972 973 /* 974 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 975 * only be used on buffers that are not concurrently modified. 976 */ 977 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 978 struct percpu_memcpy_buffer_node *item, 979 int cpu) 980 { 981 intptr_t offset; 982 983 offset = buffer->c[cpu].offset; 984 if (offset == 0) 985 return false; 986 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 987 buffer->c[cpu].offset = offset - 1; 988 return true; 989 } 990 991 void *test_percpu_memcpy_buffer_thread(void *arg) 992 { 993 long long i, reps; 994 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 995 996 if (!opt_disable_rseq && rseq_register_current_thread()) 997 abort(); 998 999 reps = opt_reps; 1000 for (i = 0; i < reps; i++) { 1001 struct percpu_memcpy_buffer_node item; 1002 bool result; 1003 1004 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1005 if (opt_yield) 1006 sched_yield(); /* encourage shuffling */ 1007 if (result) { 1008 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1009 /* Should increase buffer size. */ 1010 abort(); 1011 } 1012 } 1013 } 1014 1015 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 1016 (int) rseq_gettid(), nr_abort, signals_delivered); 1017 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1018 abort(); 1019 1020 return NULL; 1021 } 1022 1023 /* Simultaneous modification to a per-cpu buffer from many threads. */ 1024 void test_percpu_memcpy_buffer(void) 1025 { 1026 const int num_threads = opt_threads; 1027 int i, j, ret; 1028 uint64_t sum = 0, expected_sum = 0; 1029 struct percpu_memcpy_buffer buffer; 1030 pthread_t test_threads[num_threads]; 1031 cpu_set_t allowed_cpus; 1032 1033 memset(&buffer, 0, sizeof(buffer)); 1034 1035 /* Generate list entries for every usable cpu. */ 1036 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1037 for (i = 0; i < CPU_SETSIZE; i++) { 1038 if (!CPU_ISSET(i, &allowed_cpus)) 1039 continue; 1040 /* Worse-case is every item in same CPU. */ 1041 buffer.c[i].array = 1042 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1043 MEMCPY_BUFFER_ITEM_PER_CPU); 1044 assert(buffer.c[i].array); 1045 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1046 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1047 expected_sum += 2 * j + 1; 1048 1049 /* 1050 * We could theoretically put the word-sized 1051 * "data" directly in the buffer. However, we 1052 * want to model objects that would not fit 1053 * within a single word, so allocate an object 1054 * for each node. 1055 */ 1056 buffer.c[i].array[j - 1].data1 = j; 1057 buffer.c[i].array[j - 1].data2 = j + 1; 1058 buffer.c[i].offset++; 1059 } 1060 } 1061 1062 for (i = 0; i < num_threads; i++) { 1063 ret = pthread_create(&test_threads[i], NULL, 1064 test_percpu_memcpy_buffer_thread, 1065 &buffer); 1066 if (ret) { 1067 errno = ret; 1068 perror("pthread_create"); 1069 abort(); 1070 } 1071 } 1072 1073 for (i = 0; i < num_threads; i++) { 1074 ret = pthread_join(test_threads[i], NULL); 1075 if (ret) { 1076 errno = ret; 1077 perror("pthread_join"); 1078 abort(); 1079 } 1080 } 1081 1082 for (i = 0; i < CPU_SETSIZE; i++) { 1083 struct percpu_memcpy_buffer_node item; 1084 1085 if (!CPU_ISSET(i, &allowed_cpus)) 1086 continue; 1087 1088 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1089 sum += item.data1; 1090 sum += item.data2; 1091 } 1092 free(buffer.c[i].array); 1093 } 1094 1095 /* 1096 * All entries should now be accounted for (unless some external 1097 * actor is interfering with our allowed affinity while this 1098 * test is running). 1099 */ 1100 assert(sum == expected_sum); 1101 } 1102 1103 static void test_signal_interrupt_handler(int signo) 1104 { 1105 signals_delivered++; 1106 } 1107 1108 static int set_signal_handler(void) 1109 { 1110 int ret = 0; 1111 struct sigaction sa; 1112 sigset_t sigset; 1113 1114 ret = sigemptyset(&sigset); 1115 if (ret < 0) { 1116 perror("sigemptyset"); 1117 return ret; 1118 } 1119 1120 sa.sa_handler = test_signal_interrupt_handler; 1121 sa.sa_mask = sigset; 1122 sa.sa_flags = 0; 1123 ret = sigaction(SIGUSR1, &sa, NULL); 1124 if (ret < 0) { 1125 perror("sigaction"); 1126 return ret; 1127 } 1128 1129 printf_verbose("Signal handler set for SIGUSR1\n"); 1130 1131 return ret; 1132 } 1133 1134 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ 1135 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV 1136 struct test_membarrier_thread_args { 1137 int stop; 1138 intptr_t percpu_list_ptr; 1139 }; 1140 1141 /* Worker threads modify data in their "active" percpu lists. */ 1142 void *test_membarrier_worker_thread(void *arg) 1143 { 1144 struct test_membarrier_thread_args *args = 1145 (struct test_membarrier_thread_args *)arg; 1146 const int iters = opt_reps; 1147 int i; 1148 1149 if (rseq_register_current_thread()) { 1150 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1151 errno, strerror(errno)); 1152 abort(); 1153 } 1154 1155 /* Wait for initialization. */ 1156 while (!atomic_load(&args->percpu_list_ptr)) {} 1157 1158 for (i = 0; i < iters; ++i) { 1159 int ret; 1160 1161 do { 1162 int cpu = rseq_cpu_start(); 1163 1164 ret = rseq_offset_deref_addv(&args->percpu_list_ptr, 1165 sizeof(struct percpu_list_entry) * cpu, 1, cpu); 1166 } while (rseq_unlikely(ret)); 1167 } 1168 1169 if (rseq_unregister_current_thread()) { 1170 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1171 errno, strerror(errno)); 1172 abort(); 1173 } 1174 return NULL; 1175 } 1176 1177 void test_membarrier_init_percpu_list(struct percpu_list *list) 1178 { 1179 int i; 1180 1181 memset(list, 0, sizeof(*list)); 1182 for (i = 0; i < CPU_SETSIZE; i++) { 1183 struct percpu_list_node *node; 1184 1185 node = malloc(sizeof(*node)); 1186 assert(node); 1187 node->data = 0; 1188 node->next = NULL; 1189 list->c[i].head = node; 1190 } 1191 } 1192 1193 void test_membarrier_free_percpu_list(struct percpu_list *list) 1194 { 1195 int i; 1196 1197 for (i = 0; i < CPU_SETSIZE; i++) 1198 free(list->c[i].head); 1199 } 1200 1201 static int sys_membarrier(int cmd, int flags, int cpu_id) 1202 { 1203 return syscall(__NR_membarrier, cmd, flags, cpu_id); 1204 } 1205 1206 /* 1207 * The manager thread swaps per-cpu lists that worker threads see, 1208 * and validates that there are no unexpected modifications. 1209 */ 1210 void *test_membarrier_manager_thread(void *arg) 1211 { 1212 struct test_membarrier_thread_args *args = 1213 (struct test_membarrier_thread_args *)arg; 1214 struct percpu_list list_a, list_b; 1215 intptr_t expect_a = 0, expect_b = 0; 1216 int cpu_a = 0, cpu_b = 0; 1217 1218 if (rseq_register_current_thread()) { 1219 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1220 errno, strerror(errno)); 1221 abort(); 1222 } 1223 1224 /* Init lists. */ 1225 test_membarrier_init_percpu_list(&list_a); 1226 test_membarrier_init_percpu_list(&list_b); 1227 1228 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1229 1230 while (!atomic_load(&args->stop)) { 1231 /* list_a is "active". */ 1232 cpu_a = rand() % CPU_SETSIZE; 1233 /* 1234 * As list_b is "inactive", we should never see changes 1235 * to list_b. 1236 */ 1237 if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { 1238 fprintf(stderr, "Membarrier test failed\n"); 1239 abort(); 1240 } 1241 1242 /* Make list_b "active". */ 1243 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); 1244 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 1245 MEMBARRIER_CMD_FLAG_CPU, cpu_a) && 1246 errno != ENXIO /* missing CPU */) { 1247 perror("sys_membarrier"); 1248 abort(); 1249 } 1250 /* 1251 * Cpu A should now only modify list_b, so the values 1252 * in list_a should be stable. 1253 */ 1254 expect_a = atomic_load(&list_a.c[cpu_a].head->data); 1255 1256 cpu_b = rand() % CPU_SETSIZE; 1257 /* 1258 * As list_a is "inactive", we should never see changes 1259 * to list_a. 1260 */ 1261 if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { 1262 fprintf(stderr, "Membarrier test failed\n"); 1263 abort(); 1264 } 1265 1266 /* Make list_a "active". */ 1267 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1268 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 1269 MEMBARRIER_CMD_FLAG_CPU, cpu_b) && 1270 errno != ENXIO /* missing CPU*/) { 1271 perror("sys_membarrier"); 1272 abort(); 1273 } 1274 /* Remember a value from list_b. */ 1275 expect_b = atomic_load(&list_b.c[cpu_b].head->data); 1276 } 1277 1278 test_membarrier_free_percpu_list(&list_a); 1279 test_membarrier_free_percpu_list(&list_b); 1280 1281 if (rseq_unregister_current_thread()) { 1282 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1283 errno, strerror(errno)); 1284 abort(); 1285 } 1286 return NULL; 1287 } 1288 1289 void test_membarrier(void) 1290 { 1291 const int num_threads = opt_threads; 1292 struct test_membarrier_thread_args thread_args; 1293 pthread_t worker_threads[num_threads]; 1294 pthread_t manager_thread; 1295 int i, ret; 1296 1297 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { 1298 perror("sys_membarrier"); 1299 abort(); 1300 } 1301 1302 thread_args.stop = 0; 1303 thread_args.percpu_list_ptr = 0; 1304 ret = pthread_create(&manager_thread, NULL, 1305 test_membarrier_manager_thread, &thread_args); 1306 if (ret) { 1307 errno = ret; 1308 perror("pthread_create"); 1309 abort(); 1310 } 1311 1312 for (i = 0; i < num_threads; i++) { 1313 ret = pthread_create(&worker_threads[i], NULL, 1314 test_membarrier_worker_thread, &thread_args); 1315 if (ret) { 1316 errno = ret; 1317 perror("pthread_create"); 1318 abort(); 1319 } 1320 } 1321 1322 1323 for (i = 0; i < num_threads; i++) { 1324 ret = pthread_join(worker_threads[i], NULL); 1325 if (ret) { 1326 errno = ret; 1327 perror("pthread_join"); 1328 abort(); 1329 } 1330 } 1331 1332 atomic_store(&thread_args.stop, 1); 1333 ret = pthread_join(manager_thread, NULL); 1334 if (ret) { 1335 errno = ret; 1336 perror("pthread_join"); 1337 abort(); 1338 } 1339 } 1340 #else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ 1341 void test_membarrier(void) 1342 { 1343 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " 1344 "Skipping membarrier test.\n"); 1345 } 1346 #endif 1347 1348 static void show_usage(int argc, char **argv) 1349 { 1350 printf("Usage : %s <OPTIONS>\n", 1351 argv[0]); 1352 printf("OPTIONS:\n"); 1353 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1354 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1355 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1356 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1357 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1358 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1359 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1360 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1361 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1362 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1363 printf(" [-y] Yield\n"); 1364 printf(" [-k] Kill thread with signal\n"); 1365 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1366 printf(" [-t N] Number of threads (default 200)\n"); 1367 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1368 printf(" [-d] Disable rseq system call (no initialization)\n"); 1369 printf(" [-D M] Disable rseq for each M threads\n"); 1370 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); 1371 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1372 printf(" [-v] Verbose output.\n"); 1373 printf(" [-h] Show this help.\n"); 1374 printf("\n"); 1375 } 1376 1377 int main(int argc, char **argv) 1378 { 1379 int i; 1380 1381 for (i = 1; i < argc; i++) { 1382 if (argv[i][0] != '-') 1383 continue; 1384 switch (argv[i][1]) { 1385 case '1': 1386 case '2': 1387 case '3': 1388 case '4': 1389 case '5': 1390 case '6': 1391 case '7': 1392 case '8': 1393 case '9': 1394 if (argc < i + 2) { 1395 show_usage(argc, argv); 1396 goto error; 1397 } 1398 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1399 i++; 1400 break; 1401 case 'm': 1402 if (argc < i + 2) { 1403 show_usage(argc, argv); 1404 goto error; 1405 } 1406 opt_modulo = atol(argv[i + 1]); 1407 if (opt_modulo < 0) { 1408 show_usage(argc, argv); 1409 goto error; 1410 } 1411 i++; 1412 break; 1413 case 's': 1414 if (argc < i + 2) { 1415 show_usage(argc, argv); 1416 goto error; 1417 } 1418 opt_sleep = atol(argv[i + 1]); 1419 if (opt_sleep < 0) { 1420 show_usage(argc, argv); 1421 goto error; 1422 } 1423 i++; 1424 break; 1425 case 'y': 1426 opt_yield = 1; 1427 break; 1428 case 'k': 1429 opt_signal = 1; 1430 break; 1431 case 'd': 1432 opt_disable_rseq = 1; 1433 break; 1434 case 'D': 1435 if (argc < i + 2) { 1436 show_usage(argc, argv); 1437 goto error; 1438 } 1439 opt_disable_mod = atol(argv[i + 1]); 1440 if (opt_disable_mod < 0) { 1441 show_usage(argc, argv); 1442 goto error; 1443 } 1444 i++; 1445 break; 1446 case 't': 1447 if (argc < i + 2) { 1448 show_usage(argc, argv); 1449 goto error; 1450 } 1451 opt_threads = atol(argv[i + 1]); 1452 if (opt_threads < 0) { 1453 show_usage(argc, argv); 1454 goto error; 1455 } 1456 i++; 1457 break; 1458 case 'r': 1459 if (argc < i + 2) { 1460 show_usage(argc, argv); 1461 goto error; 1462 } 1463 opt_reps = atoll(argv[i + 1]); 1464 if (opt_reps < 0) { 1465 show_usage(argc, argv); 1466 goto error; 1467 } 1468 i++; 1469 break; 1470 case 'h': 1471 show_usage(argc, argv); 1472 goto end; 1473 case 'T': 1474 if (argc < i + 2) { 1475 show_usage(argc, argv); 1476 goto error; 1477 } 1478 opt_test = *argv[i + 1]; 1479 switch (opt_test) { 1480 case 's': 1481 case 'l': 1482 case 'i': 1483 case 'b': 1484 case 'm': 1485 case 'r': 1486 break; 1487 default: 1488 show_usage(argc, argv); 1489 goto error; 1490 } 1491 i++; 1492 break; 1493 case 'v': 1494 verbose = 1; 1495 break; 1496 case 'M': 1497 opt_mb = 1; 1498 break; 1499 default: 1500 show_usage(argc, argv); 1501 goto error; 1502 } 1503 } 1504 1505 loop_cnt_1 = loop_cnt[1]; 1506 loop_cnt_2 = loop_cnt[2]; 1507 loop_cnt_3 = loop_cnt[3]; 1508 loop_cnt_4 = loop_cnt[4]; 1509 loop_cnt_5 = loop_cnt[5]; 1510 loop_cnt_6 = loop_cnt[6]; 1511 1512 if (set_signal_handler()) 1513 goto error; 1514 1515 if (!opt_disable_rseq && rseq_register_current_thread()) 1516 goto error; 1517 switch (opt_test) { 1518 case 's': 1519 printf_verbose("spinlock\n"); 1520 test_percpu_spinlock(); 1521 break; 1522 case 'l': 1523 printf_verbose("linked list\n"); 1524 test_percpu_list(); 1525 break; 1526 case 'b': 1527 printf_verbose("buffer\n"); 1528 test_percpu_buffer(); 1529 break; 1530 case 'm': 1531 printf_verbose("memcpy buffer\n"); 1532 test_percpu_memcpy_buffer(); 1533 break; 1534 case 'i': 1535 printf_verbose("counter increment\n"); 1536 test_percpu_inc(); 1537 break; 1538 case 'r': 1539 printf_verbose("membarrier\n"); 1540 test_membarrier(); 1541 break; 1542 } 1543 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1544 abort(); 1545 end: 1546 return 0; 1547 1548 error: 1549 return -1; 1550 } 1551