1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <linux/membarrier.h> 5 #include <pthread.h> 6 #include <sched.h> 7 #include <stdatomic.h> 8 #include <stdint.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <unistd.h> 14 #include <poll.h> 15 #include <sys/types.h> 16 #include <signal.h> 17 #include <errno.h> 18 #include <stddef.h> 19 20 static inline pid_t rseq_gettid(void) 21 { 22 return syscall(__NR_gettid); 23 } 24 25 #define NR_INJECT 9 26 static int loop_cnt[NR_INJECT + 1]; 27 28 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 29 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 30 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 31 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 32 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 33 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 34 35 static int opt_modulo, verbose; 36 37 static int opt_yield, opt_signal, opt_sleep, 38 opt_disable_rseq, opt_threads = 200, 39 opt_disable_mod = 0, opt_test = 's', opt_mb = 0; 40 41 #ifndef RSEQ_SKIP_FASTPATH 42 static long long opt_reps = 5000; 43 #else 44 static long long opt_reps = 100; 45 #endif 46 47 static __thread __attribute__((tls_model("initial-exec"))) 48 unsigned int signals_delivered; 49 50 #ifndef BENCHMARK 51 52 static __thread __attribute__((tls_model("initial-exec"), unused)) 53 unsigned int yield_mod_cnt, nr_abort; 54 55 #define printf_verbose(fmt, ...) \ 56 do { \ 57 if (verbose) \ 58 printf(fmt, ## __VA_ARGS__); \ 59 } while (0) 60 61 #ifdef __i386__ 62 63 #define INJECT_ASM_REG "eax" 64 65 #define RSEQ_INJECT_CLOBBER \ 66 , INJECT_ASM_REG 67 68 #define RSEQ_INJECT_ASM(n) \ 69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 71 "jz 333f\n\t" \ 72 "222:\n\t" \ 73 "dec %%" INJECT_ASM_REG "\n\t" \ 74 "jnz 222b\n\t" \ 75 "333:\n\t" 76 77 #elif defined(__x86_64__) 78 79 #define INJECT_ASM_REG_P "rax" 80 #define INJECT_ASM_REG "eax" 81 82 #define RSEQ_INJECT_CLOBBER \ 83 , INJECT_ASM_REG_P \ 84 , INJECT_ASM_REG 85 86 #define RSEQ_INJECT_ASM(n) \ 87 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ 88 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ 89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 90 "jz 333f\n\t" \ 91 "222:\n\t" \ 92 "dec %%" INJECT_ASM_REG "\n\t" \ 93 "jnz 222b\n\t" \ 94 "333:\n\t" 95 96 #elif defined(__s390__) 97 98 #define RSEQ_INJECT_INPUT \ 99 , [loop_cnt_1]"m"(loop_cnt[1]) \ 100 , [loop_cnt_2]"m"(loop_cnt[2]) \ 101 , [loop_cnt_3]"m"(loop_cnt[3]) \ 102 , [loop_cnt_4]"m"(loop_cnt[4]) \ 103 , [loop_cnt_5]"m"(loop_cnt[5]) \ 104 , [loop_cnt_6]"m"(loop_cnt[6]) 105 106 #define INJECT_ASM_REG "r12" 107 108 #define RSEQ_INJECT_CLOBBER \ 109 , INJECT_ASM_REG 110 111 #define RSEQ_INJECT_ASM(n) \ 112 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 113 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 114 "je 333f\n\t" \ 115 "222:\n\t" \ 116 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 117 "jnz 222b\n\t" \ 118 "333:\n\t" 119 120 #elif defined(__ARMEL__) 121 122 #define RSEQ_INJECT_INPUT \ 123 , [loop_cnt_1]"m"(loop_cnt[1]) \ 124 , [loop_cnt_2]"m"(loop_cnt[2]) \ 125 , [loop_cnt_3]"m"(loop_cnt[3]) \ 126 , [loop_cnt_4]"m"(loop_cnt[4]) \ 127 , [loop_cnt_5]"m"(loop_cnt[5]) \ 128 , [loop_cnt_6]"m"(loop_cnt[6]) 129 130 #define INJECT_ASM_REG "r4" 131 132 #define RSEQ_INJECT_CLOBBER \ 133 , INJECT_ASM_REG 134 135 #define RSEQ_INJECT_ASM(n) \ 136 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 137 "cmp " INJECT_ASM_REG ", #0\n\t" \ 138 "beq 333f\n\t" \ 139 "222:\n\t" \ 140 "subs " INJECT_ASM_REG ", #1\n\t" \ 141 "bne 222b\n\t" \ 142 "333:\n\t" 143 144 #elif defined(__AARCH64EL__) 145 146 #define RSEQ_INJECT_INPUT \ 147 , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 148 , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 149 , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 150 , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 151 , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 152 , [loop_cnt_6] "Qo" (loop_cnt[6]) 153 154 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 155 156 #define RSEQ_INJECT_ASM(n) \ 157 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 158 " cbz " INJECT_ASM_REG ", 333f\n" \ 159 "222:\n" \ 160 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 161 " cbnz " INJECT_ASM_REG ", 222b\n" \ 162 "333:\n" 163 164 #elif defined(__PPC__) 165 166 #define RSEQ_INJECT_INPUT \ 167 , [loop_cnt_1]"m"(loop_cnt[1]) \ 168 , [loop_cnt_2]"m"(loop_cnt[2]) \ 169 , [loop_cnt_3]"m"(loop_cnt[3]) \ 170 , [loop_cnt_4]"m"(loop_cnt[4]) \ 171 , [loop_cnt_5]"m"(loop_cnt[5]) \ 172 , [loop_cnt_6]"m"(loop_cnt[6]) 173 174 #define INJECT_ASM_REG "r18" 175 176 #define RSEQ_INJECT_CLOBBER \ 177 , INJECT_ASM_REG 178 179 #define RSEQ_INJECT_ASM(n) \ 180 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 181 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 182 "beq 333f\n\t" \ 183 "222:\n\t" \ 184 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 185 "bne 222b\n\t" \ 186 "333:\n\t" 187 188 #elif defined(__mips__) 189 190 #define RSEQ_INJECT_INPUT \ 191 , [loop_cnt_1]"m"(loop_cnt[1]) \ 192 , [loop_cnt_2]"m"(loop_cnt[2]) \ 193 , [loop_cnt_3]"m"(loop_cnt[3]) \ 194 , [loop_cnt_4]"m"(loop_cnt[4]) \ 195 , [loop_cnt_5]"m"(loop_cnt[5]) \ 196 , [loop_cnt_6]"m"(loop_cnt[6]) 197 198 #define INJECT_ASM_REG "$5" 199 200 #define RSEQ_INJECT_CLOBBER \ 201 , INJECT_ASM_REG 202 203 #define RSEQ_INJECT_ASM(n) \ 204 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 205 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 206 "222:\n\t" \ 207 "addiu " INJECT_ASM_REG ", -1\n\t" \ 208 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 209 "333:\n\t" 210 #elif defined(__riscv) 211 212 #define RSEQ_INJECT_INPUT \ 213 , [loop_cnt_1]"m"(loop_cnt[1]) \ 214 , [loop_cnt_2]"m"(loop_cnt[2]) \ 215 , [loop_cnt_3]"m"(loop_cnt[3]) \ 216 , [loop_cnt_4]"m"(loop_cnt[4]) \ 217 , [loop_cnt_5]"m"(loop_cnt[5]) \ 218 , [loop_cnt_6]"m"(loop_cnt[6]) 219 220 #define INJECT_ASM_REG "t1" 221 222 #define RSEQ_INJECT_CLOBBER \ 223 , INJECT_ASM_REG 224 225 #define RSEQ_INJECT_ASM(n) \ 226 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 227 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 228 "222:\n\t" \ 229 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ 230 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 231 "333:\n\t" 232 233 234 #else 235 #error unsupported target 236 #endif 237 238 #define RSEQ_INJECT_FAILED \ 239 nr_abort++; 240 241 #define RSEQ_INJECT_C(n) \ 242 { \ 243 int loc_i, loc_nr_loops = loop_cnt[n]; \ 244 \ 245 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 246 rseq_barrier(); \ 247 } \ 248 if (loc_nr_loops == -1 && opt_modulo) { \ 249 if (yield_mod_cnt == opt_modulo - 1) { \ 250 if (opt_sleep > 0) \ 251 poll(NULL, 0, opt_sleep); \ 252 if (opt_yield) \ 253 sched_yield(); \ 254 if (opt_signal) \ 255 raise(SIGUSR1); \ 256 yield_mod_cnt = 0; \ 257 } else { \ 258 yield_mod_cnt++; \ 259 } \ 260 } \ 261 } 262 263 #else 264 265 #define printf_verbose(fmt, ...) 266 267 #endif /* BENCHMARK */ 268 269 #include "rseq.h" 270 271 struct percpu_lock_entry { 272 intptr_t v; 273 } __attribute__((aligned(128))); 274 275 struct percpu_lock { 276 struct percpu_lock_entry c[CPU_SETSIZE]; 277 }; 278 279 struct test_data_entry { 280 intptr_t count; 281 } __attribute__((aligned(128))); 282 283 struct spinlock_test_data { 284 struct percpu_lock lock; 285 struct test_data_entry c[CPU_SETSIZE]; 286 }; 287 288 struct spinlock_thread_test_data { 289 struct spinlock_test_data *data; 290 long long reps; 291 int reg; 292 }; 293 294 struct inc_test_data { 295 struct test_data_entry c[CPU_SETSIZE]; 296 }; 297 298 struct inc_thread_test_data { 299 struct inc_test_data *data; 300 long long reps; 301 int reg; 302 }; 303 304 struct percpu_list_node { 305 intptr_t data; 306 struct percpu_list_node *next; 307 }; 308 309 struct percpu_list_entry { 310 struct percpu_list_node *head; 311 } __attribute__((aligned(128))); 312 313 struct percpu_list { 314 struct percpu_list_entry c[CPU_SETSIZE]; 315 }; 316 317 #define BUFFER_ITEM_PER_CPU 100 318 319 struct percpu_buffer_node { 320 intptr_t data; 321 }; 322 323 struct percpu_buffer_entry { 324 intptr_t offset; 325 intptr_t buflen; 326 struct percpu_buffer_node **array; 327 } __attribute__((aligned(128))); 328 329 struct percpu_buffer { 330 struct percpu_buffer_entry c[CPU_SETSIZE]; 331 }; 332 333 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 334 335 struct percpu_memcpy_buffer_node { 336 intptr_t data1; 337 uint64_t data2; 338 }; 339 340 struct percpu_memcpy_buffer_entry { 341 intptr_t offset; 342 intptr_t buflen; 343 struct percpu_memcpy_buffer_node *array; 344 } __attribute__((aligned(128))); 345 346 struct percpu_memcpy_buffer { 347 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 348 }; 349 350 /* A simple percpu spinlock. Grabs lock on current cpu. */ 351 static int rseq_this_cpu_lock(struct percpu_lock *lock) 352 { 353 int cpu; 354 355 for (;;) { 356 int ret; 357 358 cpu = rseq_cpu_start(); 359 ret = rseq_cmpeqv_storev(&lock->c[cpu].v, 360 0, 1, cpu); 361 if (rseq_likely(!ret)) 362 break; 363 /* Retry if comparison fails or rseq aborts. */ 364 } 365 /* 366 * Acquire semantic when taking lock after control dependency. 367 * Matches rseq_smp_store_release(). 368 */ 369 rseq_smp_acquire__after_ctrl_dep(); 370 return cpu; 371 } 372 373 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 374 { 375 assert(lock->c[cpu].v == 1); 376 /* 377 * Release lock, with release semantic. Matches 378 * rseq_smp_acquire__after_ctrl_dep(). 379 */ 380 rseq_smp_store_release(&lock->c[cpu].v, 0); 381 } 382 383 void *test_percpu_spinlock_thread(void *arg) 384 { 385 struct spinlock_thread_test_data *thread_data = arg; 386 struct spinlock_test_data *data = thread_data->data; 387 long long i, reps; 388 389 if (!opt_disable_rseq && thread_data->reg && 390 rseq_register_current_thread()) 391 abort(); 392 reps = thread_data->reps; 393 for (i = 0; i < reps; i++) { 394 int cpu = rseq_this_cpu_lock(&data->lock); 395 data->c[cpu].count++; 396 rseq_percpu_unlock(&data->lock, cpu); 397 #ifndef BENCHMARK 398 if (i != 0 && !(i % (reps / 10))) 399 printf_verbose("tid %d: count %lld\n", 400 (int) rseq_gettid(), i); 401 #endif 402 } 403 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 404 (int) rseq_gettid(), nr_abort, signals_delivered); 405 if (!opt_disable_rseq && thread_data->reg && 406 rseq_unregister_current_thread()) 407 abort(); 408 return NULL; 409 } 410 411 /* 412 * A simple test which implements a sharded counter using a per-cpu 413 * lock. Obviously real applications might prefer to simply use a 414 * per-cpu increment; however, this is reasonable for a test and the 415 * lock can be extended to synchronize more complicated operations. 416 */ 417 void test_percpu_spinlock(void) 418 { 419 const int num_threads = opt_threads; 420 int i, ret; 421 uint64_t sum; 422 pthread_t test_threads[num_threads]; 423 struct spinlock_test_data data; 424 struct spinlock_thread_test_data thread_data[num_threads]; 425 426 memset(&data, 0, sizeof(data)); 427 for (i = 0; i < num_threads; i++) { 428 thread_data[i].reps = opt_reps; 429 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 430 thread_data[i].reg = 1; 431 else 432 thread_data[i].reg = 0; 433 thread_data[i].data = &data; 434 ret = pthread_create(&test_threads[i], NULL, 435 test_percpu_spinlock_thread, 436 &thread_data[i]); 437 if (ret) { 438 errno = ret; 439 perror("pthread_create"); 440 abort(); 441 } 442 } 443 444 for (i = 0; i < num_threads; i++) { 445 ret = pthread_join(test_threads[i], NULL); 446 if (ret) { 447 errno = ret; 448 perror("pthread_join"); 449 abort(); 450 } 451 } 452 453 sum = 0; 454 for (i = 0; i < CPU_SETSIZE; i++) 455 sum += data.c[i].count; 456 457 assert(sum == (uint64_t)opt_reps * num_threads); 458 } 459 460 void *test_percpu_inc_thread(void *arg) 461 { 462 struct inc_thread_test_data *thread_data = arg; 463 struct inc_test_data *data = thread_data->data; 464 long long i, reps; 465 466 if (!opt_disable_rseq && thread_data->reg && 467 rseq_register_current_thread()) 468 abort(); 469 reps = thread_data->reps; 470 for (i = 0; i < reps; i++) { 471 int ret; 472 473 do { 474 int cpu; 475 476 cpu = rseq_cpu_start(); 477 ret = rseq_addv(&data->c[cpu].count, 1, cpu); 478 } while (rseq_unlikely(ret)); 479 #ifndef BENCHMARK 480 if (i != 0 && !(i % (reps / 10))) 481 printf_verbose("tid %d: count %lld\n", 482 (int) rseq_gettid(), i); 483 #endif 484 } 485 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 486 (int) rseq_gettid(), nr_abort, signals_delivered); 487 if (!opt_disable_rseq && thread_data->reg && 488 rseq_unregister_current_thread()) 489 abort(); 490 return NULL; 491 } 492 493 void test_percpu_inc(void) 494 { 495 const int num_threads = opt_threads; 496 int i, ret; 497 uint64_t sum; 498 pthread_t test_threads[num_threads]; 499 struct inc_test_data data; 500 struct inc_thread_test_data thread_data[num_threads]; 501 502 memset(&data, 0, sizeof(data)); 503 for (i = 0; i < num_threads; i++) { 504 thread_data[i].reps = opt_reps; 505 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 506 thread_data[i].reg = 1; 507 else 508 thread_data[i].reg = 0; 509 thread_data[i].data = &data; 510 ret = pthread_create(&test_threads[i], NULL, 511 test_percpu_inc_thread, 512 &thread_data[i]); 513 if (ret) { 514 errno = ret; 515 perror("pthread_create"); 516 abort(); 517 } 518 } 519 520 for (i = 0; i < num_threads; i++) { 521 ret = pthread_join(test_threads[i], NULL); 522 if (ret) { 523 errno = ret; 524 perror("pthread_join"); 525 abort(); 526 } 527 } 528 529 sum = 0; 530 for (i = 0; i < CPU_SETSIZE; i++) 531 sum += data.c[i].count; 532 533 assert(sum == (uint64_t)opt_reps * num_threads); 534 } 535 536 void this_cpu_list_push(struct percpu_list *list, 537 struct percpu_list_node *node, 538 int *_cpu) 539 { 540 int cpu; 541 542 for (;;) { 543 intptr_t *targetptr, newval, expect; 544 int ret; 545 546 cpu = rseq_cpu_start(); 547 /* Load list->c[cpu].head with single-copy atomicity. */ 548 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 549 newval = (intptr_t)node; 550 targetptr = (intptr_t *)&list->c[cpu].head; 551 node->next = (struct percpu_list_node *)expect; 552 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); 553 if (rseq_likely(!ret)) 554 break; 555 /* Retry if comparison fails or rseq aborts. */ 556 } 557 if (_cpu) 558 *_cpu = cpu; 559 } 560 561 /* 562 * Unlike a traditional lock-less linked list; the availability of a 563 * rseq primitive allows us to implement pop without concerns over 564 * ABA-type races. 565 */ 566 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 567 int *_cpu) 568 { 569 struct percpu_list_node *node = NULL; 570 int cpu; 571 572 for (;;) { 573 struct percpu_list_node *head; 574 intptr_t *targetptr, expectnot, *load; 575 long offset; 576 int ret; 577 578 cpu = rseq_cpu_start(); 579 targetptr = (intptr_t *)&list->c[cpu].head; 580 expectnot = (intptr_t)NULL; 581 offset = offsetof(struct percpu_list_node, next); 582 load = (intptr_t *)&head; 583 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, 584 offset, load, cpu); 585 if (rseq_likely(!ret)) { 586 node = head; 587 break; 588 } 589 if (ret > 0) 590 break; 591 /* Retry if rseq aborts. */ 592 } 593 if (_cpu) 594 *_cpu = cpu; 595 return node; 596 } 597 598 /* 599 * __percpu_list_pop is not safe against concurrent accesses. Should 600 * only be used on lists that are not concurrently modified. 601 */ 602 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 603 { 604 struct percpu_list_node *node; 605 606 node = list->c[cpu].head; 607 if (!node) 608 return NULL; 609 list->c[cpu].head = node->next; 610 return node; 611 } 612 613 void *test_percpu_list_thread(void *arg) 614 { 615 long long i, reps; 616 struct percpu_list *list = (struct percpu_list *)arg; 617 618 if (!opt_disable_rseq && rseq_register_current_thread()) 619 abort(); 620 621 reps = opt_reps; 622 for (i = 0; i < reps; i++) { 623 struct percpu_list_node *node; 624 625 node = this_cpu_list_pop(list, NULL); 626 if (opt_yield) 627 sched_yield(); /* encourage shuffling */ 628 if (node) 629 this_cpu_list_push(list, node, NULL); 630 } 631 632 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 633 (int) rseq_gettid(), nr_abort, signals_delivered); 634 if (!opt_disable_rseq && rseq_unregister_current_thread()) 635 abort(); 636 637 return NULL; 638 } 639 640 /* Simultaneous modification to a per-cpu linked list from many threads. */ 641 void test_percpu_list(void) 642 { 643 const int num_threads = opt_threads; 644 int i, j, ret; 645 uint64_t sum = 0, expected_sum = 0; 646 struct percpu_list list; 647 pthread_t test_threads[num_threads]; 648 cpu_set_t allowed_cpus; 649 650 memset(&list, 0, sizeof(list)); 651 652 /* Generate list entries for every usable cpu. */ 653 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 654 for (i = 0; i < CPU_SETSIZE; i++) { 655 if (!CPU_ISSET(i, &allowed_cpus)) 656 continue; 657 for (j = 1; j <= 100; j++) { 658 struct percpu_list_node *node; 659 660 expected_sum += j; 661 662 node = malloc(sizeof(*node)); 663 assert(node); 664 node->data = j; 665 node->next = list.c[i].head; 666 list.c[i].head = node; 667 } 668 } 669 670 for (i = 0; i < num_threads; i++) { 671 ret = pthread_create(&test_threads[i], NULL, 672 test_percpu_list_thread, &list); 673 if (ret) { 674 errno = ret; 675 perror("pthread_create"); 676 abort(); 677 } 678 } 679 680 for (i = 0; i < num_threads; i++) { 681 ret = pthread_join(test_threads[i], NULL); 682 if (ret) { 683 errno = ret; 684 perror("pthread_join"); 685 abort(); 686 } 687 } 688 689 for (i = 0; i < CPU_SETSIZE; i++) { 690 struct percpu_list_node *node; 691 692 if (!CPU_ISSET(i, &allowed_cpus)) 693 continue; 694 695 while ((node = __percpu_list_pop(&list, i))) { 696 sum += node->data; 697 free(node); 698 } 699 } 700 701 /* 702 * All entries should now be accounted for (unless some external 703 * actor is interfering with our allowed affinity while this 704 * test is running). 705 */ 706 assert(sum == expected_sum); 707 } 708 709 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 710 struct percpu_buffer_node *node, 711 int *_cpu) 712 { 713 bool result = false; 714 int cpu; 715 716 for (;;) { 717 intptr_t *targetptr_spec, newval_spec; 718 intptr_t *targetptr_final, newval_final; 719 intptr_t offset; 720 int ret; 721 722 cpu = rseq_cpu_start(); 723 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 724 if (offset == buffer->c[cpu].buflen) 725 break; 726 newval_spec = (intptr_t)node; 727 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 728 newval_final = offset + 1; 729 targetptr_final = &buffer->c[cpu].offset; 730 if (opt_mb) 731 ret = rseq_cmpeqv_trystorev_storev_release( 732 targetptr_final, offset, targetptr_spec, 733 newval_spec, newval_final, cpu); 734 else 735 ret = rseq_cmpeqv_trystorev_storev(targetptr_final, 736 offset, targetptr_spec, newval_spec, 737 newval_final, cpu); 738 if (rseq_likely(!ret)) { 739 result = true; 740 break; 741 } 742 /* Retry if comparison fails or rseq aborts. */ 743 } 744 if (_cpu) 745 *_cpu = cpu; 746 return result; 747 } 748 749 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 750 int *_cpu) 751 { 752 struct percpu_buffer_node *head; 753 int cpu; 754 755 for (;;) { 756 intptr_t *targetptr, newval; 757 intptr_t offset; 758 int ret; 759 760 cpu = rseq_cpu_start(); 761 /* Load offset with single-copy atomicity. */ 762 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 763 if (offset == 0) { 764 head = NULL; 765 break; 766 } 767 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 768 newval = offset - 1; 769 targetptr = (intptr_t *)&buffer->c[cpu].offset; 770 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, 771 (intptr_t *)&buffer->c[cpu].array[offset - 1], 772 (intptr_t)head, newval, cpu); 773 if (rseq_likely(!ret)) 774 break; 775 /* Retry if comparison fails or rseq aborts. */ 776 } 777 if (_cpu) 778 *_cpu = cpu; 779 return head; 780 } 781 782 /* 783 * __percpu_buffer_pop is not safe against concurrent accesses. Should 784 * only be used on buffers that are not concurrently modified. 785 */ 786 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 787 int cpu) 788 { 789 struct percpu_buffer_node *head; 790 intptr_t offset; 791 792 offset = buffer->c[cpu].offset; 793 if (offset == 0) 794 return NULL; 795 head = buffer->c[cpu].array[offset - 1]; 796 buffer->c[cpu].offset = offset - 1; 797 return head; 798 } 799 800 void *test_percpu_buffer_thread(void *arg) 801 { 802 long long i, reps; 803 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 804 805 if (!opt_disable_rseq && rseq_register_current_thread()) 806 abort(); 807 808 reps = opt_reps; 809 for (i = 0; i < reps; i++) { 810 struct percpu_buffer_node *node; 811 812 node = this_cpu_buffer_pop(buffer, NULL); 813 if (opt_yield) 814 sched_yield(); /* encourage shuffling */ 815 if (node) { 816 if (!this_cpu_buffer_push(buffer, node, NULL)) { 817 /* Should increase buffer size. */ 818 abort(); 819 } 820 } 821 } 822 823 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 824 (int) rseq_gettid(), nr_abort, signals_delivered); 825 if (!opt_disable_rseq && rseq_unregister_current_thread()) 826 abort(); 827 828 return NULL; 829 } 830 831 /* Simultaneous modification to a per-cpu buffer from many threads. */ 832 void test_percpu_buffer(void) 833 { 834 const int num_threads = opt_threads; 835 int i, j, ret; 836 uint64_t sum = 0, expected_sum = 0; 837 struct percpu_buffer buffer; 838 pthread_t test_threads[num_threads]; 839 cpu_set_t allowed_cpus; 840 841 memset(&buffer, 0, sizeof(buffer)); 842 843 /* Generate list entries for every usable cpu. */ 844 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 845 for (i = 0; i < CPU_SETSIZE; i++) { 846 if (!CPU_ISSET(i, &allowed_cpus)) 847 continue; 848 /* Worse-case is every item in same CPU. */ 849 buffer.c[i].array = 850 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 851 BUFFER_ITEM_PER_CPU); 852 assert(buffer.c[i].array); 853 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 854 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 855 struct percpu_buffer_node *node; 856 857 expected_sum += j; 858 859 /* 860 * We could theoretically put the word-sized 861 * "data" directly in the buffer. However, we 862 * want to model objects that would not fit 863 * within a single word, so allocate an object 864 * for each node. 865 */ 866 node = malloc(sizeof(*node)); 867 assert(node); 868 node->data = j; 869 buffer.c[i].array[j - 1] = node; 870 buffer.c[i].offset++; 871 } 872 } 873 874 for (i = 0; i < num_threads; i++) { 875 ret = pthread_create(&test_threads[i], NULL, 876 test_percpu_buffer_thread, &buffer); 877 if (ret) { 878 errno = ret; 879 perror("pthread_create"); 880 abort(); 881 } 882 } 883 884 for (i = 0; i < num_threads; i++) { 885 ret = pthread_join(test_threads[i], NULL); 886 if (ret) { 887 errno = ret; 888 perror("pthread_join"); 889 abort(); 890 } 891 } 892 893 for (i = 0; i < CPU_SETSIZE; i++) { 894 struct percpu_buffer_node *node; 895 896 if (!CPU_ISSET(i, &allowed_cpus)) 897 continue; 898 899 while ((node = __percpu_buffer_pop(&buffer, i))) { 900 sum += node->data; 901 free(node); 902 } 903 free(buffer.c[i].array); 904 } 905 906 /* 907 * All entries should now be accounted for (unless some external 908 * actor is interfering with our allowed affinity while this 909 * test is running). 910 */ 911 assert(sum == expected_sum); 912 } 913 914 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 915 struct percpu_memcpy_buffer_node item, 916 int *_cpu) 917 { 918 bool result = false; 919 int cpu; 920 921 for (;;) { 922 intptr_t *targetptr_final, newval_final, offset; 923 char *destptr, *srcptr; 924 size_t copylen; 925 int ret; 926 927 cpu = rseq_cpu_start(); 928 /* Load offset with single-copy atomicity. */ 929 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 930 if (offset == buffer->c[cpu].buflen) 931 break; 932 destptr = (char *)&buffer->c[cpu].array[offset]; 933 srcptr = (char *)&item; 934 /* copylen must be <= 4kB. */ 935 copylen = sizeof(item); 936 newval_final = offset + 1; 937 targetptr_final = &buffer->c[cpu].offset; 938 if (opt_mb) 939 ret = rseq_cmpeqv_trymemcpy_storev_release( 940 targetptr_final, offset, 941 destptr, srcptr, copylen, 942 newval_final, cpu); 943 else 944 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 945 offset, destptr, srcptr, copylen, 946 newval_final, cpu); 947 if (rseq_likely(!ret)) { 948 result = true; 949 break; 950 } 951 /* Retry if comparison fails or rseq aborts. */ 952 } 953 if (_cpu) 954 *_cpu = cpu; 955 return result; 956 } 957 958 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 959 struct percpu_memcpy_buffer_node *item, 960 int *_cpu) 961 { 962 bool result = false; 963 int cpu; 964 965 for (;;) { 966 intptr_t *targetptr_final, newval_final, offset; 967 char *destptr, *srcptr; 968 size_t copylen; 969 int ret; 970 971 cpu = rseq_cpu_start(); 972 /* Load offset with single-copy atomicity. */ 973 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 974 if (offset == 0) 975 break; 976 destptr = (char *)item; 977 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 978 /* copylen must be <= 4kB. */ 979 copylen = sizeof(*item); 980 newval_final = offset - 1; 981 targetptr_final = &buffer->c[cpu].offset; 982 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 983 offset, destptr, srcptr, copylen, 984 newval_final, cpu); 985 if (rseq_likely(!ret)) { 986 result = true; 987 break; 988 } 989 /* Retry if comparison fails or rseq aborts. */ 990 } 991 if (_cpu) 992 *_cpu = cpu; 993 return result; 994 } 995 996 /* 997 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 998 * only be used on buffers that are not concurrently modified. 999 */ 1000 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 1001 struct percpu_memcpy_buffer_node *item, 1002 int cpu) 1003 { 1004 intptr_t offset; 1005 1006 offset = buffer->c[cpu].offset; 1007 if (offset == 0) 1008 return false; 1009 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 1010 buffer->c[cpu].offset = offset - 1; 1011 return true; 1012 } 1013 1014 void *test_percpu_memcpy_buffer_thread(void *arg) 1015 { 1016 long long i, reps; 1017 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 1018 1019 if (!opt_disable_rseq && rseq_register_current_thread()) 1020 abort(); 1021 1022 reps = opt_reps; 1023 for (i = 0; i < reps; i++) { 1024 struct percpu_memcpy_buffer_node item; 1025 bool result; 1026 1027 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1028 if (opt_yield) 1029 sched_yield(); /* encourage shuffling */ 1030 if (result) { 1031 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1032 /* Should increase buffer size. */ 1033 abort(); 1034 } 1035 } 1036 } 1037 1038 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 1039 (int) rseq_gettid(), nr_abort, signals_delivered); 1040 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1041 abort(); 1042 1043 return NULL; 1044 } 1045 1046 /* Simultaneous modification to a per-cpu buffer from many threads. */ 1047 void test_percpu_memcpy_buffer(void) 1048 { 1049 const int num_threads = opt_threads; 1050 int i, j, ret; 1051 uint64_t sum = 0, expected_sum = 0; 1052 struct percpu_memcpy_buffer buffer; 1053 pthread_t test_threads[num_threads]; 1054 cpu_set_t allowed_cpus; 1055 1056 memset(&buffer, 0, sizeof(buffer)); 1057 1058 /* Generate list entries for every usable cpu. */ 1059 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1060 for (i = 0; i < CPU_SETSIZE; i++) { 1061 if (!CPU_ISSET(i, &allowed_cpus)) 1062 continue; 1063 /* Worse-case is every item in same CPU. */ 1064 buffer.c[i].array = 1065 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1066 MEMCPY_BUFFER_ITEM_PER_CPU); 1067 assert(buffer.c[i].array); 1068 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1069 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1070 expected_sum += 2 * j + 1; 1071 1072 /* 1073 * We could theoretically put the word-sized 1074 * "data" directly in the buffer. However, we 1075 * want to model objects that would not fit 1076 * within a single word, so allocate an object 1077 * for each node. 1078 */ 1079 buffer.c[i].array[j - 1].data1 = j; 1080 buffer.c[i].array[j - 1].data2 = j + 1; 1081 buffer.c[i].offset++; 1082 } 1083 } 1084 1085 for (i = 0; i < num_threads; i++) { 1086 ret = pthread_create(&test_threads[i], NULL, 1087 test_percpu_memcpy_buffer_thread, 1088 &buffer); 1089 if (ret) { 1090 errno = ret; 1091 perror("pthread_create"); 1092 abort(); 1093 } 1094 } 1095 1096 for (i = 0; i < num_threads; i++) { 1097 ret = pthread_join(test_threads[i], NULL); 1098 if (ret) { 1099 errno = ret; 1100 perror("pthread_join"); 1101 abort(); 1102 } 1103 } 1104 1105 for (i = 0; i < CPU_SETSIZE; i++) { 1106 struct percpu_memcpy_buffer_node item; 1107 1108 if (!CPU_ISSET(i, &allowed_cpus)) 1109 continue; 1110 1111 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1112 sum += item.data1; 1113 sum += item.data2; 1114 } 1115 free(buffer.c[i].array); 1116 } 1117 1118 /* 1119 * All entries should now be accounted for (unless some external 1120 * actor is interfering with our allowed affinity while this 1121 * test is running). 1122 */ 1123 assert(sum == expected_sum); 1124 } 1125 1126 static void test_signal_interrupt_handler(int signo) 1127 { 1128 signals_delivered++; 1129 } 1130 1131 static int set_signal_handler(void) 1132 { 1133 int ret = 0; 1134 struct sigaction sa; 1135 sigset_t sigset; 1136 1137 ret = sigemptyset(&sigset); 1138 if (ret < 0) { 1139 perror("sigemptyset"); 1140 return ret; 1141 } 1142 1143 sa.sa_handler = test_signal_interrupt_handler; 1144 sa.sa_mask = sigset; 1145 sa.sa_flags = 0; 1146 ret = sigaction(SIGUSR1, &sa, NULL); 1147 if (ret < 0) { 1148 perror("sigaction"); 1149 return ret; 1150 } 1151 1152 printf_verbose("Signal handler set for SIGUSR1\n"); 1153 1154 return ret; 1155 } 1156 1157 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ 1158 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV 1159 struct test_membarrier_thread_args { 1160 int stop; 1161 intptr_t percpu_list_ptr; 1162 }; 1163 1164 /* Worker threads modify data in their "active" percpu lists. */ 1165 void *test_membarrier_worker_thread(void *arg) 1166 { 1167 struct test_membarrier_thread_args *args = 1168 (struct test_membarrier_thread_args *)arg; 1169 const int iters = opt_reps; 1170 int i; 1171 1172 if (rseq_register_current_thread()) { 1173 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1174 errno, strerror(errno)); 1175 abort(); 1176 } 1177 1178 /* Wait for initialization. */ 1179 while (!atomic_load(&args->percpu_list_ptr)) {} 1180 1181 for (i = 0; i < iters; ++i) { 1182 int ret; 1183 1184 do { 1185 int cpu = rseq_cpu_start(); 1186 1187 ret = rseq_offset_deref_addv(&args->percpu_list_ptr, 1188 sizeof(struct percpu_list_entry) * cpu, 1, cpu); 1189 } while (rseq_unlikely(ret)); 1190 } 1191 1192 if (rseq_unregister_current_thread()) { 1193 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1194 errno, strerror(errno)); 1195 abort(); 1196 } 1197 return NULL; 1198 } 1199 1200 void test_membarrier_init_percpu_list(struct percpu_list *list) 1201 { 1202 int i; 1203 1204 memset(list, 0, sizeof(*list)); 1205 for (i = 0; i < CPU_SETSIZE; i++) { 1206 struct percpu_list_node *node; 1207 1208 node = malloc(sizeof(*node)); 1209 assert(node); 1210 node->data = 0; 1211 node->next = NULL; 1212 list->c[i].head = node; 1213 } 1214 } 1215 1216 void test_membarrier_free_percpu_list(struct percpu_list *list) 1217 { 1218 int i; 1219 1220 for (i = 0; i < CPU_SETSIZE; i++) 1221 free(list->c[i].head); 1222 } 1223 1224 static int sys_membarrier(int cmd, int flags, int cpu_id) 1225 { 1226 return syscall(__NR_membarrier, cmd, flags, cpu_id); 1227 } 1228 1229 /* 1230 * The manager thread swaps per-cpu lists that worker threads see, 1231 * and validates that there are no unexpected modifications. 1232 */ 1233 void *test_membarrier_manager_thread(void *arg) 1234 { 1235 struct test_membarrier_thread_args *args = 1236 (struct test_membarrier_thread_args *)arg; 1237 struct percpu_list list_a, list_b; 1238 intptr_t expect_a = 0, expect_b = 0; 1239 int cpu_a = 0, cpu_b = 0; 1240 1241 if (rseq_register_current_thread()) { 1242 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1243 errno, strerror(errno)); 1244 abort(); 1245 } 1246 1247 /* Init lists. */ 1248 test_membarrier_init_percpu_list(&list_a); 1249 test_membarrier_init_percpu_list(&list_b); 1250 1251 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1252 1253 while (!atomic_load(&args->stop)) { 1254 /* list_a is "active". */ 1255 cpu_a = rand() % CPU_SETSIZE; 1256 /* 1257 * As list_b is "inactive", we should never see changes 1258 * to list_b. 1259 */ 1260 if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { 1261 fprintf(stderr, "Membarrier test failed\n"); 1262 abort(); 1263 } 1264 1265 /* Make list_b "active". */ 1266 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); 1267 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 1268 MEMBARRIER_CMD_FLAG_CPU, cpu_a) && 1269 errno != ENXIO /* missing CPU */) { 1270 perror("sys_membarrier"); 1271 abort(); 1272 } 1273 /* 1274 * Cpu A should now only modify list_b, so the values 1275 * in list_a should be stable. 1276 */ 1277 expect_a = atomic_load(&list_a.c[cpu_a].head->data); 1278 1279 cpu_b = rand() % CPU_SETSIZE; 1280 /* 1281 * As list_a is "inactive", we should never see changes 1282 * to list_a. 1283 */ 1284 if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { 1285 fprintf(stderr, "Membarrier test failed\n"); 1286 abort(); 1287 } 1288 1289 /* Make list_a "active". */ 1290 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1291 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 1292 MEMBARRIER_CMD_FLAG_CPU, cpu_b) && 1293 errno != ENXIO /* missing CPU*/) { 1294 perror("sys_membarrier"); 1295 abort(); 1296 } 1297 /* Remember a value from list_b. */ 1298 expect_b = atomic_load(&list_b.c[cpu_b].head->data); 1299 } 1300 1301 test_membarrier_free_percpu_list(&list_a); 1302 test_membarrier_free_percpu_list(&list_b); 1303 1304 if (rseq_unregister_current_thread()) { 1305 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1306 errno, strerror(errno)); 1307 abort(); 1308 } 1309 return NULL; 1310 } 1311 1312 void test_membarrier(void) 1313 { 1314 const int num_threads = opt_threads; 1315 struct test_membarrier_thread_args thread_args; 1316 pthread_t worker_threads[num_threads]; 1317 pthread_t manager_thread; 1318 int i, ret; 1319 1320 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { 1321 perror("sys_membarrier"); 1322 abort(); 1323 } 1324 1325 thread_args.stop = 0; 1326 thread_args.percpu_list_ptr = 0; 1327 ret = pthread_create(&manager_thread, NULL, 1328 test_membarrier_manager_thread, &thread_args); 1329 if (ret) { 1330 errno = ret; 1331 perror("pthread_create"); 1332 abort(); 1333 } 1334 1335 for (i = 0; i < num_threads; i++) { 1336 ret = pthread_create(&worker_threads[i], NULL, 1337 test_membarrier_worker_thread, &thread_args); 1338 if (ret) { 1339 errno = ret; 1340 perror("pthread_create"); 1341 abort(); 1342 } 1343 } 1344 1345 1346 for (i = 0; i < num_threads; i++) { 1347 ret = pthread_join(worker_threads[i], NULL); 1348 if (ret) { 1349 errno = ret; 1350 perror("pthread_join"); 1351 abort(); 1352 } 1353 } 1354 1355 atomic_store(&thread_args.stop, 1); 1356 ret = pthread_join(manager_thread, NULL); 1357 if (ret) { 1358 errno = ret; 1359 perror("pthread_join"); 1360 abort(); 1361 } 1362 } 1363 #else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ 1364 void test_membarrier(void) 1365 { 1366 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " 1367 "Skipping membarrier test.\n"); 1368 } 1369 #endif 1370 1371 static void show_usage(int argc, char **argv) 1372 { 1373 printf("Usage : %s <OPTIONS>\n", 1374 argv[0]); 1375 printf("OPTIONS:\n"); 1376 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1377 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1378 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1379 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1380 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1381 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1382 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1383 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1384 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1385 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1386 printf(" [-y] Yield\n"); 1387 printf(" [-k] Kill thread with signal\n"); 1388 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1389 printf(" [-t N] Number of threads (default 200)\n"); 1390 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1391 printf(" [-d] Disable rseq system call (no initialization)\n"); 1392 printf(" [-D M] Disable rseq for each M threads\n"); 1393 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); 1394 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1395 printf(" [-v] Verbose output.\n"); 1396 printf(" [-h] Show this help.\n"); 1397 printf("\n"); 1398 } 1399 1400 int main(int argc, char **argv) 1401 { 1402 int i; 1403 1404 for (i = 1; i < argc; i++) { 1405 if (argv[i][0] != '-') 1406 continue; 1407 switch (argv[i][1]) { 1408 case '1': 1409 case '2': 1410 case '3': 1411 case '4': 1412 case '5': 1413 case '6': 1414 case '7': 1415 case '8': 1416 case '9': 1417 if (argc < i + 2) { 1418 show_usage(argc, argv); 1419 goto error; 1420 } 1421 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1422 i++; 1423 break; 1424 case 'm': 1425 if (argc < i + 2) { 1426 show_usage(argc, argv); 1427 goto error; 1428 } 1429 opt_modulo = atol(argv[i + 1]); 1430 if (opt_modulo < 0) { 1431 show_usage(argc, argv); 1432 goto error; 1433 } 1434 i++; 1435 break; 1436 case 's': 1437 if (argc < i + 2) { 1438 show_usage(argc, argv); 1439 goto error; 1440 } 1441 opt_sleep = atol(argv[i + 1]); 1442 if (opt_sleep < 0) { 1443 show_usage(argc, argv); 1444 goto error; 1445 } 1446 i++; 1447 break; 1448 case 'y': 1449 opt_yield = 1; 1450 break; 1451 case 'k': 1452 opt_signal = 1; 1453 break; 1454 case 'd': 1455 opt_disable_rseq = 1; 1456 break; 1457 case 'D': 1458 if (argc < i + 2) { 1459 show_usage(argc, argv); 1460 goto error; 1461 } 1462 opt_disable_mod = atol(argv[i + 1]); 1463 if (opt_disable_mod < 0) { 1464 show_usage(argc, argv); 1465 goto error; 1466 } 1467 i++; 1468 break; 1469 case 't': 1470 if (argc < i + 2) { 1471 show_usage(argc, argv); 1472 goto error; 1473 } 1474 opt_threads = atol(argv[i + 1]); 1475 if (opt_threads < 0) { 1476 show_usage(argc, argv); 1477 goto error; 1478 } 1479 i++; 1480 break; 1481 case 'r': 1482 if (argc < i + 2) { 1483 show_usage(argc, argv); 1484 goto error; 1485 } 1486 opt_reps = atoll(argv[i + 1]); 1487 if (opt_reps < 0) { 1488 show_usage(argc, argv); 1489 goto error; 1490 } 1491 i++; 1492 break; 1493 case 'h': 1494 show_usage(argc, argv); 1495 goto end; 1496 case 'T': 1497 if (argc < i + 2) { 1498 show_usage(argc, argv); 1499 goto error; 1500 } 1501 opt_test = *argv[i + 1]; 1502 switch (opt_test) { 1503 case 's': 1504 case 'l': 1505 case 'i': 1506 case 'b': 1507 case 'm': 1508 case 'r': 1509 break; 1510 default: 1511 show_usage(argc, argv); 1512 goto error; 1513 } 1514 i++; 1515 break; 1516 case 'v': 1517 verbose = 1; 1518 break; 1519 case 'M': 1520 opt_mb = 1; 1521 break; 1522 default: 1523 show_usage(argc, argv); 1524 goto error; 1525 } 1526 } 1527 1528 loop_cnt_1 = loop_cnt[1]; 1529 loop_cnt_2 = loop_cnt[2]; 1530 loop_cnt_3 = loop_cnt[3]; 1531 loop_cnt_4 = loop_cnt[4]; 1532 loop_cnt_5 = loop_cnt[5]; 1533 loop_cnt_6 = loop_cnt[6]; 1534 1535 if (set_signal_handler()) 1536 goto error; 1537 1538 if (!opt_disable_rseq && rseq_register_current_thread()) 1539 goto error; 1540 switch (opt_test) { 1541 case 's': 1542 printf_verbose("spinlock\n"); 1543 test_percpu_spinlock(); 1544 break; 1545 case 'l': 1546 printf_verbose("linked list\n"); 1547 test_percpu_list(); 1548 break; 1549 case 'b': 1550 printf_verbose("buffer\n"); 1551 test_percpu_buffer(); 1552 break; 1553 case 'm': 1554 printf_verbose("memcpy buffer\n"); 1555 test_percpu_memcpy_buffer(); 1556 break; 1557 case 'i': 1558 printf_verbose("counter increment\n"); 1559 test_percpu_inc(); 1560 break; 1561 case 'r': 1562 printf_verbose("membarrier\n"); 1563 test_membarrier(); 1564 break; 1565 } 1566 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1567 abort(); 1568 end: 1569 return 0; 1570 1571 error: 1572 return -1; 1573 } 1574