1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <pthread.h> 5 #include <sched.h> 6 #include <stdint.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <syscall.h> 11 #include <unistd.h> 12 #include <poll.h> 13 #include <sys/types.h> 14 #include <signal.h> 15 #include <errno.h> 16 #include <stddef.h> 17 18 static inline pid_t gettid(void) 19 { 20 return syscall(__NR_gettid); 21 } 22 23 #define NR_INJECT 9 24 static int loop_cnt[NR_INJECT + 1]; 25 26 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 27 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 28 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 29 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 30 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 31 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 32 33 static int opt_modulo, verbose; 34 35 static int opt_yield, opt_signal, opt_sleep, 36 opt_disable_rseq, opt_threads = 200, 37 opt_disable_mod = 0, opt_test = 's', opt_mb = 0; 38 39 #ifndef RSEQ_SKIP_FASTPATH 40 static long long opt_reps = 5000; 41 #else 42 static long long opt_reps = 100; 43 #endif 44 45 static __thread __attribute__((tls_model("initial-exec"))) 46 unsigned int signals_delivered; 47 48 #ifndef BENCHMARK 49 50 static __thread __attribute__((tls_model("initial-exec"), unused)) 51 unsigned int yield_mod_cnt, nr_abort; 52 53 #define printf_verbose(fmt, ...) \ 54 do { \ 55 if (verbose) \ 56 printf(fmt, ## __VA_ARGS__); \ 57 } while (0) 58 59 #ifdef __i386__ 60 61 #define INJECT_ASM_REG "eax" 62 63 #define RSEQ_INJECT_CLOBBER \ 64 , INJECT_ASM_REG 65 66 #define RSEQ_INJECT_ASM(n) \ 67 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 68 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 69 "jz 333f\n\t" \ 70 "222:\n\t" \ 71 "dec %%" INJECT_ASM_REG "\n\t" \ 72 "jnz 222b\n\t" \ 73 "333:\n\t" 74 75 #elif defined(__x86_64__) 76 77 #define INJECT_ASM_REG_P "rax" 78 #define INJECT_ASM_REG "eax" 79 80 #define RSEQ_INJECT_CLOBBER \ 81 , INJECT_ASM_REG_P \ 82 , INJECT_ASM_REG 83 84 #define RSEQ_INJECT_ASM(n) \ 85 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ 86 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ 87 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 88 "jz 333f\n\t" \ 89 "222:\n\t" \ 90 "dec %%" INJECT_ASM_REG "\n\t" \ 91 "jnz 222b\n\t" \ 92 "333:\n\t" 93 94 #elif defined(__s390__) 95 96 #define RSEQ_INJECT_INPUT \ 97 , [loop_cnt_1]"m"(loop_cnt[1]) \ 98 , [loop_cnt_2]"m"(loop_cnt[2]) \ 99 , [loop_cnt_3]"m"(loop_cnt[3]) \ 100 , [loop_cnt_4]"m"(loop_cnt[4]) \ 101 , [loop_cnt_5]"m"(loop_cnt[5]) \ 102 , [loop_cnt_6]"m"(loop_cnt[6]) 103 104 #define INJECT_ASM_REG "r12" 105 106 #define RSEQ_INJECT_CLOBBER \ 107 , INJECT_ASM_REG 108 109 #define RSEQ_INJECT_ASM(n) \ 110 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 111 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 112 "je 333f\n\t" \ 113 "222:\n\t" \ 114 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 115 "jnz 222b\n\t" \ 116 "333:\n\t" 117 118 #elif defined(__ARMEL__) 119 120 #define RSEQ_INJECT_INPUT \ 121 , [loop_cnt_1]"m"(loop_cnt[1]) \ 122 , [loop_cnt_2]"m"(loop_cnt[2]) \ 123 , [loop_cnt_3]"m"(loop_cnt[3]) \ 124 , [loop_cnt_4]"m"(loop_cnt[4]) \ 125 , [loop_cnt_5]"m"(loop_cnt[5]) \ 126 , [loop_cnt_6]"m"(loop_cnt[6]) 127 128 #define INJECT_ASM_REG "r4" 129 130 #define RSEQ_INJECT_CLOBBER \ 131 , INJECT_ASM_REG 132 133 #define RSEQ_INJECT_ASM(n) \ 134 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 135 "cmp " INJECT_ASM_REG ", #0\n\t" \ 136 "beq 333f\n\t" \ 137 "222:\n\t" \ 138 "subs " INJECT_ASM_REG ", #1\n\t" \ 139 "bne 222b\n\t" \ 140 "333:\n\t" 141 142 #elif defined(__AARCH64EL__) 143 144 #define RSEQ_INJECT_INPUT \ 145 , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 146 , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 147 , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 148 , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 149 , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 150 , [loop_cnt_6] "Qo" (loop_cnt[6]) 151 152 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 153 154 #define RSEQ_INJECT_ASM(n) \ 155 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 156 " cbz " INJECT_ASM_REG ", 333f\n" \ 157 "222:\n" \ 158 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 159 " cbnz " INJECT_ASM_REG ", 222b\n" \ 160 "333:\n" 161 162 #elif __PPC__ 163 164 #define RSEQ_INJECT_INPUT \ 165 , [loop_cnt_1]"m"(loop_cnt[1]) \ 166 , [loop_cnt_2]"m"(loop_cnt[2]) \ 167 , [loop_cnt_3]"m"(loop_cnt[3]) \ 168 , [loop_cnt_4]"m"(loop_cnt[4]) \ 169 , [loop_cnt_5]"m"(loop_cnt[5]) \ 170 , [loop_cnt_6]"m"(loop_cnt[6]) 171 172 #define INJECT_ASM_REG "r18" 173 174 #define RSEQ_INJECT_CLOBBER \ 175 , INJECT_ASM_REG 176 177 #define RSEQ_INJECT_ASM(n) \ 178 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 179 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 180 "beq 333f\n\t" \ 181 "222:\n\t" \ 182 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 183 "bne 222b\n\t" \ 184 "333:\n\t" 185 186 #elif defined(__mips__) 187 188 #define RSEQ_INJECT_INPUT \ 189 , [loop_cnt_1]"m"(loop_cnt[1]) \ 190 , [loop_cnt_2]"m"(loop_cnt[2]) \ 191 , [loop_cnt_3]"m"(loop_cnt[3]) \ 192 , [loop_cnt_4]"m"(loop_cnt[4]) \ 193 , [loop_cnt_5]"m"(loop_cnt[5]) \ 194 , [loop_cnt_6]"m"(loop_cnt[6]) 195 196 #define INJECT_ASM_REG "$5" 197 198 #define RSEQ_INJECT_CLOBBER \ 199 , INJECT_ASM_REG 200 201 #define RSEQ_INJECT_ASM(n) \ 202 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 203 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 204 "222:\n\t" \ 205 "addiu " INJECT_ASM_REG ", -1\n\t" \ 206 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 207 "333:\n\t" 208 209 #else 210 #error unsupported target 211 #endif 212 213 #define RSEQ_INJECT_FAILED \ 214 nr_abort++; 215 216 #define RSEQ_INJECT_C(n) \ 217 { \ 218 int loc_i, loc_nr_loops = loop_cnt[n]; \ 219 \ 220 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 221 rseq_barrier(); \ 222 } \ 223 if (loc_nr_loops == -1 && opt_modulo) { \ 224 if (yield_mod_cnt == opt_modulo - 1) { \ 225 if (opt_sleep > 0) \ 226 poll(NULL, 0, opt_sleep); \ 227 if (opt_yield) \ 228 sched_yield(); \ 229 if (opt_signal) \ 230 raise(SIGUSR1); \ 231 yield_mod_cnt = 0; \ 232 } else { \ 233 yield_mod_cnt++; \ 234 } \ 235 } \ 236 } 237 238 #else 239 240 #define printf_verbose(fmt, ...) 241 242 #endif /* BENCHMARK */ 243 244 #include "rseq.h" 245 246 struct percpu_lock_entry { 247 intptr_t v; 248 } __attribute__((aligned(128))); 249 250 struct percpu_lock { 251 struct percpu_lock_entry c[CPU_SETSIZE]; 252 }; 253 254 struct test_data_entry { 255 intptr_t count; 256 } __attribute__((aligned(128))); 257 258 struct spinlock_test_data { 259 struct percpu_lock lock; 260 struct test_data_entry c[CPU_SETSIZE]; 261 }; 262 263 struct spinlock_thread_test_data { 264 struct spinlock_test_data *data; 265 long long reps; 266 int reg; 267 }; 268 269 struct inc_test_data { 270 struct test_data_entry c[CPU_SETSIZE]; 271 }; 272 273 struct inc_thread_test_data { 274 struct inc_test_data *data; 275 long long reps; 276 int reg; 277 }; 278 279 struct percpu_list_node { 280 intptr_t data; 281 struct percpu_list_node *next; 282 }; 283 284 struct percpu_list_entry { 285 struct percpu_list_node *head; 286 } __attribute__((aligned(128))); 287 288 struct percpu_list { 289 struct percpu_list_entry c[CPU_SETSIZE]; 290 }; 291 292 #define BUFFER_ITEM_PER_CPU 100 293 294 struct percpu_buffer_node { 295 intptr_t data; 296 }; 297 298 struct percpu_buffer_entry { 299 intptr_t offset; 300 intptr_t buflen; 301 struct percpu_buffer_node **array; 302 } __attribute__((aligned(128))); 303 304 struct percpu_buffer { 305 struct percpu_buffer_entry c[CPU_SETSIZE]; 306 }; 307 308 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 309 310 struct percpu_memcpy_buffer_node { 311 intptr_t data1; 312 uint64_t data2; 313 }; 314 315 struct percpu_memcpy_buffer_entry { 316 intptr_t offset; 317 intptr_t buflen; 318 struct percpu_memcpy_buffer_node *array; 319 } __attribute__((aligned(128))); 320 321 struct percpu_memcpy_buffer { 322 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 323 }; 324 325 /* A simple percpu spinlock. Grabs lock on current cpu. */ 326 static int rseq_this_cpu_lock(struct percpu_lock *lock) 327 { 328 int cpu; 329 330 for (;;) { 331 int ret; 332 333 cpu = rseq_cpu_start(); 334 ret = rseq_cmpeqv_storev(&lock->c[cpu].v, 335 0, 1, cpu); 336 if (rseq_likely(!ret)) 337 break; 338 /* Retry if comparison fails or rseq aborts. */ 339 } 340 /* 341 * Acquire semantic when taking lock after control dependency. 342 * Matches rseq_smp_store_release(). 343 */ 344 rseq_smp_acquire__after_ctrl_dep(); 345 return cpu; 346 } 347 348 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 349 { 350 assert(lock->c[cpu].v == 1); 351 /* 352 * Release lock, with release semantic. Matches 353 * rseq_smp_acquire__after_ctrl_dep(). 354 */ 355 rseq_smp_store_release(&lock->c[cpu].v, 0); 356 } 357 358 void *test_percpu_spinlock_thread(void *arg) 359 { 360 struct spinlock_thread_test_data *thread_data = arg; 361 struct spinlock_test_data *data = thread_data->data; 362 long long i, reps; 363 364 if (!opt_disable_rseq && thread_data->reg && 365 rseq_register_current_thread()) 366 abort(); 367 reps = thread_data->reps; 368 for (i = 0; i < reps; i++) { 369 int cpu = rseq_cpu_start(); 370 371 cpu = rseq_this_cpu_lock(&data->lock); 372 data->c[cpu].count++; 373 rseq_percpu_unlock(&data->lock, cpu); 374 #ifndef BENCHMARK 375 if (i != 0 && !(i % (reps / 10))) 376 printf_verbose("tid %d: count %lld\n", (int) gettid(), i); 377 #endif 378 } 379 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 380 (int) gettid(), nr_abort, signals_delivered); 381 if (!opt_disable_rseq && thread_data->reg && 382 rseq_unregister_current_thread()) 383 abort(); 384 return NULL; 385 } 386 387 /* 388 * A simple test which implements a sharded counter using a per-cpu 389 * lock. Obviously real applications might prefer to simply use a 390 * per-cpu increment; however, this is reasonable for a test and the 391 * lock can be extended to synchronize more complicated operations. 392 */ 393 void test_percpu_spinlock(void) 394 { 395 const int num_threads = opt_threads; 396 int i, ret; 397 uint64_t sum; 398 pthread_t test_threads[num_threads]; 399 struct spinlock_test_data data; 400 struct spinlock_thread_test_data thread_data[num_threads]; 401 402 memset(&data, 0, sizeof(data)); 403 for (i = 0; i < num_threads; i++) { 404 thread_data[i].reps = opt_reps; 405 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 406 thread_data[i].reg = 1; 407 else 408 thread_data[i].reg = 0; 409 thread_data[i].data = &data; 410 ret = pthread_create(&test_threads[i], NULL, 411 test_percpu_spinlock_thread, 412 &thread_data[i]); 413 if (ret) { 414 errno = ret; 415 perror("pthread_create"); 416 abort(); 417 } 418 } 419 420 for (i = 0; i < num_threads; i++) { 421 ret = pthread_join(test_threads[i], NULL); 422 if (ret) { 423 errno = ret; 424 perror("pthread_join"); 425 abort(); 426 } 427 } 428 429 sum = 0; 430 for (i = 0; i < CPU_SETSIZE; i++) 431 sum += data.c[i].count; 432 433 assert(sum == (uint64_t)opt_reps * num_threads); 434 } 435 436 void *test_percpu_inc_thread(void *arg) 437 { 438 struct inc_thread_test_data *thread_data = arg; 439 struct inc_test_data *data = thread_data->data; 440 long long i, reps; 441 442 if (!opt_disable_rseq && thread_data->reg && 443 rseq_register_current_thread()) 444 abort(); 445 reps = thread_data->reps; 446 for (i = 0; i < reps; i++) { 447 int ret; 448 449 do { 450 int cpu; 451 452 cpu = rseq_cpu_start(); 453 ret = rseq_addv(&data->c[cpu].count, 1, cpu); 454 } while (rseq_unlikely(ret)); 455 #ifndef BENCHMARK 456 if (i != 0 && !(i % (reps / 10))) 457 printf_verbose("tid %d: count %lld\n", (int) gettid(), i); 458 #endif 459 } 460 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 461 (int) gettid(), nr_abort, signals_delivered); 462 if (!opt_disable_rseq && thread_data->reg && 463 rseq_unregister_current_thread()) 464 abort(); 465 return NULL; 466 } 467 468 void test_percpu_inc(void) 469 { 470 const int num_threads = opt_threads; 471 int i, ret; 472 uint64_t sum; 473 pthread_t test_threads[num_threads]; 474 struct inc_test_data data; 475 struct inc_thread_test_data thread_data[num_threads]; 476 477 memset(&data, 0, sizeof(data)); 478 for (i = 0; i < num_threads; i++) { 479 thread_data[i].reps = opt_reps; 480 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 481 thread_data[i].reg = 1; 482 else 483 thread_data[i].reg = 0; 484 thread_data[i].data = &data; 485 ret = pthread_create(&test_threads[i], NULL, 486 test_percpu_inc_thread, 487 &thread_data[i]); 488 if (ret) { 489 errno = ret; 490 perror("pthread_create"); 491 abort(); 492 } 493 } 494 495 for (i = 0; i < num_threads; i++) { 496 ret = pthread_join(test_threads[i], NULL); 497 if (ret) { 498 errno = ret; 499 perror("pthread_join"); 500 abort(); 501 } 502 } 503 504 sum = 0; 505 for (i = 0; i < CPU_SETSIZE; i++) 506 sum += data.c[i].count; 507 508 assert(sum == (uint64_t)opt_reps * num_threads); 509 } 510 511 void this_cpu_list_push(struct percpu_list *list, 512 struct percpu_list_node *node, 513 int *_cpu) 514 { 515 int cpu; 516 517 for (;;) { 518 intptr_t *targetptr, newval, expect; 519 int ret; 520 521 cpu = rseq_cpu_start(); 522 /* Load list->c[cpu].head with single-copy atomicity. */ 523 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 524 newval = (intptr_t)node; 525 targetptr = (intptr_t *)&list->c[cpu].head; 526 node->next = (struct percpu_list_node *)expect; 527 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); 528 if (rseq_likely(!ret)) 529 break; 530 /* Retry if comparison fails or rseq aborts. */ 531 } 532 if (_cpu) 533 *_cpu = cpu; 534 } 535 536 /* 537 * Unlike a traditional lock-less linked list; the availability of a 538 * rseq primitive allows us to implement pop without concerns over 539 * ABA-type races. 540 */ 541 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 542 int *_cpu) 543 { 544 struct percpu_list_node *node = NULL; 545 int cpu; 546 547 for (;;) { 548 struct percpu_list_node *head; 549 intptr_t *targetptr, expectnot, *load; 550 off_t offset; 551 int ret; 552 553 cpu = rseq_cpu_start(); 554 targetptr = (intptr_t *)&list->c[cpu].head; 555 expectnot = (intptr_t)NULL; 556 offset = offsetof(struct percpu_list_node, next); 557 load = (intptr_t *)&head; 558 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, 559 offset, load, cpu); 560 if (rseq_likely(!ret)) { 561 node = head; 562 break; 563 } 564 if (ret > 0) 565 break; 566 /* Retry if rseq aborts. */ 567 } 568 if (_cpu) 569 *_cpu = cpu; 570 return node; 571 } 572 573 /* 574 * __percpu_list_pop is not safe against concurrent accesses. Should 575 * only be used on lists that are not concurrently modified. 576 */ 577 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 578 { 579 struct percpu_list_node *node; 580 581 node = list->c[cpu].head; 582 if (!node) 583 return NULL; 584 list->c[cpu].head = node->next; 585 return node; 586 } 587 588 void *test_percpu_list_thread(void *arg) 589 { 590 long long i, reps; 591 struct percpu_list *list = (struct percpu_list *)arg; 592 593 if (!opt_disable_rseq && rseq_register_current_thread()) 594 abort(); 595 596 reps = opt_reps; 597 for (i = 0; i < reps; i++) { 598 struct percpu_list_node *node; 599 600 node = this_cpu_list_pop(list, NULL); 601 if (opt_yield) 602 sched_yield(); /* encourage shuffling */ 603 if (node) 604 this_cpu_list_push(list, node, NULL); 605 } 606 607 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 608 (int) gettid(), nr_abort, signals_delivered); 609 if (!opt_disable_rseq && rseq_unregister_current_thread()) 610 abort(); 611 612 return NULL; 613 } 614 615 /* Simultaneous modification to a per-cpu linked list from many threads. */ 616 void test_percpu_list(void) 617 { 618 const int num_threads = opt_threads; 619 int i, j, ret; 620 uint64_t sum = 0, expected_sum = 0; 621 struct percpu_list list; 622 pthread_t test_threads[num_threads]; 623 cpu_set_t allowed_cpus; 624 625 memset(&list, 0, sizeof(list)); 626 627 /* Generate list entries for every usable cpu. */ 628 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 629 for (i = 0; i < CPU_SETSIZE; i++) { 630 if (!CPU_ISSET(i, &allowed_cpus)) 631 continue; 632 for (j = 1; j <= 100; j++) { 633 struct percpu_list_node *node; 634 635 expected_sum += j; 636 637 node = malloc(sizeof(*node)); 638 assert(node); 639 node->data = j; 640 node->next = list.c[i].head; 641 list.c[i].head = node; 642 } 643 } 644 645 for (i = 0; i < num_threads; i++) { 646 ret = pthread_create(&test_threads[i], NULL, 647 test_percpu_list_thread, &list); 648 if (ret) { 649 errno = ret; 650 perror("pthread_create"); 651 abort(); 652 } 653 } 654 655 for (i = 0; i < num_threads; i++) { 656 ret = pthread_join(test_threads[i], NULL); 657 if (ret) { 658 errno = ret; 659 perror("pthread_join"); 660 abort(); 661 } 662 } 663 664 for (i = 0; i < CPU_SETSIZE; i++) { 665 struct percpu_list_node *node; 666 667 if (!CPU_ISSET(i, &allowed_cpus)) 668 continue; 669 670 while ((node = __percpu_list_pop(&list, i))) { 671 sum += node->data; 672 free(node); 673 } 674 } 675 676 /* 677 * All entries should now be accounted for (unless some external 678 * actor is interfering with our allowed affinity while this 679 * test is running). 680 */ 681 assert(sum == expected_sum); 682 } 683 684 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 685 struct percpu_buffer_node *node, 686 int *_cpu) 687 { 688 bool result = false; 689 int cpu; 690 691 for (;;) { 692 intptr_t *targetptr_spec, newval_spec; 693 intptr_t *targetptr_final, newval_final; 694 intptr_t offset; 695 int ret; 696 697 cpu = rseq_cpu_start(); 698 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 699 if (offset == buffer->c[cpu].buflen) 700 break; 701 newval_spec = (intptr_t)node; 702 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 703 newval_final = offset + 1; 704 targetptr_final = &buffer->c[cpu].offset; 705 if (opt_mb) 706 ret = rseq_cmpeqv_trystorev_storev_release( 707 targetptr_final, offset, targetptr_spec, 708 newval_spec, newval_final, cpu); 709 else 710 ret = rseq_cmpeqv_trystorev_storev(targetptr_final, 711 offset, targetptr_spec, newval_spec, 712 newval_final, cpu); 713 if (rseq_likely(!ret)) { 714 result = true; 715 break; 716 } 717 /* Retry if comparison fails or rseq aborts. */ 718 } 719 if (_cpu) 720 *_cpu = cpu; 721 return result; 722 } 723 724 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 725 int *_cpu) 726 { 727 struct percpu_buffer_node *head; 728 int cpu; 729 730 for (;;) { 731 intptr_t *targetptr, newval; 732 intptr_t offset; 733 int ret; 734 735 cpu = rseq_cpu_start(); 736 /* Load offset with single-copy atomicity. */ 737 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 738 if (offset == 0) { 739 head = NULL; 740 break; 741 } 742 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 743 newval = offset - 1; 744 targetptr = (intptr_t *)&buffer->c[cpu].offset; 745 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, 746 (intptr_t *)&buffer->c[cpu].array[offset - 1], 747 (intptr_t)head, newval, cpu); 748 if (rseq_likely(!ret)) 749 break; 750 /* Retry if comparison fails or rseq aborts. */ 751 } 752 if (_cpu) 753 *_cpu = cpu; 754 return head; 755 } 756 757 /* 758 * __percpu_buffer_pop is not safe against concurrent accesses. Should 759 * only be used on buffers that are not concurrently modified. 760 */ 761 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 762 int cpu) 763 { 764 struct percpu_buffer_node *head; 765 intptr_t offset; 766 767 offset = buffer->c[cpu].offset; 768 if (offset == 0) 769 return NULL; 770 head = buffer->c[cpu].array[offset - 1]; 771 buffer->c[cpu].offset = offset - 1; 772 return head; 773 } 774 775 void *test_percpu_buffer_thread(void *arg) 776 { 777 long long i, reps; 778 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 779 780 if (!opt_disable_rseq && rseq_register_current_thread()) 781 abort(); 782 783 reps = opt_reps; 784 for (i = 0; i < reps; i++) { 785 struct percpu_buffer_node *node; 786 787 node = this_cpu_buffer_pop(buffer, NULL); 788 if (opt_yield) 789 sched_yield(); /* encourage shuffling */ 790 if (node) { 791 if (!this_cpu_buffer_push(buffer, node, NULL)) { 792 /* Should increase buffer size. */ 793 abort(); 794 } 795 } 796 } 797 798 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 799 (int) gettid(), nr_abort, signals_delivered); 800 if (!opt_disable_rseq && rseq_unregister_current_thread()) 801 abort(); 802 803 return NULL; 804 } 805 806 /* Simultaneous modification to a per-cpu buffer from many threads. */ 807 void test_percpu_buffer(void) 808 { 809 const int num_threads = opt_threads; 810 int i, j, ret; 811 uint64_t sum = 0, expected_sum = 0; 812 struct percpu_buffer buffer; 813 pthread_t test_threads[num_threads]; 814 cpu_set_t allowed_cpus; 815 816 memset(&buffer, 0, sizeof(buffer)); 817 818 /* Generate list entries for every usable cpu. */ 819 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 820 for (i = 0; i < CPU_SETSIZE; i++) { 821 if (!CPU_ISSET(i, &allowed_cpus)) 822 continue; 823 /* Worse-case is every item in same CPU. */ 824 buffer.c[i].array = 825 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 826 BUFFER_ITEM_PER_CPU); 827 assert(buffer.c[i].array); 828 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 829 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 830 struct percpu_buffer_node *node; 831 832 expected_sum += j; 833 834 /* 835 * We could theoretically put the word-sized 836 * "data" directly in the buffer. However, we 837 * want to model objects that would not fit 838 * within a single word, so allocate an object 839 * for each node. 840 */ 841 node = malloc(sizeof(*node)); 842 assert(node); 843 node->data = j; 844 buffer.c[i].array[j - 1] = node; 845 buffer.c[i].offset++; 846 } 847 } 848 849 for (i = 0; i < num_threads; i++) { 850 ret = pthread_create(&test_threads[i], NULL, 851 test_percpu_buffer_thread, &buffer); 852 if (ret) { 853 errno = ret; 854 perror("pthread_create"); 855 abort(); 856 } 857 } 858 859 for (i = 0; i < num_threads; i++) { 860 ret = pthread_join(test_threads[i], NULL); 861 if (ret) { 862 errno = ret; 863 perror("pthread_join"); 864 abort(); 865 } 866 } 867 868 for (i = 0; i < CPU_SETSIZE; i++) { 869 struct percpu_buffer_node *node; 870 871 if (!CPU_ISSET(i, &allowed_cpus)) 872 continue; 873 874 while ((node = __percpu_buffer_pop(&buffer, i))) { 875 sum += node->data; 876 free(node); 877 } 878 free(buffer.c[i].array); 879 } 880 881 /* 882 * All entries should now be accounted for (unless some external 883 * actor is interfering with our allowed affinity while this 884 * test is running). 885 */ 886 assert(sum == expected_sum); 887 } 888 889 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 890 struct percpu_memcpy_buffer_node item, 891 int *_cpu) 892 { 893 bool result = false; 894 int cpu; 895 896 for (;;) { 897 intptr_t *targetptr_final, newval_final, offset; 898 char *destptr, *srcptr; 899 size_t copylen; 900 int ret; 901 902 cpu = rseq_cpu_start(); 903 /* Load offset with single-copy atomicity. */ 904 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 905 if (offset == buffer->c[cpu].buflen) 906 break; 907 destptr = (char *)&buffer->c[cpu].array[offset]; 908 srcptr = (char *)&item; 909 /* copylen must be <= 4kB. */ 910 copylen = sizeof(item); 911 newval_final = offset + 1; 912 targetptr_final = &buffer->c[cpu].offset; 913 if (opt_mb) 914 ret = rseq_cmpeqv_trymemcpy_storev_release( 915 targetptr_final, offset, 916 destptr, srcptr, copylen, 917 newval_final, cpu); 918 else 919 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 920 offset, destptr, srcptr, copylen, 921 newval_final, cpu); 922 if (rseq_likely(!ret)) { 923 result = true; 924 break; 925 } 926 /* Retry if comparison fails or rseq aborts. */ 927 } 928 if (_cpu) 929 *_cpu = cpu; 930 return result; 931 } 932 933 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 934 struct percpu_memcpy_buffer_node *item, 935 int *_cpu) 936 { 937 bool result = false; 938 int cpu; 939 940 for (;;) { 941 intptr_t *targetptr_final, newval_final, offset; 942 char *destptr, *srcptr; 943 size_t copylen; 944 int ret; 945 946 cpu = rseq_cpu_start(); 947 /* Load offset with single-copy atomicity. */ 948 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 949 if (offset == 0) 950 break; 951 destptr = (char *)item; 952 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 953 /* copylen must be <= 4kB. */ 954 copylen = sizeof(*item); 955 newval_final = offset - 1; 956 targetptr_final = &buffer->c[cpu].offset; 957 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 958 offset, destptr, srcptr, copylen, 959 newval_final, cpu); 960 if (rseq_likely(!ret)) { 961 result = true; 962 break; 963 } 964 /* Retry if comparison fails or rseq aborts. */ 965 } 966 if (_cpu) 967 *_cpu = cpu; 968 return result; 969 } 970 971 /* 972 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 973 * only be used on buffers that are not concurrently modified. 974 */ 975 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 976 struct percpu_memcpy_buffer_node *item, 977 int cpu) 978 { 979 intptr_t offset; 980 981 offset = buffer->c[cpu].offset; 982 if (offset == 0) 983 return false; 984 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 985 buffer->c[cpu].offset = offset - 1; 986 return true; 987 } 988 989 void *test_percpu_memcpy_buffer_thread(void *arg) 990 { 991 long long i, reps; 992 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 993 994 if (!opt_disable_rseq && rseq_register_current_thread()) 995 abort(); 996 997 reps = opt_reps; 998 for (i = 0; i < reps; i++) { 999 struct percpu_memcpy_buffer_node item; 1000 bool result; 1001 1002 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1003 if (opt_yield) 1004 sched_yield(); /* encourage shuffling */ 1005 if (result) { 1006 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1007 /* Should increase buffer size. */ 1008 abort(); 1009 } 1010 } 1011 } 1012 1013 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 1014 (int) gettid(), nr_abort, signals_delivered); 1015 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1016 abort(); 1017 1018 return NULL; 1019 } 1020 1021 /* Simultaneous modification to a per-cpu buffer from many threads. */ 1022 void test_percpu_memcpy_buffer(void) 1023 { 1024 const int num_threads = opt_threads; 1025 int i, j, ret; 1026 uint64_t sum = 0, expected_sum = 0; 1027 struct percpu_memcpy_buffer buffer; 1028 pthread_t test_threads[num_threads]; 1029 cpu_set_t allowed_cpus; 1030 1031 memset(&buffer, 0, sizeof(buffer)); 1032 1033 /* Generate list entries for every usable cpu. */ 1034 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1035 for (i = 0; i < CPU_SETSIZE; i++) { 1036 if (!CPU_ISSET(i, &allowed_cpus)) 1037 continue; 1038 /* Worse-case is every item in same CPU. */ 1039 buffer.c[i].array = 1040 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1041 MEMCPY_BUFFER_ITEM_PER_CPU); 1042 assert(buffer.c[i].array); 1043 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1044 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1045 expected_sum += 2 * j + 1; 1046 1047 /* 1048 * We could theoretically put the word-sized 1049 * "data" directly in the buffer. However, we 1050 * want to model objects that would not fit 1051 * within a single word, so allocate an object 1052 * for each node. 1053 */ 1054 buffer.c[i].array[j - 1].data1 = j; 1055 buffer.c[i].array[j - 1].data2 = j + 1; 1056 buffer.c[i].offset++; 1057 } 1058 } 1059 1060 for (i = 0; i < num_threads; i++) { 1061 ret = pthread_create(&test_threads[i], NULL, 1062 test_percpu_memcpy_buffer_thread, 1063 &buffer); 1064 if (ret) { 1065 errno = ret; 1066 perror("pthread_create"); 1067 abort(); 1068 } 1069 } 1070 1071 for (i = 0; i < num_threads; i++) { 1072 ret = pthread_join(test_threads[i], NULL); 1073 if (ret) { 1074 errno = ret; 1075 perror("pthread_join"); 1076 abort(); 1077 } 1078 } 1079 1080 for (i = 0; i < CPU_SETSIZE; i++) { 1081 struct percpu_memcpy_buffer_node item; 1082 1083 if (!CPU_ISSET(i, &allowed_cpus)) 1084 continue; 1085 1086 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1087 sum += item.data1; 1088 sum += item.data2; 1089 } 1090 free(buffer.c[i].array); 1091 } 1092 1093 /* 1094 * All entries should now be accounted for (unless some external 1095 * actor is interfering with our allowed affinity while this 1096 * test is running). 1097 */ 1098 assert(sum == expected_sum); 1099 } 1100 1101 static void test_signal_interrupt_handler(int signo) 1102 { 1103 signals_delivered++; 1104 } 1105 1106 static int set_signal_handler(void) 1107 { 1108 int ret = 0; 1109 struct sigaction sa; 1110 sigset_t sigset; 1111 1112 ret = sigemptyset(&sigset); 1113 if (ret < 0) { 1114 perror("sigemptyset"); 1115 return ret; 1116 } 1117 1118 sa.sa_handler = test_signal_interrupt_handler; 1119 sa.sa_mask = sigset; 1120 sa.sa_flags = 0; 1121 ret = sigaction(SIGUSR1, &sa, NULL); 1122 if (ret < 0) { 1123 perror("sigaction"); 1124 return ret; 1125 } 1126 1127 printf_verbose("Signal handler set for SIGUSR1\n"); 1128 1129 return ret; 1130 } 1131 1132 static void show_usage(int argc, char **argv) 1133 { 1134 printf("Usage : %s <OPTIONS>\n", 1135 argv[0]); 1136 printf("OPTIONS:\n"); 1137 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1138 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1139 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1140 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1141 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1142 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1143 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1144 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1145 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1146 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1147 printf(" [-y] Yield\n"); 1148 printf(" [-k] Kill thread with signal\n"); 1149 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1150 printf(" [-t N] Number of threads (default 200)\n"); 1151 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1152 printf(" [-d] Disable rseq system call (no initialization)\n"); 1153 printf(" [-D M] Disable rseq for each M threads\n"); 1154 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n"); 1155 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1156 printf(" [-v] Verbose output.\n"); 1157 printf(" [-h] Show this help.\n"); 1158 printf("\n"); 1159 } 1160 1161 int main(int argc, char **argv) 1162 { 1163 int i; 1164 1165 for (i = 1; i < argc; i++) { 1166 if (argv[i][0] != '-') 1167 continue; 1168 switch (argv[i][1]) { 1169 case '1': 1170 case '2': 1171 case '3': 1172 case '4': 1173 case '5': 1174 case '6': 1175 case '7': 1176 case '8': 1177 case '9': 1178 if (argc < i + 2) { 1179 show_usage(argc, argv); 1180 goto error; 1181 } 1182 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1183 i++; 1184 break; 1185 case 'm': 1186 if (argc < i + 2) { 1187 show_usage(argc, argv); 1188 goto error; 1189 } 1190 opt_modulo = atol(argv[i + 1]); 1191 if (opt_modulo < 0) { 1192 show_usage(argc, argv); 1193 goto error; 1194 } 1195 i++; 1196 break; 1197 case 's': 1198 if (argc < i + 2) { 1199 show_usage(argc, argv); 1200 goto error; 1201 } 1202 opt_sleep = atol(argv[i + 1]); 1203 if (opt_sleep < 0) { 1204 show_usage(argc, argv); 1205 goto error; 1206 } 1207 i++; 1208 break; 1209 case 'y': 1210 opt_yield = 1; 1211 break; 1212 case 'k': 1213 opt_signal = 1; 1214 break; 1215 case 'd': 1216 opt_disable_rseq = 1; 1217 break; 1218 case 'D': 1219 if (argc < i + 2) { 1220 show_usage(argc, argv); 1221 goto error; 1222 } 1223 opt_disable_mod = atol(argv[i + 1]); 1224 if (opt_disable_mod < 0) { 1225 show_usage(argc, argv); 1226 goto error; 1227 } 1228 i++; 1229 break; 1230 case 't': 1231 if (argc < i + 2) { 1232 show_usage(argc, argv); 1233 goto error; 1234 } 1235 opt_threads = atol(argv[i + 1]); 1236 if (opt_threads < 0) { 1237 show_usage(argc, argv); 1238 goto error; 1239 } 1240 i++; 1241 break; 1242 case 'r': 1243 if (argc < i + 2) { 1244 show_usage(argc, argv); 1245 goto error; 1246 } 1247 opt_reps = atoll(argv[i + 1]); 1248 if (opt_reps < 0) { 1249 show_usage(argc, argv); 1250 goto error; 1251 } 1252 i++; 1253 break; 1254 case 'h': 1255 show_usage(argc, argv); 1256 goto end; 1257 case 'T': 1258 if (argc < i + 2) { 1259 show_usage(argc, argv); 1260 goto error; 1261 } 1262 opt_test = *argv[i + 1]; 1263 switch (opt_test) { 1264 case 's': 1265 case 'l': 1266 case 'i': 1267 case 'b': 1268 case 'm': 1269 break; 1270 default: 1271 show_usage(argc, argv); 1272 goto error; 1273 } 1274 i++; 1275 break; 1276 case 'v': 1277 verbose = 1; 1278 break; 1279 case 'M': 1280 opt_mb = 1; 1281 break; 1282 default: 1283 show_usage(argc, argv); 1284 goto error; 1285 } 1286 } 1287 1288 loop_cnt_1 = loop_cnt[1]; 1289 loop_cnt_2 = loop_cnt[2]; 1290 loop_cnt_3 = loop_cnt[3]; 1291 loop_cnt_4 = loop_cnt[4]; 1292 loop_cnt_5 = loop_cnt[5]; 1293 loop_cnt_6 = loop_cnt[6]; 1294 1295 if (set_signal_handler()) 1296 goto error; 1297 1298 if (!opt_disable_rseq && rseq_register_current_thread()) 1299 goto error; 1300 switch (opt_test) { 1301 case 's': 1302 printf_verbose("spinlock\n"); 1303 test_percpu_spinlock(); 1304 break; 1305 case 'l': 1306 printf_verbose("linked list\n"); 1307 test_percpu_list(); 1308 break; 1309 case 'b': 1310 printf_verbose("buffer\n"); 1311 test_percpu_buffer(); 1312 break; 1313 case 'm': 1314 printf_verbose("memcpy buffer\n"); 1315 test_percpu_memcpy_buffer(); 1316 break; 1317 case 'i': 1318 printf_verbose("counter increment\n"); 1319 test_percpu_inc(); 1320 break; 1321 } 1322 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1323 abort(); 1324 end: 1325 return 0; 1326 1327 error: 1328 return -1; 1329 } 1330