1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <pthread.h> 5 #include <sched.h> 6 #include <stdint.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <syscall.h> 11 #include <unistd.h> 12 #include <poll.h> 13 #include <sys/types.h> 14 #include <signal.h> 15 #include <errno.h> 16 #include <stddef.h> 17 18 static inline pid_t gettid(void) 19 { 20 return syscall(__NR_gettid); 21 } 22 23 #define NR_INJECT 9 24 static int loop_cnt[NR_INJECT + 1]; 25 26 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 27 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 28 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 29 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 30 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 31 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 32 33 static int opt_modulo, verbose; 34 35 static int opt_yield, opt_signal, opt_sleep, 36 opt_disable_rseq, opt_threads = 200, 37 opt_disable_mod = 0, opt_test = 's', opt_mb = 0; 38 39 #ifndef RSEQ_SKIP_FASTPATH 40 static long long opt_reps = 5000; 41 #else 42 static long long opt_reps = 100; 43 #endif 44 45 static __thread __attribute__((tls_model("initial-exec"))) 46 unsigned int signals_delivered; 47 48 #ifndef BENCHMARK 49 50 static __thread __attribute__((tls_model("initial-exec"), unused)) 51 unsigned int yield_mod_cnt, nr_abort; 52 53 #define printf_verbose(fmt, ...) \ 54 do { \ 55 if (verbose) \ 56 printf(fmt, ## __VA_ARGS__); \ 57 } while (0) 58 59 #if defined(__x86_64__) || defined(__i386__) 60 61 #define INJECT_ASM_REG "eax" 62 63 #define RSEQ_INJECT_CLOBBER \ 64 , INJECT_ASM_REG 65 66 #ifdef __i386__ 67 68 #define RSEQ_INJECT_ASM(n) \ 69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 71 "jz 333f\n\t" \ 72 "222:\n\t" \ 73 "dec %%" INJECT_ASM_REG "\n\t" \ 74 "jnz 222b\n\t" \ 75 "333:\n\t" 76 77 #elif defined(__x86_64__) 78 79 #define RSEQ_INJECT_ASM(n) \ 80 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \ 81 "mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \ 82 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 83 "jz 333f\n\t" \ 84 "222:\n\t" \ 85 "dec %%" INJECT_ASM_REG "\n\t" \ 86 "jnz 222b\n\t" \ 87 "333:\n\t" 88 89 #else 90 #error "Unsupported architecture" 91 #endif 92 93 #elif defined(__s390__) 94 95 #define RSEQ_INJECT_INPUT \ 96 , [loop_cnt_1]"m"(loop_cnt[1]) \ 97 , [loop_cnt_2]"m"(loop_cnt[2]) \ 98 , [loop_cnt_3]"m"(loop_cnt[3]) \ 99 , [loop_cnt_4]"m"(loop_cnt[4]) \ 100 , [loop_cnt_5]"m"(loop_cnt[5]) \ 101 , [loop_cnt_6]"m"(loop_cnt[6]) 102 103 #define INJECT_ASM_REG "r12" 104 105 #define RSEQ_INJECT_CLOBBER \ 106 , INJECT_ASM_REG 107 108 #define RSEQ_INJECT_ASM(n) \ 109 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 110 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 111 "je 333f\n\t" \ 112 "222:\n\t" \ 113 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 114 "jnz 222b\n\t" \ 115 "333:\n\t" 116 117 #elif defined(__ARMEL__) 118 119 #define RSEQ_INJECT_INPUT \ 120 , [loop_cnt_1]"m"(loop_cnt[1]) \ 121 , [loop_cnt_2]"m"(loop_cnt[2]) \ 122 , [loop_cnt_3]"m"(loop_cnt[3]) \ 123 , [loop_cnt_4]"m"(loop_cnt[4]) \ 124 , [loop_cnt_5]"m"(loop_cnt[5]) \ 125 , [loop_cnt_6]"m"(loop_cnt[6]) 126 127 #define INJECT_ASM_REG "r4" 128 129 #define RSEQ_INJECT_CLOBBER \ 130 , INJECT_ASM_REG 131 132 #define RSEQ_INJECT_ASM(n) \ 133 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 134 "cmp " INJECT_ASM_REG ", #0\n\t" \ 135 "beq 333f\n\t" \ 136 "222:\n\t" \ 137 "subs " INJECT_ASM_REG ", #1\n\t" \ 138 "bne 222b\n\t" \ 139 "333:\n\t" 140 141 #elif defined(__AARCH64EL__) 142 143 #define RSEQ_INJECT_INPUT \ 144 , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 145 , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 146 , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 147 , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 148 , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 149 , [loop_cnt_6] "Qo" (loop_cnt[6]) 150 151 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 152 153 #define RSEQ_INJECT_ASM(n) \ 154 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 155 " cbz " INJECT_ASM_REG ", 333f\n" \ 156 "222:\n" \ 157 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 158 " cbnz " INJECT_ASM_REG ", 222b\n" \ 159 "333:\n" 160 161 #elif __PPC__ 162 163 #define RSEQ_INJECT_INPUT \ 164 , [loop_cnt_1]"m"(loop_cnt[1]) \ 165 , [loop_cnt_2]"m"(loop_cnt[2]) \ 166 , [loop_cnt_3]"m"(loop_cnt[3]) \ 167 , [loop_cnt_4]"m"(loop_cnt[4]) \ 168 , [loop_cnt_5]"m"(loop_cnt[5]) \ 169 , [loop_cnt_6]"m"(loop_cnt[6]) 170 171 #define INJECT_ASM_REG "r18" 172 173 #define RSEQ_INJECT_CLOBBER \ 174 , INJECT_ASM_REG 175 176 #define RSEQ_INJECT_ASM(n) \ 177 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 178 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 179 "beq 333f\n\t" \ 180 "222:\n\t" \ 181 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 182 "bne 222b\n\t" \ 183 "333:\n\t" 184 185 #elif defined(__mips__) 186 187 #define RSEQ_INJECT_INPUT \ 188 , [loop_cnt_1]"m"(loop_cnt[1]) \ 189 , [loop_cnt_2]"m"(loop_cnt[2]) \ 190 , [loop_cnt_3]"m"(loop_cnt[3]) \ 191 , [loop_cnt_4]"m"(loop_cnt[4]) \ 192 , [loop_cnt_5]"m"(loop_cnt[5]) \ 193 , [loop_cnt_6]"m"(loop_cnt[6]) 194 195 #define INJECT_ASM_REG "$5" 196 197 #define RSEQ_INJECT_CLOBBER \ 198 , INJECT_ASM_REG 199 200 #define RSEQ_INJECT_ASM(n) \ 201 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 202 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 203 "222:\n\t" \ 204 "addiu " INJECT_ASM_REG ", -1\n\t" \ 205 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 206 "333:\n\t" 207 208 #else 209 #error unsupported target 210 #endif 211 212 #define RSEQ_INJECT_FAILED \ 213 nr_abort++; 214 215 #define RSEQ_INJECT_C(n) \ 216 { \ 217 int loc_i, loc_nr_loops = loop_cnt[n]; \ 218 \ 219 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 220 rseq_barrier(); \ 221 } \ 222 if (loc_nr_loops == -1 && opt_modulo) { \ 223 if (yield_mod_cnt == opt_modulo - 1) { \ 224 if (opt_sleep > 0) \ 225 poll(NULL, 0, opt_sleep); \ 226 if (opt_yield) \ 227 sched_yield(); \ 228 if (opt_signal) \ 229 raise(SIGUSR1); \ 230 yield_mod_cnt = 0; \ 231 } else { \ 232 yield_mod_cnt++; \ 233 } \ 234 } \ 235 } 236 237 #else 238 239 #define printf_verbose(fmt, ...) 240 241 #endif /* BENCHMARK */ 242 243 #include "rseq.h" 244 245 struct percpu_lock_entry { 246 intptr_t v; 247 } __attribute__((aligned(128))); 248 249 struct percpu_lock { 250 struct percpu_lock_entry c[CPU_SETSIZE]; 251 }; 252 253 struct test_data_entry { 254 intptr_t count; 255 } __attribute__((aligned(128))); 256 257 struct spinlock_test_data { 258 struct percpu_lock lock; 259 struct test_data_entry c[CPU_SETSIZE]; 260 }; 261 262 struct spinlock_thread_test_data { 263 struct spinlock_test_data *data; 264 long long reps; 265 int reg; 266 }; 267 268 struct inc_test_data { 269 struct test_data_entry c[CPU_SETSIZE]; 270 }; 271 272 struct inc_thread_test_data { 273 struct inc_test_data *data; 274 long long reps; 275 int reg; 276 }; 277 278 struct percpu_list_node { 279 intptr_t data; 280 struct percpu_list_node *next; 281 }; 282 283 struct percpu_list_entry { 284 struct percpu_list_node *head; 285 } __attribute__((aligned(128))); 286 287 struct percpu_list { 288 struct percpu_list_entry c[CPU_SETSIZE]; 289 }; 290 291 #define BUFFER_ITEM_PER_CPU 100 292 293 struct percpu_buffer_node { 294 intptr_t data; 295 }; 296 297 struct percpu_buffer_entry { 298 intptr_t offset; 299 intptr_t buflen; 300 struct percpu_buffer_node **array; 301 } __attribute__((aligned(128))); 302 303 struct percpu_buffer { 304 struct percpu_buffer_entry c[CPU_SETSIZE]; 305 }; 306 307 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 308 309 struct percpu_memcpy_buffer_node { 310 intptr_t data1; 311 uint64_t data2; 312 }; 313 314 struct percpu_memcpy_buffer_entry { 315 intptr_t offset; 316 intptr_t buflen; 317 struct percpu_memcpy_buffer_node *array; 318 } __attribute__((aligned(128))); 319 320 struct percpu_memcpy_buffer { 321 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 322 }; 323 324 /* A simple percpu spinlock. Grabs lock on current cpu. */ 325 static int rseq_this_cpu_lock(struct percpu_lock *lock) 326 { 327 int cpu; 328 329 for (;;) { 330 int ret; 331 332 cpu = rseq_cpu_start(); 333 ret = rseq_cmpeqv_storev(&lock->c[cpu].v, 334 0, 1, cpu); 335 if (rseq_likely(!ret)) 336 break; 337 /* Retry if comparison fails or rseq aborts. */ 338 } 339 /* 340 * Acquire semantic when taking lock after control dependency. 341 * Matches rseq_smp_store_release(). 342 */ 343 rseq_smp_acquire__after_ctrl_dep(); 344 return cpu; 345 } 346 347 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 348 { 349 assert(lock->c[cpu].v == 1); 350 /* 351 * Release lock, with release semantic. Matches 352 * rseq_smp_acquire__after_ctrl_dep(). 353 */ 354 rseq_smp_store_release(&lock->c[cpu].v, 0); 355 } 356 357 void *test_percpu_spinlock_thread(void *arg) 358 { 359 struct spinlock_thread_test_data *thread_data = arg; 360 struct spinlock_test_data *data = thread_data->data; 361 long long i, reps; 362 363 if (!opt_disable_rseq && thread_data->reg && 364 rseq_register_current_thread()) 365 abort(); 366 reps = thread_data->reps; 367 for (i = 0; i < reps; i++) { 368 int cpu = rseq_cpu_start(); 369 370 cpu = rseq_this_cpu_lock(&data->lock); 371 data->c[cpu].count++; 372 rseq_percpu_unlock(&data->lock, cpu); 373 #ifndef BENCHMARK 374 if (i != 0 && !(i % (reps / 10))) 375 printf_verbose("tid %d: count %lld\n", (int) gettid(), i); 376 #endif 377 } 378 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 379 (int) gettid(), nr_abort, signals_delivered); 380 if (!opt_disable_rseq && thread_data->reg && 381 rseq_unregister_current_thread()) 382 abort(); 383 return NULL; 384 } 385 386 /* 387 * A simple test which implements a sharded counter using a per-cpu 388 * lock. Obviously real applications might prefer to simply use a 389 * per-cpu increment; however, this is reasonable for a test and the 390 * lock can be extended to synchronize more complicated operations. 391 */ 392 void test_percpu_spinlock(void) 393 { 394 const int num_threads = opt_threads; 395 int i, ret; 396 uint64_t sum; 397 pthread_t test_threads[num_threads]; 398 struct spinlock_test_data data; 399 struct spinlock_thread_test_data thread_data[num_threads]; 400 401 memset(&data, 0, sizeof(data)); 402 for (i = 0; i < num_threads; i++) { 403 thread_data[i].reps = opt_reps; 404 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 405 thread_data[i].reg = 1; 406 else 407 thread_data[i].reg = 0; 408 thread_data[i].data = &data; 409 ret = pthread_create(&test_threads[i], NULL, 410 test_percpu_spinlock_thread, 411 &thread_data[i]); 412 if (ret) { 413 errno = ret; 414 perror("pthread_create"); 415 abort(); 416 } 417 } 418 419 for (i = 0; i < num_threads; i++) { 420 ret = pthread_join(test_threads[i], NULL); 421 if (ret) { 422 errno = ret; 423 perror("pthread_join"); 424 abort(); 425 } 426 } 427 428 sum = 0; 429 for (i = 0; i < CPU_SETSIZE; i++) 430 sum += data.c[i].count; 431 432 assert(sum == (uint64_t)opt_reps * num_threads); 433 } 434 435 void *test_percpu_inc_thread(void *arg) 436 { 437 struct inc_thread_test_data *thread_data = arg; 438 struct inc_test_data *data = thread_data->data; 439 long long i, reps; 440 441 if (!opt_disable_rseq && thread_data->reg && 442 rseq_register_current_thread()) 443 abort(); 444 reps = thread_data->reps; 445 for (i = 0; i < reps; i++) { 446 int ret; 447 448 do { 449 int cpu; 450 451 cpu = rseq_cpu_start(); 452 ret = rseq_addv(&data->c[cpu].count, 1, cpu); 453 } while (rseq_unlikely(ret)); 454 #ifndef BENCHMARK 455 if (i != 0 && !(i % (reps / 10))) 456 printf_verbose("tid %d: count %lld\n", (int) gettid(), i); 457 #endif 458 } 459 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 460 (int) gettid(), nr_abort, signals_delivered); 461 if (!opt_disable_rseq && thread_data->reg && 462 rseq_unregister_current_thread()) 463 abort(); 464 return NULL; 465 } 466 467 void test_percpu_inc(void) 468 { 469 const int num_threads = opt_threads; 470 int i, ret; 471 uint64_t sum; 472 pthread_t test_threads[num_threads]; 473 struct inc_test_data data; 474 struct inc_thread_test_data thread_data[num_threads]; 475 476 memset(&data, 0, sizeof(data)); 477 for (i = 0; i < num_threads; i++) { 478 thread_data[i].reps = opt_reps; 479 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 480 thread_data[i].reg = 1; 481 else 482 thread_data[i].reg = 0; 483 thread_data[i].data = &data; 484 ret = pthread_create(&test_threads[i], NULL, 485 test_percpu_inc_thread, 486 &thread_data[i]); 487 if (ret) { 488 errno = ret; 489 perror("pthread_create"); 490 abort(); 491 } 492 } 493 494 for (i = 0; i < num_threads; i++) { 495 ret = pthread_join(test_threads[i], NULL); 496 if (ret) { 497 errno = ret; 498 perror("pthread_join"); 499 abort(); 500 } 501 } 502 503 sum = 0; 504 for (i = 0; i < CPU_SETSIZE; i++) 505 sum += data.c[i].count; 506 507 assert(sum == (uint64_t)opt_reps * num_threads); 508 } 509 510 void this_cpu_list_push(struct percpu_list *list, 511 struct percpu_list_node *node, 512 int *_cpu) 513 { 514 int cpu; 515 516 for (;;) { 517 intptr_t *targetptr, newval, expect; 518 int ret; 519 520 cpu = rseq_cpu_start(); 521 /* Load list->c[cpu].head with single-copy atomicity. */ 522 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 523 newval = (intptr_t)node; 524 targetptr = (intptr_t *)&list->c[cpu].head; 525 node->next = (struct percpu_list_node *)expect; 526 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); 527 if (rseq_likely(!ret)) 528 break; 529 /* Retry if comparison fails or rseq aborts. */ 530 } 531 if (_cpu) 532 *_cpu = cpu; 533 } 534 535 /* 536 * Unlike a traditional lock-less linked list; the availability of a 537 * rseq primitive allows us to implement pop without concerns over 538 * ABA-type races. 539 */ 540 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 541 int *_cpu) 542 { 543 struct percpu_list_node *node = NULL; 544 int cpu; 545 546 for (;;) { 547 struct percpu_list_node *head; 548 intptr_t *targetptr, expectnot, *load; 549 off_t offset; 550 int ret; 551 552 cpu = rseq_cpu_start(); 553 targetptr = (intptr_t *)&list->c[cpu].head; 554 expectnot = (intptr_t)NULL; 555 offset = offsetof(struct percpu_list_node, next); 556 load = (intptr_t *)&head; 557 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, 558 offset, load, cpu); 559 if (rseq_likely(!ret)) { 560 node = head; 561 break; 562 } 563 if (ret > 0) 564 break; 565 /* Retry if rseq aborts. */ 566 } 567 if (_cpu) 568 *_cpu = cpu; 569 return node; 570 } 571 572 /* 573 * __percpu_list_pop is not safe against concurrent accesses. Should 574 * only be used on lists that are not concurrently modified. 575 */ 576 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 577 { 578 struct percpu_list_node *node; 579 580 node = list->c[cpu].head; 581 if (!node) 582 return NULL; 583 list->c[cpu].head = node->next; 584 return node; 585 } 586 587 void *test_percpu_list_thread(void *arg) 588 { 589 long long i, reps; 590 struct percpu_list *list = (struct percpu_list *)arg; 591 592 if (!opt_disable_rseq && rseq_register_current_thread()) 593 abort(); 594 595 reps = opt_reps; 596 for (i = 0; i < reps; i++) { 597 struct percpu_list_node *node; 598 599 node = this_cpu_list_pop(list, NULL); 600 if (opt_yield) 601 sched_yield(); /* encourage shuffling */ 602 if (node) 603 this_cpu_list_push(list, node, NULL); 604 } 605 606 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 607 (int) gettid(), nr_abort, signals_delivered); 608 if (!opt_disable_rseq && rseq_unregister_current_thread()) 609 abort(); 610 611 return NULL; 612 } 613 614 /* Simultaneous modification to a per-cpu linked list from many threads. */ 615 void test_percpu_list(void) 616 { 617 const int num_threads = opt_threads; 618 int i, j, ret; 619 uint64_t sum = 0, expected_sum = 0; 620 struct percpu_list list; 621 pthread_t test_threads[num_threads]; 622 cpu_set_t allowed_cpus; 623 624 memset(&list, 0, sizeof(list)); 625 626 /* Generate list entries for every usable cpu. */ 627 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 628 for (i = 0; i < CPU_SETSIZE; i++) { 629 if (!CPU_ISSET(i, &allowed_cpus)) 630 continue; 631 for (j = 1; j <= 100; j++) { 632 struct percpu_list_node *node; 633 634 expected_sum += j; 635 636 node = malloc(sizeof(*node)); 637 assert(node); 638 node->data = j; 639 node->next = list.c[i].head; 640 list.c[i].head = node; 641 } 642 } 643 644 for (i = 0; i < num_threads; i++) { 645 ret = pthread_create(&test_threads[i], NULL, 646 test_percpu_list_thread, &list); 647 if (ret) { 648 errno = ret; 649 perror("pthread_create"); 650 abort(); 651 } 652 } 653 654 for (i = 0; i < num_threads; i++) { 655 ret = pthread_join(test_threads[i], NULL); 656 if (ret) { 657 errno = ret; 658 perror("pthread_join"); 659 abort(); 660 } 661 } 662 663 for (i = 0; i < CPU_SETSIZE; i++) { 664 struct percpu_list_node *node; 665 666 if (!CPU_ISSET(i, &allowed_cpus)) 667 continue; 668 669 while ((node = __percpu_list_pop(&list, i))) { 670 sum += node->data; 671 free(node); 672 } 673 } 674 675 /* 676 * All entries should now be accounted for (unless some external 677 * actor is interfering with our allowed affinity while this 678 * test is running). 679 */ 680 assert(sum == expected_sum); 681 } 682 683 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 684 struct percpu_buffer_node *node, 685 int *_cpu) 686 { 687 bool result = false; 688 int cpu; 689 690 for (;;) { 691 intptr_t *targetptr_spec, newval_spec; 692 intptr_t *targetptr_final, newval_final; 693 intptr_t offset; 694 int ret; 695 696 cpu = rseq_cpu_start(); 697 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 698 if (offset == buffer->c[cpu].buflen) 699 break; 700 newval_spec = (intptr_t)node; 701 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 702 newval_final = offset + 1; 703 targetptr_final = &buffer->c[cpu].offset; 704 if (opt_mb) 705 ret = rseq_cmpeqv_trystorev_storev_release( 706 targetptr_final, offset, targetptr_spec, 707 newval_spec, newval_final, cpu); 708 else 709 ret = rseq_cmpeqv_trystorev_storev(targetptr_final, 710 offset, targetptr_spec, newval_spec, 711 newval_final, cpu); 712 if (rseq_likely(!ret)) { 713 result = true; 714 break; 715 } 716 /* Retry if comparison fails or rseq aborts. */ 717 } 718 if (_cpu) 719 *_cpu = cpu; 720 return result; 721 } 722 723 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 724 int *_cpu) 725 { 726 struct percpu_buffer_node *head; 727 int cpu; 728 729 for (;;) { 730 intptr_t *targetptr, newval; 731 intptr_t offset; 732 int ret; 733 734 cpu = rseq_cpu_start(); 735 /* Load offset with single-copy atomicity. */ 736 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 737 if (offset == 0) { 738 head = NULL; 739 break; 740 } 741 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 742 newval = offset - 1; 743 targetptr = (intptr_t *)&buffer->c[cpu].offset; 744 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, 745 (intptr_t *)&buffer->c[cpu].array[offset - 1], 746 (intptr_t)head, newval, cpu); 747 if (rseq_likely(!ret)) 748 break; 749 /* Retry if comparison fails or rseq aborts. */ 750 } 751 if (_cpu) 752 *_cpu = cpu; 753 return head; 754 } 755 756 /* 757 * __percpu_buffer_pop is not safe against concurrent accesses. Should 758 * only be used on buffers that are not concurrently modified. 759 */ 760 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 761 int cpu) 762 { 763 struct percpu_buffer_node *head; 764 intptr_t offset; 765 766 offset = buffer->c[cpu].offset; 767 if (offset == 0) 768 return NULL; 769 head = buffer->c[cpu].array[offset - 1]; 770 buffer->c[cpu].offset = offset - 1; 771 return head; 772 } 773 774 void *test_percpu_buffer_thread(void *arg) 775 { 776 long long i, reps; 777 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 778 779 if (!opt_disable_rseq && rseq_register_current_thread()) 780 abort(); 781 782 reps = opt_reps; 783 for (i = 0; i < reps; i++) { 784 struct percpu_buffer_node *node; 785 786 node = this_cpu_buffer_pop(buffer, NULL); 787 if (opt_yield) 788 sched_yield(); /* encourage shuffling */ 789 if (node) { 790 if (!this_cpu_buffer_push(buffer, node, NULL)) { 791 /* Should increase buffer size. */ 792 abort(); 793 } 794 } 795 } 796 797 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 798 (int) gettid(), nr_abort, signals_delivered); 799 if (!opt_disable_rseq && rseq_unregister_current_thread()) 800 abort(); 801 802 return NULL; 803 } 804 805 /* Simultaneous modification to a per-cpu buffer from many threads. */ 806 void test_percpu_buffer(void) 807 { 808 const int num_threads = opt_threads; 809 int i, j, ret; 810 uint64_t sum = 0, expected_sum = 0; 811 struct percpu_buffer buffer; 812 pthread_t test_threads[num_threads]; 813 cpu_set_t allowed_cpus; 814 815 memset(&buffer, 0, sizeof(buffer)); 816 817 /* Generate list entries for every usable cpu. */ 818 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 819 for (i = 0; i < CPU_SETSIZE; i++) { 820 if (!CPU_ISSET(i, &allowed_cpus)) 821 continue; 822 /* Worse-case is every item in same CPU. */ 823 buffer.c[i].array = 824 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 825 BUFFER_ITEM_PER_CPU); 826 assert(buffer.c[i].array); 827 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 828 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 829 struct percpu_buffer_node *node; 830 831 expected_sum += j; 832 833 /* 834 * We could theoretically put the word-sized 835 * "data" directly in the buffer. However, we 836 * want to model objects that would not fit 837 * within a single word, so allocate an object 838 * for each node. 839 */ 840 node = malloc(sizeof(*node)); 841 assert(node); 842 node->data = j; 843 buffer.c[i].array[j - 1] = node; 844 buffer.c[i].offset++; 845 } 846 } 847 848 for (i = 0; i < num_threads; i++) { 849 ret = pthread_create(&test_threads[i], NULL, 850 test_percpu_buffer_thread, &buffer); 851 if (ret) { 852 errno = ret; 853 perror("pthread_create"); 854 abort(); 855 } 856 } 857 858 for (i = 0; i < num_threads; i++) { 859 ret = pthread_join(test_threads[i], NULL); 860 if (ret) { 861 errno = ret; 862 perror("pthread_join"); 863 abort(); 864 } 865 } 866 867 for (i = 0; i < CPU_SETSIZE; i++) { 868 struct percpu_buffer_node *node; 869 870 if (!CPU_ISSET(i, &allowed_cpus)) 871 continue; 872 873 while ((node = __percpu_buffer_pop(&buffer, i))) { 874 sum += node->data; 875 free(node); 876 } 877 free(buffer.c[i].array); 878 } 879 880 /* 881 * All entries should now be accounted for (unless some external 882 * actor is interfering with our allowed affinity while this 883 * test is running). 884 */ 885 assert(sum == expected_sum); 886 } 887 888 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 889 struct percpu_memcpy_buffer_node item, 890 int *_cpu) 891 { 892 bool result = false; 893 int cpu; 894 895 for (;;) { 896 intptr_t *targetptr_final, newval_final, offset; 897 char *destptr, *srcptr; 898 size_t copylen; 899 int ret; 900 901 cpu = rseq_cpu_start(); 902 /* Load offset with single-copy atomicity. */ 903 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 904 if (offset == buffer->c[cpu].buflen) 905 break; 906 destptr = (char *)&buffer->c[cpu].array[offset]; 907 srcptr = (char *)&item; 908 /* copylen must be <= 4kB. */ 909 copylen = sizeof(item); 910 newval_final = offset + 1; 911 targetptr_final = &buffer->c[cpu].offset; 912 if (opt_mb) 913 ret = rseq_cmpeqv_trymemcpy_storev_release( 914 targetptr_final, offset, 915 destptr, srcptr, copylen, 916 newval_final, cpu); 917 else 918 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 919 offset, destptr, srcptr, copylen, 920 newval_final, cpu); 921 if (rseq_likely(!ret)) { 922 result = true; 923 break; 924 } 925 /* Retry if comparison fails or rseq aborts. */ 926 } 927 if (_cpu) 928 *_cpu = cpu; 929 return result; 930 } 931 932 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 933 struct percpu_memcpy_buffer_node *item, 934 int *_cpu) 935 { 936 bool result = false; 937 int cpu; 938 939 for (;;) { 940 intptr_t *targetptr_final, newval_final, offset; 941 char *destptr, *srcptr; 942 size_t copylen; 943 int ret; 944 945 cpu = rseq_cpu_start(); 946 /* Load offset with single-copy atomicity. */ 947 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 948 if (offset == 0) 949 break; 950 destptr = (char *)item; 951 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 952 /* copylen must be <= 4kB. */ 953 copylen = sizeof(*item); 954 newval_final = offset - 1; 955 targetptr_final = &buffer->c[cpu].offset; 956 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 957 offset, destptr, srcptr, copylen, 958 newval_final, cpu); 959 if (rseq_likely(!ret)) { 960 result = true; 961 break; 962 } 963 /* Retry if comparison fails or rseq aborts. */ 964 } 965 if (_cpu) 966 *_cpu = cpu; 967 return result; 968 } 969 970 /* 971 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 972 * only be used on buffers that are not concurrently modified. 973 */ 974 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 975 struct percpu_memcpy_buffer_node *item, 976 int cpu) 977 { 978 intptr_t offset; 979 980 offset = buffer->c[cpu].offset; 981 if (offset == 0) 982 return false; 983 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 984 buffer->c[cpu].offset = offset - 1; 985 return true; 986 } 987 988 void *test_percpu_memcpy_buffer_thread(void *arg) 989 { 990 long long i, reps; 991 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 992 993 if (!opt_disable_rseq && rseq_register_current_thread()) 994 abort(); 995 996 reps = opt_reps; 997 for (i = 0; i < reps; i++) { 998 struct percpu_memcpy_buffer_node item; 999 bool result; 1000 1001 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1002 if (opt_yield) 1003 sched_yield(); /* encourage shuffling */ 1004 if (result) { 1005 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1006 /* Should increase buffer size. */ 1007 abort(); 1008 } 1009 } 1010 } 1011 1012 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 1013 (int) gettid(), nr_abort, signals_delivered); 1014 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1015 abort(); 1016 1017 return NULL; 1018 } 1019 1020 /* Simultaneous modification to a per-cpu buffer from many threads. */ 1021 void test_percpu_memcpy_buffer(void) 1022 { 1023 const int num_threads = opt_threads; 1024 int i, j, ret; 1025 uint64_t sum = 0, expected_sum = 0; 1026 struct percpu_memcpy_buffer buffer; 1027 pthread_t test_threads[num_threads]; 1028 cpu_set_t allowed_cpus; 1029 1030 memset(&buffer, 0, sizeof(buffer)); 1031 1032 /* Generate list entries for every usable cpu. */ 1033 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1034 for (i = 0; i < CPU_SETSIZE; i++) { 1035 if (!CPU_ISSET(i, &allowed_cpus)) 1036 continue; 1037 /* Worse-case is every item in same CPU. */ 1038 buffer.c[i].array = 1039 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1040 MEMCPY_BUFFER_ITEM_PER_CPU); 1041 assert(buffer.c[i].array); 1042 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1043 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1044 expected_sum += 2 * j + 1; 1045 1046 /* 1047 * We could theoretically put the word-sized 1048 * "data" directly in the buffer. However, we 1049 * want to model objects that would not fit 1050 * within a single word, so allocate an object 1051 * for each node. 1052 */ 1053 buffer.c[i].array[j - 1].data1 = j; 1054 buffer.c[i].array[j - 1].data2 = j + 1; 1055 buffer.c[i].offset++; 1056 } 1057 } 1058 1059 for (i = 0; i < num_threads; i++) { 1060 ret = pthread_create(&test_threads[i], NULL, 1061 test_percpu_memcpy_buffer_thread, 1062 &buffer); 1063 if (ret) { 1064 errno = ret; 1065 perror("pthread_create"); 1066 abort(); 1067 } 1068 } 1069 1070 for (i = 0; i < num_threads; i++) { 1071 ret = pthread_join(test_threads[i], NULL); 1072 if (ret) { 1073 errno = ret; 1074 perror("pthread_join"); 1075 abort(); 1076 } 1077 } 1078 1079 for (i = 0; i < CPU_SETSIZE; i++) { 1080 struct percpu_memcpy_buffer_node item; 1081 1082 if (!CPU_ISSET(i, &allowed_cpus)) 1083 continue; 1084 1085 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1086 sum += item.data1; 1087 sum += item.data2; 1088 } 1089 free(buffer.c[i].array); 1090 } 1091 1092 /* 1093 * All entries should now be accounted for (unless some external 1094 * actor is interfering with our allowed affinity while this 1095 * test is running). 1096 */ 1097 assert(sum == expected_sum); 1098 } 1099 1100 static void test_signal_interrupt_handler(int signo) 1101 { 1102 signals_delivered++; 1103 } 1104 1105 static int set_signal_handler(void) 1106 { 1107 int ret = 0; 1108 struct sigaction sa; 1109 sigset_t sigset; 1110 1111 ret = sigemptyset(&sigset); 1112 if (ret < 0) { 1113 perror("sigemptyset"); 1114 return ret; 1115 } 1116 1117 sa.sa_handler = test_signal_interrupt_handler; 1118 sa.sa_mask = sigset; 1119 sa.sa_flags = 0; 1120 ret = sigaction(SIGUSR1, &sa, NULL); 1121 if (ret < 0) { 1122 perror("sigaction"); 1123 return ret; 1124 } 1125 1126 printf_verbose("Signal handler set for SIGUSR1\n"); 1127 1128 return ret; 1129 } 1130 1131 static void show_usage(int argc, char **argv) 1132 { 1133 printf("Usage : %s <OPTIONS>\n", 1134 argv[0]); 1135 printf("OPTIONS:\n"); 1136 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1137 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1138 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1139 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1140 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1141 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1142 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1143 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1144 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1145 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1146 printf(" [-y] Yield\n"); 1147 printf(" [-k] Kill thread with signal\n"); 1148 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1149 printf(" [-t N] Number of threads (default 200)\n"); 1150 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1151 printf(" [-d] Disable rseq system call (no initialization)\n"); 1152 printf(" [-D M] Disable rseq for each M threads\n"); 1153 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n"); 1154 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1155 printf(" [-v] Verbose output.\n"); 1156 printf(" [-h] Show this help.\n"); 1157 printf("\n"); 1158 } 1159 1160 int main(int argc, char **argv) 1161 { 1162 int i; 1163 1164 for (i = 1; i < argc; i++) { 1165 if (argv[i][0] != '-') 1166 continue; 1167 switch (argv[i][1]) { 1168 case '1': 1169 case '2': 1170 case '3': 1171 case '4': 1172 case '5': 1173 case '6': 1174 case '7': 1175 case '8': 1176 case '9': 1177 if (argc < i + 2) { 1178 show_usage(argc, argv); 1179 goto error; 1180 } 1181 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1182 i++; 1183 break; 1184 case 'm': 1185 if (argc < i + 2) { 1186 show_usage(argc, argv); 1187 goto error; 1188 } 1189 opt_modulo = atol(argv[i + 1]); 1190 if (opt_modulo < 0) { 1191 show_usage(argc, argv); 1192 goto error; 1193 } 1194 i++; 1195 break; 1196 case 's': 1197 if (argc < i + 2) { 1198 show_usage(argc, argv); 1199 goto error; 1200 } 1201 opt_sleep = atol(argv[i + 1]); 1202 if (opt_sleep < 0) { 1203 show_usage(argc, argv); 1204 goto error; 1205 } 1206 i++; 1207 break; 1208 case 'y': 1209 opt_yield = 1; 1210 break; 1211 case 'k': 1212 opt_signal = 1; 1213 break; 1214 case 'd': 1215 opt_disable_rseq = 1; 1216 break; 1217 case 'D': 1218 if (argc < i + 2) { 1219 show_usage(argc, argv); 1220 goto error; 1221 } 1222 opt_disable_mod = atol(argv[i + 1]); 1223 if (opt_disable_mod < 0) { 1224 show_usage(argc, argv); 1225 goto error; 1226 } 1227 i++; 1228 break; 1229 case 't': 1230 if (argc < i + 2) { 1231 show_usage(argc, argv); 1232 goto error; 1233 } 1234 opt_threads = atol(argv[i + 1]); 1235 if (opt_threads < 0) { 1236 show_usage(argc, argv); 1237 goto error; 1238 } 1239 i++; 1240 break; 1241 case 'r': 1242 if (argc < i + 2) { 1243 show_usage(argc, argv); 1244 goto error; 1245 } 1246 opt_reps = atoll(argv[i + 1]); 1247 if (opt_reps < 0) { 1248 show_usage(argc, argv); 1249 goto error; 1250 } 1251 i++; 1252 break; 1253 case 'h': 1254 show_usage(argc, argv); 1255 goto end; 1256 case 'T': 1257 if (argc < i + 2) { 1258 show_usage(argc, argv); 1259 goto error; 1260 } 1261 opt_test = *argv[i + 1]; 1262 switch (opt_test) { 1263 case 's': 1264 case 'l': 1265 case 'i': 1266 case 'b': 1267 case 'm': 1268 break; 1269 default: 1270 show_usage(argc, argv); 1271 goto error; 1272 } 1273 i++; 1274 break; 1275 case 'v': 1276 verbose = 1; 1277 break; 1278 case 'M': 1279 opt_mb = 1; 1280 break; 1281 default: 1282 show_usage(argc, argv); 1283 goto error; 1284 } 1285 } 1286 1287 loop_cnt_1 = loop_cnt[1]; 1288 loop_cnt_2 = loop_cnt[2]; 1289 loop_cnt_3 = loop_cnt[3]; 1290 loop_cnt_4 = loop_cnt[4]; 1291 loop_cnt_5 = loop_cnt[5]; 1292 loop_cnt_6 = loop_cnt[6]; 1293 1294 if (set_signal_handler()) 1295 goto error; 1296 1297 if (!opt_disable_rseq && rseq_register_current_thread()) 1298 goto error; 1299 switch (opt_test) { 1300 case 's': 1301 printf_verbose("spinlock\n"); 1302 test_percpu_spinlock(); 1303 break; 1304 case 'l': 1305 printf_verbose("linked list\n"); 1306 test_percpu_list(); 1307 break; 1308 case 'b': 1309 printf_verbose("buffer\n"); 1310 test_percpu_buffer(); 1311 break; 1312 case 'm': 1313 printf_verbose("memcpy buffer\n"); 1314 test_percpu_memcpy_buffer(); 1315 break; 1316 case 'i': 1317 printf_verbose("counter increment\n"); 1318 test_percpu_inc(); 1319 break; 1320 } 1321 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1322 abort(); 1323 end: 1324 return 0; 1325 1326 error: 1327 return -1; 1328 } 1329