1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <pthread.h> 5 #include <sched.h> 6 #include <stdint.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <syscall.h> 11 #include <unistd.h> 12 #include <poll.h> 13 #include <sys/types.h> 14 #include <signal.h> 15 #include <errno.h> 16 #include <stddef.h> 17 18 static inline pid_t gettid(void) 19 { 20 return syscall(__NR_gettid); 21 } 22 23 #define NR_INJECT 9 24 static int loop_cnt[NR_INJECT + 1]; 25 26 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 27 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 28 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 29 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 30 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 31 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 32 33 static int opt_modulo, verbose; 34 35 static int opt_yield, opt_signal, opt_sleep, 36 opt_disable_rseq, opt_threads = 200, 37 opt_disable_mod = 0, opt_test = 's', opt_mb = 0; 38 39 #ifndef RSEQ_SKIP_FASTPATH 40 static long long opt_reps = 5000; 41 #else 42 static long long opt_reps = 100; 43 #endif 44 45 static __thread __attribute__((tls_model("initial-exec"))) 46 unsigned int signals_delivered; 47 48 #ifndef BENCHMARK 49 50 static __thread __attribute__((tls_model("initial-exec"), unused)) 51 unsigned int yield_mod_cnt, nr_abort; 52 53 #define printf_verbose(fmt, ...) \ 54 do { \ 55 if (verbose) \ 56 printf(fmt, ## __VA_ARGS__); \ 57 } while (0) 58 59 #if defined(__x86_64__) || defined(__i386__) 60 61 #define INJECT_ASM_REG "eax" 62 63 #define RSEQ_INJECT_CLOBBER \ 64 , INJECT_ASM_REG 65 66 #ifdef __i386__ 67 68 #define RSEQ_INJECT_ASM(n) \ 69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 71 "jz 333f\n\t" \ 72 "222:\n\t" \ 73 "dec %%" INJECT_ASM_REG "\n\t" \ 74 "jnz 222b\n\t" \ 75 "333:\n\t" 76 77 #elif defined(__x86_64__) 78 79 #define RSEQ_INJECT_ASM(n) \ 80 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \ 81 "mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \ 82 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 83 "jz 333f\n\t" \ 84 "222:\n\t" \ 85 "dec %%" INJECT_ASM_REG "\n\t" \ 86 "jnz 222b\n\t" \ 87 "333:\n\t" 88 89 #else 90 #error "Unsupported architecture" 91 #endif 92 93 #elif defined(__ARMEL__) 94 95 #define RSEQ_INJECT_INPUT \ 96 , [loop_cnt_1]"m"(loop_cnt[1]) \ 97 , [loop_cnt_2]"m"(loop_cnt[2]) \ 98 , [loop_cnt_3]"m"(loop_cnt[3]) \ 99 , [loop_cnt_4]"m"(loop_cnt[4]) \ 100 , [loop_cnt_5]"m"(loop_cnt[5]) \ 101 , [loop_cnt_6]"m"(loop_cnt[6]) 102 103 #define INJECT_ASM_REG "r4" 104 105 #define RSEQ_INJECT_CLOBBER \ 106 , INJECT_ASM_REG 107 108 #define RSEQ_INJECT_ASM(n) \ 109 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 110 "cmp " INJECT_ASM_REG ", #0\n\t" \ 111 "beq 333f\n\t" \ 112 "222:\n\t" \ 113 "subs " INJECT_ASM_REG ", #1\n\t" \ 114 "bne 222b\n\t" \ 115 "333:\n\t" 116 117 #elif __PPC__ 118 119 #define RSEQ_INJECT_INPUT \ 120 , [loop_cnt_1]"m"(loop_cnt[1]) \ 121 , [loop_cnt_2]"m"(loop_cnt[2]) \ 122 , [loop_cnt_3]"m"(loop_cnt[3]) \ 123 , [loop_cnt_4]"m"(loop_cnt[4]) \ 124 , [loop_cnt_5]"m"(loop_cnt[5]) \ 125 , [loop_cnt_6]"m"(loop_cnt[6]) 126 127 #define INJECT_ASM_REG "r18" 128 129 #define RSEQ_INJECT_CLOBBER \ 130 , INJECT_ASM_REG 131 132 #define RSEQ_INJECT_ASM(n) \ 133 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 134 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 135 "beq 333f\n\t" \ 136 "222:\n\t" \ 137 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 138 "bne 222b\n\t" \ 139 "333:\n\t" 140 #else 141 #error unsupported target 142 #endif 143 144 #define RSEQ_INJECT_FAILED \ 145 nr_abort++; 146 147 #define RSEQ_INJECT_C(n) \ 148 { \ 149 int loc_i, loc_nr_loops = loop_cnt[n]; \ 150 \ 151 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 152 rseq_barrier(); \ 153 } \ 154 if (loc_nr_loops == -1 && opt_modulo) { \ 155 if (yield_mod_cnt == opt_modulo - 1) { \ 156 if (opt_sleep > 0) \ 157 poll(NULL, 0, opt_sleep); \ 158 if (opt_yield) \ 159 sched_yield(); \ 160 if (opt_signal) \ 161 raise(SIGUSR1); \ 162 yield_mod_cnt = 0; \ 163 } else { \ 164 yield_mod_cnt++; \ 165 } \ 166 } \ 167 } 168 169 #else 170 171 #define printf_verbose(fmt, ...) 172 173 #endif /* BENCHMARK */ 174 175 #include "rseq.h" 176 177 struct percpu_lock_entry { 178 intptr_t v; 179 } __attribute__((aligned(128))); 180 181 struct percpu_lock { 182 struct percpu_lock_entry c[CPU_SETSIZE]; 183 }; 184 185 struct test_data_entry { 186 intptr_t count; 187 } __attribute__((aligned(128))); 188 189 struct spinlock_test_data { 190 struct percpu_lock lock; 191 struct test_data_entry c[CPU_SETSIZE]; 192 }; 193 194 struct spinlock_thread_test_data { 195 struct spinlock_test_data *data; 196 long long reps; 197 int reg; 198 }; 199 200 struct inc_test_data { 201 struct test_data_entry c[CPU_SETSIZE]; 202 }; 203 204 struct inc_thread_test_data { 205 struct inc_test_data *data; 206 long long reps; 207 int reg; 208 }; 209 210 struct percpu_list_node { 211 intptr_t data; 212 struct percpu_list_node *next; 213 }; 214 215 struct percpu_list_entry { 216 struct percpu_list_node *head; 217 } __attribute__((aligned(128))); 218 219 struct percpu_list { 220 struct percpu_list_entry c[CPU_SETSIZE]; 221 }; 222 223 #define BUFFER_ITEM_PER_CPU 100 224 225 struct percpu_buffer_node { 226 intptr_t data; 227 }; 228 229 struct percpu_buffer_entry { 230 intptr_t offset; 231 intptr_t buflen; 232 struct percpu_buffer_node **array; 233 } __attribute__((aligned(128))); 234 235 struct percpu_buffer { 236 struct percpu_buffer_entry c[CPU_SETSIZE]; 237 }; 238 239 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 240 241 struct percpu_memcpy_buffer_node { 242 intptr_t data1; 243 uint64_t data2; 244 }; 245 246 struct percpu_memcpy_buffer_entry { 247 intptr_t offset; 248 intptr_t buflen; 249 struct percpu_memcpy_buffer_node *array; 250 } __attribute__((aligned(128))); 251 252 struct percpu_memcpy_buffer { 253 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 254 }; 255 256 /* A simple percpu spinlock. Grabs lock on current cpu. */ 257 static int rseq_this_cpu_lock(struct percpu_lock *lock) 258 { 259 int cpu; 260 261 for (;;) { 262 int ret; 263 264 cpu = rseq_cpu_start(); 265 ret = rseq_cmpeqv_storev(&lock->c[cpu].v, 266 0, 1, cpu); 267 if (rseq_likely(!ret)) 268 break; 269 /* Retry if comparison fails or rseq aborts. */ 270 } 271 /* 272 * Acquire semantic when taking lock after control dependency. 273 * Matches rseq_smp_store_release(). 274 */ 275 rseq_smp_acquire__after_ctrl_dep(); 276 return cpu; 277 } 278 279 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 280 { 281 assert(lock->c[cpu].v == 1); 282 /* 283 * Release lock, with release semantic. Matches 284 * rseq_smp_acquire__after_ctrl_dep(). 285 */ 286 rseq_smp_store_release(&lock->c[cpu].v, 0); 287 } 288 289 void *test_percpu_spinlock_thread(void *arg) 290 { 291 struct spinlock_thread_test_data *thread_data = arg; 292 struct spinlock_test_data *data = thread_data->data; 293 long long i, reps; 294 295 if (!opt_disable_rseq && thread_data->reg && 296 rseq_register_current_thread()) 297 abort(); 298 reps = thread_data->reps; 299 for (i = 0; i < reps; i++) { 300 int cpu = rseq_cpu_start(); 301 302 cpu = rseq_this_cpu_lock(&data->lock); 303 data->c[cpu].count++; 304 rseq_percpu_unlock(&data->lock, cpu); 305 #ifndef BENCHMARK 306 if (i != 0 && !(i % (reps / 10))) 307 printf_verbose("tid %d: count %lld\n", (int) gettid(), i); 308 #endif 309 } 310 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 311 (int) gettid(), nr_abort, signals_delivered); 312 if (!opt_disable_rseq && thread_data->reg && 313 rseq_unregister_current_thread()) 314 abort(); 315 return NULL; 316 } 317 318 /* 319 * A simple test which implements a sharded counter using a per-cpu 320 * lock. Obviously real applications might prefer to simply use a 321 * per-cpu increment; however, this is reasonable for a test and the 322 * lock can be extended to synchronize more complicated operations. 323 */ 324 void test_percpu_spinlock(void) 325 { 326 const int num_threads = opt_threads; 327 int i, ret; 328 uint64_t sum; 329 pthread_t test_threads[num_threads]; 330 struct spinlock_test_data data; 331 struct spinlock_thread_test_data thread_data[num_threads]; 332 333 memset(&data, 0, sizeof(data)); 334 for (i = 0; i < num_threads; i++) { 335 thread_data[i].reps = opt_reps; 336 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 337 thread_data[i].reg = 1; 338 else 339 thread_data[i].reg = 0; 340 thread_data[i].data = &data; 341 ret = pthread_create(&test_threads[i], NULL, 342 test_percpu_spinlock_thread, 343 &thread_data[i]); 344 if (ret) { 345 errno = ret; 346 perror("pthread_create"); 347 abort(); 348 } 349 } 350 351 for (i = 0; i < num_threads; i++) { 352 ret = pthread_join(test_threads[i], NULL); 353 if (ret) { 354 errno = ret; 355 perror("pthread_join"); 356 abort(); 357 } 358 } 359 360 sum = 0; 361 for (i = 0; i < CPU_SETSIZE; i++) 362 sum += data.c[i].count; 363 364 assert(sum == (uint64_t)opt_reps * num_threads); 365 } 366 367 void *test_percpu_inc_thread(void *arg) 368 { 369 struct inc_thread_test_data *thread_data = arg; 370 struct inc_test_data *data = thread_data->data; 371 long long i, reps; 372 373 if (!opt_disable_rseq && thread_data->reg && 374 rseq_register_current_thread()) 375 abort(); 376 reps = thread_data->reps; 377 for (i = 0; i < reps; i++) { 378 int ret; 379 380 do { 381 int cpu; 382 383 cpu = rseq_cpu_start(); 384 ret = rseq_addv(&data->c[cpu].count, 1, cpu); 385 } while (rseq_unlikely(ret)); 386 #ifndef BENCHMARK 387 if (i != 0 && !(i % (reps / 10))) 388 printf_verbose("tid %d: count %lld\n", (int) gettid(), i); 389 #endif 390 } 391 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 392 (int) gettid(), nr_abort, signals_delivered); 393 if (!opt_disable_rseq && thread_data->reg && 394 rseq_unregister_current_thread()) 395 abort(); 396 return NULL; 397 } 398 399 void test_percpu_inc(void) 400 { 401 const int num_threads = opt_threads; 402 int i, ret; 403 uint64_t sum; 404 pthread_t test_threads[num_threads]; 405 struct inc_test_data data; 406 struct inc_thread_test_data thread_data[num_threads]; 407 408 memset(&data, 0, sizeof(data)); 409 for (i = 0; i < num_threads; i++) { 410 thread_data[i].reps = opt_reps; 411 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 412 thread_data[i].reg = 1; 413 else 414 thread_data[i].reg = 0; 415 thread_data[i].data = &data; 416 ret = pthread_create(&test_threads[i], NULL, 417 test_percpu_inc_thread, 418 &thread_data[i]); 419 if (ret) { 420 errno = ret; 421 perror("pthread_create"); 422 abort(); 423 } 424 } 425 426 for (i = 0; i < num_threads; i++) { 427 ret = pthread_join(test_threads[i], NULL); 428 if (ret) { 429 errno = ret; 430 perror("pthread_join"); 431 abort(); 432 } 433 } 434 435 sum = 0; 436 for (i = 0; i < CPU_SETSIZE; i++) 437 sum += data.c[i].count; 438 439 assert(sum == (uint64_t)opt_reps * num_threads); 440 } 441 442 void this_cpu_list_push(struct percpu_list *list, 443 struct percpu_list_node *node, 444 int *_cpu) 445 { 446 int cpu; 447 448 for (;;) { 449 intptr_t *targetptr, newval, expect; 450 int ret; 451 452 cpu = rseq_cpu_start(); 453 /* Load list->c[cpu].head with single-copy atomicity. */ 454 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 455 newval = (intptr_t)node; 456 targetptr = (intptr_t *)&list->c[cpu].head; 457 node->next = (struct percpu_list_node *)expect; 458 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); 459 if (rseq_likely(!ret)) 460 break; 461 /* Retry if comparison fails or rseq aborts. */ 462 } 463 if (_cpu) 464 *_cpu = cpu; 465 } 466 467 /* 468 * Unlike a traditional lock-less linked list; the availability of a 469 * rseq primitive allows us to implement pop without concerns over 470 * ABA-type races. 471 */ 472 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 473 int *_cpu) 474 { 475 struct percpu_list_node *node = NULL; 476 int cpu; 477 478 for (;;) { 479 struct percpu_list_node *head; 480 intptr_t *targetptr, expectnot, *load; 481 off_t offset; 482 int ret; 483 484 cpu = rseq_cpu_start(); 485 targetptr = (intptr_t *)&list->c[cpu].head; 486 expectnot = (intptr_t)NULL; 487 offset = offsetof(struct percpu_list_node, next); 488 load = (intptr_t *)&head; 489 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, 490 offset, load, cpu); 491 if (rseq_likely(!ret)) { 492 node = head; 493 break; 494 } 495 if (ret > 0) 496 break; 497 /* Retry if rseq aborts. */ 498 } 499 if (_cpu) 500 *_cpu = cpu; 501 return node; 502 } 503 504 /* 505 * __percpu_list_pop is not safe against concurrent accesses. Should 506 * only be used on lists that are not concurrently modified. 507 */ 508 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 509 { 510 struct percpu_list_node *node; 511 512 node = list->c[cpu].head; 513 if (!node) 514 return NULL; 515 list->c[cpu].head = node->next; 516 return node; 517 } 518 519 void *test_percpu_list_thread(void *arg) 520 { 521 long long i, reps; 522 struct percpu_list *list = (struct percpu_list *)arg; 523 524 if (!opt_disable_rseq && rseq_register_current_thread()) 525 abort(); 526 527 reps = opt_reps; 528 for (i = 0; i < reps; i++) { 529 struct percpu_list_node *node; 530 531 node = this_cpu_list_pop(list, NULL); 532 if (opt_yield) 533 sched_yield(); /* encourage shuffling */ 534 if (node) 535 this_cpu_list_push(list, node, NULL); 536 } 537 538 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 539 (int) gettid(), nr_abort, signals_delivered); 540 if (!opt_disable_rseq && rseq_unregister_current_thread()) 541 abort(); 542 543 return NULL; 544 } 545 546 /* Simultaneous modification to a per-cpu linked list from many threads. */ 547 void test_percpu_list(void) 548 { 549 const int num_threads = opt_threads; 550 int i, j, ret; 551 uint64_t sum = 0, expected_sum = 0; 552 struct percpu_list list; 553 pthread_t test_threads[num_threads]; 554 cpu_set_t allowed_cpus; 555 556 memset(&list, 0, sizeof(list)); 557 558 /* Generate list entries for every usable cpu. */ 559 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 560 for (i = 0; i < CPU_SETSIZE; i++) { 561 if (!CPU_ISSET(i, &allowed_cpus)) 562 continue; 563 for (j = 1; j <= 100; j++) { 564 struct percpu_list_node *node; 565 566 expected_sum += j; 567 568 node = malloc(sizeof(*node)); 569 assert(node); 570 node->data = j; 571 node->next = list.c[i].head; 572 list.c[i].head = node; 573 } 574 } 575 576 for (i = 0; i < num_threads; i++) { 577 ret = pthread_create(&test_threads[i], NULL, 578 test_percpu_list_thread, &list); 579 if (ret) { 580 errno = ret; 581 perror("pthread_create"); 582 abort(); 583 } 584 } 585 586 for (i = 0; i < num_threads; i++) { 587 ret = pthread_join(test_threads[i], NULL); 588 if (ret) { 589 errno = ret; 590 perror("pthread_join"); 591 abort(); 592 } 593 } 594 595 for (i = 0; i < CPU_SETSIZE; i++) { 596 struct percpu_list_node *node; 597 598 if (!CPU_ISSET(i, &allowed_cpus)) 599 continue; 600 601 while ((node = __percpu_list_pop(&list, i))) { 602 sum += node->data; 603 free(node); 604 } 605 } 606 607 /* 608 * All entries should now be accounted for (unless some external 609 * actor is interfering with our allowed affinity while this 610 * test is running). 611 */ 612 assert(sum == expected_sum); 613 } 614 615 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 616 struct percpu_buffer_node *node, 617 int *_cpu) 618 { 619 bool result = false; 620 int cpu; 621 622 for (;;) { 623 intptr_t *targetptr_spec, newval_spec; 624 intptr_t *targetptr_final, newval_final; 625 intptr_t offset; 626 int ret; 627 628 cpu = rseq_cpu_start(); 629 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 630 if (offset == buffer->c[cpu].buflen) 631 break; 632 newval_spec = (intptr_t)node; 633 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 634 newval_final = offset + 1; 635 targetptr_final = &buffer->c[cpu].offset; 636 if (opt_mb) 637 ret = rseq_cmpeqv_trystorev_storev_release( 638 targetptr_final, offset, targetptr_spec, 639 newval_spec, newval_final, cpu); 640 else 641 ret = rseq_cmpeqv_trystorev_storev(targetptr_final, 642 offset, targetptr_spec, newval_spec, 643 newval_final, cpu); 644 if (rseq_likely(!ret)) { 645 result = true; 646 break; 647 } 648 /* Retry if comparison fails or rseq aborts. */ 649 } 650 if (_cpu) 651 *_cpu = cpu; 652 return result; 653 } 654 655 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 656 int *_cpu) 657 { 658 struct percpu_buffer_node *head; 659 int cpu; 660 661 for (;;) { 662 intptr_t *targetptr, newval; 663 intptr_t offset; 664 int ret; 665 666 cpu = rseq_cpu_start(); 667 /* Load offset with single-copy atomicity. */ 668 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 669 if (offset == 0) { 670 head = NULL; 671 break; 672 } 673 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 674 newval = offset - 1; 675 targetptr = (intptr_t *)&buffer->c[cpu].offset; 676 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, 677 (intptr_t *)&buffer->c[cpu].array[offset - 1], 678 (intptr_t)head, newval, cpu); 679 if (rseq_likely(!ret)) 680 break; 681 /* Retry if comparison fails or rseq aborts. */ 682 } 683 if (_cpu) 684 *_cpu = cpu; 685 return head; 686 } 687 688 /* 689 * __percpu_buffer_pop is not safe against concurrent accesses. Should 690 * only be used on buffers that are not concurrently modified. 691 */ 692 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 693 int cpu) 694 { 695 struct percpu_buffer_node *head; 696 intptr_t offset; 697 698 offset = buffer->c[cpu].offset; 699 if (offset == 0) 700 return NULL; 701 head = buffer->c[cpu].array[offset - 1]; 702 buffer->c[cpu].offset = offset - 1; 703 return head; 704 } 705 706 void *test_percpu_buffer_thread(void *arg) 707 { 708 long long i, reps; 709 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 710 711 if (!opt_disable_rseq && rseq_register_current_thread()) 712 abort(); 713 714 reps = opt_reps; 715 for (i = 0; i < reps; i++) { 716 struct percpu_buffer_node *node; 717 718 node = this_cpu_buffer_pop(buffer, NULL); 719 if (opt_yield) 720 sched_yield(); /* encourage shuffling */ 721 if (node) { 722 if (!this_cpu_buffer_push(buffer, node, NULL)) { 723 /* Should increase buffer size. */ 724 abort(); 725 } 726 } 727 } 728 729 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 730 (int) gettid(), nr_abort, signals_delivered); 731 if (!opt_disable_rseq && rseq_unregister_current_thread()) 732 abort(); 733 734 return NULL; 735 } 736 737 /* Simultaneous modification to a per-cpu buffer from many threads. */ 738 void test_percpu_buffer(void) 739 { 740 const int num_threads = opt_threads; 741 int i, j, ret; 742 uint64_t sum = 0, expected_sum = 0; 743 struct percpu_buffer buffer; 744 pthread_t test_threads[num_threads]; 745 cpu_set_t allowed_cpus; 746 747 memset(&buffer, 0, sizeof(buffer)); 748 749 /* Generate list entries for every usable cpu. */ 750 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 751 for (i = 0; i < CPU_SETSIZE; i++) { 752 if (!CPU_ISSET(i, &allowed_cpus)) 753 continue; 754 /* Worse-case is every item in same CPU. */ 755 buffer.c[i].array = 756 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 757 BUFFER_ITEM_PER_CPU); 758 assert(buffer.c[i].array); 759 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 760 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 761 struct percpu_buffer_node *node; 762 763 expected_sum += j; 764 765 /* 766 * We could theoretically put the word-sized 767 * "data" directly in the buffer. However, we 768 * want to model objects that would not fit 769 * within a single word, so allocate an object 770 * for each node. 771 */ 772 node = malloc(sizeof(*node)); 773 assert(node); 774 node->data = j; 775 buffer.c[i].array[j - 1] = node; 776 buffer.c[i].offset++; 777 } 778 } 779 780 for (i = 0; i < num_threads; i++) { 781 ret = pthread_create(&test_threads[i], NULL, 782 test_percpu_buffer_thread, &buffer); 783 if (ret) { 784 errno = ret; 785 perror("pthread_create"); 786 abort(); 787 } 788 } 789 790 for (i = 0; i < num_threads; i++) { 791 ret = pthread_join(test_threads[i], NULL); 792 if (ret) { 793 errno = ret; 794 perror("pthread_join"); 795 abort(); 796 } 797 } 798 799 for (i = 0; i < CPU_SETSIZE; i++) { 800 struct percpu_buffer_node *node; 801 802 if (!CPU_ISSET(i, &allowed_cpus)) 803 continue; 804 805 while ((node = __percpu_buffer_pop(&buffer, i))) { 806 sum += node->data; 807 free(node); 808 } 809 free(buffer.c[i].array); 810 } 811 812 /* 813 * All entries should now be accounted for (unless some external 814 * actor is interfering with our allowed affinity while this 815 * test is running). 816 */ 817 assert(sum == expected_sum); 818 } 819 820 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 821 struct percpu_memcpy_buffer_node item, 822 int *_cpu) 823 { 824 bool result = false; 825 int cpu; 826 827 for (;;) { 828 intptr_t *targetptr_final, newval_final, offset; 829 char *destptr, *srcptr; 830 size_t copylen; 831 int ret; 832 833 cpu = rseq_cpu_start(); 834 /* Load offset with single-copy atomicity. */ 835 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 836 if (offset == buffer->c[cpu].buflen) 837 break; 838 destptr = (char *)&buffer->c[cpu].array[offset]; 839 srcptr = (char *)&item; 840 /* copylen must be <= 4kB. */ 841 copylen = sizeof(item); 842 newval_final = offset + 1; 843 targetptr_final = &buffer->c[cpu].offset; 844 if (opt_mb) 845 ret = rseq_cmpeqv_trymemcpy_storev_release( 846 targetptr_final, offset, 847 destptr, srcptr, copylen, 848 newval_final, cpu); 849 else 850 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 851 offset, destptr, srcptr, copylen, 852 newval_final, cpu); 853 if (rseq_likely(!ret)) { 854 result = true; 855 break; 856 } 857 /* Retry if comparison fails or rseq aborts. */ 858 } 859 if (_cpu) 860 *_cpu = cpu; 861 return result; 862 } 863 864 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 865 struct percpu_memcpy_buffer_node *item, 866 int *_cpu) 867 { 868 bool result = false; 869 int cpu; 870 871 for (;;) { 872 intptr_t *targetptr_final, newval_final, offset; 873 char *destptr, *srcptr; 874 size_t copylen; 875 int ret; 876 877 cpu = rseq_cpu_start(); 878 /* Load offset with single-copy atomicity. */ 879 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 880 if (offset == 0) 881 break; 882 destptr = (char *)item; 883 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 884 /* copylen must be <= 4kB. */ 885 copylen = sizeof(*item); 886 newval_final = offset - 1; 887 targetptr_final = &buffer->c[cpu].offset; 888 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 889 offset, destptr, srcptr, copylen, 890 newval_final, cpu); 891 if (rseq_likely(!ret)) { 892 result = true; 893 break; 894 } 895 /* Retry if comparison fails or rseq aborts. */ 896 } 897 if (_cpu) 898 *_cpu = cpu; 899 return result; 900 } 901 902 /* 903 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 904 * only be used on buffers that are not concurrently modified. 905 */ 906 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 907 struct percpu_memcpy_buffer_node *item, 908 int cpu) 909 { 910 intptr_t offset; 911 912 offset = buffer->c[cpu].offset; 913 if (offset == 0) 914 return false; 915 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 916 buffer->c[cpu].offset = offset - 1; 917 return true; 918 } 919 920 void *test_percpu_memcpy_buffer_thread(void *arg) 921 { 922 long long i, reps; 923 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 924 925 if (!opt_disable_rseq && rseq_register_current_thread()) 926 abort(); 927 928 reps = opt_reps; 929 for (i = 0; i < reps; i++) { 930 struct percpu_memcpy_buffer_node item; 931 bool result; 932 933 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 934 if (opt_yield) 935 sched_yield(); /* encourage shuffling */ 936 if (result) { 937 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 938 /* Should increase buffer size. */ 939 abort(); 940 } 941 } 942 } 943 944 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 945 (int) gettid(), nr_abort, signals_delivered); 946 if (!opt_disable_rseq && rseq_unregister_current_thread()) 947 abort(); 948 949 return NULL; 950 } 951 952 /* Simultaneous modification to a per-cpu buffer from many threads. */ 953 void test_percpu_memcpy_buffer(void) 954 { 955 const int num_threads = opt_threads; 956 int i, j, ret; 957 uint64_t sum = 0, expected_sum = 0; 958 struct percpu_memcpy_buffer buffer; 959 pthread_t test_threads[num_threads]; 960 cpu_set_t allowed_cpus; 961 962 memset(&buffer, 0, sizeof(buffer)); 963 964 /* Generate list entries for every usable cpu. */ 965 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 966 for (i = 0; i < CPU_SETSIZE; i++) { 967 if (!CPU_ISSET(i, &allowed_cpus)) 968 continue; 969 /* Worse-case is every item in same CPU. */ 970 buffer.c[i].array = 971 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 972 MEMCPY_BUFFER_ITEM_PER_CPU); 973 assert(buffer.c[i].array); 974 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 975 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 976 expected_sum += 2 * j + 1; 977 978 /* 979 * We could theoretically put the word-sized 980 * "data" directly in the buffer. However, we 981 * want to model objects that would not fit 982 * within a single word, so allocate an object 983 * for each node. 984 */ 985 buffer.c[i].array[j - 1].data1 = j; 986 buffer.c[i].array[j - 1].data2 = j + 1; 987 buffer.c[i].offset++; 988 } 989 } 990 991 for (i = 0; i < num_threads; i++) { 992 ret = pthread_create(&test_threads[i], NULL, 993 test_percpu_memcpy_buffer_thread, 994 &buffer); 995 if (ret) { 996 errno = ret; 997 perror("pthread_create"); 998 abort(); 999 } 1000 } 1001 1002 for (i = 0; i < num_threads; i++) { 1003 ret = pthread_join(test_threads[i], NULL); 1004 if (ret) { 1005 errno = ret; 1006 perror("pthread_join"); 1007 abort(); 1008 } 1009 } 1010 1011 for (i = 0; i < CPU_SETSIZE; i++) { 1012 struct percpu_memcpy_buffer_node item; 1013 1014 if (!CPU_ISSET(i, &allowed_cpus)) 1015 continue; 1016 1017 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1018 sum += item.data1; 1019 sum += item.data2; 1020 } 1021 free(buffer.c[i].array); 1022 } 1023 1024 /* 1025 * All entries should now be accounted for (unless some external 1026 * actor is interfering with our allowed affinity while this 1027 * test is running). 1028 */ 1029 assert(sum == expected_sum); 1030 } 1031 1032 static void test_signal_interrupt_handler(int signo) 1033 { 1034 signals_delivered++; 1035 } 1036 1037 static int set_signal_handler(void) 1038 { 1039 int ret = 0; 1040 struct sigaction sa; 1041 sigset_t sigset; 1042 1043 ret = sigemptyset(&sigset); 1044 if (ret < 0) { 1045 perror("sigemptyset"); 1046 return ret; 1047 } 1048 1049 sa.sa_handler = test_signal_interrupt_handler; 1050 sa.sa_mask = sigset; 1051 sa.sa_flags = 0; 1052 ret = sigaction(SIGUSR1, &sa, NULL); 1053 if (ret < 0) { 1054 perror("sigaction"); 1055 return ret; 1056 } 1057 1058 printf_verbose("Signal handler set for SIGUSR1\n"); 1059 1060 return ret; 1061 } 1062 1063 static void show_usage(int argc, char **argv) 1064 { 1065 printf("Usage : %s <OPTIONS>\n", 1066 argv[0]); 1067 printf("OPTIONS:\n"); 1068 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1069 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1070 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1071 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1072 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1073 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1074 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1075 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1076 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1077 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1078 printf(" [-y] Yield\n"); 1079 printf(" [-k] Kill thread with signal\n"); 1080 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1081 printf(" [-t N] Number of threads (default 200)\n"); 1082 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1083 printf(" [-d] Disable rseq system call (no initialization)\n"); 1084 printf(" [-D M] Disable rseq for each M threads\n"); 1085 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n"); 1086 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1087 printf(" [-v] Verbose output.\n"); 1088 printf(" [-h] Show this help.\n"); 1089 printf("\n"); 1090 } 1091 1092 int main(int argc, char **argv) 1093 { 1094 int i; 1095 1096 for (i = 1; i < argc; i++) { 1097 if (argv[i][0] != '-') 1098 continue; 1099 switch (argv[i][1]) { 1100 case '1': 1101 case '2': 1102 case '3': 1103 case '4': 1104 case '5': 1105 case '6': 1106 case '7': 1107 case '8': 1108 case '9': 1109 if (argc < i + 2) { 1110 show_usage(argc, argv); 1111 goto error; 1112 } 1113 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1114 i++; 1115 break; 1116 case 'm': 1117 if (argc < i + 2) { 1118 show_usage(argc, argv); 1119 goto error; 1120 } 1121 opt_modulo = atol(argv[i + 1]); 1122 if (opt_modulo < 0) { 1123 show_usage(argc, argv); 1124 goto error; 1125 } 1126 i++; 1127 break; 1128 case 's': 1129 if (argc < i + 2) { 1130 show_usage(argc, argv); 1131 goto error; 1132 } 1133 opt_sleep = atol(argv[i + 1]); 1134 if (opt_sleep < 0) { 1135 show_usage(argc, argv); 1136 goto error; 1137 } 1138 i++; 1139 break; 1140 case 'y': 1141 opt_yield = 1; 1142 break; 1143 case 'k': 1144 opt_signal = 1; 1145 break; 1146 case 'd': 1147 opt_disable_rseq = 1; 1148 break; 1149 case 'D': 1150 if (argc < i + 2) { 1151 show_usage(argc, argv); 1152 goto error; 1153 } 1154 opt_disable_mod = atol(argv[i + 1]); 1155 if (opt_disable_mod < 0) { 1156 show_usage(argc, argv); 1157 goto error; 1158 } 1159 i++; 1160 break; 1161 case 't': 1162 if (argc < i + 2) { 1163 show_usage(argc, argv); 1164 goto error; 1165 } 1166 opt_threads = atol(argv[i + 1]); 1167 if (opt_threads < 0) { 1168 show_usage(argc, argv); 1169 goto error; 1170 } 1171 i++; 1172 break; 1173 case 'r': 1174 if (argc < i + 2) { 1175 show_usage(argc, argv); 1176 goto error; 1177 } 1178 opt_reps = atoll(argv[i + 1]); 1179 if (opt_reps < 0) { 1180 show_usage(argc, argv); 1181 goto error; 1182 } 1183 i++; 1184 break; 1185 case 'h': 1186 show_usage(argc, argv); 1187 goto end; 1188 case 'T': 1189 if (argc < i + 2) { 1190 show_usage(argc, argv); 1191 goto error; 1192 } 1193 opt_test = *argv[i + 1]; 1194 switch (opt_test) { 1195 case 's': 1196 case 'l': 1197 case 'i': 1198 case 'b': 1199 case 'm': 1200 break; 1201 default: 1202 show_usage(argc, argv); 1203 goto error; 1204 } 1205 i++; 1206 break; 1207 case 'v': 1208 verbose = 1; 1209 break; 1210 case 'M': 1211 opt_mb = 1; 1212 break; 1213 default: 1214 show_usage(argc, argv); 1215 goto error; 1216 } 1217 } 1218 1219 loop_cnt_1 = loop_cnt[1]; 1220 loop_cnt_2 = loop_cnt[2]; 1221 loop_cnt_3 = loop_cnt[3]; 1222 loop_cnt_4 = loop_cnt[4]; 1223 loop_cnt_5 = loop_cnt[5]; 1224 loop_cnt_6 = loop_cnt[6]; 1225 1226 if (set_signal_handler()) 1227 goto error; 1228 1229 if (!opt_disable_rseq && rseq_register_current_thread()) 1230 goto error; 1231 switch (opt_test) { 1232 case 's': 1233 printf_verbose("spinlock\n"); 1234 test_percpu_spinlock(); 1235 break; 1236 case 'l': 1237 printf_verbose("linked list\n"); 1238 test_percpu_list(); 1239 break; 1240 case 'b': 1241 printf_verbose("buffer\n"); 1242 test_percpu_buffer(); 1243 break; 1244 case 'm': 1245 printf_verbose("memcpy buffer\n"); 1246 test_percpu_memcpy_buffer(); 1247 break; 1248 case 'i': 1249 printf_verbose("counter increment\n"); 1250 test_percpu_inc(); 1251 break; 1252 } 1253 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1254 abort(); 1255 end: 1256 return 0; 1257 1258 error: 1259 return -1; 1260 } 1261