1 // SPDX-License-Identifier: GPL-2.0+ 2 // 3 // Scalability test comparing RCU vs other mechanisms 4 // for acquiring references on objects. 5 // 6 // Copyright (C) Google, 2020. 7 // 8 // Author: Joel Fernandes <joel@joelfernandes.org> 9 10 #define pr_fmt(fmt) fmt 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/completion.h> 15 #include <linux/cpu.h> 16 #include <linux/delay.h> 17 #include <linux/err.h> 18 #include <linux/init.h> 19 #include <linux/interrupt.h> 20 #include <linux/kthread.h> 21 #include <linux/kernel.h> 22 #include <linux/mm.h> 23 #include <linux/module.h> 24 #include <linux/moduleparam.h> 25 #include <linux/notifier.h> 26 #include <linux/percpu.h> 27 #include <linux/rcupdate.h> 28 #include <linux/rcupdate_trace.h> 29 #include <linux/reboot.h> 30 #include <linux/sched.h> 31 #include <linux/spinlock.h> 32 #include <linux/smp.h> 33 #include <linux/stat.h> 34 #include <linux/srcu.h> 35 #include <linux/slab.h> 36 #include <linux/torture.h> 37 #include <linux/types.h> 38 39 #include "rcu.h" 40 41 #define SCALE_FLAG "-ref-scale: " 42 43 #define SCALEOUT(s, x...) \ 44 pr_alert("%s" SCALE_FLAG s, scale_type, ## x) 45 46 #define VERBOSE_SCALEOUT(s, x...) \ 47 do { if (verbose) pr_alert("%s" SCALE_FLAG s, scale_type, ## x); } while (0) 48 49 #define VERBOSE_SCALEOUT_ERRSTRING(s, x...) \ 50 do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! " s, scale_type, ## x); } while (0) 51 52 MODULE_LICENSE("GPL"); 53 MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>"); 54 55 static char *scale_type = "rcu"; 56 module_param(scale_type, charp, 0444); 57 MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock."); 58 59 torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); 60 61 // Wait until there are multiple CPUs before starting test. 62 torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0, 63 "Holdoff time before test start (s)"); 64 // Number of loops per experiment, all readers execute operations concurrently. 65 torture_param(long, loops, 10000, "Number of loops per experiment."); 66 // Number of readers, with -1 defaulting to about 75% of the CPUs. 67 torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); 68 // Number of runs. 69 torture_param(int, nruns, 30, "Number of experiments to run."); 70 // Reader delay in nanoseconds, 0 for no delay. 71 torture_param(int, readdelay, 0, "Read-side delay in nanoseconds."); 72 73 #ifdef MODULE 74 # define REFSCALE_SHUTDOWN 0 75 #else 76 # define REFSCALE_SHUTDOWN 1 77 #endif 78 79 torture_param(bool, shutdown, REFSCALE_SHUTDOWN, 80 "Shutdown at end of scalability tests."); 81 82 struct reader_task { 83 struct task_struct *task; 84 int start_reader; 85 wait_queue_head_t wq; 86 u64 last_duration_ns; 87 }; 88 89 static struct task_struct *shutdown_task; 90 static wait_queue_head_t shutdown_wq; 91 92 static struct task_struct *main_task; 93 static wait_queue_head_t main_wq; 94 static int shutdown_start; 95 96 static struct reader_task *reader_tasks; 97 98 // Number of readers that are part of the current experiment. 99 static atomic_t nreaders_exp; 100 101 // Use to wait for all threads to start. 102 static atomic_t n_init; 103 static atomic_t n_started; 104 static atomic_t n_warmedup; 105 static atomic_t n_cooleddown; 106 107 // Track which experiment is currently running. 108 static int exp_idx; 109 110 // Operations vector for selecting different types of tests. 111 struct ref_scale_ops { 112 void (*init)(void); 113 void (*cleanup)(void); 114 void (*readsection)(const int nloops); 115 void (*delaysection)(const int nloops, const int udl, const int ndl); 116 const char *name; 117 }; 118 119 static struct ref_scale_ops *cur_ops; 120 121 static void un_delay(const int udl, const int ndl) 122 { 123 if (udl) 124 udelay(udl); 125 if (ndl) 126 ndelay(ndl); 127 } 128 129 static void ref_rcu_read_section(const int nloops) 130 { 131 int i; 132 133 for (i = nloops; i >= 0; i--) { 134 rcu_read_lock(); 135 rcu_read_unlock(); 136 } 137 } 138 139 static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl) 140 { 141 int i; 142 143 for (i = nloops; i >= 0; i--) { 144 rcu_read_lock(); 145 un_delay(udl, ndl); 146 rcu_read_unlock(); 147 } 148 } 149 150 static void rcu_sync_scale_init(void) 151 { 152 } 153 154 static struct ref_scale_ops rcu_ops = { 155 .init = rcu_sync_scale_init, 156 .readsection = ref_rcu_read_section, 157 .delaysection = ref_rcu_delay_section, 158 .name = "rcu" 159 }; 160 161 // Definitions for SRCU ref scale testing. 162 DEFINE_STATIC_SRCU(srcu_refctl_scale); 163 static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale; 164 165 static void srcu_ref_scale_read_section(const int nloops) 166 { 167 int i; 168 int idx; 169 170 for (i = nloops; i >= 0; i--) { 171 idx = srcu_read_lock(srcu_ctlp); 172 srcu_read_unlock(srcu_ctlp, idx); 173 } 174 } 175 176 static void srcu_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 177 { 178 int i; 179 int idx; 180 181 for (i = nloops; i >= 0; i--) { 182 idx = srcu_read_lock(srcu_ctlp); 183 un_delay(udl, ndl); 184 srcu_read_unlock(srcu_ctlp, idx); 185 } 186 } 187 188 static struct ref_scale_ops srcu_ops = { 189 .init = rcu_sync_scale_init, 190 .readsection = srcu_ref_scale_read_section, 191 .delaysection = srcu_ref_scale_delay_section, 192 .name = "srcu" 193 }; 194 195 // Definitions for RCU Tasks ref scale testing: Empty read markers. 196 // These definitions also work for RCU Rude readers. 197 static void rcu_tasks_ref_scale_read_section(const int nloops) 198 { 199 int i; 200 201 for (i = nloops; i >= 0; i--) 202 continue; 203 } 204 205 static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 206 { 207 int i; 208 209 for (i = nloops; i >= 0; i--) 210 un_delay(udl, ndl); 211 } 212 213 static struct ref_scale_ops rcu_tasks_ops = { 214 .init = rcu_sync_scale_init, 215 .readsection = rcu_tasks_ref_scale_read_section, 216 .delaysection = rcu_tasks_ref_scale_delay_section, 217 .name = "rcu-tasks" 218 }; 219 220 // Definitions for RCU Tasks Trace ref scale testing. 221 static void rcu_trace_ref_scale_read_section(const int nloops) 222 { 223 int i; 224 225 for (i = nloops; i >= 0; i--) { 226 rcu_read_lock_trace(); 227 rcu_read_unlock_trace(); 228 } 229 } 230 231 static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 232 { 233 int i; 234 235 for (i = nloops; i >= 0; i--) { 236 rcu_read_lock_trace(); 237 un_delay(udl, ndl); 238 rcu_read_unlock_trace(); 239 } 240 } 241 242 static struct ref_scale_ops rcu_trace_ops = { 243 .init = rcu_sync_scale_init, 244 .readsection = rcu_trace_ref_scale_read_section, 245 .delaysection = rcu_trace_ref_scale_delay_section, 246 .name = "rcu-trace" 247 }; 248 249 // Definitions for reference count 250 static atomic_t refcnt; 251 252 static void ref_refcnt_section(const int nloops) 253 { 254 int i; 255 256 for (i = nloops; i >= 0; i--) { 257 atomic_inc(&refcnt); 258 atomic_dec(&refcnt); 259 } 260 } 261 262 static void ref_refcnt_delay_section(const int nloops, const int udl, const int ndl) 263 { 264 int i; 265 266 for (i = nloops; i >= 0; i--) { 267 atomic_inc(&refcnt); 268 un_delay(udl, ndl); 269 atomic_dec(&refcnt); 270 } 271 } 272 273 static struct ref_scale_ops refcnt_ops = { 274 .init = rcu_sync_scale_init, 275 .readsection = ref_refcnt_section, 276 .delaysection = ref_refcnt_delay_section, 277 .name = "refcnt" 278 }; 279 280 // Definitions for rwlock 281 static rwlock_t test_rwlock; 282 283 static void ref_rwlock_init(void) 284 { 285 rwlock_init(&test_rwlock); 286 } 287 288 static void ref_rwlock_section(const int nloops) 289 { 290 int i; 291 292 for (i = nloops; i >= 0; i--) { 293 read_lock(&test_rwlock); 294 read_unlock(&test_rwlock); 295 } 296 } 297 298 static void ref_rwlock_delay_section(const int nloops, const int udl, const int ndl) 299 { 300 int i; 301 302 for (i = nloops; i >= 0; i--) { 303 read_lock(&test_rwlock); 304 un_delay(udl, ndl); 305 read_unlock(&test_rwlock); 306 } 307 } 308 309 static struct ref_scale_ops rwlock_ops = { 310 .init = ref_rwlock_init, 311 .readsection = ref_rwlock_section, 312 .delaysection = ref_rwlock_delay_section, 313 .name = "rwlock" 314 }; 315 316 // Definitions for rwsem 317 static struct rw_semaphore test_rwsem; 318 319 static void ref_rwsem_init(void) 320 { 321 init_rwsem(&test_rwsem); 322 } 323 324 static void ref_rwsem_section(const int nloops) 325 { 326 int i; 327 328 for (i = nloops; i >= 0; i--) { 329 down_read(&test_rwsem); 330 up_read(&test_rwsem); 331 } 332 } 333 334 static void ref_rwsem_delay_section(const int nloops, const int udl, const int ndl) 335 { 336 int i; 337 338 for (i = nloops; i >= 0; i--) { 339 down_read(&test_rwsem); 340 un_delay(udl, ndl); 341 up_read(&test_rwsem); 342 } 343 } 344 345 static struct ref_scale_ops rwsem_ops = { 346 .init = ref_rwsem_init, 347 .readsection = ref_rwsem_section, 348 .delaysection = ref_rwsem_delay_section, 349 .name = "rwsem" 350 }; 351 352 static void rcu_scale_one_reader(void) 353 { 354 if (readdelay <= 0) 355 cur_ops->readsection(loops); 356 else 357 cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000); 358 } 359 360 // Reader kthread. Repeatedly does empty RCU read-side 361 // critical section, minimizing update-side interference. 362 static int 363 ref_scale_reader(void *arg) 364 { 365 unsigned long flags; 366 long me = (long)arg; 367 struct reader_task *rt = &(reader_tasks[me]); 368 u64 start; 369 s64 duration; 370 371 VERBOSE_SCALEOUT("ref_scale_reader %ld: task started", me); 372 set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); 373 set_user_nice(current, MAX_NICE); 374 atomic_inc(&n_init); 375 if (holdoff) 376 schedule_timeout_interruptible(holdoff * HZ); 377 repeat: 378 VERBOSE_SCALEOUT("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, smp_processor_id()); 379 380 // Wait for signal that this reader can start. 381 wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) || 382 torture_must_stop()); 383 384 if (torture_must_stop()) 385 goto end; 386 387 // Make sure that the CPU is affinitized appropriately during testing. 388 WARN_ON_ONCE(smp_processor_id() != me); 389 390 WRITE_ONCE(rt->start_reader, 0); 391 if (!atomic_dec_return(&n_started)) 392 while (atomic_read_acquire(&n_started)) 393 cpu_relax(); 394 395 VERBOSE_SCALEOUT("ref_scale_reader %ld: experiment %d started", me, exp_idx); 396 397 398 // To reduce noise, do an initial cache-warming invocation, check 399 // in, and then keep warming until everyone has checked in. 400 rcu_scale_one_reader(); 401 if (!atomic_dec_return(&n_warmedup)) 402 while (atomic_read_acquire(&n_warmedup)) 403 rcu_scale_one_reader(); 404 // Also keep interrupts disabled. This also has the effect 405 // of preventing entries into slow path for rcu_read_unlock(). 406 local_irq_save(flags); 407 start = ktime_get_mono_fast_ns(); 408 409 rcu_scale_one_reader(); 410 411 duration = ktime_get_mono_fast_ns() - start; 412 local_irq_restore(flags); 413 414 rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration; 415 // To reduce runtime-skew noise, do maintain-load invocations until 416 // everyone is done. 417 if (!atomic_dec_return(&n_cooleddown)) 418 while (atomic_read_acquire(&n_cooleddown)) 419 rcu_scale_one_reader(); 420 421 if (atomic_dec_and_test(&nreaders_exp)) 422 wake_up(&main_wq); 423 424 VERBOSE_SCALEOUT("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)", 425 me, exp_idx, atomic_read(&nreaders_exp)); 426 427 if (!torture_must_stop()) 428 goto repeat; 429 end: 430 torture_kthread_stopping("ref_scale_reader"); 431 return 0; 432 } 433 434 static void reset_readers(void) 435 { 436 int i; 437 struct reader_task *rt; 438 439 for (i = 0; i < nreaders; i++) { 440 rt = &(reader_tasks[i]); 441 442 rt->last_duration_ns = 0; 443 } 444 } 445 446 // Print the results of each reader and return the sum of all their durations. 447 static u64 process_durations(int n) 448 { 449 int i; 450 struct reader_task *rt; 451 char buf1[64]; 452 char *buf; 453 u64 sum = 0; 454 455 buf = kmalloc(128 + nreaders * 32, GFP_KERNEL); 456 if (!buf) 457 return 0; 458 buf[0] = 0; 459 sprintf(buf, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)", 460 exp_idx); 461 462 for (i = 0; i < n && !torture_must_stop(); i++) { 463 rt = &(reader_tasks[i]); 464 sprintf(buf1, "%d: %llu\t", i, rt->last_duration_ns); 465 466 if (i % 5 == 0) 467 strcat(buf, "\n"); 468 strcat(buf, buf1); 469 470 sum += rt->last_duration_ns; 471 } 472 strcat(buf, "\n"); 473 474 SCALEOUT("%s\n", buf); 475 476 kfree(buf); 477 return sum; 478 } 479 480 // The main_func is the main orchestrator, it performs a bunch of 481 // experiments. For every experiment, it orders all the readers 482 // involved to start and waits for them to finish the experiment. It 483 // then reads their timestamps and starts the next experiment. Each 484 // experiment progresses from 1 concurrent reader to N of them at which 485 // point all the timestamps are printed. 486 static int main_func(void *arg) 487 { 488 bool errexit = false; 489 int exp, r; 490 char buf1[64]; 491 char *buf; 492 u64 *result_avg; 493 494 set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids)); 495 set_user_nice(current, MAX_NICE); 496 497 VERBOSE_SCALEOUT("main_func task started"); 498 result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL); 499 buf = kzalloc(64 + nruns * 32, GFP_KERNEL); 500 if (!result_avg || !buf) { 501 VERBOSE_SCALEOUT_ERRSTRING("out of memory"); 502 errexit = true; 503 } 504 if (holdoff) 505 schedule_timeout_interruptible(holdoff * HZ); 506 507 // Wait for all threads to start. 508 atomic_inc(&n_init); 509 while (atomic_read(&n_init) < nreaders + 1) 510 schedule_timeout_uninterruptible(1); 511 512 // Start exp readers up per experiment 513 for (exp = 0; exp < nruns && !torture_must_stop(); exp++) { 514 if (errexit) 515 break; 516 if (torture_must_stop()) 517 goto end; 518 519 reset_readers(); 520 atomic_set(&nreaders_exp, nreaders); 521 atomic_set(&n_started, nreaders); 522 atomic_set(&n_warmedup, nreaders); 523 atomic_set(&n_cooleddown, nreaders); 524 525 exp_idx = exp; 526 527 for (r = 0; r < nreaders; r++) { 528 smp_store_release(&reader_tasks[r].start_reader, 1); 529 wake_up(&reader_tasks[r].wq); 530 } 531 532 VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers", 533 nreaders); 534 535 wait_event(main_wq, 536 !atomic_read(&nreaders_exp) || torture_must_stop()); 537 538 VERBOSE_SCALEOUT("main_func: experiment ended"); 539 540 if (torture_must_stop()) 541 goto end; 542 543 result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops); 544 } 545 546 // Print the average of all experiments 547 SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n"); 548 549 if (!errexit) { 550 buf[0] = 0; 551 strcat(buf, "\n"); 552 strcat(buf, "Runs\tTime(ns)\n"); 553 } 554 555 for (exp = 0; exp < nruns; exp++) { 556 u64 avg; 557 u32 rem; 558 559 if (errexit) 560 break; 561 avg = div_u64_rem(result_avg[exp], 1000, &rem); 562 sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem); 563 strcat(buf, buf1); 564 } 565 566 if (!errexit) 567 SCALEOUT("%s", buf); 568 569 // This will shutdown everything including us. 570 if (shutdown) { 571 shutdown_start = 1; 572 wake_up(&shutdown_wq); 573 } 574 575 // Wait for torture to stop us 576 while (!torture_must_stop()) 577 schedule_timeout_uninterruptible(1); 578 579 end: 580 torture_kthread_stopping("main_func"); 581 kfree(result_avg); 582 kfree(buf); 583 return 0; 584 } 585 586 static void 587 ref_scale_print_module_parms(struct ref_scale_ops *cur_ops, const char *tag) 588 { 589 pr_alert("%s" SCALE_FLAG 590 "--- %s: verbose=%d shutdown=%d holdoff=%d loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, 591 verbose, shutdown, holdoff, loops, nreaders, nruns, readdelay); 592 } 593 594 static void 595 ref_scale_cleanup(void) 596 { 597 int i; 598 599 if (torture_cleanup_begin()) 600 return; 601 602 if (!cur_ops) { 603 torture_cleanup_end(); 604 return; 605 } 606 607 if (reader_tasks) { 608 for (i = 0; i < nreaders; i++) 609 torture_stop_kthread("ref_scale_reader", 610 reader_tasks[i].task); 611 } 612 kfree(reader_tasks); 613 614 torture_stop_kthread("main_task", main_task); 615 kfree(main_task); 616 617 // Do scale-type-specific cleanup operations. 618 if (cur_ops->cleanup != NULL) 619 cur_ops->cleanup(); 620 621 torture_cleanup_end(); 622 } 623 624 // Shutdown kthread. Just waits to be awakened, then shuts down system. 625 static int 626 ref_scale_shutdown(void *arg) 627 { 628 wait_event(shutdown_wq, shutdown_start); 629 630 smp_mb(); // Wake before output. 631 ref_scale_cleanup(); 632 kernel_power_off(); 633 634 return -EINVAL; 635 } 636 637 static int __init 638 ref_scale_init(void) 639 { 640 long i; 641 int firsterr = 0; 642 static struct ref_scale_ops *scale_ops[] = { 643 &rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, 644 &refcnt_ops, &rwlock_ops, &rwsem_ops, 645 }; 646 647 if (!torture_init_begin(scale_type, verbose)) 648 return -EBUSY; 649 650 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) { 651 cur_ops = scale_ops[i]; 652 if (strcmp(scale_type, cur_ops->name) == 0) 653 break; 654 } 655 if (i == ARRAY_SIZE(scale_ops)) { 656 pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type); 657 pr_alert("rcu-scale types:"); 658 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) 659 pr_cont(" %s", scale_ops[i]->name); 660 pr_cont("\n"); 661 WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST)); 662 firsterr = -EINVAL; 663 cur_ops = NULL; 664 goto unwind; 665 } 666 if (cur_ops->init) 667 cur_ops->init(); 668 669 ref_scale_print_module_parms(cur_ops, "Start of test"); 670 671 // Shutdown task 672 if (shutdown) { 673 init_waitqueue_head(&shutdown_wq); 674 firsterr = torture_create_kthread(ref_scale_shutdown, NULL, 675 shutdown_task); 676 if (firsterr) 677 goto unwind; 678 schedule_timeout_uninterruptible(1); 679 } 680 681 // Reader tasks (default to ~75% of online CPUs). 682 if (nreaders < 0) 683 nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2); 684 reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]), 685 GFP_KERNEL); 686 if (!reader_tasks) { 687 VERBOSE_SCALEOUT_ERRSTRING("out of memory"); 688 firsterr = -ENOMEM; 689 goto unwind; 690 } 691 692 VERBOSE_SCALEOUT("Starting %d reader threads\n", nreaders); 693 694 for (i = 0; i < nreaders; i++) { 695 firsterr = torture_create_kthread(ref_scale_reader, (void *)i, 696 reader_tasks[i].task); 697 if (firsterr) 698 goto unwind; 699 700 init_waitqueue_head(&(reader_tasks[i].wq)); 701 } 702 703 // Main Task 704 init_waitqueue_head(&main_wq); 705 firsterr = torture_create_kthread(main_func, NULL, main_task); 706 if (firsterr) 707 goto unwind; 708 709 torture_init_end(); 710 return 0; 711 712 unwind: 713 torture_init_end(); 714 ref_scale_cleanup(); 715 return firsterr; 716 } 717 718 module_init(ref_scale_init); 719 module_exit(ref_scale_cleanup); 720