1 // SPDX-License-Identifier: GPL-2.0+ 2 // 3 // Scalability test comparing RCU vs other mechanisms 4 // for acquiring references on objects. 5 // 6 // Copyright (C) Google, 2020. 7 // 8 // Author: Joel Fernandes <joel@joelfernandes.org> 9 10 #define pr_fmt(fmt) fmt 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/completion.h> 15 #include <linux/cpu.h> 16 #include <linux/delay.h> 17 #include <linux/err.h> 18 #include <linux/init.h> 19 #include <linux/interrupt.h> 20 #include <linux/kthread.h> 21 #include <linux/kernel.h> 22 #include <linux/mm.h> 23 #include <linux/module.h> 24 #include <linux/moduleparam.h> 25 #include <linux/notifier.h> 26 #include <linux/percpu.h> 27 #include <linux/rcupdate.h> 28 #include <linux/rcupdate_trace.h> 29 #include <linux/reboot.h> 30 #include <linux/sched.h> 31 #include <linux/spinlock.h> 32 #include <linux/smp.h> 33 #include <linux/stat.h> 34 #include <linux/srcu.h> 35 #include <linux/slab.h> 36 #include <linux/torture.h> 37 #include <linux/types.h> 38 39 #include "rcu.h" 40 41 #define SCALE_FLAG "-ref-scale: " 42 43 #define SCALEOUT(s, x...) \ 44 pr_alert("%s" SCALE_FLAG s, scale_type, ## x) 45 46 #define VERBOSE_SCALEOUT(s, x...) \ 47 do { if (verbose) pr_alert("%s" SCALE_FLAG s, scale_type, ## x); } while (0) 48 49 static atomic_t verbose_batch_ctr; 50 51 #define VERBOSE_SCALEOUT_BATCH(s, x...) \ 52 do { \ 53 if (verbose && \ 54 (verbose_batched <= 0 || \ 55 !(atomic_inc_return(&verbose_batch_ctr) % verbose_batched))) { \ 56 schedule_timeout_uninterruptible(1); \ 57 pr_alert("%s" SCALE_FLAG s, scale_type, ## x); \ 58 } \ 59 } while (0) 60 61 #define VERBOSE_SCALEOUT_ERRSTRING(s, x...) \ 62 do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! " s, scale_type, ## x); } while (0) 63 64 MODULE_LICENSE("GPL"); 65 MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>"); 66 67 static char *scale_type = "rcu"; 68 module_param(scale_type, charp, 0444); 69 MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock."); 70 71 torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); 72 torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s"); 73 74 // Wait until there are multiple CPUs before starting test. 75 torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0, 76 "Holdoff time before test start (s)"); 77 // Number of loops per experiment, all readers execute operations concurrently. 78 torture_param(long, loops, 10000, "Number of loops per experiment."); 79 // Number of readers, with -1 defaulting to about 75% of the CPUs. 80 torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); 81 // Number of runs. 82 torture_param(int, nruns, 30, "Number of experiments to run."); 83 // Reader delay in nanoseconds, 0 for no delay. 84 torture_param(int, readdelay, 0, "Read-side delay in nanoseconds."); 85 86 #ifdef MODULE 87 # define REFSCALE_SHUTDOWN 0 88 #else 89 # define REFSCALE_SHUTDOWN 1 90 #endif 91 92 torture_param(bool, shutdown, REFSCALE_SHUTDOWN, 93 "Shutdown at end of scalability tests."); 94 95 struct reader_task { 96 struct task_struct *task; 97 int start_reader; 98 wait_queue_head_t wq; 99 u64 last_duration_ns; 100 }; 101 102 static struct task_struct *shutdown_task; 103 static wait_queue_head_t shutdown_wq; 104 105 static struct task_struct *main_task; 106 static wait_queue_head_t main_wq; 107 static int shutdown_start; 108 109 static struct reader_task *reader_tasks; 110 111 // Number of readers that are part of the current experiment. 112 static atomic_t nreaders_exp; 113 114 // Use to wait for all threads to start. 115 static atomic_t n_init; 116 static atomic_t n_started; 117 static atomic_t n_warmedup; 118 static atomic_t n_cooleddown; 119 120 // Track which experiment is currently running. 121 static int exp_idx; 122 123 // Operations vector for selecting different types of tests. 124 struct ref_scale_ops { 125 void (*init)(void); 126 void (*cleanup)(void); 127 void (*readsection)(const int nloops); 128 void (*delaysection)(const int nloops, const int udl, const int ndl); 129 const char *name; 130 }; 131 132 static struct ref_scale_ops *cur_ops; 133 134 static void un_delay(const int udl, const int ndl) 135 { 136 if (udl) 137 udelay(udl); 138 if (ndl) 139 ndelay(ndl); 140 } 141 142 static void ref_rcu_read_section(const int nloops) 143 { 144 int i; 145 146 for (i = nloops; i >= 0; i--) { 147 rcu_read_lock(); 148 rcu_read_unlock(); 149 } 150 } 151 152 static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl) 153 { 154 int i; 155 156 for (i = nloops; i >= 0; i--) { 157 rcu_read_lock(); 158 un_delay(udl, ndl); 159 rcu_read_unlock(); 160 } 161 } 162 163 static void rcu_sync_scale_init(void) 164 { 165 } 166 167 static struct ref_scale_ops rcu_ops = { 168 .init = rcu_sync_scale_init, 169 .readsection = ref_rcu_read_section, 170 .delaysection = ref_rcu_delay_section, 171 .name = "rcu" 172 }; 173 174 // Definitions for SRCU ref scale testing. 175 DEFINE_STATIC_SRCU(srcu_refctl_scale); 176 static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale; 177 178 static void srcu_ref_scale_read_section(const int nloops) 179 { 180 int i; 181 int idx; 182 183 for (i = nloops; i >= 0; i--) { 184 idx = srcu_read_lock(srcu_ctlp); 185 srcu_read_unlock(srcu_ctlp, idx); 186 } 187 } 188 189 static void srcu_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 190 { 191 int i; 192 int idx; 193 194 for (i = nloops; i >= 0; i--) { 195 idx = srcu_read_lock(srcu_ctlp); 196 un_delay(udl, ndl); 197 srcu_read_unlock(srcu_ctlp, idx); 198 } 199 } 200 201 static struct ref_scale_ops srcu_ops = { 202 .init = rcu_sync_scale_init, 203 .readsection = srcu_ref_scale_read_section, 204 .delaysection = srcu_ref_scale_delay_section, 205 .name = "srcu" 206 }; 207 208 // Definitions for RCU Tasks ref scale testing: Empty read markers. 209 // These definitions also work for RCU Rude readers. 210 static void rcu_tasks_ref_scale_read_section(const int nloops) 211 { 212 int i; 213 214 for (i = nloops; i >= 0; i--) 215 continue; 216 } 217 218 static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 219 { 220 int i; 221 222 for (i = nloops; i >= 0; i--) 223 un_delay(udl, ndl); 224 } 225 226 static struct ref_scale_ops rcu_tasks_ops = { 227 .init = rcu_sync_scale_init, 228 .readsection = rcu_tasks_ref_scale_read_section, 229 .delaysection = rcu_tasks_ref_scale_delay_section, 230 .name = "rcu-tasks" 231 }; 232 233 // Definitions for RCU Tasks Trace ref scale testing. 234 static void rcu_trace_ref_scale_read_section(const int nloops) 235 { 236 int i; 237 238 for (i = nloops; i >= 0; i--) { 239 rcu_read_lock_trace(); 240 rcu_read_unlock_trace(); 241 } 242 } 243 244 static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 245 { 246 int i; 247 248 for (i = nloops; i >= 0; i--) { 249 rcu_read_lock_trace(); 250 un_delay(udl, ndl); 251 rcu_read_unlock_trace(); 252 } 253 } 254 255 static struct ref_scale_ops rcu_trace_ops = { 256 .init = rcu_sync_scale_init, 257 .readsection = rcu_trace_ref_scale_read_section, 258 .delaysection = rcu_trace_ref_scale_delay_section, 259 .name = "rcu-trace" 260 }; 261 262 // Definitions for reference count 263 static atomic_t refcnt; 264 265 static void ref_refcnt_section(const int nloops) 266 { 267 int i; 268 269 for (i = nloops; i >= 0; i--) { 270 atomic_inc(&refcnt); 271 atomic_dec(&refcnt); 272 } 273 } 274 275 static void ref_refcnt_delay_section(const int nloops, const int udl, const int ndl) 276 { 277 int i; 278 279 for (i = nloops; i >= 0; i--) { 280 atomic_inc(&refcnt); 281 un_delay(udl, ndl); 282 atomic_dec(&refcnt); 283 } 284 } 285 286 static struct ref_scale_ops refcnt_ops = { 287 .init = rcu_sync_scale_init, 288 .readsection = ref_refcnt_section, 289 .delaysection = ref_refcnt_delay_section, 290 .name = "refcnt" 291 }; 292 293 // Definitions for rwlock 294 static rwlock_t test_rwlock; 295 296 static void ref_rwlock_init(void) 297 { 298 rwlock_init(&test_rwlock); 299 } 300 301 static void ref_rwlock_section(const int nloops) 302 { 303 int i; 304 305 for (i = nloops; i >= 0; i--) { 306 read_lock(&test_rwlock); 307 read_unlock(&test_rwlock); 308 } 309 } 310 311 static void ref_rwlock_delay_section(const int nloops, const int udl, const int ndl) 312 { 313 int i; 314 315 for (i = nloops; i >= 0; i--) { 316 read_lock(&test_rwlock); 317 un_delay(udl, ndl); 318 read_unlock(&test_rwlock); 319 } 320 } 321 322 static struct ref_scale_ops rwlock_ops = { 323 .init = ref_rwlock_init, 324 .readsection = ref_rwlock_section, 325 .delaysection = ref_rwlock_delay_section, 326 .name = "rwlock" 327 }; 328 329 // Definitions for rwsem 330 static struct rw_semaphore test_rwsem; 331 332 static void ref_rwsem_init(void) 333 { 334 init_rwsem(&test_rwsem); 335 } 336 337 static void ref_rwsem_section(const int nloops) 338 { 339 int i; 340 341 for (i = nloops; i >= 0; i--) { 342 down_read(&test_rwsem); 343 up_read(&test_rwsem); 344 } 345 } 346 347 static void ref_rwsem_delay_section(const int nloops, const int udl, const int ndl) 348 { 349 int i; 350 351 for (i = nloops; i >= 0; i--) { 352 down_read(&test_rwsem); 353 un_delay(udl, ndl); 354 up_read(&test_rwsem); 355 } 356 } 357 358 static struct ref_scale_ops rwsem_ops = { 359 .init = ref_rwsem_init, 360 .readsection = ref_rwsem_section, 361 .delaysection = ref_rwsem_delay_section, 362 .name = "rwsem" 363 }; 364 365 // Definitions for global spinlock 366 static DEFINE_SPINLOCK(test_lock); 367 368 static void ref_lock_section(const int nloops) 369 { 370 int i; 371 372 preempt_disable(); 373 for (i = nloops; i >= 0; i--) { 374 spin_lock(&test_lock); 375 spin_unlock(&test_lock); 376 } 377 preempt_enable(); 378 } 379 380 static void ref_lock_delay_section(const int nloops, const int udl, const int ndl) 381 { 382 int i; 383 384 preempt_disable(); 385 for (i = nloops; i >= 0; i--) { 386 spin_lock(&test_lock); 387 un_delay(udl, ndl); 388 spin_unlock(&test_lock); 389 } 390 preempt_enable(); 391 } 392 393 static struct ref_scale_ops lock_ops = { 394 .readsection = ref_lock_section, 395 .delaysection = ref_lock_delay_section, 396 .name = "lock" 397 }; 398 399 // Definitions for global irq-save spinlock 400 401 static void ref_lock_irq_section(const int nloops) 402 { 403 unsigned long flags; 404 int i; 405 406 preempt_disable(); 407 for (i = nloops; i >= 0; i--) { 408 spin_lock_irqsave(&test_lock, flags); 409 spin_unlock_irqrestore(&test_lock, flags); 410 } 411 preempt_enable(); 412 } 413 414 static void ref_lock_irq_delay_section(const int nloops, const int udl, const int ndl) 415 { 416 unsigned long flags; 417 int i; 418 419 preempt_disable(); 420 for (i = nloops; i >= 0; i--) { 421 spin_lock_irqsave(&test_lock, flags); 422 un_delay(udl, ndl); 423 spin_unlock_irqrestore(&test_lock, flags); 424 } 425 preempt_enable(); 426 } 427 428 static struct ref_scale_ops lock_irq_ops = { 429 .readsection = ref_lock_irq_section, 430 .delaysection = ref_lock_irq_delay_section, 431 .name = "lock-irq" 432 }; 433 434 // Definitions acquire-release. 435 static DEFINE_PER_CPU(unsigned long, test_acqrel); 436 437 static void ref_acqrel_section(const int nloops) 438 { 439 unsigned long x; 440 int i; 441 442 preempt_disable(); 443 for (i = nloops; i >= 0; i--) { 444 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 445 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 446 } 447 preempt_enable(); 448 } 449 450 static void ref_acqrel_delay_section(const int nloops, const int udl, const int ndl) 451 { 452 unsigned long x; 453 int i; 454 455 preempt_disable(); 456 for (i = nloops; i >= 0; i--) { 457 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 458 un_delay(udl, ndl); 459 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 460 } 461 preempt_enable(); 462 } 463 464 static struct ref_scale_ops acqrel_ops = { 465 .readsection = ref_acqrel_section, 466 .delaysection = ref_acqrel_delay_section, 467 .name = "acqrel" 468 }; 469 470 static volatile u64 stopopts; 471 472 static void ref_clock_section(const int nloops) 473 { 474 u64 x = 0; 475 int i; 476 477 preempt_disable(); 478 for (i = nloops; i >= 0; i--) 479 x += ktime_get_real_fast_ns(); 480 preempt_enable(); 481 stopopts = x; 482 } 483 484 static void ref_clock_delay_section(const int nloops, const int udl, const int ndl) 485 { 486 u64 x = 0; 487 int i; 488 489 preempt_disable(); 490 for (i = nloops; i >= 0; i--) { 491 x += ktime_get_real_fast_ns(); 492 un_delay(udl, ndl); 493 } 494 preempt_enable(); 495 stopopts = x; 496 } 497 498 static struct ref_scale_ops clock_ops = { 499 .readsection = ref_clock_section, 500 .delaysection = ref_clock_delay_section, 501 .name = "clock" 502 }; 503 504 static void rcu_scale_one_reader(void) 505 { 506 if (readdelay <= 0) 507 cur_ops->readsection(loops); 508 else 509 cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000); 510 } 511 512 // Reader kthread. Repeatedly does empty RCU read-side 513 // critical section, minimizing update-side interference. 514 static int 515 ref_scale_reader(void *arg) 516 { 517 unsigned long flags; 518 long me = (long)arg; 519 struct reader_task *rt = &(reader_tasks[me]); 520 u64 start; 521 s64 duration; 522 523 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: task started", me); 524 WARN_ON_ONCE(set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids))); 525 set_user_nice(current, MAX_NICE); 526 atomic_inc(&n_init); 527 if (holdoff) 528 schedule_timeout_interruptible(holdoff * HZ); 529 repeat: 530 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, raw_smp_processor_id()); 531 532 // Wait for signal that this reader can start. 533 wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) || 534 torture_must_stop()); 535 536 if (torture_must_stop()) 537 goto end; 538 539 // Make sure that the CPU is affinitized appropriately during testing. 540 WARN_ON_ONCE(raw_smp_processor_id() != me); 541 542 WRITE_ONCE(rt->start_reader, 0); 543 if (!atomic_dec_return(&n_started)) 544 while (atomic_read_acquire(&n_started)) 545 cpu_relax(); 546 547 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d started", me, exp_idx); 548 549 550 // To reduce noise, do an initial cache-warming invocation, check 551 // in, and then keep warming until everyone has checked in. 552 rcu_scale_one_reader(); 553 if (!atomic_dec_return(&n_warmedup)) 554 while (atomic_read_acquire(&n_warmedup)) 555 rcu_scale_one_reader(); 556 // Also keep interrupts disabled. This also has the effect 557 // of preventing entries into slow path for rcu_read_unlock(). 558 local_irq_save(flags); 559 start = ktime_get_mono_fast_ns(); 560 561 rcu_scale_one_reader(); 562 563 duration = ktime_get_mono_fast_ns() - start; 564 local_irq_restore(flags); 565 566 rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration; 567 // To reduce runtime-skew noise, do maintain-load invocations until 568 // everyone is done. 569 if (!atomic_dec_return(&n_cooleddown)) 570 while (atomic_read_acquire(&n_cooleddown)) 571 rcu_scale_one_reader(); 572 573 if (atomic_dec_and_test(&nreaders_exp)) 574 wake_up(&main_wq); 575 576 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)", 577 me, exp_idx, atomic_read(&nreaders_exp)); 578 579 if (!torture_must_stop()) 580 goto repeat; 581 end: 582 torture_kthread_stopping("ref_scale_reader"); 583 return 0; 584 } 585 586 static void reset_readers(void) 587 { 588 int i; 589 struct reader_task *rt; 590 591 for (i = 0; i < nreaders; i++) { 592 rt = &(reader_tasks[i]); 593 594 rt->last_duration_ns = 0; 595 } 596 } 597 598 // Print the results of each reader and return the sum of all their durations. 599 static u64 process_durations(int n) 600 { 601 int i; 602 struct reader_task *rt; 603 char buf1[64]; 604 char *buf; 605 u64 sum = 0; 606 607 buf = kmalloc(128 + nreaders * 32, GFP_KERNEL); 608 if (!buf) 609 return 0; 610 buf[0] = 0; 611 sprintf(buf, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)", 612 exp_idx); 613 614 for (i = 0; i < n && !torture_must_stop(); i++) { 615 rt = &(reader_tasks[i]); 616 sprintf(buf1, "%d: %llu\t", i, rt->last_duration_ns); 617 618 if (i % 5 == 0) 619 strcat(buf, "\n"); 620 strcat(buf, buf1); 621 622 sum += rt->last_duration_ns; 623 } 624 strcat(buf, "\n"); 625 626 SCALEOUT("%s\n", buf); 627 628 kfree(buf); 629 return sum; 630 } 631 632 // The main_func is the main orchestrator, it performs a bunch of 633 // experiments. For every experiment, it orders all the readers 634 // involved to start and waits for them to finish the experiment. It 635 // then reads their timestamps and starts the next experiment. Each 636 // experiment progresses from 1 concurrent reader to N of them at which 637 // point all the timestamps are printed. 638 static int main_func(void *arg) 639 { 640 bool errexit = false; 641 int exp, r; 642 char buf1[64]; 643 char *buf; 644 u64 *result_avg; 645 646 set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids)); 647 set_user_nice(current, MAX_NICE); 648 649 VERBOSE_SCALEOUT("main_func task started"); 650 result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL); 651 buf = kzalloc(64 + nruns * 32, GFP_KERNEL); 652 if (!result_avg || !buf) { 653 VERBOSE_SCALEOUT_ERRSTRING("out of memory"); 654 errexit = true; 655 } 656 if (holdoff) 657 schedule_timeout_interruptible(holdoff * HZ); 658 659 // Wait for all threads to start. 660 atomic_inc(&n_init); 661 while (atomic_read(&n_init) < nreaders + 1) 662 schedule_timeout_uninterruptible(1); 663 664 // Start exp readers up per experiment 665 for (exp = 0; exp < nruns && !torture_must_stop(); exp++) { 666 if (errexit) 667 break; 668 if (torture_must_stop()) 669 goto end; 670 671 reset_readers(); 672 atomic_set(&nreaders_exp, nreaders); 673 atomic_set(&n_started, nreaders); 674 atomic_set(&n_warmedup, nreaders); 675 atomic_set(&n_cooleddown, nreaders); 676 677 exp_idx = exp; 678 679 for (r = 0; r < nreaders; r++) { 680 smp_store_release(&reader_tasks[r].start_reader, 1); 681 wake_up(&reader_tasks[r].wq); 682 } 683 684 VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers", 685 nreaders); 686 687 wait_event(main_wq, 688 !atomic_read(&nreaders_exp) || torture_must_stop()); 689 690 VERBOSE_SCALEOUT("main_func: experiment ended"); 691 692 if (torture_must_stop()) 693 goto end; 694 695 result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops); 696 } 697 698 // Print the average of all experiments 699 SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n"); 700 701 if (!errexit) { 702 buf[0] = 0; 703 strcat(buf, "\n"); 704 strcat(buf, "Runs\tTime(ns)\n"); 705 } 706 707 for (exp = 0; exp < nruns; exp++) { 708 u64 avg; 709 u32 rem; 710 711 if (errexit) 712 break; 713 avg = div_u64_rem(result_avg[exp], 1000, &rem); 714 sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem); 715 strcat(buf, buf1); 716 } 717 718 if (!errexit) 719 SCALEOUT("%s", buf); 720 721 // This will shutdown everything including us. 722 if (shutdown) { 723 shutdown_start = 1; 724 wake_up(&shutdown_wq); 725 } 726 727 // Wait for torture to stop us 728 while (!torture_must_stop()) 729 schedule_timeout_uninterruptible(1); 730 731 end: 732 torture_kthread_stopping("main_func"); 733 kfree(result_avg); 734 kfree(buf); 735 return 0; 736 } 737 738 static void 739 ref_scale_print_module_parms(struct ref_scale_ops *cur_ops, const char *tag) 740 { 741 pr_alert("%s" SCALE_FLAG 742 "--- %s: verbose=%d shutdown=%d holdoff=%d loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, 743 verbose, shutdown, holdoff, loops, nreaders, nruns, readdelay); 744 } 745 746 static void 747 ref_scale_cleanup(void) 748 { 749 int i; 750 751 if (torture_cleanup_begin()) 752 return; 753 754 if (!cur_ops) { 755 torture_cleanup_end(); 756 return; 757 } 758 759 if (reader_tasks) { 760 for (i = 0; i < nreaders; i++) 761 torture_stop_kthread("ref_scale_reader", 762 reader_tasks[i].task); 763 } 764 kfree(reader_tasks); 765 766 torture_stop_kthread("main_task", main_task); 767 kfree(main_task); 768 769 // Do scale-type-specific cleanup operations. 770 if (cur_ops->cleanup != NULL) 771 cur_ops->cleanup(); 772 773 torture_cleanup_end(); 774 } 775 776 // Shutdown kthread. Just waits to be awakened, then shuts down system. 777 static int 778 ref_scale_shutdown(void *arg) 779 { 780 wait_event(shutdown_wq, shutdown_start); 781 782 smp_mb(); // Wake before output. 783 ref_scale_cleanup(); 784 kernel_power_off(); 785 786 return -EINVAL; 787 } 788 789 static int __init 790 ref_scale_init(void) 791 { 792 long i; 793 int firsterr = 0; 794 static struct ref_scale_ops *scale_ops[] = { 795 &rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops, 796 &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops, 797 }; 798 799 if (!torture_init_begin(scale_type, verbose)) 800 return -EBUSY; 801 802 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) { 803 cur_ops = scale_ops[i]; 804 if (strcmp(scale_type, cur_ops->name) == 0) 805 break; 806 } 807 if (i == ARRAY_SIZE(scale_ops)) { 808 pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type); 809 pr_alert("rcu-scale types:"); 810 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) 811 pr_cont(" %s", scale_ops[i]->name); 812 pr_cont("\n"); 813 firsterr = -EINVAL; 814 cur_ops = NULL; 815 goto unwind; 816 } 817 if (cur_ops->init) 818 cur_ops->init(); 819 820 ref_scale_print_module_parms(cur_ops, "Start of test"); 821 822 // Shutdown task 823 if (shutdown) { 824 init_waitqueue_head(&shutdown_wq); 825 firsterr = torture_create_kthread(ref_scale_shutdown, NULL, 826 shutdown_task); 827 if (firsterr) 828 goto unwind; 829 schedule_timeout_uninterruptible(1); 830 } 831 832 // Reader tasks (default to ~75% of online CPUs). 833 if (nreaders < 0) 834 nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2); 835 if (WARN_ONCE(loops <= 0, "%s: loops = %ld, adjusted to 1\n", __func__, loops)) 836 loops = 1; 837 if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders)) 838 nreaders = 1; 839 if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns)) 840 nruns = 1; 841 reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]), 842 GFP_KERNEL); 843 if (!reader_tasks) { 844 VERBOSE_SCALEOUT_ERRSTRING("out of memory"); 845 firsterr = -ENOMEM; 846 goto unwind; 847 } 848 849 VERBOSE_SCALEOUT("Starting %d reader threads\n", nreaders); 850 851 for (i = 0; i < nreaders; i++) { 852 firsterr = torture_create_kthread(ref_scale_reader, (void *)i, 853 reader_tasks[i].task); 854 if (firsterr) 855 goto unwind; 856 857 init_waitqueue_head(&(reader_tasks[i].wq)); 858 } 859 860 // Main Task 861 init_waitqueue_head(&main_wq); 862 firsterr = torture_create_kthread(main_func, NULL, main_task); 863 if (firsterr) 864 goto unwind; 865 866 torture_init_end(); 867 return 0; 868 869 unwind: 870 torture_init_end(); 871 ref_scale_cleanup(); 872 if (shutdown) { 873 WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST)); 874 kernel_power_off(); 875 } 876 return firsterr; 877 } 878 879 module_init(ref_scale_init); 880 module_exit(ref_scale_cleanup); 881