1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Read-Copy Update mechanism for mutual exclusion 4 * 5 * Copyright IBM Corporation, 2001 6 * 7 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 8 * Manfred Spraul <manfred@colorfullife.com> 9 * 10 * Based on the original work by Paul McKenney <paulmck@linux.ibm.com> 11 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 12 * Papers: 13 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf 14 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 15 * 16 * For detailed explanation of Read-Copy Update mechanism see - 17 * http://lse.sourceforge.net/locking/rcupdate.html 18 * 19 */ 20 #include <linux/types.h> 21 #include <linux/kernel.h> 22 #include <linux/init.h> 23 #include <linux/spinlock.h> 24 #include <linux/smp.h> 25 #include <linux/interrupt.h> 26 #include <linux/sched/signal.h> 27 #include <linux/sched/debug.h> 28 #include <linux/atomic.h> 29 #include <linux/bitops.h> 30 #include <linux/percpu.h> 31 #include <linux/notifier.h> 32 #include <linux/cpu.h> 33 #include <linux/mutex.h> 34 #include <linux/export.h> 35 #include <linux/hardirq.h> 36 #include <linux/delay.h> 37 #include <linux/moduleparam.h> 38 #include <linux/kthread.h> 39 #include <linux/tick.h> 40 #include <linux/rcupdate_wait.h> 41 #include <linux/sched/isolation.h> 42 #include <linux/kprobes.h> 43 #include <linux/slab.h> 44 #include <linux/irq_work.h> 45 #include <linux/rcupdate_trace.h> 46 47 #define CREATE_TRACE_POINTS 48 49 #include "rcu.h" 50 51 #ifdef MODULE_PARAM_PREFIX 52 #undef MODULE_PARAM_PREFIX 53 #endif 54 #define MODULE_PARAM_PREFIX "rcupdate." 55 56 #ifndef CONFIG_TINY_RCU 57 module_param(rcu_expedited, int, 0); 58 module_param(rcu_normal, int, 0); 59 static int rcu_normal_after_boot; 60 module_param(rcu_normal_after_boot, int, 0); 61 #endif /* #ifndef CONFIG_TINY_RCU */ 62 63 #ifdef CONFIG_DEBUG_LOCK_ALLOC 64 /** 65 * rcu_read_lock_held_common() - might we be in RCU-sched read-side critical section? 66 * @ret: Best guess answer if lockdep cannot be relied on 67 * 68 * Returns true if lockdep must be ignored, in which case ``*ret`` contains 69 * the best guess described below. Otherwise returns false, in which 70 * case ``*ret`` tells the caller nothing and the caller should instead 71 * consult lockdep. 72 * 73 * If CONFIG_DEBUG_LOCK_ALLOC is selected, set ``*ret`` to nonzero iff in an 74 * RCU-sched read-side critical section. In absence of 75 * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side 76 * critical section unless it can prove otherwise. Note that disabling 77 * of preemption (including disabling irqs) counts as an RCU-sched 78 * read-side critical section. This is useful for debug checks in functions 79 * that required that they be called within an RCU-sched read-side 80 * critical section. 81 * 82 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot 83 * and while lockdep is disabled. 84 * 85 * Note that if the CPU is in the idle loop from an RCU point of view (ie: 86 * that we are in the section between rcu_idle_enter() and rcu_idle_exit()) 87 * then rcu_read_lock_held() sets ``*ret`` to false even if the CPU did an 88 * rcu_read_lock(). The reason for this is that RCU ignores CPUs that are 89 * in such a section, considering these as in extended quiescent state, 90 * so such a CPU is effectively never in an RCU read-side critical section 91 * regardless of what RCU primitives it invokes. This state of affairs is 92 * required --- we need to keep an RCU-free window in idle where the CPU may 93 * possibly enter into low power mode. This way we can notice an extended 94 * quiescent state to other CPUs that started a grace period. Otherwise 95 * we would delay any grace period as long as we run in the idle task. 96 * 97 * Similarly, we avoid claiming an RCU read lock held if the current 98 * CPU is offline. 99 */ 100 static bool rcu_read_lock_held_common(bool *ret) 101 { 102 if (!debug_lockdep_rcu_enabled()) { 103 *ret = true; 104 return true; 105 } 106 if (!rcu_is_watching()) { 107 *ret = false; 108 return true; 109 } 110 if (!rcu_lockdep_current_cpu_online()) { 111 *ret = false; 112 return true; 113 } 114 return false; 115 } 116 117 int rcu_read_lock_sched_held(void) 118 { 119 bool ret; 120 121 if (rcu_read_lock_held_common(&ret)) 122 return ret; 123 return lock_is_held(&rcu_sched_lock_map) || !preemptible(); 124 } 125 EXPORT_SYMBOL(rcu_read_lock_sched_held); 126 #endif 127 128 #ifndef CONFIG_TINY_RCU 129 130 /* 131 * Should expedited grace-period primitives always fall back to their 132 * non-expedited counterparts? Intended for use within RCU. Note 133 * that if the user specifies both rcu_expedited and rcu_normal, then 134 * rcu_normal wins. (Except during the time period during boot from 135 * when the first task is spawned until the rcu_set_runtime_mode() 136 * core_initcall() is invoked, at which point everything is expedited.) 137 */ 138 bool rcu_gp_is_normal(void) 139 { 140 return READ_ONCE(rcu_normal) && 141 rcu_scheduler_active != RCU_SCHEDULER_INIT; 142 } 143 EXPORT_SYMBOL_GPL(rcu_gp_is_normal); 144 145 static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1); 146 147 /* 148 * Should normal grace-period primitives be expedited? Intended for 149 * use within RCU. Note that this function takes the rcu_expedited 150 * sysfs/boot variable and rcu_scheduler_active into account as well 151 * as the rcu_expedite_gp() nesting. So looping on rcu_unexpedite_gp() 152 * until rcu_gp_is_expedited() returns false is a -really- bad idea. 153 */ 154 bool rcu_gp_is_expedited(void) 155 { 156 return rcu_expedited || atomic_read(&rcu_expedited_nesting); 157 } 158 EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); 159 160 /** 161 * rcu_expedite_gp - Expedite future RCU grace periods 162 * 163 * After a call to this function, future calls to synchronize_rcu() and 164 * friends act as the corresponding synchronize_rcu_expedited() function 165 * had instead been called. 166 */ 167 void rcu_expedite_gp(void) 168 { 169 atomic_inc(&rcu_expedited_nesting); 170 } 171 EXPORT_SYMBOL_GPL(rcu_expedite_gp); 172 173 /** 174 * rcu_unexpedite_gp - Cancel prior rcu_expedite_gp() invocation 175 * 176 * Undo a prior call to rcu_expedite_gp(). If all prior calls to 177 * rcu_expedite_gp() are undone by a subsequent call to rcu_unexpedite_gp(), 178 * and if the rcu_expedited sysfs/boot parameter is not set, then all 179 * subsequent calls to synchronize_rcu() and friends will return to 180 * their normal non-expedited behavior. 181 */ 182 void rcu_unexpedite_gp(void) 183 { 184 atomic_dec(&rcu_expedited_nesting); 185 } 186 EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); 187 188 static bool rcu_boot_ended __read_mostly; 189 190 /* 191 * Inform RCU of the end of the in-kernel boot sequence. 192 */ 193 void rcu_end_inkernel_boot(void) 194 { 195 rcu_unexpedite_gp(); 196 if (rcu_normal_after_boot) 197 WRITE_ONCE(rcu_normal, 1); 198 rcu_boot_ended = true; 199 } 200 201 /* 202 * Let rcutorture know when it is OK to turn it up to eleven. 203 */ 204 bool rcu_inkernel_boot_has_ended(void) 205 { 206 return rcu_boot_ended; 207 } 208 EXPORT_SYMBOL_GPL(rcu_inkernel_boot_has_ended); 209 210 #endif /* #ifndef CONFIG_TINY_RCU */ 211 212 /* 213 * Test each non-SRCU synchronous grace-period wait API. This is 214 * useful just after a change in mode for these primitives, and 215 * during early boot. 216 */ 217 void rcu_test_sync_prims(void) 218 { 219 if (!IS_ENABLED(CONFIG_PROVE_RCU)) 220 return; 221 synchronize_rcu(); 222 synchronize_rcu_expedited(); 223 } 224 225 #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) 226 227 /* 228 * Switch to run-time mode once RCU has fully initialized. 229 */ 230 static int __init rcu_set_runtime_mode(void) 231 { 232 rcu_test_sync_prims(); 233 rcu_scheduler_active = RCU_SCHEDULER_RUNNING; 234 kfree_rcu_scheduler_running(); 235 rcu_test_sync_prims(); 236 return 0; 237 } 238 core_initcall(rcu_set_runtime_mode); 239 240 #endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */ 241 242 #ifdef CONFIG_DEBUG_LOCK_ALLOC 243 static struct lock_class_key rcu_lock_key; 244 struct lockdep_map rcu_lock_map = { 245 .name = "rcu_read_lock", 246 .key = &rcu_lock_key, 247 .wait_type_outer = LD_WAIT_FREE, 248 .wait_type_inner = LD_WAIT_CONFIG, /* XXX PREEMPT_RCU ? */ 249 }; 250 EXPORT_SYMBOL_GPL(rcu_lock_map); 251 252 static struct lock_class_key rcu_bh_lock_key; 253 struct lockdep_map rcu_bh_lock_map = { 254 .name = "rcu_read_lock_bh", 255 .key = &rcu_bh_lock_key, 256 .wait_type_outer = LD_WAIT_FREE, 257 .wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_LOCK also makes BH preemptible */ 258 }; 259 EXPORT_SYMBOL_GPL(rcu_bh_lock_map); 260 261 static struct lock_class_key rcu_sched_lock_key; 262 struct lockdep_map rcu_sched_lock_map = { 263 .name = "rcu_read_lock_sched", 264 .key = &rcu_sched_lock_key, 265 .wait_type_outer = LD_WAIT_FREE, 266 .wait_type_inner = LD_WAIT_SPIN, 267 }; 268 EXPORT_SYMBOL_GPL(rcu_sched_lock_map); 269 270 // Tell lockdep when RCU callbacks are being invoked. 271 static struct lock_class_key rcu_callback_key; 272 struct lockdep_map rcu_callback_map = 273 STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key); 274 EXPORT_SYMBOL_GPL(rcu_callback_map); 275 276 noinstr int notrace debug_lockdep_rcu_enabled(void) 277 { 278 return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks && 279 current->lockdep_recursion == 0; 280 } 281 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); 282 283 /** 284 * rcu_read_lock_held() - might we be in RCU read-side critical section? 285 * 286 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU 287 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, 288 * this assumes we are in an RCU read-side critical section unless it can 289 * prove otherwise. This is useful for debug checks in functions that 290 * require that they be called within an RCU read-side critical section. 291 * 292 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot 293 * and while lockdep is disabled. 294 * 295 * Note that rcu_read_lock() and the matching rcu_read_unlock() must 296 * occur in the same context, for example, it is illegal to invoke 297 * rcu_read_unlock() in process context if the matching rcu_read_lock() 298 * was invoked from within an irq handler. 299 * 300 * Note that rcu_read_lock() is disallowed if the CPU is either idle or 301 * offline from an RCU perspective, so check for those as well. 302 */ 303 int rcu_read_lock_held(void) 304 { 305 bool ret; 306 307 if (rcu_read_lock_held_common(&ret)) 308 return ret; 309 return lock_is_held(&rcu_lock_map); 310 } 311 EXPORT_SYMBOL_GPL(rcu_read_lock_held); 312 313 /** 314 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? 315 * 316 * Check for bottom half being disabled, which covers both the 317 * CONFIG_PROVE_RCU and not cases. Note that if someone uses 318 * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) 319 * will show the situation. This is useful for debug checks in functions 320 * that require that they be called within an RCU read-side critical 321 * section. 322 * 323 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. 324 * 325 * Note that rcu_read_lock_bh() is disallowed if the CPU is either idle or 326 * offline from an RCU perspective, so check for those as well. 327 */ 328 int rcu_read_lock_bh_held(void) 329 { 330 bool ret; 331 332 if (rcu_read_lock_held_common(&ret)) 333 return ret; 334 return in_softirq() || irqs_disabled(); 335 } 336 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); 337 338 int rcu_read_lock_any_held(void) 339 { 340 bool ret; 341 342 if (rcu_read_lock_held_common(&ret)) 343 return ret; 344 if (lock_is_held(&rcu_lock_map) || 345 lock_is_held(&rcu_bh_lock_map) || 346 lock_is_held(&rcu_sched_lock_map)) 347 return 1; 348 return !preemptible(); 349 } 350 EXPORT_SYMBOL_GPL(rcu_read_lock_any_held); 351 352 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 353 354 /** 355 * wakeme_after_rcu() - Callback function to awaken a task after grace period 356 * @head: Pointer to rcu_head member within rcu_synchronize structure 357 * 358 * Awaken the corresponding task now that a grace period has elapsed. 359 */ 360 void wakeme_after_rcu(struct rcu_head *head) 361 { 362 struct rcu_synchronize *rcu; 363 364 rcu = container_of(head, struct rcu_synchronize, head); 365 complete(&rcu->completion); 366 } 367 EXPORT_SYMBOL_GPL(wakeme_after_rcu); 368 369 void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, 370 struct rcu_synchronize *rs_array) 371 { 372 int i; 373 int j; 374 375 /* Initialize and register callbacks for each crcu_array element. */ 376 for (i = 0; i < n; i++) { 377 if (checktiny && 378 (crcu_array[i] == call_rcu)) { 379 might_sleep(); 380 continue; 381 } 382 for (j = 0; j < i; j++) 383 if (crcu_array[j] == crcu_array[i]) 384 break; 385 if (j == i) { 386 init_rcu_head_on_stack(&rs_array[i].head); 387 init_completion(&rs_array[i].completion); 388 (crcu_array[i])(&rs_array[i].head, wakeme_after_rcu); 389 } 390 } 391 392 /* Wait for all callbacks to be invoked. */ 393 for (i = 0; i < n; i++) { 394 if (checktiny && 395 (crcu_array[i] == call_rcu)) 396 continue; 397 for (j = 0; j < i; j++) 398 if (crcu_array[j] == crcu_array[i]) 399 break; 400 if (j == i) { 401 wait_for_completion(&rs_array[i].completion); 402 destroy_rcu_head_on_stack(&rs_array[i].head); 403 } 404 } 405 } 406 EXPORT_SYMBOL_GPL(__wait_rcu_gp); 407 408 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 409 void init_rcu_head(struct rcu_head *head) 410 { 411 debug_object_init(head, &rcuhead_debug_descr); 412 } 413 EXPORT_SYMBOL_GPL(init_rcu_head); 414 415 void destroy_rcu_head(struct rcu_head *head) 416 { 417 debug_object_free(head, &rcuhead_debug_descr); 418 } 419 EXPORT_SYMBOL_GPL(destroy_rcu_head); 420 421 static bool rcuhead_is_static_object(void *addr) 422 { 423 return true; 424 } 425 426 /** 427 * init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects 428 * @head: pointer to rcu_head structure to be initialized 429 * 430 * This function informs debugobjects of a new rcu_head structure that 431 * has been allocated as an auto variable on the stack. This function 432 * is not required for rcu_head structures that are statically defined or 433 * that are dynamically allocated on the heap. This function has no 434 * effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds. 435 */ 436 void init_rcu_head_on_stack(struct rcu_head *head) 437 { 438 debug_object_init_on_stack(head, &rcuhead_debug_descr); 439 } 440 EXPORT_SYMBOL_GPL(init_rcu_head_on_stack); 441 442 /** 443 * destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects 444 * @head: pointer to rcu_head structure to be initialized 445 * 446 * This function informs debugobjects that an on-stack rcu_head structure 447 * is about to go out of scope. As with init_rcu_head_on_stack(), this 448 * function is not required for rcu_head structures that are statically 449 * defined or that are dynamically allocated on the heap. Also as with 450 * init_rcu_head_on_stack(), this function has no effect for 451 * !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds. 452 */ 453 void destroy_rcu_head_on_stack(struct rcu_head *head) 454 { 455 debug_object_free(head, &rcuhead_debug_descr); 456 } 457 EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack); 458 459 const struct debug_obj_descr rcuhead_debug_descr = { 460 .name = "rcu_head", 461 .is_static_object = rcuhead_is_static_object, 462 }; 463 EXPORT_SYMBOL_GPL(rcuhead_debug_descr); 464 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 465 466 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_RCU_TRACE) 467 void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp, 468 unsigned long secs, 469 unsigned long c_old, unsigned long c) 470 { 471 trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c); 472 } 473 EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); 474 #else 475 #define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ 476 do { } while (0) 477 #endif 478 479 #if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) 480 /* Get rcutorture access to sched_setaffinity(). */ 481 long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask) 482 { 483 int ret; 484 485 ret = sched_setaffinity(pid, in_mask); 486 WARN_ONCE(ret, "%s: sched_setaffinity() returned %d\n", __func__, ret); 487 return ret; 488 } 489 EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity); 490 #endif 491 492 #ifdef CONFIG_RCU_STALL_COMMON 493 int rcu_cpu_stall_ftrace_dump __read_mostly; 494 module_param(rcu_cpu_stall_ftrace_dump, int, 0644); 495 int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings. 496 EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress); 497 module_param(rcu_cpu_stall_suppress, int, 0644); 498 int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; 499 module_param(rcu_cpu_stall_timeout, int, 0644); 500 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 501 502 // Suppress boot-time RCU CPU stall warnings and rcutorture writer stall 503 // warnings. Also used by rcutorture even if stall warnings are excluded. 504 int rcu_cpu_stall_suppress_at_boot __read_mostly; // !0 = suppress boot stalls. 505 EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress_at_boot); 506 module_param(rcu_cpu_stall_suppress_at_boot, int, 0444); 507 508 #ifdef CONFIG_PROVE_RCU 509 510 /* 511 * Early boot self test parameters. 512 */ 513 static bool rcu_self_test; 514 module_param(rcu_self_test, bool, 0444); 515 516 static int rcu_self_test_counter; 517 518 static void test_callback(struct rcu_head *r) 519 { 520 rcu_self_test_counter++; 521 pr_info("RCU test callback executed %d\n", rcu_self_test_counter); 522 } 523 524 DEFINE_STATIC_SRCU(early_srcu); 525 526 struct early_boot_kfree_rcu { 527 struct rcu_head rh; 528 }; 529 530 static void early_boot_test_call_rcu(void) 531 { 532 static struct rcu_head head; 533 static struct rcu_head shead; 534 struct early_boot_kfree_rcu *rhp; 535 536 call_rcu(&head, test_callback); 537 if (IS_ENABLED(CONFIG_SRCU)) 538 call_srcu(&early_srcu, &shead, test_callback); 539 rhp = kmalloc(sizeof(*rhp), GFP_KERNEL); 540 if (!WARN_ON_ONCE(!rhp)) 541 kfree_rcu(rhp, rh); 542 } 543 544 void rcu_early_boot_tests(void) 545 { 546 pr_info("Running RCU self tests\n"); 547 548 if (rcu_self_test) 549 early_boot_test_call_rcu(); 550 rcu_test_sync_prims(); 551 } 552 553 static int rcu_verify_early_boot_tests(void) 554 { 555 int ret = 0; 556 int early_boot_test_counter = 0; 557 558 if (rcu_self_test) { 559 early_boot_test_counter++; 560 rcu_barrier(); 561 if (IS_ENABLED(CONFIG_SRCU)) { 562 early_boot_test_counter++; 563 srcu_barrier(&early_srcu); 564 } 565 } 566 if (rcu_self_test_counter != early_boot_test_counter) { 567 WARN_ON(1); 568 ret = -1; 569 } 570 571 return ret; 572 } 573 late_initcall(rcu_verify_early_boot_tests); 574 #else 575 void rcu_early_boot_tests(void) {} 576 #endif /* CONFIG_PROVE_RCU */ 577 578 #include "tasks.h" 579 580 #ifndef CONFIG_TINY_RCU 581 582 /* 583 * Print any significant non-default boot-time settings. 584 */ 585 void __init rcupdate_announce_bootup_oddness(void) 586 { 587 if (rcu_normal) 588 pr_info("\tNo expedited grace period (rcu_normal).\n"); 589 else if (rcu_normal_after_boot) 590 pr_info("\tNo expedited grace period (rcu_normal_after_boot).\n"); 591 else if (rcu_expedited) 592 pr_info("\tAll grace periods are expedited (rcu_expedited).\n"); 593 if (rcu_cpu_stall_suppress) 594 pr_info("\tRCU CPU stall warnings suppressed (rcu_cpu_stall_suppress).\n"); 595 if (rcu_cpu_stall_timeout != CONFIG_RCU_CPU_STALL_TIMEOUT) 596 pr_info("\tRCU CPU stall warnings timeout set to %d (rcu_cpu_stall_timeout).\n", rcu_cpu_stall_timeout); 597 rcu_tasks_bootup_oddness(); 598 } 599 600 #endif /* #ifndef CONFIG_TINY_RCU */ 601