1 /* SPDX-License-Identifier: GPL-2.0+ */ 2 /* 3 * Task-based RCU implementations. 4 * 5 * Copyright (C) 2020 Paul E. McKenney 6 */ 7 8 #ifdef CONFIG_TASKS_RCU_GENERIC 9 10 //////////////////////////////////////////////////////////////////////// 11 // 12 // Generic data structures. 13 14 struct rcu_tasks; 15 typedef void (*rcu_tasks_gp_func_t)(struct rcu_tasks *rtp); 16 typedef void (*pregp_func_t)(void); 17 typedef void (*pertask_func_t)(struct task_struct *t, struct list_head *hop); 18 typedef void (*postscan_func_t)(struct list_head *hop); 19 typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp); 20 typedef void (*postgp_func_t)(struct rcu_tasks *rtp); 21 22 /** 23 * Definition for a Tasks-RCU-like mechanism. 24 * @cbs_head: Head of callback list. 25 * @cbs_tail: Tail pointer for callback list. 26 * @cbs_wq: Wait queue allowning new callback to get kthread's attention. 27 * @cbs_lock: Lock protecting callback list. 28 * @kthread_ptr: This flavor's grace-period/callback-invocation kthread. 29 * @gp_func: This flavor's grace-period-wait function. 30 * @gp_state: Grace period's most recent state transition (debugging). 31 * @gp_sleep: Per-grace-period sleep to prevent CPU-bound looping. 32 * @init_fract: Initial backoff sleep interval. 33 * @gp_jiffies: Time of last @gp_state transition. 34 * @gp_start: Most recent grace-period start in jiffies. 35 * @n_gps: Number of grace periods completed since boot. 36 * @n_ipis: Number of IPIs sent to encourage grace periods to end. 37 * @n_ipis_fails: Number of IPI-send failures. 38 * @pregp_func: This flavor's pre-grace-period function (optional). 39 * @pertask_func: This flavor's per-task scan function (optional). 40 * @postscan_func: This flavor's post-task scan function (optional). 41 * @holdout_func: This flavor's holdout-list scan function (optional). 42 * @postgp_func: This flavor's post-grace-period function (optional). 43 * @call_func: This flavor's call_rcu()-equivalent function. 44 * @name: This flavor's textual name. 45 * @kname: This flavor's kthread name. 46 */ 47 struct rcu_tasks { 48 struct rcu_head *cbs_head; 49 struct rcu_head **cbs_tail; 50 struct wait_queue_head cbs_wq; 51 raw_spinlock_t cbs_lock; 52 int gp_state; 53 int gp_sleep; 54 int init_fract; 55 unsigned long gp_jiffies; 56 unsigned long gp_start; 57 unsigned long n_gps; 58 unsigned long n_ipis; 59 unsigned long n_ipis_fails; 60 struct task_struct *kthread_ptr; 61 rcu_tasks_gp_func_t gp_func; 62 pregp_func_t pregp_func; 63 pertask_func_t pertask_func; 64 postscan_func_t postscan_func; 65 holdouts_func_t holdouts_func; 66 postgp_func_t postgp_func; 67 call_rcu_func_t call_func; 68 char *name; 69 char *kname; 70 }; 71 72 #define DEFINE_RCU_TASKS(rt_name, gp, call, n) \ 73 static struct rcu_tasks rt_name = \ 74 { \ 75 .cbs_tail = &rt_name.cbs_head, \ 76 .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \ 77 .cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_lock), \ 78 .gp_func = gp, \ 79 .call_func = call, \ 80 .name = n, \ 81 .kname = #rt_name, \ 82 } 83 84 /* Track exiting tasks in order to allow them to be waited for. */ 85 DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu); 86 87 /* Avoid IPIing CPUs early in the grace period. */ 88 #define RCU_TASK_IPI_DELAY (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) ? HZ / 2 : 0) 89 static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY; 90 module_param(rcu_task_ipi_delay, int, 0644); 91 92 /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ 93 #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) 94 static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT; 95 module_param(rcu_task_stall_timeout, int, 0644); 96 97 /* RCU tasks grace-period state for debugging. */ 98 #define RTGS_INIT 0 99 #define RTGS_WAIT_WAIT_CBS 1 100 #define RTGS_WAIT_GP 2 101 #define RTGS_PRE_WAIT_GP 3 102 #define RTGS_SCAN_TASKLIST 4 103 #define RTGS_POST_SCAN_TASKLIST 5 104 #define RTGS_WAIT_SCAN_HOLDOUTS 6 105 #define RTGS_SCAN_HOLDOUTS 7 106 #define RTGS_POST_GP 8 107 #define RTGS_WAIT_READERS 9 108 #define RTGS_INVOKE_CBS 10 109 #define RTGS_WAIT_CBS 11 110 #ifndef CONFIG_TINY_RCU 111 static const char * const rcu_tasks_gp_state_names[] = { 112 "RTGS_INIT", 113 "RTGS_WAIT_WAIT_CBS", 114 "RTGS_WAIT_GP", 115 "RTGS_PRE_WAIT_GP", 116 "RTGS_SCAN_TASKLIST", 117 "RTGS_POST_SCAN_TASKLIST", 118 "RTGS_WAIT_SCAN_HOLDOUTS", 119 "RTGS_SCAN_HOLDOUTS", 120 "RTGS_POST_GP", 121 "RTGS_WAIT_READERS", 122 "RTGS_INVOKE_CBS", 123 "RTGS_WAIT_CBS", 124 }; 125 #endif /* #ifndef CONFIG_TINY_RCU */ 126 127 //////////////////////////////////////////////////////////////////////// 128 // 129 // Generic code. 130 131 /* Record grace-period phase and time. */ 132 static void set_tasks_gp_state(struct rcu_tasks *rtp, int newstate) 133 { 134 rtp->gp_state = newstate; 135 rtp->gp_jiffies = jiffies; 136 } 137 138 #ifndef CONFIG_TINY_RCU 139 /* Return state name. */ 140 static const char *tasks_gp_state_getname(struct rcu_tasks *rtp) 141 { 142 int i = data_race(rtp->gp_state); // Let KCSAN detect update races 143 int j = READ_ONCE(i); // Prevent the compiler from reading twice 144 145 if (j >= ARRAY_SIZE(rcu_tasks_gp_state_names)) 146 return "???"; 147 return rcu_tasks_gp_state_names[j]; 148 } 149 #endif /* #ifndef CONFIG_TINY_RCU */ 150 151 // Enqueue a callback for the specified flavor of Tasks RCU. 152 static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, 153 struct rcu_tasks *rtp) 154 { 155 unsigned long flags; 156 bool needwake; 157 158 rhp->next = NULL; 159 rhp->func = func; 160 raw_spin_lock_irqsave(&rtp->cbs_lock, flags); 161 needwake = !rtp->cbs_head; 162 WRITE_ONCE(*rtp->cbs_tail, rhp); 163 rtp->cbs_tail = &rhp->next; 164 raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags); 165 /* We can't create the thread unless interrupts are enabled. */ 166 if (needwake && READ_ONCE(rtp->kthread_ptr)) 167 wake_up(&rtp->cbs_wq); 168 } 169 170 // Wait for a grace period for the specified flavor of Tasks RCU. 171 static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp) 172 { 173 /* Complain if the scheduler has not started. */ 174 RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, 175 "synchronize_rcu_tasks called too soon"); 176 177 /* Wait for the grace period. */ 178 wait_rcu_gp(rtp->call_func); 179 } 180 181 /* RCU-tasks kthread that detects grace periods and invokes callbacks. */ 182 static int __noreturn rcu_tasks_kthread(void *arg) 183 { 184 unsigned long flags; 185 struct rcu_head *list; 186 struct rcu_head *next; 187 struct rcu_tasks *rtp = arg; 188 189 /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ 190 housekeeping_affine(current, HK_FLAG_RCU); 191 WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start! 192 193 /* 194 * Each pass through the following loop makes one check for 195 * newly arrived callbacks, and, if there are some, waits for 196 * one RCU-tasks grace period and then invokes the callbacks. 197 * This loop is terminated by the system going down. ;-) 198 */ 199 for (;;) { 200 201 /* Pick up any new callbacks. */ 202 raw_spin_lock_irqsave(&rtp->cbs_lock, flags); 203 smp_mb__after_spinlock(); // Order updates vs. GP. 204 list = rtp->cbs_head; 205 rtp->cbs_head = NULL; 206 rtp->cbs_tail = &rtp->cbs_head; 207 raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags); 208 209 /* If there were none, wait a bit and start over. */ 210 if (!list) { 211 wait_event_interruptible(rtp->cbs_wq, 212 READ_ONCE(rtp->cbs_head)); 213 if (!rtp->cbs_head) { 214 WARN_ON(signal_pending(current)); 215 set_tasks_gp_state(rtp, RTGS_WAIT_WAIT_CBS); 216 schedule_timeout_idle(HZ/10); 217 } 218 continue; 219 } 220 221 // Wait for one grace period. 222 set_tasks_gp_state(rtp, RTGS_WAIT_GP); 223 rtp->gp_start = jiffies; 224 rtp->gp_func(rtp); 225 rtp->n_gps++; 226 227 /* Invoke the callbacks. */ 228 set_tasks_gp_state(rtp, RTGS_INVOKE_CBS); 229 while (list) { 230 next = list->next; 231 local_bh_disable(); 232 list->func(list); 233 local_bh_enable(); 234 list = next; 235 cond_resched(); 236 } 237 /* Paranoid sleep to keep this from entering a tight loop */ 238 schedule_timeout_idle(rtp->gp_sleep); 239 240 set_tasks_gp_state(rtp, RTGS_WAIT_CBS); 241 } 242 } 243 244 /* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */ 245 static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp) 246 { 247 struct task_struct *t; 248 249 t = kthread_run(rcu_tasks_kthread, rtp, "%s_kthread", rtp->kname); 250 if (WARN_ONCE(IS_ERR(t), "%s: Could not start %s grace-period kthread, OOM is now expected behavior\n", __func__, rtp->name)) 251 return; 252 smp_mb(); /* Ensure others see full kthread. */ 253 } 254 255 #ifndef CONFIG_TINY_RCU 256 257 /* 258 * Print any non-default Tasks RCU settings. 259 */ 260 static void __init rcu_tasks_bootup_oddness(void) 261 { 262 #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) 263 if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT) 264 pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout); 265 #endif /* #ifdef CONFIG_TASKS_RCU */ 266 #ifdef CONFIG_TASKS_RCU 267 pr_info("\tTrampoline variant of Tasks RCU enabled.\n"); 268 #endif /* #ifdef CONFIG_TASKS_RCU */ 269 #ifdef CONFIG_TASKS_RUDE_RCU 270 pr_info("\tRude variant of Tasks RCU enabled.\n"); 271 #endif /* #ifdef CONFIG_TASKS_RUDE_RCU */ 272 #ifdef CONFIG_TASKS_TRACE_RCU 273 pr_info("\tTracing variant of Tasks RCU enabled.\n"); 274 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ 275 } 276 277 #endif /* #ifndef CONFIG_TINY_RCU */ 278 279 #ifndef CONFIG_TINY_RCU 280 /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */ 281 static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s) 282 { 283 pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n", 284 rtp->kname, 285 tasks_gp_state_getname(rtp), data_race(rtp->gp_state), 286 jiffies - data_race(rtp->gp_jiffies), 287 data_race(rtp->n_gps), 288 data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis), 289 ".k"[!!data_race(rtp->kthread_ptr)], 290 ".C"[!!data_race(rtp->cbs_head)], 291 s); 292 } 293 #endif /* #ifndef CONFIG_TINY_RCU */ 294 295 static void exit_tasks_rcu_finish_trace(struct task_struct *t); 296 297 #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) 298 299 //////////////////////////////////////////////////////////////////////// 300 // 301 // Shared code between task-list-scanning variants of Tasks RCU. 302 303 /* Wait for one RCU-tasks grace period. */ 304 static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) 305 { 306 struct task_struct *g, *t; 307 unsigned long lastreport; 308 LIST_HEAD(holdouts); 309 int fract; 310 311 set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP); 312 rtp->pregp_func(); 313 314 /* 315 * There were callbacks, so we need to wait for an RCU-tasks 316 * grace period. Start off by scanning the task list for tasks 317 * that are not already voluntarily blocked. Mark these tasks 318 * and make a list of them in holdouts. 319 */ 320 set_tasks_gp_state(rtp, RTGS_SCAN_TASKLIST); 321 rcu_read_lock(); 322 for_each_process_thread(g, t) 323 rtp->pertask_func(t, &holdouts); 324 rcu_read_unlock(); 325 326 set_tasks_gp_state(rtp, RTGS_POST_SCAN_TASKLIST); 327 rtp->postscan_func(&holdouts); 328 329 /* 330 * Each pass through the following loop scans the list of holdout 331 * tasks, removing any that are no longer holdouts. When the list 332 * is empty, we are done. 333 */ 334 lastreport = jiffies; 335 336 // Start off with initial wait and slowly back off to 1 HZ wait. 337 fract = rtp->init_fract; 338 if (fract > HZ) 339 fract = HZ; 340 341 for (;;) { 342 bool firstreport; 343 bool needreport; 344 int rtst; 345 346 if (list_empty(&holdouts)) 347 break; 348 349 /* Slowly back off waiting for holdouts */ 350 set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS); 351 schedule_timeout_idle(HZ/fract); 352 353 if (fract > 1) 354 fract--; 355 356 rtst = READ_ONCE(rcu_task_stall_timeout); 357 needreport = rtst > 0 && time_after(jiffies, lastreport + rtst); 358 if (needreport) 359 lastreport = jiffies; 360 firstreport = true; 361 WARN_ON(signal_pending(current)); 362 set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS); 363 rtp->holdouts_func(&holdouts, needreport, &firstreport); 364 } 365 366 set_tasks_gp_state(rtp, RTGS_POST_GP); 367 rtp->postgp_func(rtp); 368 } 369 370 #endif /* #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) */ 371 372 #ifdef CONFIG_TASKS_RCU 373 374 //////////////////////////////////////////////////////////////////////// 375 // 376 // Simple variant of RCU whose quiescent states are voluntary context 377 // switch, cond_resched_rcu_qs(), user-space execution, and idle. 378 // As such, grace periods can take one good long time. There are no 379 // read-side primitives similar to rcu_read_lock() and rcu_read_unlock() 380 // because this implementation is intended to get the system into a safe 381 // state for some of the manipulations involved in tracing and the like. 382 // Finally, this implementation does not support high call_rcu_tasks() 383 // rates from multiple CPUs. If this is required, per-CPU callback lists 384 // will be needed. 385 386 /* Pre-grace-period preparation. */ 387 static void rcu_tasks_pregp_step(void) 388 { 389 /* 390 * Wait for all pre-existing t->on_rq and t->nvcsw transitions 391 * to complete. Invoking synchronize_rcu() suffices because all 392 * these transitions occur with interrupts disabled. Without this 393 * synchronize_rcu(), a read-side critical section that started 394 * before the grace period might be incorrectly seen as having 395 * started after the grace period. 396 * 397 * This synchronize_rcu() also dispenses with the need for a 398 * memory barrier on the first store to t->rcu_tasks_holdout, 399 * as it forces the store to happen after the beginning of the 400 * grace period. 401 */ 402 synchronize_rcu(); 403 } 404 405 /* Per-task initial processing. */ 406 static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop) 407 { 408 if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) { 409 get_task_struct(t); 410 t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw); 411 WRITE_ONCE(t->rcu_tasks_holdout, true); 412 list_add(&t->rcu_tasks_holdout_list, hop); 413 } 414 } 415 416 /* Processing between scanning taskslist and draining the holdout list. */ 417 static void rcu_tasks_postscan(struct list_head *hop) 418 { 419 /* 420 * Wait for tasks that are in the process of exiting. This 421 * does only part of the job, ensuring that all tasks that were 422 * previously exiting reach the point where they have disabled 423 * preemption, allowing the later synchronize_rcu() to finish 424 * the job. 425 */ 426 synchronize_srcu(&tasks_rcu_exit_srcu); 427 } 428 429 /* See if tasks are still holding out, complain if so. */ 430 static void check_holdout_task(struct task_struct *t, 431 bool needreport, bool *firstreport) 432 { 433 int cpu; 434 435 if (!READ_ONCE(t->rcu_tasks_holdout) || 436 t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) || 437 !READ_ONCE(t->on_rq) || 438 (IS_ENABLED(CONFIG_NO_HZ_FULL) && 439 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { 440 WRITE_ONCE(t->rcu_tasks_holdout, false); 441 list_del_init(&t->rcu_tasks_holdout_list); 442 put_task_struct(t); 443 return; 444 } 445 rcu_request_urgent_qs_task(t); 446 if (!needreport) 447 return; 448 if (*firstreport) { 449 pr_err("INFO: rcu_tasks detected stalls on tasks:\n"); 450 *firstreport = false; 451 } 452 cpu = task_cpu(t); 453 pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n", 454 t, ".I"[is_idle_task(t)], 455 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)], 456 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout, 457 t->rcu_tasks_idle_cpu, cpu); 458 sched_show_task(t); 459 } 460 461 /* Scan the holdout lists for tasks no longer holding out. */ 462 static void check_all_holdout_tasks(struct list_head *hop, 463 bool needreport, bool *firstreport) 464 { 465 struct task_struct *t, *t1; 466 467 list_for_each_entry_safe(t, t1, hop, rcu_tasks_holdout_list) { 468 check_holdout_task(t, needreport, firstreport); 469 cond_resched(); 470 } 471 } 472 473 /* Finish off the Tasks-RCU grace period. */ 474 static void rcu_tasks_postgp(struct rcu_tasks *rtp) 475 { 476 /* 477 * Because ->on_rq and ->nvcsw are not guaranteed to have a full 478 * memory barriers prior to them in the schedule() path, memory 479 * reordering on other CPUs could cause their RCU-tasks read-side 480 * critical sections to extend past the end of the grace period. 481 * However, because these ->nvcsw updates are carried out with 482 * interrupts disabled, we can use synchronize_rcu() to force the 483 * needed ordering on all such CPUs. 484 * 485 * This synchronize_rcu() also confines all ->rcu_tasks_holdout 486 * accesses to be within the grace period, avoiding the need for 487 * memory barriers for ->rcu_tasks_holdout accesses. 488 * 489 * In addition, this synchronize_rcu() waits for exiting tasks 490 * to complete their final preempt_disable() region of execution, 491 * cleaning up after the synchronize_srcu() above. 492 */ 493 synchronize_rcu(); 494 } 495 496 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func); 497 DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks"); 498 499 /** 500 * call_rcu_tasks() - Queue an RCU for invocation task-based grace period 501 * @rhp: structure to be used for queueing the RCU updates. 502 * @func: actual callback function to be invoked after the grace period 503 * 504 * The callback function will be invoked some time after a full grace 505 * period elapses, in other words after all currently executing RCU 506 * read-side critical sections have completed. call_rcu_tasks() assumes 507 * that the read-side critical sections end at a voluntary context 508 * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle, 509 * or transition to usermode execution. As such, there are no read-side 510 * primitives analogous to rcu_read_lock() and rcu_read_unlock() because 511 * this primitive is intended to determine that all tasks have passed 512 * through a safe state, not so much for data-strcuture synchronization. 513 * 514 * See the description of call_rcu() for more detailed information on 515 * memory ordering guarantees. 516 */ 517 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func) 518 { 519 call_rcu_tasks_generic(rhp, func, &rcu_tasks); 520 } 521 EXPORT_SYMBOL_GPL(call_rcu_tasks); 522 523 /** 524 * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed. 525 * 526 * Control will return to the caller some time after a full rcu-tasks 527 * grace period has elapsed, in other words after all currently 528 * executing rcu-tasks read-side critical sections have elapsed. These 529 * read-side critical sections are delimited by calls to schedule(), 530 * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls 531 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched(). 532 * 533 * This is a very specialized primitive, intended only for a few uses in 534 * tracing and other situations requiring manipulation of function 535 * preambles and profiling hooks. The synchronize_rcu_tasks() function 536 * is not (yet) intended for heavy use from multiple CPUs. 537 * 538 * See the description of synchronize_rcu() for more detailed information 539 * on memory ordering guarantees. 540 */ 541 void synchronize_rcu_tasks(void) 542 { 543 synchronize_rcu_tasks_generic(&rcu_tasks); 544 } 545 EXPORT_SYMBOL_GPL(synchronize_rcu_tasks); 546 547 /** 548 * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks. 549 * 550 * Although the current implementation is guaranteed to wait, it is not 551 * obligated to, for example, if there are no pending callbacks. 552 */ 553 void rcu_barrier_tasks(void) 554 { 555 /* There is only one callback queue, so this is easy. ;-) */ 556 synchronize_rcu_tasks(); 557 } 558 EXPORT_SYMBOL_GPL(rcu_barrier_tasks); 559 560 static int __init rcu_spawn_tasks_kthread(void) 561 { 562 rcu_tasks.gp_sleep = HZ / 10; 563 rcu_tasks.init_fract = 10; 564 rcu_tasks.pregp_func = rcu_tasks_pregp_step; 565 rcu_tasks.pertask_func = rcu_tasks_pertask; 566 rcu_tasks.postscan_func = rcu_tasks_postscan; 567 rcu_tasks.holdouts_func = check_all_holdout_tasks; 568 rcu_tasks.postgp_func = rcu_tasks_postgp; 569 rcu_spawn_tasks_kthread_generic(&rcu_tasks); 570 return 0; 571 } 572 core_initcall(rcu_spawn_tasks_kthread); 573 574 #ifndef CONFIG_TINY_RCU 575 static void show_rcu_tasks_classic_gp_kthread(void) 576 { 577 show_rcu_tasks_generic_gp_kthread(&rcu_tasks, ""); 578 } 579 #endif /* #ifndef CONFIG_TINY_RCU */ 580 581 /* Do the srcu_read_lock() for the above synchronize_srcu(). */ 582 void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu) 583 { 584 preempt_disable(); 585 current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu); 586 preempt_enable(); 587 } 588 589 /* Do the srcu_read_unlock() for the above synchronize_srcu(). */ 590 void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu) 591 { 592 struct task_struct *t = current; 593 594 preempt_disable(); 595 __srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx); 596 preempt_enable(); 597 exit_tasks_rcu_finish_trace(t); 598 } 599 600 #else /* #ifdef CONFIG_TASKS_RCU */ 601 static inline void show_rcu_tasks_classic_gp_kthread(void) { } 602 void exit_tasks_rcu_start(void) { } 603 void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } 604 #endif /* #else #ifdef CONFIG_TASKS_RCU */ 605 606 #ifdef CONFIG_TASKS_RUDE_RCU 607 608 //////////////////////////////////////////////////////////////////////// 609 // 610 // "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of 611 // passing an empty function to schedule_on_each_cpu(). This approach 612 // provides an asynchronous call_rcu_tasks_rude() API and batching 613 // of concurrent calls to the synchronous synchronize_rcu_rude() API. 614 // This sends IPIs far and wide and induces otherwise unnecessary context 615 // switches on all online CPUs, whether idle or not. 616 617 // Empty function to allow workqueues to force a context switch. 618 static void rcu_tasks_be_rude(struct work_struct *work) 619 { 620 } 621 622 // Wait for one rude RCU-tasks grace period. 623 static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp) 624 { 625 rtp->n_ipis += cpumask_weight(cpu_online_mask); 626 schedule_on_each_cpu(rcu_tasks_be_rude); 627 } 628 629 void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func); 630 DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude, 631 "RCU Tasks Rude"); 632 633 /** 634 * call_rcu_tasks_rude() - Queue a callback rude task-based grace period 635 * @rhp: structure to be used for queueing the RCU updates. 636 * @func: actual callback function to be invoked after the grace period 637 * 638 * The callback function will be invoked some time after a full grace 639 * period elapses, in other words after all currently executing RCU 640 * read-side critical sections have completed. call_rcu_tasks_rude() 641 * assumes that the read-side critical sections end at context switch, 642 * cond_resched_rcu_qs(), or transition to usermode execution. As such, 643 * there are no read-side primitives analogous to rcu_read_lock() and 644 * rcu_read_unlock() because this primitive is intended to determine 645 * that all tasks have passed through a safe state, not so much for 646 * data-strcuture synchronization. 647 * 648 * See the description of call_rcu() for more detailed information on 649 * memory ordering guarantees. 650 */ 651 void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func) 652 { 653 call_rcu_tasks_generic(rhp, func, &rcu_tasks_rude); 654 } 655 EXPORT_SYMBOL_GPL(call_rcu_tasks_rude); 656 657 /** 658 * synchronize_rcu_tasks_rude - wait for a rude rcu-tasks grace period 659 * 660 * Control will return to the caller some time after a rude rcu-tasks 661 * grace period has elapsed, in other words after all currently 662 * executing rcu-tasks read-side critical sections have elapsed. These 663 * read-side critical sections are delimited by calls to schedule(), 664 * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory, 665 * anyway) cond_resched(). 666 * 667 * This is a very specialized primitive, intended only for a few uses in 668 * tracing and other situations requiring manipulation of function preambles 669 * and profiling hooks. The synchronize_rcu_tasks_rude() function is not 670 * (yet) intended for heavy use from multiple CPUs. 671 * 672 * See the description of synchronize_rcu() for more detailed information 673 * on memory ordering guarantees. 674 */ 675 void synchronize_rcu_tasks_rude(void) 676 { 677 synchronize_rcu_tasks_generic(&rcu_tasks_rude); 678 } 679 EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude); 680 681 /** 682 * rcu_barrier_tasks_rude - Wait for in-flight call_rcu_tasks_rude() callbacks. 683 * 684 * Although the current implementation is guaranteed to wait, it is not 685 * obligated to, for example, if there are no pending callbacks. 686 */ 687 void rcu_barrier_tasks_rude(void) 688 { 689 /* There is only one callback queue, so this is easy. ;-) */ 690 synchronize_rcu_tasks_rude(); 691 } 692 EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude); 693 694 static int __init rcu_spawn_tasks_rude_kthread(void) 695 { 696 rcu_tasks_rude.gp_sleep = HZ / 10; 697 rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude); 698 return 0; 699 } 700 core_initcall(rcu_spawn_tasks_rude_kthread); 701 702 #ifndef CONFIG_TINY_RCU 703 static void show_rcu_tasks_rude_gp_kthread(void) 704 { 705 show_rcu_tasks_generic_gp_kthread(&rcu_tasks_rude, ""); 706 } 707 #endif /* #ifndef CONFIG_TINY_RCU */ 708 709 #else /* #ifdef CONFIG_TASKS_RUDE_RCU */ 710 static void show_rcu_tasks_rude_gp_kthread(void) {} 711 #endif /* #else #ifdef CONFIG_TASKS_RUDE_RCU */ 712 713 //////////////////////////////////////////////////////////////////////// 714 // 715 // Tracing variant of Tasks RCU. This variant is designed to be used 716 // to protect tracing hooks, including those of BPF. This variant 717 // therefore: 718 // 719 // 1. Has explicit read-side markers to allow finite grace periods 720 // in the face of in-kernel loops for PREEMPT=n builds. 721 // 722 // 2. Protects code in the idle loop, exception entry/exit, and 723 // CPU-hotplug code paths, similar to the capabilities of SRCU. 724 // 725 // 3. Avoids expensive read-side instruction, having overhead similar 726 // to that of Preemptible RCU. 727 // 728 // There are of course downsides. The grace-period code can send IPIs to 729 // CPUs, even when those CPUs are in the idle loop or in nohz_full userspace. 730 // It is necessary to scan the full tasklist, much as for Tasks RCU. There 731 // is a single callback queue guarded by a single lock, again, much as for 732 // Tasks RCU. If needed, these downsides can be at least partially remedied. 733 // 734 // Perhaps most important, this variant of RCU does not affect the vanilla 735 // flavors, rcu_preempt and rcu_sched. The fact that RCU Tasks Trace 736 // readers can operate from idle, offline, and exception entry/exit in no 737 // way allows rcu_preempt and rcu_sched readers to also do so. 738 739 // The lockdep state must be outside of #ifdef to be useful. 740 #ifdef CONFIG_DEBUG_LOCK_ALLOC 741 static struct lock_class_key rcu_lock_trace_key; 742 struct lockdep_map rcu_trace_lock_map = 743 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_trace", &rcu_lock_trace_key); 744 EXPORT_SYMBOL_GPL(rcu_trace_lock_map); 745 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 746 747 #ifdef CONFIG_TASKS_TRACE_RCU 748 749 static atomic_t trc_n_readers_need_end; // Number of waited-for readers. 750 static DECLARE_WAIT_QUEUE_HEAD(trc_wait); // List of holdout tasks. 751 752 // Record outstanding IPIs to each CPU. No point in sending two... 753 static DEFINE_PER_CPU(bool, trc_ipi_to_cpu); 754 755 // The number of detections of task quiescent state relying on 756 // heavyweight readers executing explicit memory barriers. 757 static unsigned long n_heavy_reader_attempts; 758 static unsigned long n_heavy_reader_updates; 759 static unsigned long n_heavy_reader_ofl_updates; 760 761 void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); 762 DEFINE_RCU_TASKS(rcu_tasks_trace, rcu_tasks_wait_gp, call_rcu_tasks_trace, 763 "RCU Tasks Trace"); 764 765 /* 766 * This irq_work handler allows rcu_read_unlock_trace() to be invoked 767 * while the scheduler locks are held. 768 */ 769 static void rcu_read_unlock_iw(struct irq_work *iwp) 770 { 771 wake_up(&trc_wait); 772 } 773 static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw); 774 775 /* If we are the last reader, wake up the grace-period kthread. */ 776 void rcu_read_unlock_trace_special(struct task_struct *t, int nesting) 777 { 778 int nq = t->trc_reader_special.b.need_qs; 779 780 if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && 781 t->trc_reader_special.b.need_mb) 782 smp_mb(); // Pairs with update-side barriers. 783 // Update .need_qs before ->trc_reader_nesting for irq/NMI handlers. 784 if (nq) 785 WRITE_ONCE(t->trc_reader_special.b.need_qs, false); 786 WRITE_ONCE(t->trc_reader_nesting, nesting); 787 if (nq && atomic_dec_and_test(&trc_n_readers_need_end)) 788 irq_work_queue(&rcu_tasks_trace_iw); 789 } 790 EXPORT_SYMBOL_GPL(rcu_read_unlock_trace_special); 791 792 /* Add a task to the holdout list, if it is not already on the list. */ 793 static void trc_add_holdout(struct task_struct *t, struct list_head *bhp) 794 { 795 if (list_empty(&t->trc_holdout_list)) { 796 get_task_struct(t); 797 list_add(&t->trc_holdout_list, bhp); 798 } 799 } 800 801 /* Remove a task from the holdout list, if it is in fact present. */ 802 static void trc_del_holdout(struct task_struct *t) 803 { 804 if (!list_empty(&t->trc_holdout_list)) { 805 list_del_init(&t->trc_holdout_list); 806 put_task_struct(t); 807 } 808 } 809 810 /* IPI handler to check task state. */ 811 static void trc_read_check_handler(void *t_in) 812 { 813 struct task_struct *t = current; 814 struct task_struct *texp = t_in; 815 816 // If the task is no longer running on this CPU, leave. 817 if (unlikely(texp != t)) { 818 if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) 819 wake_up(&trc_wait); 820 goto reset_ipi; // Already on holdout list, so will check later. 821 } 822 823 // If the task is not in a read-side critical section, and 824 // if this is the last reader, awaken the grace-period kthread. 825 if (likely(!t->trc_reader_nesting)) { 826 if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) 827 wake_up(&trc_wait); 828 // Mark as checked after decrement to avoid false 829 // positives on the above WARN_ON_ONCE(). 830 WRITE_ONCE(t->trc_reader_checked, true); 831 goto reset_ipi; 832 } 833 // If we are racing with an rcu_read_unlock_trace(), try again later. 834 if (unlikely(t->trc_reader_nesting < 0)) { 835 if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) 836 wake_up(&trc_wait); 837 goto reset_ipi; 838 } 839 WRITE_ONCE(t->trc_reader_checked, true); 840 841 // Get here if the task is in a read-side critical section. Set 842 // its state so that it will awaken the grace-period kthread upon 843 // exit from that critical section. 844 WARN_ON_ONCE(t->trc_reader_special.b.need_qs); 845 WRITE_ONCE(t->trc_reader_special.b.need_qs, true); 846 847 reset_ipi: 848 // Allow future IPIs to be sent on CPU and for task. 849 // Also order this IPI handler against any later manipulations of 850 // the intended task. 851 smp_store_release(&per_cpu(trc_ipi_to_cpu, smp_processor_id()), false); // ^^^ 852 smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^ 853 } 854 855 /* Callback function for scheduler to check locked-down task. */ 856 static bool trc_inspect_reader(struct task_struct *t, void *arg) 857 { 858 int cpu = task_cpu(t); 859 bool in_qs = false; 860 bool ofl = cpu_is_offline(cpu); 861 862 if (task_curr(t)) { 863 WARN_ON_ONCE(ofl && !is_idle_task(t)); 864 865 // If no chance of heavyweight readers, do it the hard way. 866 if (!ofl && !IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) 867 return false; 868 869 // If heavyweight readers are enabled on the remote task, 870 // we can inspect its state despite its currently running. 871 // However, we cannot safely change its state. 872 n_heavy_reader_attempts++; 873 if (!ofl && // Check for "running" idle tasks on offline CPUs. 874 !rcu_dynticks_zero_in_eqs(cpu, &t->trc_reader_nesting)) 875 return false; // No quiescent state, do it the hard way. 876 n_heavy_reader_updates++; 877 if (ofl) 878 n_heavy_reader_ofl_updates++; 879 in_qs = true; 880 } else { 881 in_qs = likely(!t->trc_reader_nesting); 882 } 883 884 // Mark as checked. Because this is called from the grace-period 885 // kthread, also remove the task from the holdout list. 886 t->trc_reader_checked = true; 887 trc_del_holdout(t); 888 889 if (in_qs) 890 return true; // Already in quiescent state, done!!! 891 892 // The task is in a read-side critical section, so set up its 893 // state so that it will awaken the grace-period kthread upon exit 894 // from that critical section. 895 atomic_inc(&trc_n_readers_need_end); // One more to wait on. 896 WARN_ON_ONCE(t->trc_reader_special.b.need_qs); 897 WRITE_ONCE(t->trc_reader_special.b.need_qs, true); 898 return true; 899 } 900 901 /* Attempt to extract the state for the specified task. */ 902 static void trc_wait_for_one_reader(struct task_struct *t, 903 struct list_head *bhp) 904 { 905 int cpu; 906 907 // If a previous IPI is still in flight, let it complete. 908 if (smp_load_acquire(&t->trc_ipi_to_cpu) != -1) // Order IPI 909 return; 910 911 // The current task had better be in a quiescent state. 912 if (t == current) { 913 t->trc_reader_checked = true; 914 trc_del_holdout(t); 915 WARN_ON_ONCE(t->trc_reader_nesting); 916 return; 917 } 918 919 // Attempt to nail down the task for inspection. 920 get_task_struct(t); 921 if (try_invoke_on_locked_down_task(t, trc_inspect_reader, NULL)) { 922 put_task_struct(t); 923 return; 924 } 925 put_task_struct(t); 926 927 // If currently running, send an IPI, either way, add to list. 928 trc_add_holdout(t, bhp); 929 if (task_curr(t) && 930 time_after(jiffies + 1, rcu_tasks_trace.gp_start + rcu_task_ipi_delay)) { 931 // The task is currently running, so try IPIing it. 932 cpu = task_cpu(t); 933 934 // If there is already an IPI outstanding, let it happen. 935 if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0) 936 return; 937 938 atomic_inc(&trc_n_readers_need_end); 939 per_cpu(trc_ipi_to_cpu, cpu) = true; 940 t->trc_ipi_to_cpu = cpu; 941 rcu_tasks_trace.n_ipis++; 942 if (smp_call_function_single(cpu, 943 trc_read_check_handler, t, 0)) { 944 // Just in case there is some other reason for 945 // failure than the target CPU being offline. 946 rcu_tasks_trace.n_ipis_fails++; 947 per_cpu(trc_ipi_to_cpu, cpu) = false; 948 t->trc_ipi_to_cpu = cpu; 949 if (atomic_dec_and_test(&trc_n_readers_need_end)) { 950 WARN_ON_ONCE(1); 951 wake_up(&trc_wait); 952 } 953 } 954 } 955 } 956 957 /* Initialize for a new RCU-tasks-trace grace period. */ 958 static void rcu_tasks_trace_pregp_step(void) 959 { 960 int cpu; 961 962 // Allow for fast-acting IPIs. 963 atomic_set(&trc_n_readers_need_end, 1); 964 965 // There shouldn't be any old IPIs, but... 966 for_each_possible_cpu(cpu) 967 WARN_ON_ONCE(per_cpu(trc_ipi_to_cpu, cpu)); 968 969 // Disable CPU hotplug across the tasklist scan. 970 // This also waits for all readers in CPU-hotplug code paths. 971 cpus_read_lock(); 972 } 973 974 /* Do first-round processing for the specified task. */ 975 static void rcu_tasks_trace_pertask(struct task_struct *t, 976 struct list_head *hop) 977 { 978 WRITE_ONCE(t->trc_reader_special.b.need_qs, false); 979 WRITE_ONCE(t->trc_reader_checked, false); 980 t->trc_ipi_to_cpu = -1; 981 trc_wait_for_one_reader(t, hop); 982 } 983 984 /* 985 * Do intermediate processing between task and holdout scans and 986 * pick up the idle tasks. 987 */ 988 static void rcu_tasks_trace_postscan(struct list_head *hop) 989 { 990 int cpu; 991 992 for_each_possible_cpu(cpu) 993 rcu_tasks_trace_pertask(idle_task(cpu), hop); 994 995 // Re-enable CPU hotplug now that the tasklist scan has completed. 996 cpus_read_unlock(); 997 998 // Wait for late-stage exiting tasks to finish exiting. 999 // These might have passed the call to exit_tasks_rcu_finish(). 1000 synchronize_rcu(); 1001 // Any tasks that exit after this point will set ->trc_reader_checked. 1002 } 1003 1004 /* Show the state of a task stalling the current RCU tasks trace GP. */ 1005 static void show_stalled_task_trace(struct task_struct *t, bool *firstreport) 1006 { 1007 int cpu; 1008 1009 if (*firstreport) { 1010 pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n"); 1011 *firstreport = false; 1012 } 1013 // FIXME: This should attempt to use try_invoke_on_nonrunning_task(). 1014 cpu = task_cpu(t); 1015 pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n", 1016 t->pid, 1017 ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0], 1018 ".i"[is_idle_task(t)], 1019 ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)], 1020 t->trc_reader_nesting, 1021 " N"[!!t->trc_reader_special.b.need_qs], 1022 cpu); 1023 sched_show_task(t); 1024 } 1025 1026 /* List stalled IPIs for RCU tasks trace. */ 1027 static void show_stalled_ipi_trace(void) 1028 { 1029 int cpu; 1030 1031 for_each_possible_cpu(cpu) 1032 if (per_cpu(trc_ipi_to_cpu, cpu)) 1033 pr_alert("\tIPI outstanding to CPU %d\n", cpu); 1034 } 1035 1036 /* Do one scan of the holdout list. */ 1037 static void check_all_holdout_tasks_trace(struct list_head *hop, 1038 bool needreport, bool *firstreport) 1039 { 1040 struct task_struct *g, *t; 1041 1042 // Disable CPU hotplug across the holdout list scan. 1043 cpus_read_lock(); 1044 1045 list_for_each_entry_safe(t, g, hop, trc_holdout_list) { 1046 // If safe and needed, try to check the current task. 1047 if (READ_ONCE(t->trc_ipi_to_cpu) == -1 && 1048 !READ_ONCE(t->trc_reader_checked)) 1049 trc_wait_for_one_reader(t, hop); 1050 1051 // If check succeeded, remove this task from the list. 1052 if (READ_ONCE(t->trc_reader_checked)) 1053 trc_del_holdout(t); 1054 else if (needreport) 1055 show_stalled_task_trace(t, firstreport); 1056 } 1057 1058 // Re-enable CPU hotplug now that the holdout list scan has completed. 1059 cpus_read_unlock(); 1060 1061 if (needreport) { 1062 if (firstreport) 1063 pr_err("INFO: rcu_tasks_trace detected stalls? (Late IPI?)\n"); 1064 show_stalled_ipi_trace(); 1065 } 1066 } 1067 1068 /* Wait for grace period to complete and provide ordering. */ 1069 static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp) 1070 { 1071 bool firstreport; 1072 struct task_struct *g, *t; 1073 LIST_HEAD(holdouts); 1074 long ret; 1075 1076 // Remove the safety count. 1077 smp_mb__before_atomic(); // Order vs. earlier atomics 1078 atomic_dec(&trc_n_readers_need_end); 1079 smp_mb__after_atomic(); // Order vs. later atomics 1080 1081 // Wait for readers. 1082 set_tasks_gp_state(rtp, RTGS_WAIT_READERS); 1083 for (;;) { 1084 ret = wait_event_idle_exclusive_timeout( 1085 trc_wait, 1086 atomic_read(&trc_n_readers_need_end) == 0, 1087 READ_ONCE(rcu_task_stall_timeout)); 1088 if (ret) 1089 break; // Count reached zero. 1090 // Stall warning time, so make a list of the offenders. 1091 rcu_read_lock(); 1092 for_each_process_thread(g, t) 1093 if (READ_ONCE(t->trc_reader_special.b.need_qs)) 1094 trc_add_holdout(t, &holdouts); 1095 rcu_read_unlock(); 1096 firstreport = true; 1097 list_for_each_entry_safe(t, g, &holdouts, trc_holdout_list) { 1098 if (READ_ONCE(t->trc_reader_special.b.need_qs)) 1099 show_stalled_task_trace(t, &firstreport); 1100 trc_del_holdout(t); // Release task_struct reference. 1101 } 1102 if (firstreport) 1103 pr_err("INFO: rcu_tasks_trace detected stalls? (Counter/taskslist mismatch?)\n"); 1104 show_stalled_ipi_trace(); 1105 pr_err("\t%d holdouts\n", atomic_read(&trc_n_readers_need_end)); 1106 } 1107 smp_mb(); // Caller's code must be ordered after wakeup. 1108 // Pairs with pretty much every ordering primitive. 1109 } 1110 1111 /* Report any needed quiescent state for this exiting task. */ 1112 static void exit_tasks_rcu_finish_trace(struct task_struct *t) 1113 { 1114 WRITE_ONCE(t->trc_reader_checked, true); 1115 WARN_ON_ONCE(t->trc_reader_nesting); 1116 WRITE_ONCE(t->trc_reader_nesting, 0); 1117 if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs))) 1118 rcu_read_unlock_trace_special(t, 0); 1119 } 1120 1121 /** 1122 * call_rcu_tasks_trace() - Queue a callback trace task-based grace period 1123 * @rhp: structure to be used for queueing the RCU updates. 1124 * @func: actual callback function to be invoked after the grace period 1125 * 1126 * The callback function will be invoked some time after a full grace 1127 * period elapses, in other words after all currently executing RCU 1128 * read-side critical sections have completed. call_rcu_tasks_trace() 1129 * assumes that the read-side critical sections end at context switch, 1130 * cond_resched_rcu_qs(), or transition to usermode execution. As such, 1131 * there are no read-side primitives analogous to rcu_read_lock() and 1132 * rcu_read_unlock() because this primitive is intended to determine 1133 * that all tasks have passed through a safe state, not so much for 1134 * data-strcuture synchronization. 1135 * 1136 * See the description of call_rcu() for more detailed information on 1137 * memory ordering guarantees. 1138 */ 1139 void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func) 1140 { 1141 call_rcu_tasks_generic(rhp, func, &rcu_tasks_trace); 1142 } 1143 EXPORT_SYMBOL_GPL(call_rcu_tasks_trace); 1144 1145 /** 1146 * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period 1147 * 1148 * Control will return to the caller some time after a trace rcu-tasks 1149 * grace period has elapsed, in other words after all currently executing 1150 * rcu-tasks read-side critical sections have elapsed. These read-side 1151 * critical sections are delimited by calls to rcu_read_lock_trace() 1152 * and rcu_read_unlock_trace(). 1153 * 1154 * This is a very specialized primitive, intended only for a few uses in 1155 * tracing and other situations requiring manipulation of function preambles 1156 * and profiling hooks. The synchronize_rcu_tasks_trace() function is not 1157 * (yet) intended for heavy use from multiple CPUs. 1158 * 1159 * See the description of synchronize_rcu() for more detailed information 1160 * on memory ordering guarantees. 1161 */ 1162 void synchronize_rcu_tasks_trace(void) 1163 { 1164 RCU_LOCKDEP_WARN(lock_is_held(&rcu_trace_lock_map), "Illegal synchronize_rcu_tasks_trace() in RCU Tasks Trace read-side critical section"); 1165 synchronize_rcu_tasks_generic(&rcu_tasks_trace); 1166 } 1167 EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_trace); 1168 1169 /** 1170 * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks. 1171 * 1172 * Although the current implementation is guaranteed to wait, it is not 1173 * obligated to, for example, if there are no pending callbacks. 1174 */ 1175 void rcu_barrier_tasks_trace(void) 1176 { 1177 /* There is only one callback queue, so this is easy. ;-) */ 1178 synchronize_rcu_tasks_trace(); 1179 } 1180 EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace); 1181 1182 static int __init rcu_spawn_tasks_trace_kthread(void) 1183 { 1184 if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) { 1185 rcu_tasks_trace.gp_sleep = HZ / 10; 1186 rcu_tasks_trace.init_fract = 10; 1187 } else { 1188 rcu_tasks_trace.gp_sleep = HZ / 200; 1189 if (rcu_tasks_trace.gp_sleep <= 0) 1190 rcu_tasks_trace.gp_sleep = 1; 1191 rcu_tasks_trace.init_fract = HZ / 5; 1192 if (rcu_tasks_trace.init_fract <= 0) 1193 rcu_tasks_trace.init_fract = 1; 1194 } 1195 rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step; 1196 rcu_tasks_trace.pertask_func = rcu_tasks_trace_pertask; 1197 rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan; 1198 rcu_tasks_trace.holdouts_func = check_all_holdout_tasks_trace; 1199 rcu_tasks_trace.postgp_func = rcu_tasks_trace_postgp; 1200 rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace); 1201 return 0; 1202 } 1203 core_initcall(rcu_spawn_tasks_trace_kthread); 1204 1205 #ifndef CONFIG_TINY_RCU 1206 static void show_rcu_tasks_trace_gp_kthread(void) 1207 { 1208 char buf[64]; 1209 1210 sprintf(buf, "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end), 1211 data_race(n_heavy_reader_ofl_updates), 1212 data_race(n_heavy_reader_updates), 1213 data_race(n_heavy_reader_attempts)); 1214 show_rcu_tasks_generic_gp_kthread(&rcu_tasks_trace, buf); 1215 } 1216 #endif /* #ifndef CONFIG_TINY_RCU */ 1217 1218 #else /* #ifdef CONFIG_TASKS_TRACE_RCU */ 1219 static void exit_tasks_rcu_finish_trace(struct task_struct *t) { } 1220 static inline void show_rcu_tasks_trace_gp_kthread(void) {} 1221 #endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */ 1222 1223 #ifndef CONFIG_TINY_RCU 1224 void show_rcu_tasks_gp_kthreads(void) 1225 { 1226 show_rcu_tasks_classic_gp_kthread(); 1227 show_rcu_tasks_rude_gp_kthread(); 1228 show_rcu_tasks_trace_gp_kthread(); 1229 } 1230 #endif /* #ifndef CONFIG_TINY_RCU */ 1231 1232 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ 1233 static inline void rcu_tasks_bootup_oddness(void) {} 1234 void show_rcu_tasks_gp_kthreads(void) {} 1235 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ 1236