122e40925SPaul E. McKenney // SPDX-License-Identifier: GPL-2.0+
24102adabSPaul E. McKenney /*
365bb0dc4SSeongJae Park * Read-Copy Update mechanism for mutual exclusion (tree-based version)
44102adabSPaul E. McKenney *
54102adabSPaul E. McKenney * Copyright IBM Corporation, 2008
64102adabSPaul E. McKenney *
74102adabSPaul E. McKenney * Authors: Dipankar Sarma <dipankar@in.ibm.com>
84102adabSPaul E. McKenney * Manfred Spraul <manfred@colorfullife.com>
965bb0dc4SSeongJae Park * Paul E. McKenney <paulmck@linux.ibm.com>
104102adabSPaul E. McKenney *
1122e40925SPaul E. McKenney * Based on the original work by Paul McKenney <paulmck@linux.ibm.com>
124102adabSPaul E. McKenney * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
134102adabSPaul E. McKenney *
144102adabSPaul E. McKenney * For detailed explanation of Read-Copy Update mechanism see -
154102adabSPaul E. McKenney * Documentation/RCU
164102adabSPaul E. McKenney */
17a7538352SJoe Perches
18a7538352SJoe Perches #define pr_fmt(fmt) "rcu: " fmt
19a7538352SJoe Perches
204102adabSPaul E. McKenney #include <linux/types.h>
214102adabSPaul E. McKenney #include <linux/kernel.h>
224102adabSPaul E. McKenney #include <linux/init.h>
234102adabSPaul E. McKenney #include <linux/spinlock.h>
244102adabSPaul E. McKenney #include <linux/smp.h>
25f9411ebeSIngo Molnar #include <linux/rcupdate_wait.h>
264102adabSPaul E. McKenney #include <linux/interrupt.h>
274102adabSPaul E. McKenney #include <linux/sched.h>
28b17b0153SIngo Molnar #include <linux/sched/debug.h>
294102adabSPaul E. McKenney #include <linux/nmi.h>
304102adabSPaul E. McKenney #include <linux/atomic.h>
314102adabSPaul E. McKenney #include <linux/bitops.h>
324102adabSPaul E. McKenney #include <linux/export.h>
334102adabSPaul E. McKenney #include <linux/completion.h>
344102adabSPaul E. McKenney #include <linux/kmemleak.h>
35f39650deSAndy Shevchenko #include <linux/moduleparam.h>
36f39650deSAndy Shevchenko #include <linux/panic.h>
374102adabSPaul E. McKenney #include <linux/panic_notifier.h>
384102adabSPaul E. McKenney #include <linux/percpu.h>
394102adabSPaul E. McKenney #include <linux/notifier.h>
404102adabSPaul E. McKenney #include <linux/cpu.h>
414102adabSPaul E. McKenney #include <linux/mutex.h>
424102adabSPaul E. McKenney #include <linux/time.h>
434102adabSPaul E. McKenney #include <linux/kernel_stat.h>
444102adabSPaul E. McKenney #include <linux/wait.h>
45ae7e81c0SIngo Molnar #include <linux/kthread.h>
464102adabSPaul E. McKenney #include <uapi/linux/sched/types.h>
474102adabSPaul E. McKenney #include <linux/prefetch.h>
484102adabSPaul E. McKenney #include <linux/delay.h>
49af658dcaSSteven Rostedt (Red Hat) #include <linux/random.h>
504102adabSPaul E. McKenney #include <linux/trace_events.h>
51a278d471SPaul E. McKenney #include <linux/suspend.h>
52d3052109SPaul E. McKenney #include <linux/ftrace.h>
532ccaff10SPaul E. McKenney #include <linux/tick.h>
54c13324a5SMasami Hiramatsu #include <linux/sysrq.h>
5548d07c04SSebastian Andrzej Siewior #include <linux/kprobes.h>
5648d07c04SSebastian Andrzej Siewior #include <linux/gfp.h>
5748d07c04SSebastian Andrzej Siewior #include <linux/oom.h>
5848d07c04SSebastian Andrzej Siewior #include <linux/smpboot.h>
5977a40f97SJoel Fernandes (Google) #include <linux/jiffies.h>
6048d07c04SSebastian Andrzej Siewior #include <linux/slab.h>
61cfcdef5eSEric Dumazet #include <linux/sched/isolation.h>
625f3c8d62SUladzislau Rezki (Sony) #include <linux/sched/clock.h>
635f3c8d62SUladzislau Rezki (Sony) #include <linux/vmalloc.h>
6426e760c9SWalter Wu #include <linux/mm.h>
6517211455SFrederic Weisbecker #include <linux/kasan.h>
6648d07c04SSebastian Andrzej Siewior #include <linux/context_tracking.h>
674102adabSPaul E. McKenney #include "../time/tick-internal.h"
684102adabSPaul E. McKenney
694102adabSPaul E. McKenney #include "tree.h"
704102adabSPaul E. McKenney #include "rcu.h"
714102adabSPaul E. McKenney
724102adabSPaul E. McKenney #ifdef MODULE_PARAM_PREFIX
734102adabSPaul E. McKenney #undef MODULE_PARAM_PREFIX
744102adabSPaul E. McKenney #endif
754102adabSPaul E. McKenney #define MODULE_PARAM_PREFIX "rcutree."
764102adabSPaul E. McKenney
774102adabSPaul E. McKenney /* Data structures. */
784c5273bfSPaul E. McKenney
79a5d1b0b6SPaul E. McKenney static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
808d346d43SFrederic Weisbecker .gpwrap = true,
81213d56bfSFrederic Weisbecker #ifdef CONFIG_RCU_NOCB_CPU
828d346d43SFrederic Weisbecker .cblist.flags = SEGCBLIST_RCU_CORE,
834c5273bfSPaul E. McKenney #endif
84c30fe541SPaul E. McKenney };
85358be2d3SPaul E. McKenney static struct rcu_state rcu_state = {
86358be2d3SPaul E. McKenney .level = { &rcu_state.node[0] },
87358be2d3SPaul E. McKenney .gp_state = RCU_GP_IDLE,
88358be2d3SPaul E. McKenney .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
8980b3fd47SPaul E. McKenney .barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
90358be2d3SPaul E. McKenney .barrier_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.barrier_lock),
91358be2d3SPaul E. McKenney .name = RCU_NAME,
92358be2d3SPaul E. McKenney .abbr = RCU_ABBR,
93358be2d3SPaul E. McKenney .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
9482980b16SDavid Woodhouse .exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
95358be2d3SPaul E. McKenney .ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
964102adabSPaul E. McKenney };
97a3dc2948SPaul E. McKenney
98a3dc2948SPaul E. McKenney /* Dump rcu_node combining tree at boot to verify correct setup. */
99a3dc2948SPaul E. McKenney static bool dump_tree;
10048d07c04SSebastian Andrzej Siewior module_param(dump_tree, bool, 0444);
1018b9a0eccSScott Wood /* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
1028b9a0eccSScott Wood static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT);
10348d07c04SSebastian Andrzej Siewior #ifndef CONFIG_PREEMPT_RT
1048b9a0eccSScott Wood module_param(use_softirq, bool, 0444);
1057fa27001SPaul E. McKenney #endif
1067fa27001SPaul E. McKenney /* Control rcu_node-tree auto-balancing at boot time. */
1077fa27001SPaul E. McKenney static bool rcu_fanout_exact;
10847d631afSPaul E. McKenney module_param(rcu_fanout_exact, bool, 0444);
10947d631afSPaul E. McKenney /* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */
1104102adabSPaul E. McKenney static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
1114102adabSPaul E. McKenney module_param(rcu_fanout_leaf, int, 0444);
112cb007102SAlexander Gordeev int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
113e95d68d2SPaul E. McKenney /* Number of rcu_nodes at specified level. */
1144102adabSPaul E. McKenney int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
1154102adabSPaul E. McKenney int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
1164102adabSPaul E. McKenney
11752d7e48bSPaul E. McKenney /*
11852d7e48bSPaul E. McKenney * The rcu_scheduler_active variable is initialized to the value
11952d7e48bSPaul E. McKenney * RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the
12052d7e48bSPaul E. McKenney * first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE,
1210d95092cSPaul E. McKenney * RCU can assume that there is but one task, allowing RCU to (for example)
12252d7e48bSPaul E. McKenney * optimize synchronize_rcu() to a simple barrier(). When this variable
12352d7e48bSPaul E. McKenney * is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required
12452d7e48bSPaul E. McKenney * to detect real grace periods. This variable is also used to suppress
12552d7e48bSPaul E. McKenney * boot-time false positives from lockdep-RCU error checking. Finally, it
12652d7e48bSPaul E. McKenney * transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU
1274102adabSPaul E. McKenney * is fully initialized, including all of its kthreads having been spawned.
1284102adabSPaul E. McKenney */
1294102adabSPaul E. McKenney int rcu_scheduler_active __read_mostly;
1304102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_scheduler_active);
1314102adabSPaul E. McKenney
1324102adabSPaul E. McKenney /*
1334102adabSPaul E. McKenney * The rcu_scheduler_fully_active variable transitions from zero to one
1344102adabSPaul E. McKenney * during the early_initcall() processing, which is after the scheduler
1354102adabSPaul E. McKenney * is capable of creating new tasks. So RCU processing (for example,
1364102adabSPaul E. McKenney * creating tasks for RCU priority boosting) must be delayed until after
1374102adabSPaul E. McKenney * rcu_scheduler_fully_active transitions from zero to one. We also
1384102adabSPaul E. McKenney * currently delay invocation of any RCU callbacks until after this point.
1394102adabSPaul E. McKenney *
1404102adabSPaul E. McKenney * It might later prove better for people registering RCU callbacks during
1414102adabSPaul E. McKenney * early boot to take responsibility for these callbacks, but one step at
1424102adabSPaul E. McKenney * a time.
1434102adabSPaul E. McKenney */
1444102adabSPaul E. McKenney static int rcu_scheduler_fully_active __read_mostly;
145b50912d0SPaul E. McKenney
146b50912d0SPaul E. McKenney static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
1470aa04b05SPaul E. McKenney unsigned long gps, unsigned long flags);
1480aa04b05SPaul E. McKenney static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
1494102adabSPaul E. McKenney static void invoke_rcu_core(void);
1504102adabSPaul E. McKenney static void rcu_report_exp_rdp(struct rcu_data *rdp);
15163d4c8c9SPaul E. McKenney static void sync_sched_exp_online_cleanup(int cpu);
1523549c2bcSPaul E. McKenney static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
153b2b00ddfSPaul E. McKenney static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
1543820b513SFrederic Weisbecker static bool rcu_rdp_cpu_online(struct rcu_data *rdp);
1554102adabSPaul E. McKenney static bool rcu_init_invoked(void);
1568f489b4dSUladzislau Rezki (Sony) static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
1578f489b4dSUladzislau Rezki (Sony) static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
1588f489b4dSUladzislau Rezki (Sony)
1598f489b4dSUladzislau Rezki (Sony) /*
1608f489b4dSUladzislau Rezki (Sony) * rcuc/rcub/rcuop kthread realtime priority. The "rcuop"
16126730f55SPaul E. McKenney * real-time priority(enabling/disabling) is controlled by
1623ffe3d1aSLiu Song * the extra CONFIG_RCU_NOCB_CPU_CB_BOOST configuration.
163a94844b2SPaul E. McKenney */
1648d7dc928SPaul E. McKenney static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
1650f41c0ddSPaul E. McKenney module_param(kthread_prio, int, 0444);
16690040c9eSPaul E. McKenney
16790040c9eSPaul E. McKenney /* Delay in jiffies for grace-period initialization delays, debug only. */
16890040c9eSPaul E. McKenney
16990040c9eSPaul E. McKenney static int gp_preinit_delay;
17090040c9eSPaul E. McKenney module_param(gp_preinit_delay, int, 0444);
17190040c9eSPaul E. McKenney static int gp_init_delay;
1720f41c0ddSPaul E. McKenney module_param(gp_init_delay, int, 0444);
173aa40c138SPaul E. McKenney static int gp_cleanup_delay;
174aa40c138SPaul E. McKenney module_param(gp_cleanup_delay, int, 0444);
175aa40c138SPaul E. McKenney
176aa40c138SPaul E. McKenney // Add delay to rcu_read_unlock() for strict grace periods.
177aa40c138SPaul E. McKenney static int rcu_unlock_delay;
178aa40c138SPaul E. McKenney #ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
17953c72b59SUladzislau Rezki (Sony) module_param(rcu_unlock_delay, int, 0444);
18053c72b59SUladzislau Rezki (Sony) #endif
18153c72b59SUladzislau Rezki (Sony)
18253c72b59SUladzislau Rezki (Sony) /*
18353c72b59SUladzislau Rezki (Sony) * This rcu parameter is runtime-read-only. It reflects
18453c72b59SUladzislau Rezki (Sony) * a minimum allowed number of objects which can be cached
18556292e86SUladzislau Rezki (Sony) * per-CPU. Object size is equal to one page. This value
18653c72b59SUladzislau Rezki (Sony) * can be changed at boot time.
18753c72b59SUladzislau Rezki (Sony) */
188d0bfa8b3SZhang Qiang static int rcu_min_cached_objs = 5;
189d0bfa8b3SZhang Qiang module_param(rcu_min_cached_objs, int, 0444);
190d0bfa8b3SZhang Qiang
191d0bfa8b3SZhang Qiang // A page shrinker can ask for pages to be freed to make them
192d0bfa8b3SZhang Qiang // available for other parts of the system. This usually happens
193d0bfa8b3SZhang Qiang // under low memory conditions, and in that case we should also
194d0bfa8b3SZhang Qiang // defer page-cache filling for a short time period.
195d0bfa8b3SZhang Qiang //
196d0bfa8b3SZhang Qiang // The default value is 5 seconds, which is long enough to reduce
197d0bfa8b3SZhang Qiang // interference with the shrinker while it asks other systems to
198d0bfa8b3SZhang Qiang // drain their caches.
1994cf439a2SPaul E. McKenney static int rcu_delay_page_cache_fill_msec = 5000;
2004babd855SJoel Fernandes (Google) module_param(rcu_delay_page_cache_fill_msec, int, 0444);
2014babd855SJoel Fernandes (Google)
2024babd855SJoel Fernandes (Google) /* Retrieve RCU kthreads priority for rcutorture */
rcu_get_gp_kthreads_prio(void)2034babd855SJoel Fernandes (Google) int rcu_get_gp_kthreads_prio(void)
2044babd855SJoel Fernandes (Google) {
2054babd855SJoel Fernandes (Google) return kthread_prio;
206eab128e8SPaul E. McKenney }
207eab128e8SPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
208bfd090beSPaul E. McKenney
209eab128e8SPaul E. McKenney /*
210eab128e8SPaul E. McKenney * Number of grace periods between delays, normalized by the duration of
211eab128e8SPaul E. McKenney * the delay. The longer the delay, the more the grace periods between
212eab128e8SPaul E. McKenney * each delay. The reason for this normalization is that it means that,
213eab128e8SPaul E. McKenney * for non-zero delays, the overall slowdown of grace periods is constant
214eab128e8SPaul E. McKenney * regardless of the duration of the delay. This arrangement balances
215277ffe1bSZhouyi Zhou * the need for long delays to increase some race probabilities with the
21637745d28SPaul E. McKenney * need for fast grace periods to increase other race probabilities.
2174102adabSPaul E. McKenney */
2180aa04b05SPaul E. McKenney #define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays for debugging. */
2190aa04b05SPaul E. McKenney
2200aa04b05SPaul E. McKenney /*
2210aa04b05SPaul E. McKenney * Return true if an RCU grace period is in progress. The READ_ONCE()s
2220aa04b05SPaul E. McKenney * permit this function to be invoked without holding the root rcu_node
223c30fe541SPaul E. McKenney * structure's ->lock, but of course results can be subject to change.
2240aa04b05SPaul E. McKenney */
rcu_gp_in_progress(void)2257d0ae808SPaul E. McKenney static int rcu_gp_in_progress(void)
2260aa04b05SPaul E. McKenney {
2270aa04b05SPaul E. McKenney return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq));
2280aa04b05SPaul E. McKenney }
2295ae0f1b5SPaul E. McKenney
2305ae0f1b5SPaul E. McKenney /*
2315ae0f1b5SPaul E. McKenney * Return the number of callbacks queued on the specified CPU.
2325ae0f1b5SPaul E. McKenney * Handles both the nocbs and normal cases.
2335ae0f1b5SPaul E. McKenney */
rcu_get_n_cbs_cpu(int cpu)2345ae0f1b5SPaul E. McKenney static long rcu_get_n_cbs_cpu(int cpu)
2355ae0f1b5SPaul E. McKenney {
2365ae0f1b5SPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
2375ae0f1b5SPaul E. McKenney
2385ae0f1b5SPaul E. McKenney if (rcu_segcblist_is_enabled(&rdp->cblist))
2397d0ae808SPaul E. McKenney return rcu_segcblist_n_cbs(&rdp->cblist);
2404102adabSPaul E. McKenney return 0;
2414102adabSPaul E. McKenney }
2424102adabSPaul E. McKenney
rcu_softirq_qs(void)243de8e8730SPaul E. McKenney void rcu_softirq_qs(void)
2444102adabSPaul E. McKenney {
245de8e8730SPaul E. McKenney rcu_qs();
2464102adabSPaul E. McKenney rcu_preempt_deferred_qs(current);
2474102adabSPaul E. McKenney rcu_tasks_qs(current, false);
248903ee83dSPaul E. McKenney }
249903ee83dSPaul E. McKenney
250903ee83dSPaul E. McKenney /*
251903ee83dSPaul E. McKenney * Reset the current CPU's ->dynticks counter to indicate that the
252903ee83dSPaul E. McKenney * newly onlined CPU is no longer in an extended quiescent state.
253903ee83dSPaul E. McKenney * This will either leave the counter unchanged, or increment it
254903ee83dSPaul E. McKenney * to the next non-quiescent value.
255903ee83dSPaul E. McKenney *
256c035280fSPaul E. McKenney * The non-atomic test/increment sequence works because the upper bits
257903ee83dSPaul E. McKenney * of the ->dynticks counter are manipulated only by the corresponding CPU,
258c035280fSPaul E. McKenney * or when the corresponding CPU is offline.
259903ee83dSPaul E. McKenney */
rcu_dynticks_eqs_online(void)260903ee83dSPaul E. McKenney static void rcu_dynticks_eqs_online(void)
261d28139c4SPaul E. McKenney {
2624102adabSPaul E. McKenney if (ct_dynticks() & RCU_DYNTICKS_IDX)
26345975c7dSPaul E. McKenney return;
264d28139c4SPaul E. McKenney ct_state_inc(RCU_DYNTICKS_IDX);
265cf868c2aSPaul E. McKenney }
2664102adabSPaul E. McKenney
2674102adabSPaul E. McKenney /*
2686563de9dSPaul E. McKenney * Snapshot the ->dynticks counter with full ordering so as to allow
2692625d469SPaul E. McKenney * stable comparison of this counter with past and future snapshots.
2702625d469SPaul E. McKenney */
rcu_dynticks_snap(int cpu)2712625d469SPaul E. McKenney static int rcu_dynticks_snap(int cpu)
2722625d469SPaul E. McKenney {
2732625d469SPaul E. McKenney smp_mb(); // Fundamental RCU ordering guarantee.
2742625d469SPaul E. McKenney return ct_dynticks_cpu_acquire(cpu);
2752625d469SPaul E. McKenney }
2762625d469SPaul E. McKenney
2772625d469SPaul E. McKenney /*
2782625d469SPaul E. McKenney * Return true if the snapshot returned from rcu_dynticks_snap()
2792625d469SPaul E. McKenney * indicates that RCU is in an extended quiescent state.
28017147677SFrederic Weisbecker */
rcu_dynticks_in_eqs(int snap)2812625d469SPaul E. McKenney static bool rcu_dynticks_in_eqs(int snap)
28217147677SFrederic Weisbecker {
28302a5c550SPaul E. McKenney return !(snap & RCU_DYNTICKS_IDX);
28402a5c550SPaul E. McKenney }
28502a5c550SPaul E. McKenney
2868b2f63abSPaul E. McKenney /*
2878b2f63abSPaul E. McKenney * Return true if the CPU corresponding to the specified rcu_data
2888b2f63abSPaul E. McKenney * structure has spent some time in an extended quiescent state since
28962e2412dSFrederic Weisbecker * rcu_dynticks_snap() returned the specified snapshot.
2908b2f63abSPaul E. McKenney */
rcu_dynticks_in_eqs_since(struct rcu_data * rdp,int snap)2912be57f73SPaul E. McKenney static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
29262e2412dSFrederic Weisbecker {
2938b2f63abSPaul E. McKenney return snap != rcu_dynticks_snap(rdp->cpu);
2948b2f63abSPaul E. McKenney }
2958b2f63abSPaul E. McKenney
29602a5c550SPaul E. McKenney /*
29702a5c550SPaul E. McKenney * Return true if the referenced integer is zero while the specified
29802a5c550SPaul E. McKenney * CPU remains within a single extended quiescent state.
29902a5c550SPaul E. McKenney */
rcu_dynticks_zero_in_eqs(int cpu,int * vp)30002a5c550SPaul E. McKenney bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
30117147677SFrederic Weisbecker {
30202a5c550SPaul E. McKenney int snap;
30302a5c550SPaul E. McKenney
3043fcd6a23SPaul E. McKenney // If not quiescent, force back to earlier extended quiescent state.
3053fcd6a23SPaul E. McKenney snap = ct_dynticks_cpu(cpu) & ~RCU_DYNTICKS_IDX;
3063fcd6a23SPaul E. McKenney smp_rmb(); // Order ->dynticks and *vp reads.
30762e2412dSFrederic Weisbecker if (READ_ONCE(*vp))
3083fcd6a23SPaul E. McKenney return false; // Non-zero, so report failure;
3093fcd6a23SPaul E. McKenney smp_rmb(); // Order *vp read and ->dynticks re-read.
31002a5c550SPaul E. McKenney
311dc5a4f29SPaul E. McKenney // If still in the same extended quiescent state, we are good!
31202a5c550SPaul E. McKenney return snap == ct_dynticks_cpu(cpu);
31302a5c550SPaul E. McKenney }
31402a5c550SPaul E. McKenney
315dc5a4f29SPaul E. McKenney /*
31602a5c550SPaul E. McKenney * Let the RCU core know that this CPU has gone through the scheduler,
31762e2412dSFrederic Weisbecker * which is a quiescent state. This is called when the need for a
31802a5c550SPaul E. McKenney * quiescent state is urgent, so we burn an atomic operation and full
31902a5c550SPaul E. McKenney * memory barriers to let the RCU core know about it, regardless of what
32002a5c550SPaul E. McKenney * this CPU might (or might not) do in the near future.
3217d0c9c50SPaul E. McKenney *
3227d0c9c50SPaul E. McKenney * We inform the RCU core by emulating a zero-duration dyntick-idle period.
3237d0c9c50SPaul E. McKenney *
3247d0c9c50SPaul E. McKenney * The caller must have disabled interrupts and must not be idle.
3257d0c9c50SPaul E. McKenney */
rcu_momentary_dyntick_idle(void)3267d0c9c50SPaul E. McKenney notrace void rcu_momentary_dyntick_idle(void)
3277d0c9c50SPaul E. McKenney {
3287d0c9c50SPaul E. McKenney int seq;
32917147677SFrederic Weisbecker
3307d0c9c50SPaul E. McKenney raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
3317d0c9c50SPaul E. McKenney seq = ct_state_inc(2 * RCU_DYNTICKS_IDX);
3327d0c9c50SPaul E. McKenney /* It is illegal to call this from idle state. */
3337d0c9c50SPaul E. McKenney WARN_ON_ONCE(!(seq & RCU_DYNTICKS_IDX));
3347d0c9c50SPaul E. McKenney rcu_preempt_deferred_qs(current);
3357d0c9c50SPaul E. McKenney }
33662e2412dSFrederic Weisbecker EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
3376563de9dSPaul E. McKenney
3385cd37193SPaul E. McKenney /**
3394a81e832SPaul E. McKenney * rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle
3404a81e832SPaul E. McKenney *
3414a81e832SPaul E. McKenney * If the current CPU is idle and running at a first-level (not nested)
3424a81e832SPaul E. McKenney * interrupt, or directly, from idle, return true.
3434a81e832SPaul E. McKenney *
3444a81e832SPaul E. McKenney * The caller must have at least disabled IRQs.
3454a81e832SPaul E. McKenney */
rcu_is_cpu_rrupt_from_idle(void)3460f9be8caSPaul E. McKenney static int rcu_is_cpu_rrupt_from_idle(void)
34746a5d164SPaul E. McKenney {
3483b57a399SPaul E. McKenney long nesting;
3494a81e832SPaul E. McKenney
3504230e2deSZong Li /*
3514a81e832SPaul E. McKenney * Usually called from the tick; but also used from smp_function_call()
3522be57f73SPaul E. McKenney * for expedited grace periods. This latter can result in running from
3533b57a399SPaul E. McKenney * the idle task, instead of an actual IPI.
3542dba13f0SPaul E. McKenney */
35517147677SFrederic Weisbecker lockdep_assert_irqs_disabled();
3563b57a399SPaul E. McKenney
35717147677SFrederic Weisbecker /* Check for counter underflows */
3583e310098SPaul E. McKenney RCU_LOCKDEP_WARN(ct_dynticks_nesting() < 0,
3594a81e832SPaul E. McKenney "RCU dynticks_nesting counter underflow!");
36079ba7ff5SPaul E. McKenney RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() <= 0,
3614a81e832SPaul E. McKenney "RCU dynticks_nmi_nesting counter underflow/zero!");
36245975c7dSPaul E. McKenney
363806f04e9SPeter Zijlstra /* Are we at first interrupt nesting level? */
364bb73c52bSBoqun Feng nesting = ct_dynticks_nmi_nesting();
365eddded80SJoel Fernandes (Google) if (nesting > 1)
366806f04e9SPeter Zijlstra return false;
367806f04e9SPeter Zijlstra
368806f04e9SPeter Zijlstra /*
3695cd37193SPaul E. McKenney * If we're not in an interrupt, we must be in the idle task!
37045975c7dSPaul E. McKenney */
3715cd37193SPaul E. McKenney WARN_ON_ONCE(!nesting && !is_idle_task(current));
372806f04e9SPeter Zijlstra
373806f04e9SPeter Zijlstra /* Does CPU appear to be idle from an RCU standpoint? */
374806f04e9SPeter Zijlstra return ct_dynticks_nesting() == 0;
375806f04e9SPeter Zijlstra }
376806f04e9SPeter Zijlstra
377806f04e9SPeter Zijlstra #define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
378806f04e9SPeter Zijlstra // Maximum callbacks per rcu_do_batch ...
379806f04e9SPeter Zijlstra #define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.
380eddded80SJoel Fernandes (Google) static long blimit = DEFAULT_RCU_BLIMIT;
381eddded80SJoel Fernandes (Google) #define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.
382904e600eSFrederic Weisbecker static long qhimark = DEFAULT_RCU_QHIMARK;
383eddded80SJoel Fernandes (Google) #define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.
38495e04f48SFrederic Weisbecker static long qlowmark = DEFAULT_RCU_QLOMARK;
385eddded80SJoel Fernandes (Google) #define DEFAULT_RCU_QOVLD_MULT 2
386eddded80SJoel Fernandes (Google) #define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
387eddded80SJoel Fernandes (Google) static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.
38895e04f48SFrederic Weisbecker static long qovld_calc = -1; // No pre-initialization lock acquisitions!
389806f04e9SPeter Zijlstra
390eddded80SJoel Fernandes (Google) module_param(blimit, long, 0444);
391eddded80SJoel Fernandes (Google) module_param(qhimark, long, 0444);
392806f04e9SPeter Zijlstra module_param(qlowmark, long, 0444);
393806f04e9SPeter Zijlstra module_param(qovld, long, 0444);
394806f04e9SPeter Zijlstra
395806f04e9SPeter Zijlstra static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
396806f04e9SPeter Zijlstra static ulong jiffies_till_next_fqs = ULONG_MAX;
397eddded80SJoel Fernandes (Google) static bool rcu_kick_kthreads;
398904e600eSFrederic Weisbecker static int rcu_divisor = 7;
3999226b10dSPaul E. McKenney module_param(rcu_divisor, int, 0644);
4005cd37193SPaul E. McKenney
40129fc5f93SPaul E. McKenney /* Force an exit from rcu_do_batch() after 3 milliseconds. */
40229fc5f93SPaul E. McKenney static long rcu_resched_ns = 3 * NSEC_PER_MSEC;
40329fc5f93SPaul E. McKenney module_param(rcu_resched_ns, long, 0644);
40417c7798bSPaul E. McKenney
40529fc5f93SPaul E. McKenney /*
40617c7798bSPaul E. McKenney * How long the grace period must be before we start recruiting
40729fc5f93SPaul E. McKenney * quiescent-state help from rcu_note_context_switch().
40817c7798bSPaul E. McKenney */
409b2b00ddfSPaul E. McKenney static ulong jiffies_till_sched_qs = ULONG_MAX;
410b2b00ddfSPaul E. McKenney module_param(jiffies_till_sched_qs, ulong, 0444);
41129fc5f93SPaul E. McKenney static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */
41229fc5f93SPaul E. McKenney module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */
4134102adabSPaul E. McKenney
4144102adabSPaul E. McKenney /*
4154102adabSPaul E. McKenney * Make sure that we give the grace-period kthread time to detect any
4164102adabSPaul E. McKenney * idle CPUs before taking active measures to force quiescent states.
417b2b00ddfSPaul E. McKenney * However, don't go below 100 milliseconds, adjusted upwards for really
4184102adabSPaul E. McKenney * large systems.
419aecd34b9SPaul E. McKenney */
adjust_jiffies_till_sched_qs(void)4204102adabSPaul E. McKenney static void adjust_jiffies_till_sched_qs(void)
4218c7c4829SPaul E. McKenney {
422cfcdef5eSEric Dumazet unsigned long j;
423cfcdef5eSEric Dumazet
424cfcdef5eSEric Dumazet /* If jiffies_till_sched_qs was specified, respect the request. */
425cfcdef5eSEric Dumazet if (jiffies_till_sched_qs != ULONG_MAX) {
426cfcdef5eSEric Dumazet WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
427cfcdef5eSEric Dumazet return;
4284102adabSPaul E. McKenney }
429c06aed0eSPaul E. McKenney /* Otherwise, set to third fqs scan, but bound below on large system. */
430c06aed0eSPaul E. McKenney j = READ_ONCE(jiffies_till_first_fqs) +
431c06aed0eSPaul E. McKenney 2 * READ_ONCE(jiffies_till_next_fqs);
432c06aed0eSPaul E. McKenney if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
433c06aed0eSPaul E. McKenney j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
434c06aed0eSPaul E. McKenney pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);
43585f2b60cSPaul E. McKenney WRITE_ONCE(jiffies_to_sched_qs, j);
436c06aed0eSPaul E. McKenney }
437c06aed0eSPaul E. McKenney
param_set_first_fqs_jiffies(const char * val,const struct kernel_param * kp)438c06aed0eSPaul E. McKenney static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
439c06aed0eSPaul E. McKenney {
440c06aed0eSPaul E. McKenney ulong j;
441c06aed0eSPaul E. McKenney int ret = kstrtoul(val, 0, &j);
442c06aed0eSPaul E. McKenney
443c06aed0eSPaul E. McKenney if (!ret) {
444c06aed0eSPaul E. McKenney WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
445c06aed0eSPaul E. McKenney adjust_jiffies_till_sched_qs();
446c06aed0eSPaul E. McKenney }
447c06aed0eSPaul E. McKenney return ret;
448c06aed0eSPaul E. McKenney }
449c06aed0eSPaul E. McKenney
param_set_next_fqs_jiffies(const char * val,const struct kernel_param * kp)450c06aed0eSPaul E. McKenney static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param *kp)
451c06aed0eSPaul E. McKenney {
452c06aed0eSPaul E. McKenney ulong j;
45385f2b60cSPaul E. McKenney int ret = kstrtoul(val, 0, &j);
454c06aed0eSPaul E. McKenney
455c06aed0eSPaul E. McKenney if (!ret) {
456c06aed0eSPaul E. McKenney WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
457c06aed0eSPaul E. McKenney adjust_jiffies_till_sched_qs();
458c06aed0eSPaul E. McKenney }
459c06aed0eSPaul E. McKenney return ret;
460c06aed0eSPaul E. McKenney }
461c06aed0eSPaul E. McKenney
46267abb96cSByungchul Park static const struct kernel_param_ops first_fqs_jiffies_ops = {
46367abb96cSByungchul Park .set = param_set_first_fqs_jiffies,
46467abb96cSByungchul Park .get = param_get_ulong,
46567abb96cSByungchul Park };
46667abb96cSByungchul Park
467c06aed0eSPaul E. McKenney static const struct kernel_param_ops next_fqs_jiffies_ops = {
46867abb96cSByungchul Park .set = param_set_next_fqs_jiffies,
469c06aed0eSPaul E. McKenney .get = param_get_ulong,
470c06aed0eSPaul E. McKenney };
47167abb96cSByungchul Park
47267abb96cSByungchul Park module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_first_fqs, 0644);
47367abb96cSByungchul Park module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
47467abb96cSByungchul Park module_param(rcu_kick_kthreads, bool, 0644);
47567abb96cSByungchul Park
47667abb96cSByungchul Park static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
47767abb96cSByungchul Park static int rcu_pending(int user);
47867abb96cSByungchul Park
479c06aed0eSPaul E. McKenney /*
48067abb96cSByungchul Park * Return the number of RCU GPs completed thus far for debug & stats.
481c06aed0eSPaul E. McKenney */
rcu_get_gp_seq(void)482c06aed0eSPaul E. McKenney unsigned long rcu_get_gp_seq(void)
48367abb96cSByungchul Park {
48467abb96cSByungchul Park return READ_ONCE(rcu_state.gp_seq);
48567abb96cSByungchul Park }
4867c47ee5aSJoe Perches EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
48767abb96cSByungchul Park
48867abb96cSByungchul Park /*
48967abb96cSByungchul Park * Return the number of RCU expedited batches completed thus far for
49067abb96cSByungchul Park * debug & stats. Odd numbers mean that a batch is in progress, even
4917c47ee5aSJoe Perches * numbers mean idle. The value returned will thus be roughly double
49267abb96cSByungchul Park * the cumulative batches since boot.
49367abb96cSByungchul Park */
rcu_exp_batches_completed(void)49467abb96cSByungchul Park unsigned long rcu_exp_batches_completed(void)
49567abb96cSByungchul Park {
49667abb96cSByungchul Park return rcu_state.expedited_sequence;
49767abb96cSByungchul Park }
4988c7c4829SPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
4994102adabSPaul E. McKenney
5008ff0b907SPaul E. McKenney /*
501dd7dafd1SPaul E. McKenney * Return the root node of the rcu_state structure.
5024102adabSPaul E. McKenney */
rcu_get_root(void)5034102adabSPaul E. McKenney static struct rcu_node *rcu_get_root(void)
50417ef2fe9SPaul E. McKenney {
5054102adabSPaul E. McKenney return &rcu_state.node[0];
50617ef2fe9SPaul E. McKenney }
507917963d0SPaul E. McKenney
50816fc9c60SPaul E. McKenney /*
509917963d0SPaul E. McKenney * Send along grace-period-related data for rcutorture diagnostics.
51017ef2fe9SPaul E. McKenney */
rcutorture_get_gp_data(enum rcutorture_type test_type,int * flags,unsigned long * gp_seq)511917963d0SPaul E. McKenney void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
512917963d0SPaul E. McKenney unsigned long *gp_seq)
513291783b8SPaul E. McKenney {
514291783b8SPaul E. McKenney switch (test_type) {
515291783b8SPaul E. McKenney case RCU_FLAVOR:
516291783b8SPaul E. McKenney *flags = READ_ONCE(rcu_state.gp_flags);
517291783b8SPaul E. McKenney *gp_seq = rcu_seq_current(&rcu_state.gp_seq);
518291783b8SPaul E. McKenney break;
519291783b8SPaul E. McKenney default:
52016fc9c60SPaul E. McKenney break;
521291783b8SPaul E. McKenney }
522291783b8SPaul E. McKenney }
523291783b8SPaul E. McKenney EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
524291783b8SPaul E. McKenney
525fd897573SPaul E. McKenney #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
526fd897573SPaul E. McKenney /*
527fd897573SPaul E. McKenney * An empty function that will trigger a reschedule on
528fd897573SPaul E. McKenney * IRQ tail once IRQs get re-enabled on userspace/guest resume.
529fd897573SPaul E. McKenney */
late_wakeup_func(struct irq_work * work)530fd897573SPaul E. McKenney static void late_wakeup_func(struct irq_work *work)
531fd897573SPaul E. McKenney {
532fd897573SPaul E. McKenney }
533ad0dc7f9SPaul E. McKenney
534ad0dc7f9SPaul E. McKenney static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
535ad0dc7f9SPaul E. McKenney IRQ_WORK_INIT(late_wakeup_func);
536aebc8264SPaul E. McKenney
537ad0dc7f9SPaul E. McKenney /*
538ad0dc7f9SPaul E. McKenney * If either:
539ad0dc7f9SPaul E. McKenney *
540f7dd7d44SPaul E. McKenney * 1) the task is about to enter in guest mode and $ARCH doesn't support KVM generic work
541f7dd7d44SPaul E. McKenney * 2) the task is about to enter in user mode and $ARCH doesn't support generic entry.
542ad0dc7f9SPaul E. McKenney *
543ad0dc7f9SPaul E. McKenney * In these cases the late RCU wake ups aren't supported in the resched loops and our
544ad0dc7f9SPaul E. McKenney * last resort is to fire a local irq_work that will trigger a reschedule once IRQs
545ad0dc7f9SPaul E. McKenney * get re-enabled again.
546ad0dc7f9SPaul E. McKenney */
rcu_irq_work_resched(void)547ad0dc7f9SPaul E. McKenney noinstr void rcu_irq_work_resched(void)
548ad0dc7f9SPaul E. McKenney {
54917211455SFrederic Weisbecker struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
550f8bb5caeSFrederic Weisbecker
551f8bb5caeSFrederic Weisbecker if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU))
5524ae7dc97SFrederic Weisbecker return;
553f8bb5caeSFrederic Weisbecker
554f8bb5caeSFrederic Weisbecker if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU))
555f8bb5caeSFrederic Weisbecker return;
556f8bb5caeSFrederic Weisbecker
557f8bb5caeSFrederic Weisbecker instrumentation_begin();
558f8bb5caeSFrederic Weisbecker if (do_nocb_deferred_wakeup(rdp) && need_resched()) {
559f8bb5caeSFrederic Weisbecker irq_work_queue(this_cpu_ptr(&late_wakeup_work));
560f8bb5caeSFrederic Weisbecker }
5614ae7dc97SFrederic Weisbecker instrumentation_end();
5624ae7dc97SFrederic Weisbecker }
5634ae7dc97SFrederic Weisbecker #endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) */
5644ae7dc97SFrederic Weisbecker
5654ae7dc97SFrederic Weisbecker #ifdef CONFIG_PROVE_RCU
5664ae7dc97SFrederic Weisbecker /**
5674ae7dc97SFrederic Weisbecker * rcu_irq_exit_check_preempt - Validate that scheduling is possible
5684ae7dc97SFrederic Weisbecker */
rcu_irq_exit_check_preempt(void)5694ae7dc97SFrederic Weisbecker void rcu_irq_exit_check_preempt(void)
5704ae7dc97SFrederic Weisbecker {
57156450649SFrederic Weisbecker lockdep_assert_irqs_disabled();
5724ae7dc97SFrederic Weisbecker
5734ae7dc97SFrederic Weisbecker RCU_LOCKDEP_WARN(ct_dynticks_nesting() <= 0,
5744ae7dc97SFrederic Weisbecker "RCU dynticks_nesting counter underflow/zero!");
5754ae7dc97SFrederic Weisbecker RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() !=
5764ae7dc97SFrederic Weisbecker DYNTICK_IRQ_NONIDLE,
5774ae7dc97SFrederic Weisbecker "Bad RCU dynticks_nmi_nesting counter\n");
5784ae7dc97SFrederic Weisbecker RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
5794ae7dc97SFrederic Weisbecker "RCU in extended quiescent state!");
5804ae7dc97SFrederic Weisbecker }
5814ae7dc97SFrederic Weisbecker #endif /* #ifdef CONFIG_PROVE_RCU */
5824ae7dc97SFrederic Weisbecker
5834ae7dc97SFrederic Weisbecker #ifdef CONFIG_NO_HZ_FULL
5844ae7dc97SFrederic Weisbecker /**
5854ae7dc97SFrederic Weisbecker * __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
5864ae7dc97SFrederic Weisbecker *
58717211455SFrederic Weisbecker * The scheduler tick is not normally enabled when CPUs enter the kernel
5887c9906caSPaul E. McKenney * from nohz_full userspace execution. After all, nohz_full userspace
58907325d4aSThomas Gleixner * execution is an RCU quiescent state and the time executing in the kernel
59007325d4aSThomas Gleixner * is quite short. Except of course when it isn't. And it is not hard to
59107325d4aSThomas Gleixner * cause a large system to spend tens of seconds or even minutes looping
59207325d4aSThomas Gleixner * in the kernel, which can cause a number of problems, include RCU CPU
59307325d4aSThomas Gleixner * stall warnings.
59407325d4aSThomas Gleixner *
59507325d4aSThomas Gleixner * Therefore, if a nohz_full CPU fails to report a quiescent state
59607325d4aSThomas Gleixner * in a timely manner, the RCU grace-period kthread sets that CPU's
597904e600eSFrederic Weisbecker * ->rcu_urgent_qs flag with the expectation that the next interrupt or
59807325d4aSThomas Gleixner * exception will invoke this function, which will turn on the scheduler
59995e04f48SFrederic Weisbecker * tick, which will enable RCU to detect that CPU's quiescent states,
60007325d4aSThomas Gleixner * for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.
60107325d4aSThomas Gleixner * The tick will be disabled once a quiescent state is reported for
60207325d4aSThomas Gleixner * this CPU.
60307325d4aSThomas Gleixner *
60407325d4aSThomas Gleixner * Of course, in carefully tuned systems, there might never be an
60507325d4aSThomas Gleixner * interrupt or exception. In that case, the RCU grace-period kthread
60607325d4aSThomas Gleixner * will eventually cause one to happen. However, in less carefully
607d1ec4c34SPaul E. McKenney * controlled environments, this function allows RCU to get what it
6084102adabSPaul E. McKenney * needs without creating otherwise useless interruptions.
609aaf2bc50SPaul E. McKenney */
__rcu_irq_enter_check_tick(void)610aaf2bc50SPaul E. McKenney void __rcu_irq_enter_check_tick(void)
611aaf2bc50SPaul E. McKenney {
612aaf2bc50SPaul E. McKenney struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
613aaf2bc50SPaul E. McKenney
614aaf2bc50SPaul E. McKenney // If we're here from NMI there's nothing to do.
615aaf2bc50SPaul E. McKenney if (in_nmi())
616aaf2bc50SPaul E. McKenney return;
617aaf2bc50SPaul E. McKenney
618aaf2bc50SPaul E. McKenney RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
619aaf2bc50SPaul E. McKenney "Illegal rcu_irq_enter_check_tick() from extended quiescent state");
620aaf2bc50SPaul E. McKenney
621aaf2bc50SPaul E. McKenney if (!tick_nohz_full_cpu(rdp->cpu) ||
622aaf2bc50SPaul E. McKenney !READ_ONCE(rdp->rcu_urgent_qs) ||
623aaf2bc50SPaul E. McKenney READ_ONCE(rdp->rcu_forced_tick)) {
624aaf2bc50SPaul E. McKenney // RCU doesn't need nohz_full help from this CPU, or it is
625aaf2bc50SPaul E. McKenney // already getting that help.
626aaf2bc50SPaul E. McKenney return;
627aaf2bc50SPaul E. McKenney }
628aaf2bc50SPaul E. McKenney
629aaf2bc50SPaul E. McKenney // We get here only when not in an extended quiescent state and
630aaf2bc50SPaul E. McKenney // from interrupts (as opposed to NMIs). Therefore, (1) RCU is
631aaf2bc50SPaul E. McKenney // already watching and (2) The fact that we are in an interrupt
632aaf2bc50SPaul E. McKenney // handler and that the rcu_node lock is an irq-disabled lock
633aaf2bc50SPaul E. McKenney // prevents self-deadlock. So we can safely recheck under the lock.
634aaf2bc50SPaul E. McKenney // Note that the nohz_full state currently cannot change.
635aaf2bc50SPaul E. McKenney raw_spin_lock_rcu_node(rdp->mynode);
636aaf2bc50SPaul E. McKenney if (READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) {
637aaf2bc50SPaul E. McKenney // A nohz_full CPU is in the kernel and RCU needs a
6386dbce04dSPeter Zijlstra // quiescent state. Turn on the tick!
6396dbce04dSPeter Zijlstra WRITE_ONCE(rdp->rcu_forced_tick, true);
640aaf2bc50SPaul E. McKenney tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
641aaf2bc50SPaul E. McKenney }
642aaf2bc50SPaul E. McKenney raw_spin_unlock_rcu_node(rdp->mynode);
643aaf2bc50SPaul E. McKenney }
644aaf2bc50SPaul E. McKenney NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick);
645aaf2bc50SPaul E. McKenney #endif /* CONFIG_NO_HZ_FULL */
646aaf2bc50SPaul E. McKenney
647aaf2bc50SPaul E. McKenney /*
648aaf2bc50SPaul E. McKenney * Check to see if any future non-offloaded RCU-related work will need
649aaf2bc50SPaul E. McKenney * to be done by the current CPU, even if none need be done immediately,
650aaf2bc50SPaul E. McKenney * returning 1 if so. This function is part of the RCU implementation;
651aaf2bc50SPaul E. McKenney * it is -not- an exported member of the RCU API. This is used by
652aaf2bc50SPaul E. McKenney * the idle-entry code to figure out whether it is safe to disable the
653aaf2bc50SPaul E. McKenney * scheduler-clock interrupt.
654aaf2bc50SPaul E. McKenney *
655aaf2bc50SPaul E. McKenney * Just check whether or not this CPU has non-offloaded RCU callbacks
656aaf2bc50SPaul E. McKenney * queued.
657aaf2bc50SPaul E. McKenney */
rcu_needs_cpu(void)658aaf2bc50SPaul E. McKenney int rcu_needs_cpu(void)
659aaf2bc50SPaul E. McKenney {
660aaf2bc50SPaul E. McKenney return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
661aaf2bc50SPaul E. McKenney !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
662aaf2bc50SPaul E. McKenney }
663aaf2bc50SPaul E. McKenney
664aaf2bc50SPaul E. McKenney /*
665aaf2bc50SPaul E. McKenney * If any sort of urgency was applied to the current CPU (for example,
666aaf2bc50SPaul E. McKenney * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
667aaf2bc50SPaul E. McKenney * to get to a quiescent state, disable it.
668d1ec4c34SPaul E. McKenney */
rcu_disable_urgency_upon_qs(struct rcu_data * rdp)6694102adabSPaul E. McKenney static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
67066e4c33bSPaul E. McKenney {
671bc849e91SPaul E. McKenney raw_lockdep_assert_held_rcu_node(rdp->mynode);
672bc849e91SPaul E. McKenney WRITE_ONCE(rdp->rcu_urgent_qs, false);
673bc849e91SPaul E. McKenney WRITE_ONCE(rdp->rcu_need_heavy_qs, false);
674bc849e91SPaul E. McKenney if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
675bc849e91SPaul E. McKenney tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
676bc849e91SPaul E. McKenney WRITE_ONCE(rdp->rcu_forced_tick, false);
677bc849e91SPaul E. McKenney }
678bc849e91SPaul E. McKenney }
679bc849e91SPaul E. McKenney
680bc849e91SPaul E. McKenney /**
68129845399SFrederic Weisbecker * rcu_is_watching - RCU read-side critical sections permitted on current CPU?
682bc849e91SPaul E. McKenney *
683bc849e91SPaul E. McKenney * Return @true if RCU is watching the running CPU and @false otherwise.
684bc849e91SPaul E. McKenney * An @true return means that this CPU can safely enter RCU read-side
685bc849e91SPaul E. McKenney * critical sections.
686bc849e91SPaul E. McKenney *
687bc849e91SPaul E. McKenney * Although calls to rcu_is_watching() from most parts of the kernel
688516e5ae0SJoel Fernandes (Google) * will return @true, there are important exceptions. For example, if the
689516e5ae0SJoel Fernandes (Google) * current CPU is deep within its idle loop, in kernel entry/exit code,
690516e5ae0SJoel Fernandes (Google) * or offline, rcu_is_watching() will return @false.
69166e4c33bSPaul E. McKenney *
692516e5ae0SJoel Fernandes (Google) * Make notrace because it can be called by the internal functions of
69366e4c33bSPaul E. McKenney * ftrace, and making this notrace removes unnecessary recursion calls.
6945b14557bSPaul E. McKenney */
rcu_is_watching(void)695516e5ae0SJoel Fernandes (Google) notrace bool rcu_is_watching(void)
696516e5ae0SJoel Fernandes (Google) {
69766e4c33bSPaul E. McKenney bool ret;
69866e4c33bSPaul E. McKenney
6992a2ae872SPaul E. McKenney preempt_disable_notrace();
70066e4c33bSPaul E. McKenney ret = !rcu_dynticks_curr_cpu_in_eqs();
70166e4c33bSPaul E. McKenney preempt_enable_notrace();
70266e4c33bSPaul E. McKenney return ret;
7034102adabSPaul E. McKenney }
7042320bda2SZhouyi Zhou EXPORT_SYMBOL_GPL(rcu_is_watching);
7054102adabSPaul E. McKenney
706791875d1SPaul E. McKenney /*
707791875d1SPaul E. McKenney * If a holdout task is actually running, request an urgent quiescent
7082320bda2SZhouyi Zhou * state from its CPU. This is unsynchronized, so migrations can cause
7092320bda2SZhouyi Zhou * the request to go to the wrong CPU. Which is OK, all that will happen
710d2098b44SPeter Zijlstra * is that the CPU's next context switch will be a bit slower and next
711d2098b44SPeter Zijlstra * time around this task will generate another request.
712d2098b44SPeter Zijlstra */
rcu_request_urgent_qs_task(struct task_struct * t)7134102adabSPaul E. McKenney void rcu_request_urgent_qs_task(struct task_struct *t)
714d2098b44SPeter Zijlstra {
7154102adabSPaul E. McKenney int cpu;
716f534ed1fSPranith Kumar
7174102adabSPaul E. McKenney barrier();
71846f00d18SAlexei Starovoitov cpu = task_cpu(t);
719791875d1SPaul E. McKenney if (!task_curr(t))
72046f00d18SAlexei Starovoitov return; /* This task is not running on that CPU. */
7214102adabSPaul E. McKenney smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
7224102adabSPaul E. McKenney }
7234102adabSPaul E. McKenney
7244102adabSPaul E. McKenney /*
725bcbfdd01SPaul E. McKenney * When trying to report a quiescent state on behalf of some other CPU,
726bcbfdd01SPaul E. McKenney * it is our responsibility to check for and handle potential overflow
727bcbfdd01SPaul E. McKenney * of the rcu_node ->gp_seq counter with respect to the rcu_data counters.
728bcbfdd01SPaul E. McKenney * After all, the CPU might be in deep idle state, and thus executing no
729bcbfdd01SPaul E. McKenney * code whatsoever.
730bcbfdd01SPaul E. McKenney */
rcu_gpnum_ovf(struct rcu_node * rnp,struct rcu_data * rdp)731bcbfdd01SPaul E. McKenney static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
732bcbfdd01SPaul E. McKenney {
733bcbfdd01SPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp);
734bcbfdd01SPaul E. McKenney if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
735bcbfdd01SPaul E. McKenney rnp->gp_seq))
736bcbfdd01SPaul E. McKenney WRITE_ONCE(rdp->gpwrap, true);
737bcbfdd01SPaul E. McKenney if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
738bcbfdd01SPaul E. McKenney rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
739bcbfdd01SPaul E. McKenney }
7402dba13f0SPaul E. McKenney
741bcbfdd01SPaul E. McKenney /*
742bcbfdd01SPaul E. McKenney * Snapshot the specified CPU's dynticks counter so that we can later
7434102adabSPaul E. McKenney * credit them with an implicit quiescent state. Return 1 if this CPU
7444102adabSPaul E. McKenney * is in dynticks idle mode, which is an extended quiescent state.
7454102adabSPaul E. McKenney */
dyntick_save_progress_counter(struct rcu_data * rdp)7465554788eSPaul E. McKenney static int dyntick_save_progress_counter(struct rcu_data *rdp)
7474102adabSPaul E. McKenney {
7485554788eSPaul E. McKenney rdp->dynticks_snap = rcu_dynticks_snap(rdp->cpu);
7495554788eSPaul E. McKenney if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
7505554788eSPaul E. McKenney trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
75149918a54SPaul E. McKenney rcu_gpnum_ovf(rdp->mynode, rdp);
7524102adabSPaul E. McKenney return 1;
7535554788eSPaul E. McKenney }
7545554788eSPaul E. McKenney return 0;
7555554788eSPaul E. McKenney }
7565554788eSPaul E. McKenney
7574102adabSPaul E. McKenney /*
7584102adabSPaul E. McKenney * Returns positive if the specified CPU has passed through a quiescent state
7594102adabSPaul E. McKenney * by virtue of being in or having passed through an dynticks idle state since
7604102adabSPaul E. McKenney * the last call to dyntick_save_progress_counter() for this same CPU, or by
761b97d23c5SPaul E. McKenney * virtue of having been offline.
7624102adabSPaul E. McKenney *
7635554788eSPaul E. McKenney * Returns negative if the specified CPU needs a force resched.
764f6f7ee9aSFengguang Wu *
765ff5c4f5cSThomas Gleixner * Returns zero otherwise.
766da1df50dSPaul E. McKenney */
rcu_implicit_dynticks_qs(struct rcu_data * rdp)76782980b16SDavid Woodhouse static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
76882980b16SDavid Woodhouse {
76982980b16SDavid Woodhouse unsigned long jtsq;
77082980b16SDavid Woodhouse int ret = 0;
77182980b16SDavid Woodhouse struct rcu_node *rnp = rdp->mynode;
77282980b16SDavid Woodhouse
77382980b16SDavid Woodhouse /*
7745ae0f1b5SPaul E. McKenney * If the CPU passed through or entered a dynticks idle phase with
775b97d23c5SPaul E. McKenney * no active irq/NMI handlers, then we can safely pretend that the CPU
776ff5c4f5cSThomas Gleixner * already acknowledged the request to pass through a quiescent
777b97d23c5SPaul E. McKenney * state. Either way, that CPU cannot possibly be in an RCU
7784102adabSPaul E. McKenney * read-side critical section that started before the beginning
7794102adabSPaul E. McKenney * of the current RCU grace period.
7804102adabSPaul E. McKenney */
7814102adabSPaul E. McKenney if (rcu_dynticks_in_eqs_since(rdp, rdp->dynticks_snap)) {
7824102adabSPaul E. McKenney trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
7834102adabSPaul E. McKenney rcu_gpnum_ovf(rnp, rdp);
784277ffe1bSZhouyi Zhou return 1;
7859b9500daSPaul E. McKenney }
786a66ae8aeSPaul E. McKenney
7879b9500daSPaul E. McKenney /*
7889b9500daSPaul E. McKenney * Complain if a CPU that is considered to be offline from RCU's
7899b9500daSPaul E. McKenney * perspective has not yet reported a quiescent state. After all,
7909b9500daSPaul E. McKenney * the offline CPU should have reported a quiescent state during
7919b9500daSPaul E. McKenney * the CPU-offline process, or, failing that, by rcu_gp_init()
792a32e01eeSMatthew Wilcox * if it ran concurrently with either the CPU going offline or the
793a66ae8aeSPaul E. McKenney * last task on a leaf rcu_node structure exiting its RCU read-side
794a66ae8aeSPaul E. McKenney * critical section while all CPUs corresponding to that structure
7959b9500daSPaul E. McKenney * are offline. This added warning detects bugs in any of these
7968aa670cdSPaul E. McKenney * code paths.
7978aa670cdSPaul E. McKenney *
7989b9500daSPaul E. McKenney * The rcu_node structure's ->lock is held here, which excludes
7999b9500daSPaul E. McKenney * the relevant portions the CPU-hotplug code, the grace-period
8009b9500daSPaul E. McKenney * initialization code, and the rcu_read_unlock() code paths.
8014102adabSPaul E. McKenney *
8024102adabSPaul E. McKenney * For more detail, please refer to the "Hotplug CPU" section
8034102adabSPaul E. McKenney * of RCU's Requirements documentation.
8044102adabSPaul E. McKenney */
805fe5ac724SPaul E. McKenney if (WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp))) {
8064102adabSPaul E. McKenney struct rcu_node *rnp1;
80762e2412dSFrederic Weisbecker
80802a5c550SPaul E. McKenney pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
80988d1beadSPaul E. McKenney __func__, rnp->grplo, rnp->grphi, rnp->level,
8109b9500daSPaul E. McKenney (long)rnp->gp_seq, (long)rnp->completedqs);
81123a9bacdSPaul E. McKenney for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
8127941dbdeSAndreea-Cristina Bernat pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n",
81323a9bacdSPaul E. McKenney __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask);
8144102adabSPaul E. McKenney pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",
8154102adabSPaul E. McKenney __func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)],
8164102adabSPaul E. McKenney (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
8174102adabSPaul E. McKenney (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
8184102adabSPaul E. McKenney return 1; /* Break things loose after complaining. */
8194102adabSPaul E. McKenney }
8204102adabSPaul E. McKenney
8214102adabSPaul E. McKenney /*
822fe5ac724SPaul E. McKenney * A CPU running for an extended time within the kernel can
8234102adabSPaul E. McKenney * delay RCU grace periods: (1) At age jiffies_to_sched_qs,
8243a19b46aSPaul E. McKenney * set .rcu_urgent_qs, (2) At age 2*jiffies_to_sched_qs, set
8259b9500daSPaul E. McKenney * both .rcu_need_heavy_qs and .rcu_urgent_qs. Note that the
8264102adabSPaul E. McKenney * unsynchronized assignments to the per-CPU rcu_need_heavy_qs
8274102adabSPaul E. McKenney * variable are safe because the assignments are repeated if this
8284102adabSPaul E. McKenney * CPU failed to pass through a quiescent state. This code
8294102adabSPaul E. McKenney * also checks .jiffies_resched in case jiffies_to_sched_qs
8304102adabSPaul E. McKenney * is set way high.
8314102adabSPaul E. McKenney */
8324102adabSPaul E. McKenney jtsq = READ_ONCE(jiffies_to_sched_qs);
8334102adabSPaul E. McKenney if (!READ_ONCE(rdp->rcu_need_heavy_qs) &&
8344102adabSPaul E. McKenney (time_after(jiffies, rcu_state.gp_start + jtsq * 2) ||
835dc5a4f29SPaul E. McKenney time_after(jiffies, rcu_state.jiffies_resched) ||
83688d1beadSPaul E. McKenney rcu_state.cbovld)) {
8379b9500daSPaul E. McKenney WRITE_ONCE(rdp->rcu_need_heavy_qs, true);
8384102adabSPaul E. McKenney /* Store rcu_need_heavy_qs before rcu_urgent_qs. */
8394102adabSPaul E. McKenney smp_store_release(&rdp->rcu_urgent_qs, true);
8404102adabSPaul E. McKenney } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) {
841666ca290SJoel Fernandes (Google) WRITE_ONCE(rdp->rcu_urgent_qs, true);
842666ca290SJoel Fernandes (Google) }
843666ca290SJoel Fernandes (Google)
844666ca290SJoel Fernandes (Google) /*
845666ca290SJoel Fernandes (Google) * NO_HZ_FULL CPUs can run in-kernel without rcu_sched_clock_irq!
846666ca290SJoel Fernandes (Google) * The above code handles this, but only for straight cond_resched().
847666ca290SJoel Fernandes (Google) * And some in-kernel loops check need_resched() before calling
848666ca290SJoel Fernandes (Google) * cond_resched(), which defeats the above code for CPUs that are
849666ca290SJoel Fernandes (Google) * running in-kernel with scheduling-clock interrupts disabled.
850666ca290SJoel Fernandes (Google) * So hit them over the head with the resched_cpu() hammer!
851666ca290SJoel Fernandes (Google) */
852666ca290SJoel Fernandes (Google) if (tick_nohz_full_cpu(rdp->cpu) &&
853666ca290SJoel Fernandes (Google) (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) ||
854666ca290SJoel Fernandes (Google) rcu_state.cbovld)) {
855666ca290SJoel Fernandes (Google) WRITE_ONCE(rdp->rcu_urgent_qs, true);
856666ca290SJoel Fernandes (Google) WRITE_ONCE(rdp->last_fqs_resched, jiffies);
857666ca290SJoel Fernandes (Google) ret = -1;
858666ca290SJoel Fernandes (Google) }
8595ae0f1b5SPaul E. McKenney
860f2e2df59SPaul E. McKenney /*
861f2e2df59SPaul E. McKenney * If more than halfway to RCU CPU stall-warning time, invoke
862f2e2df59SPaul E. McKenney * resched_cpu() more frequently to try to loosen things up a bit.
863f2e2df59SPaul E. McKenney * Also check to see if the CPU is getting hammered with interrupts,
864f2e2df59SPaul E. McKenney * but only once per grace period, just to keep the IPIs down to
865f2e2df59SPaul E. McKenney * a dull roar.
866f2e2df59SPaul E. McKenney */
867f2e2df59SPaul E. McKenney if (time_after(jiffies, rcu_state.jiffies_resched)) {
868f2e2df59SPaul E. McKenney if (time_after(jiffies,
8695ae0f1b5SPaul E. McKenney READ_ONCE(rdp->last_fqs_resched) + jtsq)) {
870f2e2df59SPaul E. McKenney WRITE_ONCE(rdp->last_fqs_resched, jiffies);
871f2e2df59SPaul E. McKenney ret = -1;
872f2e2df59SPaul E. McKenney }
873f2e2df59SPaul E. McKenney if (IS_ENABLED(CONFIG_IRQ_WORK) &&
874f2e2df59SPaul E. McKenney !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
8754102adabSPaul E. McKenney (rnp->ffmask & rdp->grpmask)) {
8764a81e832SPaul E. McKenney rdp->rcu_iw_pending = true;
877c06aed0eSPaul E. McKenney rdp->rcu_iw_gp_seq = rnp->gp_seq;
878c06aed0eSPaul E. McKenney irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
8797e28c5afSPaul E. McKenney }
8807e28c5afSPaul E. McKenney
8817e28c5afSPaul E. McKenney if (rcu_cpu_stall_cputime && rdp->snap_record.gp_seq != rdp->gp_seq) {
8827e28c5afSPaul E. McKenney int cpu = rdp->cpu;
883c06aed0eSPaul E. McKenney struct rcu_snap_record *rsrp;
8847e28c5afSPaul E. McKenney struct kernel_cpustat *kcsp;
8854102adabSPaul E. McKenney
886c06aed0eSPaul E. McKenney kcsp = &kcpustat_cpu(cpu);
88788ee23efSPaul E. McKenney
8887e28c5afSPaul E. McKenney rsrp = &rdp->snap_record;
889b2b00ddfSPaul E. McKenney rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
890b2b00ddfSPaul E. McKenney rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
89188ee23efSPaul E. McKenney rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
8929226b10dSPaul E. McKenney rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu);
8939424b867SPaul E. McKenney rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu);
8947e28c5afSPaul E. McKenney rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu);
8959424b867SPaul E. McKenney rsrp->jiffies = jiffies;
8964a81e832SPaul E. McKenney rsrp->gp_seq = rdp->gp_seq;
8974914950aSPaul E. McKenney }
89828053bc7SPaul E. McKenney }
899c98cac60SPaul E. McKenney
900d3052109SPaul E. McKenney return ret;
901d3052109SPaul E. McKenney }
902d3052109SPaul E. McKenney
903d3052109SPaul E. McKenney /* Trace-event wrapper function for trace_rcu_future_grace_period. */
trace_rcu_this_gp(struct rcu_node * rnp,struct rcu_data * rdp,unsigned long gp_seq_req,const char * s)904d3052109SPaul E. McKenney static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
90528053bc7SPaul E. McKenney unsigned long gp_seq_req, const char *s)
906d3052109SPaul E. McKenney {
907b2b00ddfSPaul E. McKenney trace_rcu_future_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
908b2b00ddfSPaul E. McKenney gp_seq_req, rnp->level,
9099424b867SPaul E. McKenney rnp->grplo, rnp->grphi, s);
91028053bc7SPaul E. McKenney }
911d3052109SPaul E. McKenney
912d3052109SPaul E. McKenney /*
913d3052109SPaul E. McKenney * rcu_start_this_gp - Request the start of a particular grace period
914d3052109SPaul E. McKenney * @rnp_start: The leaf node of the CPU from which to start.
915d3052109SPaul E. McKenney * @rdp: The rcu_data corresponding to the CPU from which to start.
916d3052109SPaul E. McKenney * @gp_seq_req: The gp_seq of the grace period to start.
917d3052109SPaul E. McKenney *
918d3052109SPaul E. McKenney * Start the specified grace period, as needed to handle newly arrived
919d3052109SPaul E. McKenney * callbacks. The required future grace periods are recorded in each
9204914950aSPaul E. McKenney * rcu_node structure's ->gp_seq_needed field. Returns true if there
9217e28c5afSPaul E. McKenney * is reason to awaken the grace-period kthread.
922d3052109SPaul E. McKenney *
923d3052109SPaul E. McKenney * The caller must hold the specified rcu_node structure's ->lock, which
9244914950aSPaul E. McKenney * is why the caller is responsible for waking the grace-period kthread.
925d3052109SPaul E. McKenney *
926d3052109SPaul E. McKenney * Returns true if the GP thread needs to be awakened else false.
9279b9500daSPaul E. McKenney */
rcu_start_this_gp(struct rcu_node * rnp_start,struct rcu_data * rdp,unsigned long gp_seq_req)9288aa670cdSPaul E. McKenney static bool rcu_start_this_gp(struct rcu_node *rnp_start, struct rcu_data *rdp,
9299b9500daSPaul E. McKenney unsigned long gp_seq_req)
9309b9500daSPaul E. McKenney {
9318aa670cdSPaul E. McKenney bool ret = false;
9329b9500daSPaul E. McKenney struct rcu_node *rnp;
9339b9500daSPaul E. McKenney
9349b9500daSPaul E. McKenney /*
9356193c76aSPaul E. McKenney * Use funnel locking to either acquire the root rcu_node
9364102adabSPaul E. McKenney * structure's lock or bail out if the need for this grace period
9374102adabSPaul E. McKenney * has already been recorded -- or if that grace period has in
9384102adabSPaul E. McKenney * fact already started. If there is already a grace period in
93941e80595SPaul E. McKenney * progress in a non-leaf node, no recording is needed because the
94041e80595SPaul E. McKenney * end of the grace period will scan the leaf rcu_node structures.
941b73de91dSJoel Fernandes * Note that rnp_start->lock must not be released.
9424102adabSPaul E. McKenney */
9430937d045SPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp_start);
9440937d045SPaul E. McKenney trace_rcu_this_gp(rnp_start, rdp, gp_seq_req, TPS("Startleaf"));
9450937d045SPaul E. McKenney for (rnp = rnp_start; 1; rnp = rnp->parent) {
9464102adabSPaul E. McKenney if (rnp != rnp_start)
9474102adabSPaul E. McKenney raw_spin_lock_rcu_node(rnp);
9484102adabSPaul E. McKenney if (ULONG_CMP_GE(rnp->gp_seq_needed, gp_seq_req) ||
949b73de91dSJoel Fernandes rcu_seq_started(&rnp->gp_seq, gp_seq_req) ||
950df2bf8f7SJoel Fernandes (Google) (rnp != rnp_start &&
951b73de91dSJoel Fernandes rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))) {
952b73de91dSJoel Fernandes trace_rcu_this_gp(rnp, rdp, gp_seq_req,
953b73de91dSJoel Fernandes TPS("Prestarted"));
95441e80595SPaul E. McKenney goto unlock_out;
9554102adabSPaul E. McKenney }
9567a1d0f23SPaul E. McKenney WRITE_ONCE(rnp->gp_seq_needed, gp_seq_req);
95748a7639cSPaul E. McKenney if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) {
9584102adabSPaul E. McKenney /*
959d5cd9685SPaul E. McKenney * We just marked the leaf or internal node, and a
960d5cd9685SPaul E. McKenney * grace period is in progress, which means that
961b73de91dSJoel Fernandes * rcu_gp_cleanup() will see the marking. Bail to
962b73de91dSJoel Fernandes * reduce contention.
9634102adabSPaul E. McKenney */
964df2bf8f7SJoel Fernandes (Google) trace_rcu_this_gp(rnp_start, rdp, gp_seq_req,
965b73de91dSJoel Fernandes TPS("Startedleaf"));
9664102adabSPaul E. McKenney goto unlock_out;
96748a7639cSPaul E. McKenney }
968df2bf8f7SJoel Fernandes (Google) if (rnp != rnp_start && rnp->parent != NULL)
9694102adabSPaul E. McKenney raw_spin_unlock_rcu_node(rnp);
970360e0da6SPaul E. McKenney if (!rnp->parent)
971360e0da6SPaul E. McKenney break; /* At root, and perhaps also leaf. */
972360e0da6SPaul E. McKenney }
973df2bf8f7SJoel Fernandes (Google)
974df2bf8f7SJoel Fernandes (Google) /* If GP already in progress, just leave, otherwise start one. */
975df2bf8f7SJoel Fernandes (Google) if (rcu_gp_in_progress()) {
976df2bf8f7SJoel Fernandes (Google) trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedleafroot"));
977df2bf8f7SJoel Fernandes (Google) goto unlock_out;
978360e0da6SPaul E. McKenney }
979df2bf8f7SJoel Fernandes (Google) trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedroot"));
980df2bf8f7SJoel Fernandes (Google) WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags | RCU_GP_FLAG_INIT);
981df2bf8f7SJoel Fernandes (Google) WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
982df2bf8f7SJoel Fernandes (Google) if (!READ_ONCE(rcu_state.gp_kthread)) {
983df2bf8f7SJoel Fernandes (Google) trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread"));
984df2bf8f7SJoel Fernandes (Google) goto unlock_out;
985df2bf8f7SJoel Fernandes (Google) }
986df2bf8f7SJoel Fernandes (Google) trace_rcu_grace_period(rcu_state.name, data_race(rcu_state.gp_seq), TPS("newreq"));
987df2bf8f7SJoel Fernandes (Google) ret = true; /* Caller must wake GP kthread. */
988df2bf8f7SJoel Fernandes (Google) unlock_out:
989b73de91dSJoel Fernandes /* Push furthest requested GP to leaf node and rcu_data structure. */
9904102adabSPaul E. McKenney if (ULONG_CMP_LT(gp_seq_req, rnp->gp_seq_needed)) {
9914102adabSPaul E. McKenney WRITE_ONCE(rnp_start->gp_seq_needed, rnp->gp_seq_needed);
9928ff37290SPaul E. McKenney WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);
993226ca5e7SJoel Fernandes (Google) }
994a2165e41SPaul E. McKenney if (rnp != rnp_start)
995226ca5e7SJoel Fernandes (Google) raw_spin_unlock_rcu_node(rnp);
996226ca5e7SJoel Fernandes (Google) return ret;
997226ca5e7SJoel Fernandes (Google) }
998226ca5e7SJoel Fernandes (Google)
999a2165e41SPaul E. McKenney /*
1000df2bf8f7SJoel Fernandes (Google) * Clean up any old requests for the just-ended grace period. Also return
1001b73de91dSJoel Fernandes * whether any additional grace periods have been requested.
1002a2165e41SPaul E. McKenney */
rcu_future_gp_cleanup(struct rcu_node * rnp)1003a2165e41SPaul E. McKenney static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
1004df2bf8f7SJoel Fernandes (Google) {
1005df2bf8f7SJoel Fernandes (Google) bool needmore;
1006df2bf8f7SJoel Fernandes (Google) struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1007360e0da6SPaul E. McKenney
10084102adabSPaul E. McKenney needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed);
1009360e0da6SPaul E. McKenney if (!needmore)
1010360e0da6SPaul E. McKenney rnp->gp_seq_needed = rnp->gp_seq; /* Avoid counter wrap. */
1011de8e8730SPaul E. McKenney trace_rcu_this_gp(rnp, rdp, rnp->gp_seq,
1012df2bf8f7SJoel Fernandes (Google) needmore ? TPS("CleanupMore") : TPS("Cleanup"));
1013360e0da6SPaul E. McKenney return needmore;
1014360e0da6SPaul E. McKenney }
1015df2bf8f7SJoel Fernandes (Google)
swake_up_one_online_ipi(void * arg)10169cbc5b97SPaul E. McKenney static void swake_up_one_online_ipi(void *arg)
10172906d215SPaul E. McKenney {
10185648d659SPaul E. McKenney struct swait_queue_head *wqh = arg;
1019df2bf8f7SJoel Fernandes (Google)
1020360e0da6SPaul E. McKenney swake_up_one(wqh);
1021360e0da6SPaul E. McKenney }
102262ae1951SPaul E. McKenney
swake_up_one_online(struct swait_queue_head * wqh)1023360e0da6SPaul E. McKenney static void swake_up_one_online(struct swait_queue_head *wqh)
10244102adabSPaul E. McKenney {
1025ab5e869cSPaul E. McKenney int cpu = get_cpu();
1026df2bf8f7SJoel Fernandes (Google)
10278ff37290SPaul E. McKenney /*
10288ff37290SPaul E. McKenney * If called from rcutree_report_cpu_starting(), wake up
1029ab5e869cSPaul E. McKenney * is dangerous that late in the CPU-down hotplug process. The
1030df2bf8f7SJoel Fernandes (Google) * scheduler might queue an ignored hrtimer. Defer the wake up
1031df2bf8f7SJoel Fernandes (Google) * to an online CPU instead.
103248a7639cSPaul E. McKenney */
10334102adabSPaul E. McKenney if (unlikely(cpu_is_offline(cpu))) {
10344102adabSPaul E. McKenney int target;
10354102adabSPaul E. McKenney
10364102adabSPaul E. McKenney target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU),
1037d1e4f01dSPaul E. McKenney cpu_online_mask);
10384102adabSPaul E. McKenney
10393481f2eaSPaul E. McKenney smp_call_function_single(target, swake_up_one_online_ipi,
10404102adabSPaul E. McKenney wqh, 0);
1041fb31340fSPaul E. McKenney put_cpu();
1042da1df50dSPaul E. McKenney } else {
10434102adabSPaul E. McKenney put_cpu();
10447a1d0f23SPaul E. McKenney swake_up_one(wqh);
10457a1d0f23SPaul E. McKenney }
10467a1d0f23SPaul E. McKenney }
1047b73de91dSJoel Fernandes
10484102adabSPaul E. McKenney /*
10494102adabSPaul E. McKenney * Awaken the grace-period kthread. Don't do a self-awaken (unless in an
10504102adabSPaul E. McKenney * interrupt or softirq handler, in which case we just might immediately
10514102adabSPaul E. McKenney * sleep upon return, resulting in a grace-period hang), and don't bother
10524102adabSPaul E. McKenney * awakening when there is nothing for the grace-period kthread to do
10535648d659SPaul E. McKenney * (as in several CPUs raced to awaken, we lost), and finally don't try
10545648d659SPaul E. McKenney * to awaken a kthread that has not yet been created. If all those checks
10555648d659SPaul E. McKenney * are passed, track some debug information and awaken.
10565648d659SPaul E. McKenney *
10575648d659SPaul E. McKenney * So why do the self-wakeup when in an interrupt or softirq handler
10585648d659SPaul E. McKenney * in the grace-period kthread's context? Because the kthread might have
10595648d659SPaul E. McKenney * been interrupted just as it was going to sleep, and just after the final
10601d1f898dSZhang, Jun * pre-sleep check of the awaken condition. In this case, a wakeup really
10611d1f898dSZhang, Jun * is required, and is therefore supplied.
10621d1f898dSZhang, Jun */
rcu_gp_kthread_wake(void)10631d1f898dSZhang, Jun static void rcu_gp_kthread_wake(void)
10641d1f898dSZhang, Jun {
10651d1f898dSZhang, Jun struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
106648a7639cSPaul E. McKenney
1067532c00c9SPaul E. McKenney if ((current == t && !in_hardirq() && !in_serving_softirq()) ||
106848a7639cSPaul E. McKenney !READ_ONCE(rcu_state.gp_flags) || !t)
10695648d659SPaul E. McKenney return;
10705648d659SPaul E. McKenney WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
10712407a64fSChangbin Du WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
10725648d659SPaul E. McKenney swake_up_one_online(&rcu_state.gp_wq);
107348a7639cSPaul E. McKenney }
1074fd897573SPaul E. McKenney
1075fd897573SPaul E. McKenney /*
1076532c00c9SPaul E. McKenney * If there is room, assign a ->gp_seq number to any callbacks on this
107748a7639cSPaul E. McKenney * CPU that have not already been assigned. Also accelerate any callbacks
107848a7639cSPaul E. McKenney * that were previously assigned a ->gp_seq number that has since proven
107948a7639cSPaul E. McKenney * to be too conservative, which can happen if callbacks get assigned a
108029365e56SPaul E. McKenney * ->gp_seq number while RCU is idle, but with reference to a non-root
108129365e56SPaul E. McKenney * rcu_node structure. This function is idempotent, so it does not hurt
108229365e56SPaul E. McKenney * to call it repeatedly. Returns an flag saying that we should awaken
108329365e56SPaul E. McKenney * the RCU grace-period kthread.
108429365e56SPaul E. McKenney *
108529365e56SPaul E. McKenney * The caller must hold rnp->lock with interrupts disabled.
108629365e56SPaul E. McKenney */
rcu_accelerate_cbs(struct rcu_node * rnp,struct rcu_data * rdp)108729365e56SPaul E. McKenney static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
10884102adabSPaul E. McKenney {
10894102adabSPaul E. McKenney unsigned long gp_seq_req;
10904102adabSPaul E. McKenney bool ret = false;
109102f50142SPaul E. McKenney
10924102adabSPaul E. McKenney rcu_lockdep_assert_cblist_protected(rdp);
1093b73de91dSJoel Fernandes raw_lockdep_assert_held_rcu_node(rnp);
109415fecf89SPaul E. McKenney
10954102adabSPaul E. McKenney /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
1096d1b222c6SPaul E. McKenney if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1097a32e01eeSMatthew Wilcox return false;
1098c0b334c5SPaul E. McKenney
109915fecf89SPaul E. McKenney trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPreAcc"));
110015fecf89SPaul E. McKenney
110148a7639cSPaul E. McKenney /*
11024102adabSPaul E. McKenney * Callbacks are often registered with incomplete grace-period
11033afe7fa5SJoel Fernandes (Google) * information. Something about the fact that getting exact
11043afe7fa5SJoel Fernandes (Google) * information requires acquiring a global lock... RCU therefore
11054102adabSPaul E. McKenney * makes a conservative estimate of the grace period number at which
110615fecf89SPaul E. McKenney * a given callback will become ready to invoke. The following
110715fecf89SPaul E. McKenney * code checks this estimate and improves it when possible, thus
110815fecf89SPaul E. McKenney * accelerating callback invocation to an earlier grace-period
110915fecf89SPaul E. McKenney * number.
111015fecf89SPaul E. McKenney */
111115fecf89SPaul E. McKenney gp_seq_req = rcu_seq_snap(&rcu_state.gp_seq);
111215fecf89SPaul E. McKenney if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req))
111315fecf89SPaul E. McKenney ret = rcu_start_this_gp(rnp, rdp, gp_seq_req);
11144102adabSPaul E. McKenney
11159cbc5b97SPaul E. McKenney /* Trace depending on how much we were able to accelerate. */
1116b73de91dSJoel Fernandes if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
1117b73de91dSJoel Fernandes trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccWaitCB"));
11184102adabSPaul E. McKenney else
11194102adabSPaul E. McKenney trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccReadyCB"));
112015fecf89SPaul E. McKenney
1121a7886e89SJoel Fernandes (Google) trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPostAcc"));
11224102adabSPaul E. McKenney
1123a7886e89SJoel Fernandes (Google) return ret;
1124a7886e89SJoel Fernandes (Google) }
11253afe7fa5SJoel Fernandes (Google)
11263afe7fa5SJoel Fernandes (Google) /*
112748a7639cSPaul E. McKenney * Similar to rcu_accelerate_cbs(), but does not require that the leaf
11284102adabSPaul E. McKenney * rcu_node structure's ->lock be held. It consults the cached value
11294102adabSPaul E. McKenney * of ->gp_seq_needed in the rcu_data structure, and if that indicates
11304102adabSPaul E. McKenney * that a new grace-period request be made, invokes rcu_accelerate_cbs()
1131e44e73caSPaul E. McKenney * while holding the leaf rcu_node structure's ->lock.
1132e44e73caSPaul E. McKenney */
rcu_accelerate_cbs_unlocked(struct rcu_node * rnp,struct rcu_data * rdp)1133e44e73caSPaul E. McKenney static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp,
1134e44e73caSPaul E. McKenney struct rcu_data *rdp)
1135e44e73caSPaul E. McKenney {
1136e44e73caSPaul E. McKenney unsigned long c;
1137c6e09b97SPaul E. McKenney bool needwake;
1138e44e73caSPaul E. McKenney
1139e44e73caSPaul E. McKenney rcu_lockdep_assert_cblist_protected(rdp);
1140e44e73caSPaul E. McKenney c = rcu_seq_snap(&rcu_state.gp_seq);
1141e44e73caSPaul E. McKenney if (!READ_ONCE(rdp->gpwrap) && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
1142e44e73caSPaul E. McKenney /* Old request still live, so mark recent callbacks. */
1143d1b222c6SPaul E. McKenney (void)rcu_segcblist_accelerate(&rdp->cblist, c);
1144c6e09b97SPaul E. McKenney return;
1145a5b89501SPaul E. McKenney }
1146e44e73caSPaul E. McKenney raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
1147e44e73caSPaul E. McKenney needwake = rcu_accelerate_cbs(rnp, rdp);
1148e44e73caSPaul E. McKenney raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
1149e44e73caSPaul E. McKenney if (needwake)
1150e44e73caSPaul E. McKenney rcu_gp_kthread_wake();
115102f50142SPaul E. McKenney }
1152e44e73caSPaul E. McKenney
1153e44e73caSPaul E. McKenney /*
1154532c00c9SPaul E. McKenney * Move any callbacks whose grace period has completed to the
1155e44e73caSPaul E. McKenney * RCU_DONE_TAIL sublist, then compact the remaining sublists and
1156e44e73caSPaul E. McKenney * assign ->gp_seq numbers to any callbacks in the RCU_NEXT_TAIL
1157e44e73caSPaul E. McKenney * sublist. This function is idempotent, so it does not hurt to
11584102adabSPaul E. McKenney * invoke it repeatedly. As long as it is not invoked -too- often...
11594102adabSPaul E. McKenney * Returns true if the RCU grace-period kthread needs to be awakened.
116029365e56SPaul E. McKenney *
11614102adabSPaul E. McKenney * The caller must hold rnp->lock with interrupts disabled.
11624102adabSPaul E. McKenney */
rcu_advance_cbs(struct rcu_node * rnp,struct rcu_data * rdp)116348a7639cSPaul E. McKenney static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
11644102adabSPaul E. McKenney {
11654102adabSPaul E. McKenney rcu_lockdep_assert_cblist_protected(rdp);
11664102adabSPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp);
1167834f56bfSPaul E. McKenney
11684102adabSPaul E. McKenney /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
1169d1b222c6SPaul E. McKenney if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1170a32e01eeSMatthew Wilcox return false;
1171c0b334c5SPaul E. McKenney
117215fecf89SPaul E. McKenney /*
117315fecf89SPaul E. McKenney * Find all callbacks whose ->gp_seq numbers indicate that they
117448a7639cSPaul E. McKenney * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
11754102adabSPaul E. McKenney */
11764102adabSPaul E. McKenney rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq);
117729365e56SPaul E. McKenney
11784102adabSPaul E. McKenney /* Classify any remaining callbacks. */
11794102adabSPaul E. McKenney return rcu_accelerate_cbs(rnp, rdp);
118029365e56SPaul E. McKenney }
11814102adabSPaul E. McKenney
11824102adabSPaul E. McKenney /*
118302f50142SPaul E. McKenney * Move and classify callbacks, but only if doing so won't require
11844102adabSPaul E. McKenney * that the RCU grace-period kthread be awakened.
11854102adabSPaul E. McKenney */
rcu_advance_cbs_nowake(struct rcu_node * rnp,struct rcu_data * rdp)11864102adabSPaul E. McKenney static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
11877f36ef82SPaul E. McKenney struct rcu_data *rdp)
11887f36ef82SPaul E. McKenney {
11897f36ef82SPaul E. McKenney rcu_lockdep_assert_cblist_protected(rdp);
11907f36ef82SPaul E. McKenney if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp))
11917f36ef82SPaul E. McKenney return;
11927f36ef82SPaul E. McKenney // The grace period cannot end while we hold the rcu_node lock.
1193d1b222c6SPaul E. McKenney if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))
1194614ddad1SPaul E. McKenney WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
11957f36ef82SPaul E. McKenney raw_spin_unlock_rcu_node(rnp);
1196614ddad1SPaul E. McKenney }
1197614ddad1SPaul E. McKenney
11987f36ef82SPaul E. McKenney /*
11996608c3a0SPaul E. McKenney * In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a
12007f36ef82SPaul E. McKenney * quiescent state. This is intended to be invoked when the CPU notices
12017f36ef82SPaul E. McKenney * a new grace period.
12027f36ef82SPaul E. McKenney */
rcu_strict_gp_check_qs(void)12031a2f5d57SPaul E. McKenney static void rcu_strict_gp_check_qs(void)
12041a2f5d57SPaul E. McKenney {
12051a2f5d57SPaul E. McKenney if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
12061a2f5d57SPaul E. McKenney rcu_read_lock();
12071a2f5d57SPaul E. McKenney rcu_read_unlock();
12081a2f5d57SPaul E. McKenney }
12091a2f5d57SPaul E. McKenney }
12101a2f5d57SPaul E. McKenney
12111a2f5d57SPaul E. McKenney /*
12121a2f5d57SPaul E. McKenney * Update CPU-local rcu_data state to record the beginnings and ends of
12131a2f5d57SPaul E. McKenney * grace periods. The caller must hold the ->lock of the leaf rcu_node
12141a2f5d57SPaul E. McKenney * structure corresponding to the current CPU, and must have irqs disabled.
12151a2f5d57SPaul E. McKenney * Returns true if the grace-period kthread needs to be awakened.
12164102adabSPaul E. McKenney */
__note_gp_changes(struct rcu_node * rnp,struct rcu_data * rdp)12174102adabSPaul E. McKenney static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
12184102adabSPaul E. McKenney {
121948a7639cSPaul E. McKenney bool ret = false;
12204102adabSPaul E. McKenney bool need_qs;
1221c7e48f7bSPaul E. McKenney const bool offloaded = rcu_rdp_is_offloaded(rdp);
12224102adabSPaul E. McKenney
12235d6742b3SPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp);
1224b5ea0370SPaul E. McKenney
12253820b513SFrederic Weisbecker if (rdp->gp_seq == rnp->gp_seq)
122648a7639cSPaul E. McKenney return false; /* Nothing to do. */
1227a32e01eeSMatthew Wilcox
1228c0b334c5SPaul E. McKenney /* Handle the ends of any preceding grace periods first. */
122967e14c1eSPaul E. McKenney if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) ||
123067e14c1eSPaul E. McKenney unlikely(READ_ONCE(rdp->gpwrap))) {
123167e14c1eSPaul E. McKenney if (!offloaded)
12324102adabSPaul E. McKenney ret = rcu_advance_cbs(rnp, rdp); /* Advance CBs. */
123367e14c1eSPaul E. McKenney rdp->core_needs_qs = false;
123467e14c1eSPaul E. McKenney trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend"));
12355d6742b3SPaul E. McKenney } else {
12365d6742b3SPaul E. McKenney if (!offloaded)
1237b5ea0370SPaul E. McKenney ret = rcu_accelerate_cbs(rnp, rdp); /* Recent CBs. */
12389cbc5b97SPaul E. McKenney if (rdp->core_needs_qs)
123967e14c1eSPaul E. McKenney rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask);
12405d6742b3SPaul E. McKenney }
12415d6742b3SPaul E. McKenney
1242b5ea0370SPaul E. McKenney /* Now handle the beginnings of any new-to-this-CPU grace periods. */
1243b5ea0370SPaul E. McKenney if (rcu_seq_new_gp(rdp->gp_seq, rnp->gp_seq) ||
12444102adabSPaul E. McKenney unlikely(READ_ONCE(rdp->gpwrap))) {
12454102adabSPaul E. McKenney /*
124667e14c1eSPaul E. McKenney * If the current grace period is waiting for this CPU,
124767e14c1eSPaul E. McKenney * set up to detect a quiescent state, otherwise don't
124867e14c1eSPaul E. McKenney * go looking for one.
12494102adabSPaul E. McKenney */
12504102adabSPaul E. McKenney trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));
12514102adabSPaul E. McKenney need_qs = !!(rnp->qsmask & rdp->grpmask);
12524102adabSPaul E. McKenney rdp->cpu_no_qs.b.norm = need_qs;
12534102adabSPaul E. McKenney rdp->core_needs_qs = need_qs;
12549cbc5b97SPaul E. McKenney zero_cpu_stall_ticks(rdp);
1255b5ea0370SPaul E. McKenney }
1256b5ea0370SPaul E. McKenney rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */
1257b5ea0370SPaul E. McKenney if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
12584102adabSPaul E. McKenney WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);
12594102adabSPaul E. McKenney if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap))
126067e14c1eSPaul E. McKenney WRITE_ONCE(rdp->last_sched_clock, jiffies);
126113dc7d0cSZhang, Jun WRITE_ONCE(rdp->gpwrap, false);
12628ff37290SPaul E. McKenney rcu_gpnum_ovf(rnp, rdp);
1263c708b08cSPaul E. McKenney return ret;
1264c708b08cSPaul E. McKenney }
12653d18469aSPaul E. McKenney
note_gp_changes(struct rcu_data * rdp)12663d18469aSPaul E. McKenney static void note_gp_changes(struct rcu_data *rdp)
126748a7639cSPaul E. McKenney {
12684102adabSPaul E. McKenney unsigned long flags;
12694102adabSPaul E. McKenney bool needwake;
127015cabdffSPaul E. McKenney struct rcu_node *rnp;
12714102adabSPaul E. McKenney
12724102adabSPaul E. McKenney local_irq_save(flags);
127348a7639cSPaul E. McKenney rnp = rdp->mynode;
12744102adabSPaul E. McKenney if ((rdp->gp_seq == rcu_seq_current(&rnp->gp_seq) &&
12754102adabSPaul E. McKenney !unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */
12764102adabSPaul E. McKenney !raw_spin_trylock_rcu_node(rnp)) { /* irqs already off, so later. */
12774102adabSPaul E. McKenney local_irq_restore(flags);
127867e14c1eSPaul E. McKenney return;
12797d0ae808SPaul E. McKenney }
12802a67e741SPeter Zijlstra needwake = __note_gp_changes(rnp, rdp);
12814102adabSPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
12824102adabSPaul E. McKenney rcu_strict_gp_check_qs();
12834102adabSPaul E. McKenney if (needwake)
1284c7e48f7bSPaul E. McKenney rcu_gp_kthread_wake();
128567c583a7SBoqun Feng }
12861a2f5d57SPaul E. McKenney
128748a7639cSPaul E. McKenney static atomic_t *rcu_gp_slow_suppress;
1288532c00c9SPaul E. McKenney
12894102adabSPaul E. McKenney /* Register a counter to suppress debugging grace-period delays. */
rcu_gp_slow_register(atomic_t * rgssp)12904102adabSPaul E. McKenney void rcu_gp_slow_register(atomic_t *rgssp)
129199d6a2acSPaul E. McKenney {
129299d6a2acSPaul E. McKenney WARN_ON_ONCE(rcu_gp_slow_suppress);
129399d6a2acSPaul E. McKenney
129499d6a2acSPaul E. McKenney WRITE_ONCE(rcu_gp_slow_suppress, rgssp);
129599d6a2acSPaul E. McKenney }
129699d6a2acSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_gp_slow_register);
129799d6a2acSPaul E. McKenney
129899d6a2acSPaul E. McKenney /* Unregister a counter, with NULL for not caring which. */
rcu_gp_slow_unregister(atomic_t * rgssp)129999d6a2acSPaul E. McKenney void rcu_gp_slow_unregister(atomic_t *rgssp)
130099d6a2acSPaul E. McKenney {
130199d6a2acSPaul E. McKenney WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress && rcu_gp_slow_suppress != NULL);
130299d6a2acSPaul E. McKenney
130399d6a2acSPaul E. McKenney WRITE_ONCE(rcu_gp_slow_suppress, NULL);
130499d6a2acSPaul E. McKenney }
130599d6a2acSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_gp_slow_unregister);
130699d6a2acSPaul E. McKenney
rcu_gp_slow_is_suppressed(void)130799d6a2acSPaul E. McKenney static bool rcu_gp_slow_is_suppressed(void)
130899d6a2acSPaul E. McKenney {
130999d6a2acSPaul E. McKenney atomic_t *rgssp = READ_ONCE(rcu_gp_slow_suppress);
131099d6a2acSPaul E. McKenney
131199d6a2acSPaul E. McKenney return rgssp && atomic_read(rgssp);
131299d6a2acSPaul E. McKenney }
131399d6a2acSPaul E. McKenney
rcu_gp_slow(int delay)131499d6a2acSPaul E. McKenney static void rcu_gp_slow(int delay)
131599d6a2acSPaul E. McKenney {
131699d6a2acSPaul E. McKenney if (!rcu_gp_slow_is_suppressed() && delay > 0 &&
131799d6a2acSPaul E. McKenney !(rcu_seq_ctr(rcu_state.gp_seq) % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
131822212332SPaul E. McKenney schedule_timeout_idle(delay);
13190f41c0ddSPaul E. McKenney }
132099d6a2acSPaul E. McKenney
132199d6a2acSPaul E. McKenney static unsigned long sleep_duration;
132277865deaSPaul E. McKenney
13230f41c0ddSPaul E. McKenney /* Allow rcutorture to stall the grace-period kthread. */
rcu_gp_set_torture_wait(int duration)13240f41c0ddSPaul E. McKenney void rcu_gp_set_torture_wait(int duration)
132555b2dcf5SPaul E. McKenney {
132655b2dcf5SPaul E. McKenney if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST) && duration > 0)
132755b2dcf5SPaul E. McKenney WRITE_ONCE(sleep_duration, duration);
132855b2dcf5SPaul E. McKenney }
132955b2dcf5SPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_gp_set_torture_wait);
133055b2dcf5SPaul E. McKenney
133155b2dcf5SPaul E. McKenney /* Actually implement the aforementioned wait. */
rcu_gp_torture_wait(void)133255b2dcf5SPaul E. McKenney static void rcu_gp_torture_wait(void)
133355b2dcf5SPaul E. McKenney {
133455b2dcf5SPaul E. McKenney unsigned long duration;
133555b2dcf5SPaul E. McKenney
133655b2dcf5SPaul E. McKenney if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST))
133755b2dcf5SPaul E. McKenney return;
133855b2dcf5SPaul E. McKenney duration = xchg(&sleep_duration, 0UL);
133955b2dcf5SPaul E. McKenney if (duration > 0) {
134055b2dcf5SPaul E. McKenney pr_alert("%s: Waiting %lu jiffies\n", __func__, duration);
134155b2dcf5SPaul E. McKenney schedule_timeout_idle(duration);
134255b2dcf5SPaul E. McKenney pr_alert("%s: Wait complete\n", __func__);
134355b2dcf5SPaul E. McKenney }
134455b2dcf5SPaul E. McKenney }
134577865deaSPaul E. McKenney
134655b2dcf5SPaul E. McKenney /*
134755b2dcf5SPaul E. McKenney * Handler for on_each_cpu() to invoke the target CPU's RCU core
134855b2dcf5SPaul E. McKenney * processing.
134955b2dcf5SPaul E. McKenney */
rcu_strict_gp_boundary(void * unused)13504102adabSPaul E. McKenney static void rcu_strict_gp_boundary(void *unused)
1351933ada2cSPaul E. McKenney {
1352933ada2cSPaul E. McKenney invoke_rcu_core();
1353933ada2cSPaul E. McKenney }
1354933ada2cSPaul E. McKenney
1355933ada2cSPaul E. McKenney // Make the polled API aware of the beginning of a grace period.
rcu_poll_gp_seq_start(unsigned long * snap)1356933ada2cSPaul E. McKenney static void rcu_poll_gp_seq_start(unsigned long *snap)
1357933ada2cSPaul E. McKenney {
1358933ada2cSPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
1359bf95b2bcSPaul E. McKenney
1360bf95b2bcSPaul E. McKenney if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
1361bf95b2bcSPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp);
1362bf95b2bcSPaul E. McKenney
1363bf95b2bcSPaul E. McKenney // If RCU was idle, note beginning of GP.
1364bf95b2bcSPaul E. McKenney if (!rcu_seq_state(rcu_state.gp_seq_polled))
1365bf95b2bcSPaul E. McKenney rcu_seq_start(&rcu_state.gp_seq_polled);
1366bf95b2bcSPaul E. McKenney
1367bf95b2bcSPaul E. McKenney // Either way, record current state.
1368bf95b2bcSPaul E. McKenney *snap = rcu_state.gp_seq_polled;
1369bf95b2bcSPaul E. McKenney }
1370bf95b2bcSPaul E. McKenney
1371bf95b2bcSPaul E. McKenney // Make the polled API aware of the end of a grace period.
rcu_poll_gp_seq_end(unsigned long * snap)1372bf95b2bcSPaul E. McKenney static void rcu_poll_gp_seq_end(unsigned long *snap)
1373bf95b2bcSPaul E. McKenney {
1374bf95b2bcSPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
1375bf95b2bcSPaul E. McKenney
1376bf95b2bcSPaul E. McKenney if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
1377bf95b2bcSPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp);
1378bf95b2bcSPaul E. McKenney
1379bf95b2bcSPaul E. McKenney // If the previously noted GP is still in effect, record the
1380bf95b2bcSPaul E. McKenney // end of that GP. Either way, zero counter to avoid counter-wrap
1381bf95b2bcSPaul E. McKenney // problems.
1382bf95b2bcSPaul E. McKenney if (*snap && *snap == rcu_state.gp_seq_polled) {
1383bf95b2bcSPaul E. McKenney rcu_seq_end(&rcu_state.gp_seq_polled);
1384bf95b2bcSPaul E. McKenney rcu_state.gp_seq_polled_snap = 0;
1385bf95b2bcSPaul E. McKenney rcu_state.gp_seq_polled_exp_snap = 0;
1386bf95b2bcSPaul E. McKenney } else {
1387bf95b2bcSPaul E. McKenney *snap = 0;
1388bf95b2bcSPaul E. McKenney }
1389bf95b2bcSPaul E. McKenney }
1390bf95b2bcSPaul E. McKenney
1391bf95b2bcSPaul E. McKenney // Make the polled API aware of the beginning of a grace period, but
1392bf95b2bcSPaul E. McKenney // where caller does not hold the root rcu_node structure's lock.
rcu_poll_gp_seq_start_unlocked(unsigned long * snap)1393bf95b2bcSPaul E. McKenney static void rcu_poll_gp_seq_start_unlocked(unsigned long *snap)
1394bf95b2bcSPaul E. McKenney {
1395bf95b2bcSPaul E. McKenney unsigned long flags;
1396dd041405SPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
1397bf95b2bcSPaul E. McKenney
1398bf95b2bcSPaul E. McKenney if (rcu_init_invoked()) {
1399bf95b2bcSPaul E. McKenney if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
1400bf95b2bcSPaul E. McKenney lockdep_assert_irqs_enabled();
1401bf95b2bcSPaul E. McKenney raw_spin_lock_irqsave_rcu_node(rnp, flags);
1402bf95b2bcSPaul E. McKenney }
1403bf95b2bcSPaul E. McKenney rcu_poll_gp_seq_start(snap);
1404bf95b2bcSPaul E. McKenney if (rcu_init_invoked())
1405bf95b2bcSPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1406*31d8aaa8SPaul E. McKenney }
1407bf95b2bcSPaul E. McKenney
1408bf95b2bcSPaul E. McKenney // Make the polled API aware of the end of a grace period, but where
1409bf95b2bcSPaul E. McKenney // caller does not hold the root rcu_node structure's lock.
rcu_poll_gp_seq_end_unlocked(unsigned long * snap)1410bf95b2bcSPaul E. McKenney static void rcu_poll_gp_seq_end_unlocked(unsigned long *snap)
1411*31d8aaa8SPaul E. McKenney {
1412bf95b2bcSPaul E. McKenney unsigned long flags;
1413bf95b2bcSPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
1414bf95b2bcSPaul E. McKenney
1415*31d8aaa8SPaul E. McKenney if (rcu_init_invoked()) {
1416bf95b2bcSPaul E. McKenney if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
1417bf95b2bcSPaul E. McKenney lockdep_assert_irqs_enabled();
1418bf95b2bcSPaul E. McKenney raw_spin_lock_irqsave_rcu_node(rnp, flags);
1419bf95b2bcSPaul E. McKenney }
1420bf95b2bcSPaul E. McKenney rcu_poll_gp_seq_end(snap);
1421bf95b2bcSPaul E. McKenney if (rcu_init_invoked())
1422*31d8aaa8SPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1423bf95b2bcSPaul E. McKenney }
1424bf95b2bcSPaul E. McKenney
1425bf95b2bcSPaul E. McKenney /*
1426bf95b2bcSPaul E. McKenney * Initialize a new grace period. Return false if no grace period required.
1427*31d8aaa8SPaul E. McKenney */
rcu_gp_init(void)1428bf95b2bcSPaul E. McKenney static noinline_for_stack bool rcu_gp_init(void)
1429bf95b2bcSPaul E. McKenney {
1430bf95b2bcSPaul E. McKenney unsigned long flags;
1431*31d8aaa8SPaul E. McKenney unsigned long oldmask;
1432bf95b2bcSPaul E. McKenney unsigned long mask;
1433bf95b2bcSPaul E. McKenney struct rcu_data *rdp;
1434933ada2cSPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
143545fed3e7SPaul E. McKenney
14364102adabSPaul E. McKenney WRITE_ONCE(rcu_state.gp_activity, jiffies);
1437f74126dcSPaul E. McKenney raw_spin_lock_irq_rcu_node(rnp);
14384102adabSPaul E. McKenney if (!READ_ONCE(rcu_state.gp_flags)) {
1439ec2c2976SPaul E. McKenney /* Spurious wakeup, tell caller to go back to sleep. */
14400aa04b05SPaul E. McKenney raw_spin_unlock_irq_rcu_node(rnp);
1441ec2c2976SPaul E. McKenney return false;
14424102adabSPaul E. McKenney }
1443336a4f6cSPaul E. McKenney WRITE_ONCE(rcu_state.gp_flags, 0); /* Clear all flags: New GP. */
14444102adabSPaul E. McKenney
14459cbc5b97SPaul E. McKenney if (WARN_ON_ONCE(rcu_gp_in_progress())) {
14462a67e741SPeter Zijlstra /*
14479cbc5b97SPaul E. McKenney * Grace period already in progress, don't start another.
14484102adabSPaul E. McKenney * Not supposed to be able to happen.
144967c583a7SBoqun Feng */
145045fed3e7SPaul E. McKenney raw_spin_unlock_irq_rcu_node(rnp);
14514102adabSPaul E. McKenney return false;
14529cbc5b97SPaul E. McKenney }
14534102adabSPaul E. McKenney
1454de8e8730SPaul E. McKenney /* Advance to a new grace period and initialize state. */
14554102adabSPaul E. McKenney record_gp_stall_check_time();
14564102adabSPaul E. McKenney /* Record GP times before starting GP, hence rcu_seq_start(). */
14574102adabSPaul E. McKenney rcu_seq_start(&rcu_state.gp_seq);
14584102adabSPaul E. McKenney ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
145967c583a7SBoqun Feng trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
146045fed3e7SPaul E. McKenney rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
14614102adabSPaul E. McKenney raw_spin_unlock_irq_rcu_node(rnp);
14624102adabSPaul E. McKenney
14634102adabSPaul E. McKenney /*
1464ad3832e9SPaul E. McKenney * Apply per-leaf buffered online and offline operations to
1465ff3bb6f4SPaul E. McKenney * the rcu_node tree. Note that this new grace period need not
14669cbc5b97SPaul E. McKenney * wait for subsequent online CPUs, and that RCU hooks in the CPU
146762ae1951SPaul E. McKenney * offlining path, when combined with checks in this function,
14689cbc5b97SPaul E. McKenney * will handle CPUs that are currently going offline or that will
1469bf95b2bcSPaul E. McKenney * go offline later. Please also refer to "Hotplug CPU" section
147067c583a7SBoqun Feng * of RCU's Requirements documentation.
14714102adabSPaul E. McKenney */
14724102adabSPaul E. McKenney WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
1473f37599e6SJoel Fernandes (Google) /* Exclude CPU hotplug operations. */
1474f37599e6SJoel Fernandes (Google) rcu_for_each_leaf_node(rnp) {
1475f37599e6SJoel Fernandes (Google) local_irq_save(flags);
1476f37599e6SJoel Fernandes (Google) arch_spin_lock(&rcu_state.ofl_lock);
1477f37599e6SJoel Fernandes (Google) raw_spin_lock_rcu_node(rnp);
1478f37599e6SJoel Fernandes (Google) if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
1479f37599e6SJoel Fernandes (Google) !rnp->wait_blkd_tasks) {
14800aa04b05SPaul E. McKenney /* Nothing to do on this leaf rcu_node structure. */
1481683954e5SNeeraj Upadhyay raw_spin_unlock_rcu_node(rnp);
148282980b16SDavid Woodhouse arch_spin_unlock(&rcu_state.ofl_lock);
1483aedf4ba9SPaul E. McKenney local_irq_restore(flags);
148482980b16SDavid Woodhouse continue;
148582980b16SDavid Woodhouse }
148682980b16SDavid Woodhouse
14870aa04b05SPaul E. McKenney /* Record old state, apply changes to ->qsmaskinit field. */
14880aa04b05SPaul E. McKenney oldmask = rnp->qsmaskinit;
14890aa04b05SPaul E. McKenney rnp->qsmaskinit = rnp->qsmaskinitnext;
149082980b16SDavid Woodhouse
149182980b16SDavid Woodhouse /* If zero-ness of ->qsmaskinit changed, propagate up tree. */
149282980b16SDavid Woodhouse if (!oldmask != !rnp->qsmaskinit) {
14930aa04b05SPaul E. McKenney if (!oldmask) { /* First online CPU for rcu_node. */
14940aa04b05SPaul E. McKenney if (!rnp->wait_blkd_tasks) /* Ever offline? */
14950aa04b05SPaul E. McKenney rcu_init_new_rnp(rnp);
14960aa04b05SPaul E. McKenney } else if (rcu_preempt_has_tasks(rnp)) {
14970aa04b05SPaul E. McKenney rnp->wait_blkd_tasks = true; /* blocked tasks */
14980aa04b05SPaul E. McKenney } else { /* Last offline CPU and can propagate. */
14990aa04b05SPaul E. McKenney rcu_cleanup_dead_rnp(rnp);
15000aa04b05SPaul E. McKenney }
15010aa04b05SPaul E. McKenney }
1502962aff03SPaul E. McKenney
1503962aff03SPaul E. McKenney /*
15040aa04b05SPaul E. McKenney * If all waited-on tasks from prior grace period are
1505962aff03SPaul E. McKenney * done, and if all this rcu_node structure's CPUs are
1506962aff03SPaul E. McKenney * still offline, propagate up the rcu_node tree and
1507962aff03SPaul E. McKenney * clear ->wait_blkd_tasks. Otherwise, if one of this
15080aa04b05SPaul E. McKenney * rcu_node structure's CPUs has since come back online,
15090aa04b05SPaul E. McKenney * simply clear ->wait_blkd_tasks.
1510962aff03SPaul E. McKenney */
15110aa04b05SPaul E. McKenney if (rnp->wait_blkd_tasks &&
15120aa04b05SPaul E. McKenney (!rcu_preempt_has_tasks(rnp) || rnp->qsmaskinit)) {
15130aa04b05SPaul E. McKenney rnp->wait_blkd_tasks = false;
15140aa04b05SPaul E. McKenney if (!rnp->qsmaskinit)
15150aa04b05SPaul E. McKenney rcu_cleanup_dead_rnp(rnp);
15160aa04b05SPaul E. McKenney }
15170aa04b05SPaul E. McKenney
1518962aff03SPaul E. McKenney raw_spin_unlock_rcu_node(rnp);
15190aa04b05SPaul E. McKenney arch_spin_unlock(&rcu_state.ofl_lock);
15200aa04b05SPaul E. McKenney local_irq_restore(flags);
1521962aff03SPaul E. McKenney }
15220aa04b05SPaul E. McKenney rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */
1523962aff03SPaul E. McKenney
15240aa04b05SPaul E. McKenney /*
15250aa04b05SPaul E. McKenney * Set the quiescent-state-needed bits in all the rcu_node
15260aa04b05SPaul E. McKenney * structures for all currently online CPUs in breadth-first
152782980b16SDavid Woodhouse * order, starting from the root rcu_node structure, relying on the
152882980b16SDavid Woodhouse * layout of the tree within the rcu_state.node[] array. Note that
152982980b16SDavid Woodhouse * other CPUs will access only the leaves of the hierarchy, thus
15300aa04b05SPaul E. McKenney * seeing that no grace period is in progress, at least until the
153122212332SPaul E. McKenney * corresponding leaf node has been initialized.
15324102adabSPaul E. McKenney *
15334102adabSPaul E. McKenney * The grace period cannot complete until the initialization
15344102adabSPaul E. McKenney * process finishes, because this kthread handles both.
15359cbc5b97SPaul E. McKenney */
15369cbc5b97SPaul E. McKenney WRITE_ONCE(rcu_state.gp_state, RCU_GP_INIT);
15379cbc5b97SPaul E. McKenney rcu_for_each_node_breadth_first(rnp) {
15389cbc5b97SPaul E. McKenney rcu_gp_slow(gp_init_delay);
15399cbc5b97SPaul E. McKenney raw_spin_lock_irqsave_rcu_node(rnp, flags);
15409cbc5b97SPaul E. McKenney rdp = this_cpu_ptr(&rcu_data);
15414102adabSPaul E. McKenney rcu_preempt_check_blocked_tasks(rnp);
15424102adabSPaul E. McKenney rnp->qsmask = rnp->qsmaskinit;
15434102adabSPaul E. McKenney WRITE_ONCE(rnp->gp_seq, rcu_state.gp_seq);
15444102adabSPaul E. McKenney if (rnp == rdp->mynode)
1545683954e5SNeeraj Upadhyay (void)__note_gp_changes(rnp, rdp);
1546aedf4ba9SPaul E. McKenney rcu_preempt_boost_start_gp(rnp);
154722212332SPaul E. McKenney trace_rcu_grace_period_init(rcu_state.name, rnp->gp_seq,
1548ec2c2976SPaul E. McKenney rnp->level, rnp->grplo,
1549da1df50dSPaul E. McKenney rnp->grphi, rnp->qsmask);
155081ab59a3SPaul E. McKenney /* Quiescent states for tasks on any now-offline CPUs. */
15514102adabSPaul E. McKenney mask = rnp->qsmask & ~rnp->qsmaskinitnext;
15529cbc5b97SPaul E. McKenney rnp->rcu_gp_init_mask = mask;
15534102adabSPaul E. McKenney if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp))
1554c7e48f7bSPaul E. McKenney rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
15554102adabSPaul E. McKenney else
15569cbc5b97SPaul E. McKenney raw_spin_unlock_irq_rcu_node(rnp);
15574102adabSPaul E. McKenney cond_resched_tasks_rcu_qs();
15584102adabSPaul E. McKenney WRITE_ONCE(rcu_state.gp_activity, jiffies);
1559ec2c2976SPaul E. McKenney }
1560ec2c2976SPaul E. McKenney
1561f2e2df59SPaul E. McKenney // If strict, make all CPUs aware of new grace period.
1562ec2c2976SPaul E. McKenney if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
1563b50912d0SPaul E. McKenney on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
1564ec2c2976SPaul E. McKenney
156567c583a7SBoqun Feng return true;
1566cee43939SPaul E. McKenney }
15679cbc5b97SPaul E. McKenney
15684102adabSPaul E. McKenney /*
15694102adabSPaul E. McKenney * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state
1570933ada2cSPaul E. McKenney * time.
1571933ada2cSPaul E. McKenney */
rcu_gp_fqs_check_wake(int * gfp)1572933ada2cSPaul E. McKenney static bool rcu_gp_fqs_check_wake(int *gfp)
1573933ada2cSPaul E. McKenney {
157445fed3e7SPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
15754102adabSPaul E. McKenney
15764102adabSPaul E. McKenney // If under overload conditions, force an immediate FQS scan.
15774102adabSPaul E. McKenney if (*gfp & RCU_GP_FLAG_OVLD)
1578b3dae109SPeter Zijlstra return true;
1579d5374226SLuis R. Rodriguez
1580b9a425cfSPaul E. McKenney // Someone like call_rcu() requested a force-quiescent-state scan.
15810854a05cSPaul E. McKenney *gfp = READ_ONCE(rcu_state.gp_flags);
1582b9a425cfSPaul E. McKenney if (*gfp & RCU_GP_FLAG_FQS)
1583336a4f6cSPaul E. McKenney return true;
1584b9a425cfSPaul E. McKenney
15851fca4d12SPaul E. McKenney // The current grace period has completed.
15861fca4d12SPaul E. McKenney if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))
15871fca4d12SPaul E. McKenney return true;
15881fca4d12SPaul E. McKenney
15891fca4d12SPaul E. McKenney return false;
15900854a05cSPaul E. McKenney }
1591b9a425cfSPaul E. McKenney
1592b9a425cfSPaul E. McKenney /*
1593b9a425cfSPaul E. McKenney * Do one round of quiescent-state forcing.
15941fca4d12SPaul E. McKenney */
rcu_gp_fqs(bool first_time)1595b9a425cfSPaul E. McKenney static void rcu_gp_fqs(bool first_time)
1596b9a425cfSPaul E. McKenney {
1597b9a425cfSPaul E. McKenney int nr_fqs = READ_ONCE(rcu_state.nr_fqs_jiffies_stall);
1598b9a425cfSPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
1599b9a425cfSPaul E. McKenney
1600b9a425cfSPaul E. McKenney WRITE_ONCE(rcu_state.gp_activity, jiffies);
1601b9a425cfSPaul E. McKenney WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1);
16024102adabSPaul E. McKenney
16034102adabSPaul E. McKenney WARN_ON_ONCE(nr_fqs > 3);
16040854a05cSPaul E. McKenney /* Only countdown nr_fqs for stall purposes if jiffies moves. */
16054102adabSPaul E. McKenney if (nr_fqs) {
1606336a4f6cSPaul E. McKenney if (nr_fqs == 1) {
16074102adabSPaul E. McKenney WRITE_ONCE(rcu_state.jiffies_stall,
16089cbc5b97SPaul E. McKenney jiffies + rcu_jiffies_till_stall_check());
16092431774fSPaul E. McKenney }
161077f81fe0SPetr Mladek WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, --nr_fqs);
16114102adabSPaul E. McKenney }
1612e9ecb780SPaul E. McKenney
16134102adabSPaul E. McKenney if (first_time) {
16144102adabSPaul E. McKenney /* Collect dyntick-idle snapshots. */
1615e9ecb780SPaul E. McKenney force_qs_rnp(dyntick_save_progress_counter);
16164102adabSPaul E. McKenney } else {
16174102adabSPaul E. McKenney /* Handle dyntick-idle and offline CPUs. */
16189cbc5b97SPaul E. McKenney force_qs_rnp(rcu_implicit_dynticks_qs);
16192a67e741SPeter Zijlstra }
16209cbc5b97SPaul E. McKenney /* Clear flag to prevent immediate re-entry. */
16219cbc5b97SPaul E. McKenney if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) {
162267c583a7SBoqun Feng raw_spin_lock_irq_rcu_node(rnp);
16234102adabSPaul E. McKenney WRITE_ONCE(rcu_state.gp_flags,
16244102adabSPaul E. McKenney READ_ONCE(rcu_state.gp_flags) & ~RCU_GP_FLAG_FQS);
16254102adabSPaul E. McKenney raw_spin_unlock_irq_rcu_node(rnp);
16264102adabSPaul E. McKenney }
1627c3854a05SPaul E. McKenney }
1628c3854a05SPaul E. McKenney
1629f74126dcSPaul E. McKenney /*
1630c3854a05SPaul E. McKenney * Loop doing repeated quiescent-state forcing until the grace period ends.
16319bdb5b3aSPaul E. McKenney */
rcu_gp_fqs_loop(void)16321fca4d12SPaul E. McKenney static noinline_for_stack void rcu_gp_fqs_loop(void)
1633c3854a05SPaul E. McKenney {
1634c3854a05SPaul E. McKenney bool first_gp_fqs = true;
1635c3854a05SPaul E. McKenney int gf = 0;
1636c3854a05SPaul E. McKenney unsigned long j;
1637c06aed0eSPaul E. McKenney int ret;
16381fca4d12SPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
16391fca4d12SPaul E. McKenney
1640c3854a05SPaul E. McKenney j = READ_ONCE(jiffies_till_first_fqs);
1641c3854a05SPaul E. McKenney if (rcu_state.cbovld)
1642fb77dccfSPaul E. McKenney gf = RCU_GP_FLAG_OVLD;
1643fb77dccfSPaul E. McKenney ret = 0;
1644fb77dccfSPaul E. McKenney for (;;) {
1645fb77dccfSPaul E. McKenney if (rcu_state.cbovld) {
1646fb77dccfSPaul E. McKenney j = (j + 2) / 3;
1647fb77dccfSPaul E. McKenney if (j <= 0)
1648683954e5SNeeraj Upadhyay j = 1;
1649683954e5SNeeraj Upadhyay }
1650683954e5SNeeraj Upadhyay if (!ret || time_before(jiffies + j, rcu_state.jiffies_force_qs)) {
1651683954e5SNeeraj Upadhyay WRITE_ONCE(rcu_state.jiffies_force_qs, jiffies + j);
1652683954e5SNeeraj Upadhyay /*
1653683954e5SNeeraj Upadhyay * jiffies_force_qs before RCU_GP_WAIT_FQS state
1654c3854a05SPaul E. McKenney * update; required for stall checks.
16559cf422a8SPaul E. McKenney */
1656c3854a05SPaul E. McKenney smp_wmb();
16570f11ad32SPaul E. McKenney WRITE_ONCE(rcu_state.jiffies_kick_kthreads,
1658c3854a05SPaul E. McKenney jiffies + (j ? 3 * j : 2));
1659683954e5SNeeraj Upadhyay }
1660eb880949SLiu Song trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
1661eb880949SLiu Song TPS("fqswait"));
166255b2dcf5SPaul E. McKenney WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_FQS);
1663683954e5SNeeraj Upadhyay (void)swait_event_idle_timeout_exclusive(rcu_state.gp_wq,
1664c3854a05SPaul E. McKenney rcu_gp_fqs_check_wake(&gf), j);
1665a03ae49cSNeeraj Upadhyay rcu_gp_torture_wait();
1666a03ae49cSNeeraj Upadhyay WRITE_ONCE(rcu_state.gp_state, RCU_GP_DOING_FQS);
1667a03ae49cSNeeraj Upadhyay /* Locking provides needed memory barriers. */
1668a03ae49cSNeeraj Upadhyay /*
1669a03ae49cSNeeraj Upadhyay * Exit the loop if the root rcu_node structure indicates that the grace period
1670a03ae49cSNeeraj Upadhyay * has ended, leave the loop. The rcu_preempt_blocked_readers_cgp(rnp) check
1671a03ae49cSNeeraj Upadhyay * is required only for single-node rcu_node trees because readers blocking
1672a03ae49cSNeeraj Upadhyay * the current grace period are queued only on leaf rcu_node structures.
1673a03ae49cSNeeraj Upadhyay * For multi-node trees, checking the root node's ->qsmask suffices, because a
1674c3854a05SPaul E. McKenney * given root node's ->qsmask bit is cleared only when all CPUs and tasks from
1675c3854a05SPaul E. McKenney * the corresponding leaf nodes have passed through their quiescent state.
1676c3854a05SPaul E. McKenney */
1677c3854a05SPaul E. McKenney if (!READ_ONCE(rnp->qsmask) &&
167829ffebc5SPaul E. McKenney !rcu_preempt_blocked_readers_cgp(rnp))
16799c392453SNeeraj Upadhyay break;
16800f11ad32SPaul E. McKenney /* If time for quiescent-state forcing, do it. */
1681c3854a05SPaul E. McKenney if (!time_after(rcu_state.jiffies_force_qs, jiffies) ||
1682c3854a05SPaul E. McKenney (gf & (RCU_GP_FLAG_FQS | RCU_GP_FLAG_OVLD))) {
16831fca4d12SPaul E. McKenney trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
16841fca4d12SPaul E. McKenney TPS("fqsstart"));
1685c3854a05SPaul E. McKenney rcu_gp_fqs(first_gp_fqs);
16861fca4d12SPaul E. McKenney gf = 0;
16871fca4d12SPaul E. McKenney if (first_gp_fqs) {
16880f11ad32SPaul E. McKenney first_gp_fqs = false;
1689c3854a05SPaul E. McKenney gf = rcu_state.cbovld ? RCU_GP_FLAG_OVLD : 0;
1690c3854a05SPaul E. McKenney }
1691c3854a05SPaul E. McKenney trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
1692c3854a05SPaul E. McKenney TPS("fqsend"));
1693c06aed0eSPaul E. McKenney cond_resched_tasks_rcu_qs();
1694c3854a05SPaul E. McKenney WRITE_ONCE(rcu_state.gp_activity, jiffies);
1695c3854a05SPaul E. McKenney ret = 0; /* Force full wait till next FQS. */
1696c3854a05SPaul E. McKenney j = READ_ONCE(jiffies_till_next_fqs);
1697c3854a05SPaul E. McKenney } else {
1698c3854a05SPaul E. McKenney /* Deal with stray signal. */
16990f11ad32SPaul E. McKenney cond_resched_tasks_rcu_qs();
1700c3854a05SPaul E. McKenney WRITE_ONCE(rcu_state.gp_activity, jiffies);
1701c3854a05SPaul E. McKenney WARN_ON(signal_pending(current));
1702c3854a05SPaul E. McKenney trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
1703c3854a05SPaul E. McKenney TPS("fqswaitsig"));
1704c3854a05SPaul E. McKenney ret = 1; /* Keep old FQS timing. */
1705c3854a05SPaul E. McKenney j = jiffies;
1706c3854a05SPaul E. McKenney if (time_after(jiffies, rcu_state.jiffies_force_qs))
17071fca4d12SPaul E. McKenney j = 1;
1708c3854a05SPaul E. McKenney else
1709c3854a05SPaul E. McKenney j = rcu_state.jiffies_force_qs - j;
1710c3854a05SPaul E. McKenney gf = 0;
1711c3854a05SPaul E. McKenney }
1712c3854a05SPaul E. McKenney }
17134102adabSPaul E. McKenney }
17144102adabSPaul E. McKenney
17152f20de99SPaul E. McKenney /*
17164102adabSPaul E. McKenney * Clean up after the old grace period.
1717b2b00ddfSPaul E. McKenney */
rcu_gp_cleanup(void)171848a7639cSPaul E. McKenney static noinline void rcu_gp_cleanup(void)
1719b2b00ddfSPaul E. McKenney {
1720de30ad51SPaul E. McKenney int cpu;
17215d6742b3SPaul E. McKenney bool needgp = false;
17224102adabSPaul E. McKenney unsigned long gp_duration;
1723336a4f6cSPaul E. McKenney unsigned long new_gp_seq;
1724abedf8e2SPaul Gortmaker bool offloaded;
17254102adabSPaul E. McKenney struct rcu_data *rdp;
17269cbc5b97SPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
17272a67e741SPeter Zijlstra struct swait_queue_head *sq;
1728c51d7b5eSPaul E. McKenney
1729c51d7b5eSPaul E. McKenney WRITE_ONCE(rcu_state.gp_activity, jiffies);
17309cbc5b97SPaul E. McKenney raw_spin_lock_irq_rcu_node(rnp);
17319cbc5b97SPaul E. McKenney rcu_state.gp_end = jiffies;
17324102adabSPaul E. McKenney gp_duration = rcu_state.gp_end - rcu_state.gp_start;
17334102adabSPaul E. McKenney if (gp_duration > rcu_state.gp_max)
17344102adabSPaul E. McKenney rcu_state.gp_max = gp_duration;
17354102adabSPaul E. McKenney
17364102adabSPaul E. McKenney /*
17374102adabSPaul E. McKenney * We know the grace period is complete, but to everyone else
17384102adabSPaul E. McKenney * it appears to still be ongoing. But it is also the case
17394102adabSPaul E. McKenney * that to everyone else it looks like there is nothing that
17404102adabSPaul E. McKenney * they can do to advance the grace period. It is therefore
1741bf95b2bcSPaul E. McKenney * safe for us to drop the lock in order to mark the grace
174267c583a7SBoqun Feng * period as completed in all of the rcu_node structures.
17434102adabSPaul E. McKenney */
17444102adabSPaul E. McKenney rcu_poll_gp_seq_end(&rcu_state.gp_seq_polled_snap);
1745ff3bb6f4SPaul E. McKenney raw_spin_unlock_irq_rcu_node(rnp);
1746ff3bb6f4SPaul E. McKenney
1747ff3bb6f4SPaul E. McKenney /*
1748ff3bb6f4SPaul E. McKenney * Propagate new ->gp_seq value to rcu_node structures so that
1749ff3bb6f4SPaul E. McKenney * other CPUs don't have to wait until the start of the next grace
1750ff3bb6f4SPaul E. McKenney * period to process their callbacks. This also avoids some nasty
1751ff3bb6f4SPaul E. McKenney * RCU grace-period initialization races by forcing the end of
17524102adabSPaul E. McKenney * the current grace period to be completely recorded in all of
17539cbc5b97SPaul E. McKenney * the rcu_node structures before the beginning of the next grace
1754de30ad51SPaul E. McKenney * period is recorded in any of the rcu_node structures.
1755aedf4ba9SPaul E. McKenney */
17562a67e741SPeter Zijlstra new_gp_seq = rcu_state.gp_seq;
17574bc8d555SPaul E. McKenney rcu_seq_end(&new_gp_seq);
175881ab59a3SPaul E. McKenney rcu_for_each_node_breadth_first(rnp) {
17595c60d25fSPaul E. McKenney raw_spin_lock_irq_rcu_node(rnp);
1760de30ad51SPaul E. McKenney if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
17613fdefca9SPaul E. McKenney dump_blkd_tasks(rnp, 10);
17623fdefca9SPaul E. McKenney WARN_ON_ONCE(rnp->qsmask);
1763da1df50dSPaul E. McKenney WRITE_ONCE(rnp->gp_seq, new_gp_seq);
17644102adabSPaul E. McKenney if (!rnp->parent)
1765c7e48f7bSPaul E. McKenney smp_mb(); // Order against failing poll_state_synchronize_rcu_full().
176678e4bc34SPaul E. McKenney rdp = this_cpu_ptr(&rcu_data);
17673481f2eaSPaul E. McKenney if (rnp == rdp->mynode)
1768b2b00ddfSPaul E. McKenney needgp = __note_gp_changes(rnp, rdp) || needgp;
1769b2b00ddfSPaul E. McKenney /* smp_mb() provided by prior unlock-lock pair. */
1770b2b00ddfSPaul E. McKenney needgp = rcu_future_gp_cleanup(rnp) || needgp;
1771b2b00ddfSPaul E. McKenney // Reset overload indication for CPUs no longer overloaded
1772b2b00ddfSPaul E. McKenney if (rcu_is_leaf_node(rnp))
1773b2b00ddfSPaul E. McKenney for_each_leaf_node_cpu_mask(rnp, cpu, rnp->cbovldmask) {
1774065bb78cSDaniel Wagner rdp = per_cpu_ptr(&rcu_data, cpu);
177567c583a7SBoqun Feng check_cb_ovld_locked(rdp, rnp);
1776065bb78cSDaniel Wagner }
1777cee43939SPaul E. McKenney sq = rcu_nocb_gp_get(rnp);
17789cbc5b97SPaul E. McKenney raw_spin_unlock_irq_rcu_node(rnp);
177922212332SPaul E. McKenney rcu_nocb_gp_cleanup(sq);
17804102adabSPaul E. McKenney cond_resched_tasks_rcu_qs();
1781336a4f6cSPaul E. McKenney WRITE_ONCE(rcu_state.gp_activity, jiffies);
17829cbc5b97SPaul E. McKenney rcu_gp_slow(gp_cleanup_delay);
17834102adabSPaul E. McKenney }
17840a89e5a4SPaul E. McKenney rnp = rcu_get_root();
17859cbc5b97SPaul E. McKenney raw_spin_lock_irq_rcu_node(rnp); /* GP before ->gp_seq update. */
17860a89e5a4SPaul E. McKenney
178762ae1951SPaul E. McKenney /* Declare grace period done, trace first to use old GP number. */
1788683954e5SNeeraj Upadhyay trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
1789fb31340fSPaul E. McKenney rcu_seq_end(&rcu_state.gp_seq);
1790da1df50dSPaul E. McKenney ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
17915b55072fSJoel Fernandes (Google) WRITE_ONCE(rcu_state.gp_state, RCU_GP_IDLE);
1792abd13fddSPaul E. McKenney /* Check for GP requests since above loop. */
1793fb31340fSPaul E. McKenney rdp = this_cpu_ptr(&rcu_data);
1794fb31340fSPaul E. McKenney if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {
1795fb31340fSPaul E. McKenney trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed,
179648a7639cSPaul E. McKenney TPS("CleanupMore"));
17973820b513SFrederic Weisbecker needgp = true;
17985d6742b3SPaul E. McKenney }
179975182a4eSPaul E. McKenney /* Advance CBs to reduce false positives below. */
180075182a4eSPaul E. McKenney offloaded = rcu_rdp_is_offloaded(rdp);
180175182a4eSPaul E. McKenney if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
180275182a4eSPaul E. McKenney
180375182a4eSPaul E. McKenney // We get here if a grace period was needed (“needgp”)
180475182a4eSPaul E. McKenney // and the above call to rcu_accelerate_cbs() did not set
180575182a4eSPaul E. McKenney // the RCU_GP_FLAG_INIT bit in ->gp_state (which records
180675182a4eSPaul E. McKenney // the need for another grace period). The purpose
180775182a4eSPaul E. McKenney // of the “offloaded” check is to avoid invoking
180875182a4eSPaul E. McKenney // rcu_accelerate_cbs() on an offloaded CPU because we do not
180975182a4eSPaul E. McKenney // hold the ->nocb_lock needed to safely access an offloaded
18109cbc5b97SPaul E. McKenney // ->cblist. We do not want to acquire that lock because
18112906d215SPaul E. McKenney // it can be heavily contended during callback floods.
181275182a4eSPaul E. McKenney
181318390aeaSPaul E. McKenney WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
181475182a4eSPaul E. McKenney WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
181575182a4eSPaul E. McKenney trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq"));
181675182a4eSPaul E. McKenney } else {
181775182a4eSPaul E. McKenney
181875182a4eSPaul E. McKenney // We get here either if there is no need for an
181975182a4eSPaul E. McKenney // additional grace period or if rcu_accelerate_cbs() has
182075182a4eSPaul E. McKenney // already set the RCU_GP_FLAG_INIT bit in ->gp_flags.
182175182a4eSPaul E. McKenney // So all we need to do is to clear all of the other
182218390aeaSPaul E. McKenney // ->gp_flags bits.
182367c583a7SBoqun Feng
18244e025f52SPaul E. McKenney WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags & RCU_GP_FLAG_INIT);
18254e025f52SPaul E. McKenney }
18264e025f52SPaul E. McKenney raw_spin_unlock_irq_rcu_node(rnp);
18274e025f52SPaul E. McKenney
18284102adabSPaul E. McKenney // If strict, make all CPUs aware of the end of the old grace period.
18294102adabSPaul E. McKenney if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
18304102adabSPaul E. McKenney on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
18314102adabSPaul E. McKenney }
18324102adabSPaul E. McKenney
18330854a05cSPaul E. McKenney /*
18344102adabSPaul E. McKenney * Body of kthread that handles grace periods.
18355871968dSPaul E. McKenney */
rcu_gp_kthread(void * unused)18364102adabSPaul E. McKenney static int __noreturn rcu_gp_kthread(void *unused)
18374102adabSPaul E. McKenney {
18384102adabSPaul E. McKenney rcu_bind_gp_kthread();
18394102adabSPaul E. McKenney for (;;) {
18400f11ad32SPaul E. McKenney
18414102adabSPaul E. McKenney /* Handle grace-period start. */
1842683954e5SNeeraj Upadhyay for (;;) {
18439cbc5b97SPaul E. McKenney trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
18449cbc5b97SPaul E. McKenney TPS("reqwait"));
18454102adabSPaul E. McKenney WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_GPS);
184655b2dcf5SPaul E. McKenney swait_event_idle_exclusive(rcu_state.gp_wq,
1847683954e5SNeeraj Upadhyay READ_ONCE(rcu_state.gp_flags) &
184878e4bc34SPaul E. McKenney RCU_GP_FLAG_INIT);
18490854a05cSPaul E. McKenney rcu_gp_torture_wait();
18504102adabSPaul E. McKenney WRITE_ONCE(rcu_state.gp_state, RCU_GP_DONE_GPS);
1851cee43939SPaul E. McKenney /* Locking provides needed memory barrier. */
18529cbc5b97SPaul E. McKenney if (rcu_gp_init())
185373a860cdSPaul E. McKenney break;
18540f11ad32SPaul E. McKenney cond_resched_tasks_rcu_qs();
18554102adabSPaul E. McKenney WRITE_ONCE(rcu_state.gp_activity, jiffies);
18564102adabSPaul E. McKenney WARN_ON(signal_pending(current));
18574102adabSPaul E. McKenney trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
18584102adabSPaul E. McKenney TPS("reqwaitsig"));
1859c3854a05SPaul E. McKenney }
18604102adabSPaul E. McKenney
18614102adabSPaul E. McKenney /* Handle quiescent-state forcing. */
1862683954e5SNeeraj Upadhyay rcu_gp_fqs_loop();
18630854a05cSPaul E. McKenney
1864683954e5SNeeraj Upadhyay /* Handle grace-period end. */
18654102adabSPaul E. McKenney WRITE_ONCE(rcu_state.gp_state, RCU_GP_CLEANUP);
18664102adabSPaul E. McKenney rcu_gp_cleanup();
18674102adabSPaul E. McKenney WRITE_ONCE(rcu_state.gp_state, RCU_GP_CLEANED);
18684102adabSPaul E. McKenney }
186949918a54SPaul E. McKenney }
187049918a54SPaul E. McKenney
187149918a54SPaul E. McKenney /*
187249918a54SPaul E. McKenney * Report a full set of quiescent states to the rcu_state data structure.
187349918a54SPaul E. McKenney * Invoke rcu_gp_kthread_wake() to awaken the grace-period kthread if
187449918a54SPaul E. McKenney * another grace period is required. Whether we wake the grace-period
187549918a54SPaul E. McKenney * kthread or it awakens itself for the next round of quiescent-state
18764102adabSPaul E. McKenney * forcing, that kthread will clean up after the just-completed grace
1877aff4e9edSPaul E. McKenney * period. Note that the caller must hold rnp->lock, which is released
1878336a4f6cSPaul E. McKenney * before return.
18794102adabSPaul E. McKenney */
rcu_report_qs_rsp(unsigned long flags)1880336a4f6cSPaul E. McKenney static void rcu_report_qs_rsp(unsigned long flags)
1881de8e8730SPaul E. McKenney __releases(rcu_get_root()->lock)
18829cbc5b97SPaul E. McKenney {
18839cbc5b97SPaul E. McKenney raw_lockdep_assert_held_rcu_node(rcu_get_root());
1884336a4f6cSPaul E. McKenney WARN_ON_ONCE(!rcu_gp_in_progress());
1885532c00c9SPaul E. McKenney WRITE_ONCE(rcu_state.gp_flags,
18864102adabSPaul E. McKenney READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS);
18874102adabSPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(), flags);
18884102adabSPaul E. McKenney rcu_gp_kthread_wake();
18894102adabSPaul E. McKenney }
18904102adabSPaul E. McKenney
18914102adabSPaul E. McKenney /*
1892654e9533SPaul E. McKenney * Similar to rcu_report_qs_rdp(), for which it is a helper function.
1893654e9533SPaul E. McKenney * Allows quiescent states for a group of CPUs to be reported at one go
1894654e9533SPaul E. McKenney * to the specified rcu_node structure, though all the CPUs in the group
1895c9a24e2dSPaul E. McKenney * must be represented by the same rcu_node structure (which need not be a
1896654e9533SPaul E. McKenney * leaf rcu_node structure, though it often will be). The gps parameter
1897ec2c2976SPaul E. McKenney * is the grace-period snapshot, which means that the quiescent states
1898ec2c2976SPaul E. McKenney * are valid only if rnp->gp_seq is equal to gps. That structure's lock
1899ec2c2976SPaul E. McKenney * must be held upon entry, and it is released before return.
1900ec2c2976SPaul E. McKenney *
19014102adabSPaul E. McKenney * As a special case, if mask is zero, the bit-already-cleared check is
1902b50912d0SPaul E. McKenney * disabled. This allows propagating quiescent state due to resumed tasks
1903b50912d0SPaul E. McKenney * during grace-period initialization.
19044102adabSPaul E. McKenney */
rcu_report_qs_rnp(unsigned long mask,struct rcu_node * rnp,unsigned long gps,unsigned long flags)19054102adabSPaul E. McKenney static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
1906654e9533SPaul E. McKenney unsigned long gps, unsigned long flags)
19074102adabSPaul E. McKenney __releases(rnp->lock)
19084102adabSPaul E. McKenney {
1909a32e01eeSMatthew Wilcox unsigned long oldmask = 0;
1910c0b334c5SPaul E. McKenney struct rcu_node *rnp_c;
19114102adabSPaul E. McKenney
19124102adabSPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp);
1913ec2c2976SPaul E. McKenney
19144102adabSPaul E. McKenney /* Walk up the rcu_node hierarchy. */
1915654e9533SPaul E. McKenney for (;;) {
1916654e9533SPaul E. McKenney if ((!(rnp->qsmask & mask) && mask) || rnp->gp_seq != gps) {
1917654e9533SPaul E. McKenney
1918654e9533SPaul E. McKenney /*
191967c583a7SBoqun Feng * Our bit has already been cleared, or the
19204102adabSPaul E. McKenney * relevant grace period is already over, so done.
19214102adabSPaul E. McKenney */
1922654e9533SPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
19235b4c11d5SPaul E. McKenney return;
19242dee9404SPaul E. McKenney }
19257672d647SPaul E. McKenney WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
192667a0edbfSPaul E. McKenney WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
19274102adabSPaul E. McKenney rcu_preempt_blocked_readers_cgp(rnp));
19284102adabSPaul E. McKenney WRITE_ONCE(rnp->qsmask, rnp->qsmask & ~mask);
19294102adabSPaul E. McKenney trace_rcu_quiescent_state_report(rcu_state.name, rnp->gp_seq,
19304102adabSPaul E. McKenney mask, rnp->qsmask, rnp->level,
19314102adabSPaul E. McKenney rnp->grplo, rnp->grphi,
19324102adabSPaul E. McKenney !!rnp->gp_tasks);
193367c583a7SBoqun Feng if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
19344102adabSPaul E. McKenney
19354102adabSPaul E. McKenney /* Other bits still set at this level, so done. */
1936d43a5d32SPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
19374102adabSPaul E. McKenney return;
19384102adabSPaul E. McKenney }
19394102adabSPaul E. McKenney rnp->completedqs = rnp->gp_seq;
19404102adabSPaul E. McKenney mask = rnp->grpmask;
19414102adabSPaul E. McKenney if (rnp->parent == NULL) {
19424102adabSPaul E. McKenney
19434102adabSPaul E. McKenney /* No more levels. Exit loop holding root lock. */
194467c583a7SBoqun Feng
19454102adabSPaul E. McKenney break;
19464102adabSPaul E. McKenney }
19472a67e741SPeter Zijlstra raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
19480937d045SPaul E. McKenney rnp_c = rnp;
19494102adabSPaul E. McKenney rnp = rnp->parent;
19504102adabSPaul E. McKenney raw_spin_lock_irqsave_rcu_node(rnp, flags);
19514102adabSPaul E. McKenney oldmask = READ_ONCE(rnp_c->qsmask);
19524102adabSPaul E. McKenney }
19534102adabSPaul E. McKenney
19544102adabSPaul E. McKenney /*
19554102adabSPaul E. McKenney * Get here if we are the last CPU to pass through a quiescent
1956aff4e9edSPaul E. McKenney * state for this grace period. Invoke rcu_report_qs_rsp()
19574102adabSPaul E. McKenney * to clean up and start the next grace period if one is needed.
19584102adabSPaul E. McKenney */
19594102adabSPaul E. McKenney rcu_report_qs_rsp(flags); /* releases rnp->lock. */
1960cc99a310SPaul E. McKenney }
1961cc99a310SPaul E. McKenney
196249918a54SPaul E. McKenney /*
1963cc99a310SPaul E. McKenney * Record a quiescent state for all tasks that were previously queued
1964cc99a310SPaul E. McKenney * on the specified rcu_node structure and that were blocking the current
1965cc99a310SPaul E. McKenney * RCU grace period. The caller must hold the corresponding rnp->lock with
196617a8212bSPaul E. McKenney * irqs disabled, and this lock is released upon return, but irqs remain
1967139ad4daSPaul E. McKenney * disabled.
1968cc99a310SPaul E. McKenney */
1969cc99a310SPaul E. McKenney static void __maybe_unused
rcu_report_unblock_qs_rnp(struct rcu_node * rnp,unsigned long flags)1970654e9533SPaul E. McKenney rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
1971cc99a310SPaul E. McKenney __releases(rnp->lock)
1972cc99a310SPaul E. McKenney {
1973cc99a310SPaul E. McKenney unsigned long gps;
1974a32e01eeSMatthew Wilcox unsigned long mask;
1975c130d2dcSLai Jiangshan struct rcu_node *rnp_p;
1976c74859d1SPaul E. McKenney
1977c74859d1SPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp);
197867c583a7SBoqun Feng if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RCU)) ||
1979cc99a310SPaul E. McKenney WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) ||
1980cc99a310SPaul E. McKenney rnp->qsmask != 0) {
1981cc99a310SPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
198277cfc7bfSPaul E. McKenney return; /* Still need more quiescent states! */
1983cc99a310SPaul E. McKenney }
1984cc99a310SPaul E. McKenney
1985cc99a310SPaul E. McKenney rnp->completedqs = rnp->gp_seq;
1986a77da14cSPaul E. McKenney rnp_p = rnp->parent;
1987a77da14cSPaul E. McKenney if (rnp_p == NULL) {
1988cc99a310SPaul E. McKenney /*
1989aff4e9edSPaul E. McKenney * Only one rcu_node structure in the tree, so don't
1990cc99a310SPaul E. McKenney * try to report up to its nonexistent parent!
1991cc99a310SPaul E. McKenney */
1992cc99a310SPaul E. McKenney rcu_report_qs_rsp(flags);
1993c9a24e2dSPaul E. McKenney return;
1994c9a24e2dSPaul E. McKenney }
1995cc99a310SPaul E. McKenney
199667c583a7SBoqun Feng /* Report up the rest of the hierarchy, tracking current ->gp_seq. */
19972a67e741SPeter Zijlstra gps = rnp->gp_seq;
1998b50912d0SPaul E. McKenney mask = rnp->grpmask;
1999cc99a310SPaul E. McKenney raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
2000cc99a310SPaul E. McKenney raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */
2001cc99a310SPaul E. McKenney rcu_report_qs_rnp(mask, rnp_p, gps, flags);
20024102adabSPaul E. McKenney }
20034b455dc3SPaul E. McKenney
20044102adabSPaul E. McKenney /*
20054102adabSPaul E. McKenney * Record a quiescent state for the specified CPU to that CPU's rcu_data
2006cfeac397SPaul E. McKenney * structure. This must be called from the specified CPU.
20074102adabSPaul E. McKenney */
20084102adabSPaul E. McKenney static void
rcu_report_qs_rdp(struct rcu_data * rdp)20094102adabSPaul E. McKenney rcu_report_qs_rdp(struct rcu_data *rdp)
20105d6742b3SPaul E. McKenney {
2011b3bb02feSFrederic Weisbecker unsigned long flags;
20124102adabSPaul E. McKenney unsigned long mask;
20134102adabSPaul E. McKenney bool needacc = false;
2014cfeac397SPaul E. McKenney struct rcu_node *rnp;
20154102adabSPaul E. McKenney
20162a67e741SPeter Zijlstra WARN_ON_ONCE(rdp->cpu != smp_processor_id());
2017c9a24e2dSPaul E. McKenney rnp = rdp->mynode;
2018c9a24e2dSPaul E. McKenney raw_spin_lock_irqsave_rcu_node(rnp, flags);
20194102adabSPaul E. McKenney if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
20204102adabSPaul E. McKenney rdp->gpwrap) {
20214102adabSPaul E. McKenney
20224102adabSPaul E. McKenney /*
20234102adabSPaul E. McKenney * The grace period in which this quiescent state was
20244102adabSPaul E. McKenney * recorded has ended, so don't report it upwards.
20254102adabSPaul E. McKenney * We will instead need a new quiescent state that lies
20265b74c458SPaul E. McKenney * within the current grace period.
202767c583a7SBoqun Feng */
20284102adabSPaul E. McKenney rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */
20294102adabSPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
20304102adabSPaul E. McKenney return;
2031b5ea0370SPaul E. McKenney }
20324102adabSPaul E. McKenney mask = rdp->grpmask;
203367c583a7SBoqun Feng rdp->core_needs_qs = false;
20344102adabSPaul E. McKenney if ((rnp->qsmask & mask) == 0) {
20354102adabSPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
20364102adabSPaul E. McKenney } else {
20374102adabSPaul E. McKenney /*
203824ee940dSThomas Gleixner * This GP can't end until cpu checks in, so all of our
2039b3bb02feSFrederic Weisbecker * callbacks can be processed during the next GP.
20404102adabSPaul E. McKenney *
2041b3bb02feSFrederic Weisbecker * NOCB kthreads have their own way to deal with that...
204202f50142SPaul E. McKenney */
2043b3bb02feSFrederic Weisbecker if (!rcu_rdp_is_offloaded(rdp)) {
2044b3bb02feSFrederic Weisbecker /*
2045b3bb02feSFrederic Weisbecker * The current GP has not yet ended, so it
2046b3bb02feSFrederic Weisbecker * should not be possible for rcu_accelerate_cbs()
2047b3bb02feSFrederic Weisbecker * to return true. So complain, but don't awaken.
2048b3bb02feSFrederic Weisbecker */
2049b3bb02feSFrederic Weisbecker WARN_ON_ONCE(rcu_accelerate_cbs(rnp, rdp));
20504102adabSPaul E. McKenney } else if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
2051516e5ae0SJoel Fernandes (Google) /*
2052b50912d0SPaul E. McKenney * ...but NOCB kthreads may miss or delay callbacks acceleration
2053654e9533SPaul E. McKenney * if in the middle of a (de-)offloading process.
205448a7639cSPaul E. McKenney */
2055532c00c9SPaul E. McKenney needacc = true;
2056b3bb02feSFrederic Weisbecker }
2057b3bb02feSFrederic Weisbecker
2058b3bb02feSFrederic Weisbecker rcu_disable_urgency_upon_qs(rdp);
2059b3bb02feSFrederic Weisbecker rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
2060b3bb02feSFrederic Weisbecker /* ^^^ Released rnp->lock */
2061b3bb02feSFrederic Weisbecker
20624102adabSPaul E. McKenney if (needacc) {
20634102adabSPaul E. McKenney rcu_nocb_lock_irqsave(rdp, flags);
20644102adabSPaul E. McKenney rcu_accelerate_cbs_unlocked(rnp, rdp);
20654102adabSPaul E. McKenney rcu_nocb_unlock_irqrestore(rdp, flags);
20664102adabSPaul E. McKenney }
20674102adabSPaul E. McKenney }
20684102adabSPaul E. McKenney }
20694102adabSPaul E. McKenney
20704102adabSPaul E. McKenney /*
20714102adabSPaul E. McKenney * Check to see if there is a new grace period of which this CPU
20728087d3e3SPaul E. McKenney * is not yet aware, and if so, set up local rcu_data state for it.
20734102adabSPaul E. McKenney * Otherwise, see if this CPU has just passed through its first
20744102adabSPaul E. McKenney * quiescent state for this grace period, and record that fact if so.
207515cabdffSPaul E. McKenney */
20764102adabSPaul E. McKenney static void
rcu_check_quiescent_state(struct rcu_data * rdp)20774102adabSPaul E. McKenney rcu_check_quiescent_state(struct rcu_data *rdp)
20784102adabSPaul E. McKenney {
20794102adabSPaul E. McKenney /* Check for grace-period ends and beginnings. */
20804102adabSPaul E. McKenney note_gp_changes(rdp);
208197c668b8SPaul E. McKenney
20824102adabSPaul E. McKenney /*
20834102adabSPaul E. McKenney * Does this CPU still need to do its part for current grace period?
20844102adabSPaul E. McKenney * If no, return and let the other CPUs do their part as well.
20854102adabSPaul E. McKenney */
20864102adabSPaul E. McKenney if (!rdp->core_needs_qs)
20874102adabSPaul E. McKenney return;
20883a19b46aSPaul E. McKenney
20894102adabSPaul E. McKenney /*
20904102adabSPaul E. McKenney * Was there a quiescent state since the beginning of the grace
20914102adabSPaul E. McKenney * period? If no, then exit and wait for the next call.
20924102adabSPaul E. McKenney */
20934102adabSPaul E. McKenney if (rdp->cpu_no_qs.b.norm)
20944102adabSPaul E. McKenney return;
2095cfeac397SPaul E. McKenney
20964102adabSPaul E. McKenney /*
20974102adabSPaul E. McKenney * Tell RCU we are done (but rcu_report_qs_rdp() will be the
20984102adabSPaul E. McKenney * judge of that).
2099780cd590SPaul E. McKenney */
2100780cd590SPaul E. McKenney rcu_report_qs_rdp(rdp);
21014102adabSPaul E. McKenney }
2102780cd590SPaul E. McKenney
21034102adabSPaul E. McKenney /* Return true if callback-invocation time limit exceeded. */
rcu_do_batch_check_time(long count,long tlimit,bool jlimit_check,unsigned long jlimit)21044f5fbd78SYafang Shao static bool rcu_do_batch_check_time(long count, long tlimit,
21054aa846f9SPaul E. McKenney bool jlimit_check, unsigned long jlimit)
21064f5fbd78SYafang Shao {
21074102adabSPaul E. McKenney // Invoke local_clock() only once per 32 consecutive callbacks.
2108ea46351cSPaul E. McKenney return unlikely(tlimit) &&
2109780cd590SPaul E. McKenney (!likely(count & 31) ||
2110ea46351cSPaul E. McKenney (IS_ENABLED(CONFIG_RCU_DOUBLE_CHECK_CB_TIME) &&
21114f5fbd78SYafang Shao jlimit_check && time_after(jiffies, jlimit))) &&
21120937d045SPaul E. McKenney local_clock() >= tlimit;
211347fcbc8dSNeeraj Upadhyay }
2114780cd590SPaul E. McKenney
21154102adabSPaul E. McKenney /*
21164102adabSPaul E. McKenney * Invoke any RCU callbacks that have made it to the end of their grace
21174102adabSPaul E. McKenney * period. Throttle as specified by rdp->blimit.
21188af3a5e7SPaul E. McKenney */
rcu_do_batch(struct rcu_data * rdp)21198af3a5e7SPaul E. McKenney static void rcu_do_batch(struct rcu_data *rdp)
21208af3a5e7SPaul E. McKenney {
21218af3a5e7SPaul E. McKenney long bl;
21228af3a5e7SPaul E. McKenney long count = 0;
21238af3a5e7SPaul E. McKenney int div;
21248af3a5e7SPaul E. McKenney bool __maybe_unused empty;
21258af3a5e7SPaul E. McKenney unsigned long flags;
2126c50cbe53SPaul E. McKenney unsigned long jlimit;
21278af3a5e7SPaul E. McKenney bool jlimit_check = false;
21288af3a5e7SPaul E. McKenney long pending;
21298af3a5e7SPaul E. McKenney struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
21308af3a5e7SPaul E. McKenney struct rcu_head *rhp;
21318af3a5e7SPaul E. McKenney long tlimit = 0;
21328af3a5e7SPaul E. McKenney
21338af3a5e7SPaul E. McKenney /* If no callbacks are ready, just return. */
21348af3a5e7SPaul E. McKenney if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
21358af3a5e7SPaul E. McKenney trace_rcu_batch_start(rcu_state.name,
21368af3a5e7SPaul E. McKenney rcu_segcblist_n_cbs(&rdp->cblist), 0);
21378af3a5e7SPaul E. McKenney trace_rcu_batch_end(rcu_state.name, 0,
21388af3a5e7SPaul E. McKenney !rcu_segcblist_empty(&rdp->cblist),
2139962aff03SPaul E. McKenney need_resched(), is_idle_task(current),
2140ea46351cSPaul E. McKenney rcu_is_callbacks_kthread(rdp));
2141962aff03SPaul E. McKenney return;
2142962aff03SPaul E. McKenney }
21438af3a5e7SPaul E. McKenney
21448af3a5e7SPaul E. McKenney /*
21458af3a5e7SPaul E. McKenney * Extract the list of ready callbacks, disabling IRQs to prevent
21468af3a5e7SPaul E. McKenney * races with call_rcu() from interrupt handlers. Leave the
21478af3a5e7SPaul E. McKenney * callback counts, as rcu_barrier() needs to be conservative.
21488af3a5e7SPaul E. McKenney */
21492a67e741SPeter Zijlstra rcu_nocb_lock_irqsave(rdp, flags);
21508af3a5e7SPaul E. McKenney WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
2151962aff03SPaul E. McKenney pending = rcu_segcblist_get_seglen(&rdp->cblist, RCU_DONE_TAIL);
2152962aff03SPaul E. McKenney div = READ_ONCE(rcu_divisor);
21538af3a5e7SPaul E. McKenney div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
215467c583a7SBoqun Feng bl = max(rdp->blimit, pending >> div);
215567c583a7SBoqun Feng if ((in_serving_softirq() || rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING) &&
21568af3a5e7SPaul E. McKenney (IS_ENABLED(CONFIG_RCU_DOUBLE_CHECK_CB_TIME) || unlikely(bl > 100))) {
21578af3a5e7SPaul E. McKenney const long npj = NSEC_PER_SEC / HZ;
215867c583a7SBoqun Feng long rrn = READ_ONCE(rcu_resched_ns);
21598af3a5e7SPaul E. McKenney
21608af3a5e7SPaul E. McKenney rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn;
21618af3a5e7SPaul E. McKenney tlimit = local_clock() + rrn;
21628af3a5e7SPaul E. McKenney jlimit = jiffies + (rrn + npj + 1) / npj;
21634102adabSPaul E. McKenney jlimit_check = true;
2164a58163d8SPaul E. McKenney }
2165a58163d8SPaul E. McKenney trace_rcu_batch_start(rcu_state.name,
2166a58163d8SPaul E. McKenney rcu_segcblist_n_cbs(&rdp->cblist), bl);
21674102adabSPaul E. McKenney rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
2168780cd590SPaul E. McKenney if (rcu_rdp_is_offloaded(rdp))
21694102adabSPaul E. McKenney rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
2170da1df50dSPaul E. McKenney
21714102adabSPaul E. McKenney trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued"));
21724102adabSPaul E. McKenney rcu_nocb_unlock_irqrestore(rdp, flags);
2173ea46351cSPaul E. McKenney
2174780cd590SPaul E. McKenney /* Invoke callbacks. */
2175ea46351cSPaul E. McKenney tick_dep_set_task(current, TICK_DEP_BIT_RCU);
2176ed73860cSNeeraj Upadhyay rhp = rcu_cblist_dequeue(&rcl);
21774102adabSPaul E. McKenney
21784102adabSPaul E. McKenney for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
217996926686SPaul E. McKenney rcu_callback_t f;
218096926686SPaul E. McKenney
2181780cd590SPaul E. McKenney count++;
21824102adabSPaul E. McKenney debug_rcu_head_unqueue(rhp);
21834102adabSPaul E. McKenney
21844102adabSPaul E. McKenney rcu_lock_acquire(&rcu_callback_map);
21854102adabSPaul E. McKenney trace_rcu_invoke_callback(rcu_state.name, rhp);
2186a616aec9SIngo Molnar
21874102adabSPaul E. McKenney f = rhp->func;
21885bb5d09cSPaul E. McKenney debug_rcu_head_callback(rhp);
21894102adabSPaul E. McKenney WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
2190b5374b2dSPaul E. McKenney f(rhp);
2191b4e6039eSJoel Fernandes (Google)
21924102adabSPaul E. McKenney rcu_lock_release(&rcu_callback_map);
219315fecf89SPaul E. McKenney
219415fecf89SPaul E. McKenney /*
21956bc33582SJoel Fernandes (Google) * Stop only if limit reached and CPU has something to do.
2196cfcdef5eSEric Dumazet */
21974102adabSPaul E. McKenney if (in_serving_softirq()) {
21984102adabSPaul E. McKenney if (count >= bl && (need_resched() || !is_idle_task(current)))
219915fecf89SPaul E. McKenney break;
22003c779dfeSPaul E. McKenney /*
220115fecf89SPaul E. McKenney * Make sure we don't spend too much time here and deprive other
22023c779dfeSPaul E. McKenney * softirq vectors of CPU cycles.
220315fecf89SPaul E. McKenney */
22044102adabSPaul E. McKenney if (rcu_do_batch_check_time(count, tlimit, jlimit_check, jlimit))
220551038506SZqiang break;
22064102adabSPaul E. McKenney } else {
22074102adabSPaul E. McKenney // In rcuc/rcuoc context, so no worries about
22084102adabSPaul E. McKenney // depriving other softirq vectors of CPU cycles.
22094102adabSPaul E. McKenney local_bh_enable();
22107b65dfa3SFrederic Weisbecker lockdep_assert_irqs_enabled();
221115fecf89SPaul E. McKenney cond_resched_tasks_rcu_qs();
221215fecf89SPaul E. McKenney lockdep_assert_irqs_enabled();
22134102adabSPaul E. McKenney local_bh_disable();
22147b65dfa3SFrederic Weisbecker // But rcuc kthreads can delay quiescent-state
22154102adabSPaul E. McKenney // reporting, so check time limits for them.
2216cfcdef5eSEric Dumazet if (rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING &&
2217b5374b2dSPaul E. McKenney rcu_do_batch_check_time(count, tlimit, jlimit_check, jlimit)) {
2218b5374b2dSPaul E. McKenney rdp->rcu_cpu_has_work = 1;
2219b5374b2dSPaul E. McKenney break;
2220a554ba28SFrederic Weisbecker }
2221a2b354b9SPaul E. McKenney }
2222a2b354b9SPaul E. McKenney }
2223a2b354b9SPaul E. McKenney
2224a2b354b9SPaul E. McKenney rcu_nocb_lock_irqsave(rdp, flags);
2225a2b354b9SPaul E. McKenney rdp->n_cbs_invoked += count;
22263c779dfeSPaul E. McKenney trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
222715fecf89SPaul E. McKenney is_idle_task(current), rcu_is_callbacks_kthread(rdp));
222815fecf89SPaul E. McKenney
2229344e219dSFrederic Weisbecker /* Update counts and requeue any remaining callbacks. */
22307f36ef82SPaul E. McKenney rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
22313afe7fa5SJoel Fernandes (Google) rcu_segcblist_add_len(&rdp->cblist, -count);
22323afe7fa5SJoel Fernandes (Google)
22335d6742b3SPaul E. McKenney /* Reinstate batch limit if we have worked down the excess. */
22344102adabSPaul E. McKenney count = rcu_segcblist_n_cbs(&rdp->cblist);
22354102adabSPaul E. McKenney if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark)
22366a949b7aSPaul E. McKenney rdp->blimit = blimit;
223715fecf89SPaul E. McKenney
22383afe7fa5SJoel Fernandes (Google) /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
223915fecf89SPaul E. McKenney if (count == 0 && rdp->qlen_last_fqs_check != 0) {
224077a40f97SJoel Fernandes (Google) rdp->qlen_last_fqs_check = 0;
224177a40f97SJoel Fernandes (Google) rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);
22426bc33582SJoel Fernandes (Google) } else if (count < rdp->qlen_last_fqs_check - qhimark)
224315fecf89SPaul E. McKenney rdp->qlen_last_fqs_check = count;
224477a40f97SJoel Fernandes (Google)
224577a40f97SJoel Fernandes (Google) /*
224677a40f97SJoel Fernandes (Google) * The following usually indicates a double call_rcu(). To track
224777a40f97SJoel Fernandes (Google) * this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y.
224877a40f97SJoel Fernandes (Google) */
224977a40f97SJoel Fernandes (Google) empty = rcu_segcblist_empty(&rdp->cblist);
225077a40f97SJoel Fernandes (Google) WARN_ON_ONCE(count == 0 && !empty);
225177a40f97SJoel Fernandes (Google) WARN_ON_ONCE(!IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
225277a40f97SJoel Fernandes (Google) count != 0 && empty);
225377a40f97SJoel Fernandes (Google) WARN_ON_ONCE(count == 0 && rcu_segcblist_n_segment_cbs(&rdp->cblist) != 0);
225415fecf89SPaul E. McKenney WARN_ON_ONCE(!empty && rcu_segcblist_n_segment_cbs(&rdp->cblist) == 0);
225515fecf89SPaul E. McKenney
225615fecf89SPaul E. McKenney rcu_nocb_unlock_irqrestore(rdp, flags);
22573e61e95eSFrederic Weisbecker
22583e61e95eSFrederic Weisbecker tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
22594102adabSPaul E. McKenney }
22603e61e95eSFrederic Weisbecker
22613e61e95eSFrederic Weisbecker /*
22623e61e95eSFrederic Weisbecker * This function is invoked from each scheduling-clock interrupt,
22633e61e95eSFrederic Weisbecker * and checks to see if this CPU is in a non-context-switch quiescent
2264cfcdef5eSEric Dumazet * state, for example, user mode or idle loop. It also schedules RCU
2265cfcdef5eSEric Dumazet * core processing. If the current grace period has gone on too long,
22666bc33582SJoel Fernandes (Google) * it will ask the scheduler to manufacture a context switch for the sole
2267cfcdef5eSEric Dumazet * purpose of providing the needed quiescent state.
2268cfcdef5eSEric Dumazet */
rcu_sched_clock_irq(int user)2269cfcdef5eSEric Dumazet void rcu_sched_clock_irq(int user)
2270cfcdef5eSEric Dumazet {
2271a554ba28SFrederic Weisbecker unsigned long j;
22725d6742b3SPaul E. McKenney
22735d6742b3SPaul E. McKenney if (IS_ENABLED(CONFIG_PROVE_RCU)) {
22745d6742b3SPaul E. McKenney j = jiffies;
22755d6742b3SPaul E. McKenney WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
22765d6742b3SPaul E. McKenney __this_cpu_write(rcu_data.last_sched_clock, j);
22775d6742b3SPaul E. McKenney }
22784102adabSPaul E. McKenney trace_rcu_utilization(TPS("Start scheduler-tick"));
22794102adabSPaul E. McKenney lockdep_assert_irqs_disabled();
22807b65dfa3SFrederic Weisbecker raw_cpu_inc(rcu_data.ticks_this_gp);
2281e816d56fSPaul E. McKenney /* The load-acquire pairs with the store-release setting to true. */
22823c779dfeSPaul E. McKenney if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
228351038506SZqiang /* Idle and userspace execution already are quiescent states. */
22844102adabSPaul E. McKenney if (!rcu_is_cpu_rrupt_from_idle() && !user) {
228515fecf89SPaul E. McKenney set_tsk_need_resched(current);
228615fecf89SPaul E. McKenney set_preempt_need_resched();
22876bc33582SJoel Fernandes (Google) }
22884102adabSPaul E. McKenney __this_cpu_write(rcu_data.rcu_urgent_qs, false);
22894102adabSPaul E. McKenney }
229015fecf89SPaul E. McKenney rcu_flavor_sched_clock_irq(user);
2291d5a9a8c3SPaul E. McKenney if (rcu_pending(user))
22924102adabSPaul E. McKenney invoke_rcu_core();
22934102adabSPaul E. McKenney if (user || rcu_is_cpu_rrupt_from_idle())
22944102adabSPaul E. McKenney rcu_note_voluntary_context_switch(current);
229515fecf89SPaul E. McKenney lockdep_assert_irqs_disabled();
22964102adabSPaul E. McKenney
22972431774fSPaul E. McKenney trace_rcu_utilization(TPS("End scheduler-tick"));
229815fecf89SPaul E. McKenney }
229915fecf89SPaul E. McKenney
2300efd88b02SPaul E. McKenney /*
2301efd88b02SPaul E. McKenney * Scan the leaf rcu_node structures. For each structure on which all
2302efd88b02SPaul E. McKenney * CPUs have reported a quiescent state and on which there are tasks
2303efd88b02SPaul E. McKenney * blocking the current grace period, initiate RCU priority boosting.
2304efd88b02SPaul E. McKenney * Otherwise, invoke the specified function to check dyntick state for
2305b4e6039eSJoel Fernandes (Google) * each CPU that has not yet reported a quiescent state.
2306b4e6039eSJoel Fernandes (Google) */
force_qs_rnp(int (* f)(struct rcu_data * rdp))2307d1b222c6SPaul E. McKenney static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
2308b4e6039eSJoel Fernandes (Google) {
2309b4e6039eSJoel Fernandes (Google) int cpu;
2310b4e6039eSJoel Fernandes (Google) unsigned long flags;
23114102adabSPaul E. McKenney struct rcu_node *rnp;
23125d6742b3SPaul E. McKenney
23134102adabSPaul E. McKenney rcu_state.cbovld = rcu_state.cbovldnext;
23146a949b7aSPaul E. McKenney rcu_state.cbovldnext = false;
23154102adabSPaul E. McKenney rcu_for_each_leaf_node(rnp) {
23164102adabSPaul E. McKenney unsigned long mask = 0;
23174102adabSPaul E. McKenney unsigned long rsmask = 0;
2318c98cac60SPaul E. McKenney
2319c98cac60SPaul E. McKenney cond_resched_tasks_rcu_qs();
2320c98cac60SPaul E. McKenney raw_spin_lock_irqsave_rcu_node(rnp, flags);
2321c98cac60SPaul E. McKenney rcu_state.cbovldnext |= !!rnp->cbovldmask;
2322c98cac60SPaul E. McKenney if (rnp->qsmask == 0) {
2323277ffe1bSZhouyi Zhou if (rcu_preempt_blocked_readers_cgp(rnp)) {
23244102adabSPaul E. McKenney /*
2325c98cac60SPaul E. McKenney * No point in scanning bits because they
23264102adabSPaul E. McKenney * are all zero. But we might need to
2327c708b08cSPaul E. McKenney * priority-boost blocked readers.
2328c708b08cSPaul E. McKenney */
2329c708b08cSPaul E. McKenney rcu_initiate_boost(rnp, flags);
2330c708b08cSPaul E. McKenney /* rcu_initiate_boost() releases rnp->lock */
2331c708b08cSPaul E. McKenney continue;
2332c708b08cSPaul E. McKenney }
2333c708b08cSPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
23344102adabSPaul E. McKenney continue;
2335a649d25dSPaul E. McKenney }
23364e95020cSPaul E. McKenney for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) {
233792aa39e9SPaul E. McKenney struct rcu_data *rdp;
23382dba13f0SPaul E. McKenney int ret;
233992aa39e9SPaul E. McKenney
2340a0ef9ec2SPaul E. McKenney rdp = per_cpu_ptr(&rcu_data, cpu);
234192aa39e9SPaul E. McKenney ret = f(rdp);
234292aa39e9SPaul E. McKenney if (ret > 0) {
23434102adabSPaul E. McKenney mask |= rdp->grpmask;
23442dba13f0SPaul E. McKenney rcu_disable_urgency_upon_qs(rdp);
234592aa39e9SPaul E. McKenney }
2346c98cac60SPaul E. McKenney if (ret < 0)
2347dd7dafd1SPaul E. McKenney rsmask |= rdp->grpmask;
23484102adabSPaul E. McKenney }
2349528262f5SZqiang if (mask != 0) {
2350528262f5SZqiang /* Idle/offline CPUs, report (releases rnp->lock). */
2351a649d25dSPaul E. McKenney rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
235207f27570SByungchul Park } else {
23534102adabSPaul E. McKenney /* Nothing to do here, so just drop the lock. */
23544102adabSPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
23554102adabSPaul E. McKenney }
23564102adabSPaul E. McKenney
23575d8a752eSZhouyi Zhou for_each_leaf_node_cpu_mask(rnp, cpu, rsmask)
23585d8a752eSZhouyi Zhou resched_cpu(cpu);
23595d8a752eSZhouyi Zhou }
23605d8a752eSZhouyi Zhou }
23615d8a752eSZhouyi Zhou
23624102adabSPaul E. McKenney /*
23638ff0b907SPaul E. McKenney * Force quiescent states on reluctant CPUs, and also detect which
23644102adabSPaul E. McKenney * CPUs are in dyntick-idle mode.
23654102adabSPaul E. McKenney */
rcu_force_quiescent_state(void)23664102adabSPaul E. McKenney void rcu_force_quiescent_state(void)
23674102adabSPaul E. McKenney {
236866e4c33bSPaul E. McKenney unsigned long flags;
23694102adabSPaul E. McKenney bool ret;
23704102adabSPaul E. McKenney struct rcu_node *rnp;
2371b2b00ddfSPaul E. McKenney struct rcu_node *rnp_old = NULL;
2372b2b00ddfSPaul E. McKenney
2373aedf4ba9SPaul E. McKenney /* Funnel through hierarchy to reduce memory contention. */
2374cee43939SPaul E. McKenney rnp = raw_cpu_read(rcu_data.mynode);
23754102adabSPaul E. McKenney for (; rnp != NULL; rnp = rnp->parent) {
23762a67e741SPeter Zijlstra ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
2377b2b00ddfSPaul E. McKenney !raw_spin_trylock(&rnp->fqslock);
23784102adabSPaul E. McKenney if (rnp_old != NULL)
23799b1ce0acSNeeraj Upadhyay raw_spin_unlock(&rnp_old->fqslock);
2380a77da14cSPaul E. McKenney if (ret)
2381a77da14cSPaul E. McKenney return;
2382a77da14cSPaul E. McKenney rnp_old = rnp;
2383a77da14cSPaul E. McKenney }
2384a77da14cSPaul E. McKenney /* rnp_old == rcu_get_root(), rnp == NULL. */
2385a77da14cSPaul E. McKenney
2386a77da14cSPaul E. McKenney /* Reached the root of the rcu_node tree, acquire lock. */
23874102adabSPaul E. McKenney raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
23884102adabSPaul E. McKenney raw_spin_unlock(&rnp_old->fqslock);
238992816435SPaul E. McKenney if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) {
239092816435SPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
2391a77da14cSPaul E. McKenney return; /* Someone beat us to it. */
23927441e766SPaul E. McKenney }
239366e4c33bSPaul E. McKenney WRITE_ONCE(rcu_state.gp_flags,
239466e4c33bSPaul E. McKenney READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS);
23957441e766SPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
2396516e5ae0SJoel Fernandes (Google) rcu_gp_kthread_wake();
239766e4c33bSPaul E. McKenney }
23984102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
23994102adabSPaul E. McKenney
2400c9a24e2dSPaul E. McKenney // Workqueue handler for an RCU reader for kernels enforcing struct RCU
2401b50912d0SPaul E. McKenney // grace periods.
strict_work_handler(struct work_struct * work)24020aa04b05SPaul E. McKenney static void strict_work_handler(struct work_struct *work)
24030aa04b05SPaul E. McKenney {
240467c583a7SBoqun Feng rcu_read_lock();
24054102adabSPaul E. McKenney rcu_read_unlock();
24064102adabSPaul E. McKenney }
24070aa04b05SPaul E. McKenney
24084102adabSPaul E. McKenney /* Perform RCU core processing work for the current CPU. */
rcu_core(void)24094102adabSPaul E. McKenney static __latent_entropy void rcu_core(void)
24104102adabSPaul E. McKenney {
24114102adabSPaul E. McKenney unsigned long flags;
24124102adabSPaul E. McKenney struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
2413cd920e5aSPaul E. McKenney struct rcu_node *rnp = rdp->mynode;
24144102adabSPaul E. McKenney /*
24154102adabSPaul E. McKenney * On RT rcu_core() can be preempted when IRQs aren't disabled.
24164102adabSPaul E. McKenney * Therefore this function can race with concurrent NOCB (de-)offloading
24174102adabSPaul E. McKenney * on this CPU and the below condition must be considered volatile.
24184102adabSPaul E. McKenney * However if we race with:
24194102adabSPaul E. McKenney *
24204102adabSPaul E. McKenney * _ Offloading: In the worst case we accelerate or process callbacks
2421da1df50dSPaul E. McKenney * concurrently with NOCB kthreads. We are guaranteed to
24224102adabSPaul E. McKenney * call rcu_nocb_lock() if that happens.
242367a0edbfSPaul E. McKenney *
24244102adabSPaul E. McKenney * _ Deoffloading: In the worst case we miss callbacks acceleration or
24254102adabSPaul E. McKenney * processing. This is fine because the early stage
24264102adabSPaul E. McKenney * of deoffloading invokes rcu_core() after setting
2427d62df573SPaul E. McKenney * SEGCBLIST_RCU_CORE. So we guarantee that we'll process
24284102adabSPaul E. McKenney * what could have been dismissed without the need to wait
24294102adabSPaul E. McKenney * for the next rcu_pending() check in the next jiffy.
24304102adabSPaul E. McKenney */
2431336a4f6cSPaul E. McKenney const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist);
24324102adabSPaul E. McKenney
24334102adabSPaul E. McKenney if (cpu_is_offline(smp_processor_id()))
24342a67e741SPeter Zijlstra return;
24354102adabSPaul E. McKenney trace_rcu_utilization(TPS("Start RCU core"));
243667a0edbfSPaul E. McKenney WARN_ON_ONCE(!rdp->beenonline);
243767c583a7SBoqun Feng
24384102adabSPaul E. McKenney /* Report any deferred quiescent states if preemption enabled. */
24394102adabSPaul E. McKenney if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) {
244067a0edbfSPaul E. McKenney rcu_preempt_deferred_qs(current);
244167a0edbfSPaul E. McKenney } else if (rcu_preempt_need_deferred_qs(current)) {
244267c583a7SBoqun Feng set_tsk_need_resched(current);
2443532c00c9SPaul E. McKenney set_preempt_need_resched();
24444102adabSPaul E. McKenney }
2445cd920e5aSPaul E. McKenney
24464102adabSPaul E. McKenney /* Update RCU state based on any recent quiescent states. */
2447a657f261SPaul E. McKenney rcu_check_quiescent_state(rdp);
2448a657f261SPaul E. McKenney
2449a657f261SPaul E. McKenney /* No grace period and unregistered callbacks? */
2450a657f261SPaul E. McKenney if (!rcu_gp_in_progress() &&
2451a657f261SPaul E. McKenney rcu_segcblist_is_enabled(&rdp->cblist) && do_batch) {
2452a657f261SPaul E. McKenney rcu_nocb_lock_irqsave(rdp, flags);
2453a657f261SPaul E. McKenney if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
2454a657f261SPaul E. McKenney rcu_accelerate_cbs_unlocked(rnp, rdp);
2455fb60e533SPaul E. McKenney rcu_nocb_unlock_irqrestore(rdp, flags);
245648d07c04SSebastian Andrzej Siewior }
24574102adabSPaul E. McKenney
24584102adabSPaul E. McKenney rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
2459da1df50dSPaul E. McKenney
24604102adabSPaul E. McKenney /* If there are callbacks ready, invoke them. */
2461fbb94cbdSFrederic Weisbecker if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) &&
2462fbb94cbdSFrederic Weisbecker likely(READ_ONCE(rcu_scheduler_fully_active))) {
2463fbb94cbdSFrederic Weisbecker rcu_do_batch(rdp);
2464fbb94cbdSFrederic Weisbecker /* Re-invoke RCU core processing if there are callbacks remaining. */
2465fbb94cbdSFrederic Weisbecker if (rcu_segcblist_ready_cbs(&rdp->cblist))
2466fbb94cbdSFrederic Weisbecker invoke_rcu_core();
2467fbb94cbdSFrederic Weisbecker }
2468fbb94cbdSFrederic Weisbecker
2469fbb94cbdSFrederic Weisbecker /* Do any needed deferred wakeups of rcuo kthreads. */
2470fbb94cbdSFrederic Weisbecker do_nocb_deferred_wakeup(rdp);
2471fbb94cbdSFrederic Weisbecker trace_rcu_utilization(TPS("End RCU core"));
2472fbb94cbdSFrederic Weisbecker
2473fbb94cbdSFrederic Weisbecker // If strict GPs, schedule an RCU reader in a clean environment.
2474fbb94cbdSFrederic Weisbecker if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
2475fbb94cbdSFrederic Weisbecker queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);
2476fbb94cbdSFrederic Weisbecker }
2477fbb94cbdSFrederic Weisbecker
rcu_core_si(struct softirq_action * h)247832aa2f41SFrederic Weisbecker static void rcu_core_si(struct softirq_action *h)
24794102adabSPaul E. McKenney {
24804102adabSPaul E. McKenney rcu_core();
24814102adabSPaul E. McKenney }
24824102adabSPaul E. McKenney
rcu_wake_cond(struct task_struct * t,int status)24834102adabSPaul E. McKenney static void rcu_wake_cond(struct task_struct *t, int status)
24844102adabSPaul E. McKenney {
24853e310098SPaul E. McKenney /*
2486790da248SPaul E. McKenney * If the thread is yielding, only wake it when this
24873e310098SPaul E. McKenney * is invoked from idle
2488fced9c8cSPaul E. McKenney */
2489fced9c8cSPaul E. McKenney if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
2490fced9c8cSPaul E. McKenney wake_up_process(t);
2491fced9c8cSPaul E. McKenney }
24923e310098SPaul E. McKenney
invoke_rcu_core_kthread(void)24934102adabSPaul E. McKenney static void invoke_rcu_core_kthread(void)
24948087d3e3SPaul E. McKenney {
24954102adabSPaul E. McKenney struct task_struct *t;
24964102adabSPaul E. McKenney unsigned long flags;
2497de8e8730SPaul E. McKenney
2498634954c2SFrederic Weisbecker local_irq_save(flags);
2499634954c2SFrederic Weisbecker __this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
25004102adabSPaul E. McKenney t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
2501c6e09b97SPaul E. McKenney if (t != NULL && t != current)
2502634954c2SFrederic Weisbecker rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
25034102adabSPaul E. McKenney local_irq_restore(flags);
25049910affaSYao Dongdong }
2505791416c4SPaul E. McKenney
25064102adabSPaul E. McKenney /*
25074102adabSPaul E. McKenney * Wake up this CPU's rcuc kthread to do RCU core processing.
250832aa2f41SFrederic Weisbecker */
invoke_rcu_core(void)25090598a4d4SFrederic Weisbecker static void invoke_rcu_core(void)
251043e903adSPaul E. McKenney {
25110598a4d4SFrederic Weisbecker if (!cpu_online(smp_processor_id()))
25120598a4d4SFrederic Weisbecker return;
25130598a4d4SFrederic Weisbecker if (use_softirq)
25140598a4d4SFrederic Weisbecker raise_softirq(RCU_SOFTIRQ);
25154102adabSPaul E. McKenney else
25164102adabSPaul E. McKenney invoke_rcu_core_kthread();
25174102adabSPaul E. McKenney }
25184102adabSPaul E. McKenney
rcu_cpu_kthread_park(unsigned int cpu)2519a657f261SPaul E. McKenney static void rcu_cpu_kthread_park(unsigned int cpu)
2520a657f261SPaul E. McKenney {
2521a657f261SPaul E. McKenney per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
2522a657f261SPaul E. McKenney }
25234102adabSPaul E. McKenney
rcu_cpu_kthread_should_run(unsigned int cpu)25244102adabSPaul E. McKenney static int rcu_cpu_kthread_should_run(unsigned int cpu)
252548d07c04SSebastian Andrzej Siewior {
252648d07c04SSebastian Andrzej Siewior return __this_cpu_read(rcu_data.rcu_cpu_has_work);
252748d07c04SSebastian Andrzej Siewior }
252848d07c04SSebastian Andrzej Siewior
252948d07c04SSebastian Andrzej Siewior /*
253048d07c04SSebastian Andrzej Siewior * Per-CPU kernel thread that invokes RCU callbacks. This replaces
253148d07c04SSebastian Andrzej Siewior * the RCU softirq used in configurations of RCU that do not support RCU
25324102adabSPaul E. McKenney * priority boosting.
253348d07c04SSebastian Andrzej Siewior */
rcu_cpu_kthread(unsigned int cpu)253448d07c04SSebastian Andrzej Siewior static void rcu_cpu_kthread(unsigned int cpu)
253548d07c04SSebastian Andrzej Siewior {
253648d07c04SSebastian Andrzej Siewior unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
253748d07c04SSebastian Andrzej Siewior char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
253848d07c04SSebastian Andrzej Siewior unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
253948d07c04SSebastian Andrzej Siewior int spincnt;
254048d07c04SSebastian Andrzej Siewior
254148d07c04SSebastian Andrzej Siewior trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
254248d07c04SSebastian Andrzej Siewior for (spincnt = 0; spincnt < 10; spincnt++) {
254348d07c04SSebastian Andrzej Siewior WRITE_ONCE(*j, jiffies);
254448d07c04SSebastian Andrzej Siewior local_bh_disable();
254548d07c04SSebastian Andrzej Siewior *statusp = RCU_KTHREAD_RUNNING;
254648d07c04SSebastian Andrzej Siewior local_irq_disable();
254748d07c04SSebastian Andrzej Siewior work = *workp;
254848d07c04SSebastian Andrzej Siewior WRITE_ONCE(*workp, 0);
254948d07c04SSebastian Andrzej Siewior local_irq_enable();
255048d07c04SSebastian Andrzej Siewior if (work)
255148d07c04SSebastian Andrzej Siewior rcu_core();
255248d07c04SSebastian Andrzej Siewior local_bh_enable();
255348d07c04SSebastian Andrzej Siewior if (!READ_ONCE(*workp)) {
255448d07c04SSebastian Andrzej Siewior trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
255548d07c04SSebastian Andrzej Siewior *statusp = RCU_KTHREAD_WAITING;
25564102adabSPaul E. McKenney return;
25574102adabSPaul E. McKenney }
255848d07c04SSebastian Andrzej Siewior }
255948d07c04SSebastian Andrzej Siewior *statusp = RCU_KTHREAD_YIELDING;
256048d07c04SSebastian Andrzej Siewior trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
25614102adabSPaul E. McKenney schedule_timeout_idle(2);
256248d07c04SSebastian Andrzej Siewior trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
256348d07c04SSebastian Andrzej Siewior *statusp = RCU_KTHREAD_WAITING;
25644102adabSPaul E. McKenney WRITE_ONCE(*j, jiffies);
25654102adabSPaul E. McKenney }
256648d07c04SSebastian Andrzej Siewior
256748d07c04SSebastian Andrzej Siewior static struct smp_hotplug_thread rcu_cpu_thread_spec = {
256848d07c04SSebastian Andrzej Siewior .store = &rcu_data.rcu_cpu_kthread_task,
256948d07c04SSebastian Andrzej Siewior .thread_should_run = rcu_cpu_kthread_should_run,
257048d07c04SSebastian Andrzej Siewior .thread_fn = rcu_cpu_kthread,
257148d07c04SSebastian Andrzej Siewior .thread_comm = "rcuc/%u",
257248d07c04SSebastian Andrzej Siewior .setup = rcu_cpu_kthread_setup,
257348d07c04SSebastian Andrzej Siewior .park = rcu_cpu_kthread_park,
257448d07c04SSebastian Andrzej Siewior };
257548d07c04SSebastian Andrzej Siewior
257648d07c04SSebastian Andrzej Siewior /*
257748d07c04SSebastian Andrzej Siewior * Spawn per-CPU RCU core processing kthreads.
257848d07c04SSebastian Andrzej Siewior */
rcu_spawn_core_kthreads(void)257948d07c04SSebastian Andrzej Siewior static int __init rcu_spawn_core_kthreads(void)
258048d07c04SSebastian Andrzej Siewior {
258148d07c04SSebastian Andrzej Siewior int cpu;
258248d07c04SSebastian Andrzej Siewior
258348d07c04SSebastian Andrzej Siewior for_each_possible_cpu(cpu)
258448d07c04SSebastian Andrzej Siewior per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
2585c9515875SZqiang if (use_softirq)
258648d07c04SSebastian Andrzej Siewior return 0;
258748d07c04SSebastian Andrzej Siewior WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
25882488a5e6SLai Jiangshan "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
258948d07c04SSebastian Andrzej Siewior return 0;
2590c9515875SZqiang }
259148d07c04SSebastian Andrzej Siewior
259248d07c04SSebastian Andrzej Siewior /*
259348d07c04SSebastian Andrzej Siewior * Handle any core-RCU processing required by a call_rcu() invocation.
259448d07c04SSebastian Andrzej Siewior */
__call_rcu_core(struct rcu_data * rdp,struct rcu_head * head,unsigned long flags)259548d07c04SSebastian Andrzej Siewior static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
259648d07c04SSebastian Andrzej Siewior unsigned long flags)
259748d07c04SSebastian Andrzej Siewior {
259848d07c04SSebastian Andrzej Siewior /*
259948d07c04SSebastian Andrzej Siewior * If called from an extended quiescent state, invoke the RCU
260048d07c04SSebastian Andrzej Siewior * core in order to force a re-evaluation of RCU's idleness.
260148d07c04SSebastian Andrzej Siewior */
260248d07c04SSebastian Andrzej Siewior if (!rcu_is_watching())
260348d07c04SSebastian Andrzej Siewior invoke_rcu_core();
260448d07c04SSebastian Andrzej Siewior
260548d07c04SSebastian Andrzej Siewior /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
260648d07c04SSebastian Andrzej Siewior if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
260748d07c04SSebastian Andrzej Siewior return;
260877865deaSPaul E. McKenney
260948d07c04SSebastian Andrzej Siewior /*
261048d07c04SSebastian Andrzej Siewior * Force the grace period if too many callbacks or too long waiting.
2611c9515875SZqiang * Enforce hysteresis, and don't invoke rcu_force_quiescent_state()
261248d07c04SSebastian Andrzej Siewior * if some other CPU has recently done so. Also, don't bother
261348d07c04SSebastian Andrzej Siewior * invoking rcu_force_quiescent_state() if the newly enqueued callback
261448d07c04SSebastian Andrzej Siewior * is the only one waiting for a grace period to complete.
261548d07c04SSebastian Andrzej Siewior */
261648d07c04SSebastian Andrzej Siewior if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) >
261748d07c04SSebastian Andrzej Siewior rdp->qlen_last_fqs_check + qhimark)) {
261848d07c04SSebastian Andrzej Siewior
261948d07c04SSebastian Andrzej Siewior /* Are we ignoring a completed grace period? */
262048d07c04SSebastian Andrzej Siewior note_gp_changes(rdp);
262148d07c04SSebastian Andrzej Siewior
262248d07c04SSebastian Andrzej Siewior /* Start a new grace period if one not already started. */
262348d07c04SSebastian Andrzej Siewior if (!rcu_gp_in_progress()) {
262448d07c04SSebastian Andrzej Siewior rcu_accelerate_cbs_unlocked(rdp->mynode, rdp);
262548d07c04SSebastian Andrzej Siewior } else {
262648d07c04SSebastian Andrzej Siewior /* Give the grace period a kick. */
262748d07c04SSebastian Andrzej Siewior rdp->blimit = DEFAULT_MAX_RCU_BLIMIT;
262848d07c04SSebastian Andrzej Siewior if (READ_ONCE(rcu_state.n_force_qs) == rdp->n_force_qs_snap &&
262948d07c04SSebastian Andrzej Siewior rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
263048d07c04SSebastian Andrzej Siewior rcu_force_quiescent_state();
263148d07c04SSebastian Andrzej Siewior rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);
26324b4399b2SZqiang rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
263348d07c04SSebastian Andrzej Siewior }
263448d07c04SSebastian Andrzej Siewior }
263548d07c04SSebastian Andrzej Siewior }
263648d07c04SSebastian Andrzej Siewior
263748d07c04SSebastian Andrzej Siewior /*
263848d07c04SSebastian Andrzej Siewior * RCU callback function to leak a callback.
26394102adabSPaul E. McKenney */
rcu_leak_callback(struct rcu_head * rhp)26404102adabSPaul E. McKenney static void rcu_leak_callback(struct rcu_head *rhp)
26414102adabSPaul E. McKenney {
26425c7d8967SPaul E. McKenney }
26435c7d8967SPaul E. McKenney
26444102adabSPaul E. McKenney /*
26454102adabSPaul E. McKenney * Check and if necessary update the leaf rcu_node structure's
26464102adabSPaul E. McKenney * ->cbovldmask bit corresponding to the current CPU based on that CPU's
26474102adabSPaul E. McKenney * number of queued RCU callbacks. The caller must hold the leaf rcu_node
26484102adabSPaul E. McKenney * structure's ->lock.
26494102adabSPaul E. McKenney */
check_cb_ovld_locked(struct rcu_data * rdp,struct rcu_node * rnp)26504102adabSPaul E. McKenney static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp)
26514102adabSPaul E. McKenney {
26524102adabSPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp);
26534102adabSPaul E. McKenney if (qovld_calc <= 0)
26544102adabSPaul E. McKenney return; // Early boot and wildcard value set.
26554102adabSPaul E. McKenney if (rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc)
26564102adabSPaul E. McKenney WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask | rdp->grpmask);
26574102adabSPaul E. McKenney else
2658cd920e5aSPaul E. McKenney WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask & ~rdp->grpmask);
26594102adabSPaul E. McKenney }
2660cd920e5aSPaul E. McKenney
26614102adabSPaul E. McKenney /*
26624102adabSPaul E. McKenney * Check and if necessary update the leaf rcu_node structure's
266315fecf89SPaul E. McKenney * ->cbovldmask bit corresponding to the current CPU based on that CPU's
266415fecf89SPaul E. McKenney * number of queued RCU callbacks. No locks need be held, but the
26654102adabSPaul E. McKenney * caller must have disabled interrupts.
26664102adabSPaul E. McKenney *
266715cabdffSPaul E. McKenney * Note that this function ignores the possibility that there are a lot
26684102adabSPaul E. McKenney * of callbacks all of which have already seen the end of their respective
26694102adabSPaul E. McKenney * grace periods. This omission is due to the need for no-CBs CPUs to
2670de8e8730SPaul E. McKenney * be holding ->nocb_lock to do this check, which is too heavy for a
2671c6e09b97SPaul E. McKenney * common-case operation.
26724102adabSPaul E. McKenney */
check_cb_ovld(struct rcu_data * rdp)26734102adabSPaul E. McKenney static void check_cb_ovld(struct rcu_data *rdp)
2674d5a9a8c3SPaul E. McKenney {
26752431774fSPaul E. McKenney struct rcu_node *const rnp = rdp->mynode;
267615fecf89SPaul E. McKenney
2677cd920e5aSPaul E. McKenney if (qovld_calc <= 0 ||
26782431774fSPaul E. McKenney ((rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) ==
267915fecf89SPaul E. McKenney !!(READ_ONCE(rnp->cbovldmask) & rdp->grpmask)))
26804102adabSPaul E. McKenney return; // Early boot wildcard value or already set correctly.
26814102adabSPaul E. McKenney raw_spin_lock_rcu_node(rnp);
26824102adabSPaul E. McKenney check_cb_ovld_locked(rdp, rnp);
26834102adabSPaul E. McKenney raw_spin_unlock_rcu_node(rnp);
26844102adabSPaul E. McKenney }
26854102adabSPaul E. McKenney
26864102adabSPaul E. McKenney static void
__call_rcu_common(struct rcu_head * head,rcu_callback_t func,bool lazy_in)26874102adabSPaul E. McKenney __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
26884102adabSPaul E. McKenney {
26894102adabSPaul E. McKenney static atomic_t doublefrees;
26904102adabSPaul E. McKenney unsigned long flags;
26914102adabSPaul E. McKenney bool lazy;
2692b2b00ddfSPaul E. McKenney struct rcu_data *rdp;
2693b2b00ddfSPaul E. McKenney bool was_alldone;
2694b2b00ddfSPaul E. McKenney
2695b2b00ddfSPaul E. McKenney /* Misaligned rcu_head! */
26964102adabSPaul E. McKenney WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
2697b2b00ddfSPaul E. McKenney
2698b2b00ddfSPaul E. McKenney if (debug_rcu_head_queue(head)) {
2699b2b00ddfSPaul E. McKenney /*
2700b2b00ddfSPaul E. McKenney * Probable double call_rcu(), so leak the callback.
2701b2b00ddfSPaul E. McKenney * Use rcu:rcu_callback trace event to find the previous
2702b2b00ddfSPaul E. McKenney * time callback was passed to call_rcu().
2703b2b00ddfSPaul E. McKenney */
2704b2b00ddfSPaul E. McKenney if (atomic_inc_return(&doublefrees) < 4) {
2705b2b00ddfSPaul E. McKenney pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func);
2706b2b00ddfSPaul E. McKenney mem_dump_obj(head);
2707b2b00ddfSPaul E. McKenney }
2708b2b00ddfSPaul E. McKenney WRITE_ONCE(head->func, rcu_leak_callback);
2709b2b00ddfSPaul E. McKenney return;
2710b2b00ddfSPaul E. McKenney }
2711b2b00ddfSPaul E. McKenney head->func = func;
2712b2b00ddfSPaul E. McKenney head->next = NULL;
2713b2b00ddfSPaul E. McKenney kasan_record_aux_stack_noalloc(head);
2714b2b00ddfSPaul E. McKenney local_irq_save(flags);
2715b2b00ddfSPaul E. McKenney rdp = this_cpu_ptr(&rcu_data);
2716b2b00ddfSPaul E. McKenney lazy = lazy_in && !rcu_async_should_hurry();
2717b2b00ddfSPaul E. McKenney
2718b2b00ddfSPaul E. McKenney /* Add the callback to our list. */
2719b2b00ddfSPaul E. McKenney if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) {
2720b2b00ddfSPaul E. McKenney // This can trigger due to call_rcu() from offline CPU:
2721b2b00ddfSPaul E. McKenney WARN_ON_ONCE(rcu_scheduler_active != RCU_SCHEDULER_INACTIVE);
2722b2b00ddfSPaul E. McKenney WARN_ON_ONCE(!rcu_is_watching());
2723b2b00ddfSPaul E. McKenney // Very early boot, before rcu_init(). Initialize if needed
2724b2b00ddfSPaul E. McKenney // and then drop through to queue the callback.
2725b2b00ddfSPaul E. McKenney if (rcu_segcblist_empty(&rdp->cblist))
2726b2b00ddfSPaul E. McKenney rcu_segcblist_init(&rdp->cblist);
2727b2b00ddfSPaul E. McKenney }
2728b2b00ddfSPaul E. McKenney
2729b2b00ddfSPaul E. McKenney check_cb_ovld(rdp);
2730b2b00ddfSPaul E. McKenney if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) {
2731b2b00ddfSPaul E. McKenney local_irq_restore(flags);
2732b2b00ddfSPaul E. McKenney return; // Enqueued onto ->nocb_bypass, so just leave.
2733a68a2bb2SPaul E. McKenney }
273445975c7dSPaul E. McKenney // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
2735a68a2bb2SPaul E. McKenney rcu_segcblist_enqueue(&rdp->cblist, head);
2736a68a2bb2SPaul E. McKenney if (__is_kvfree_rcu_offset((unsigned long)func))
2737a68a2bb2SPaul E. McKenney trace_rcu_kvfree_callback(rcu_state.name, head,
2738a68a2bb2SPaul E. McKenney (unsigned long)func,
273945975c7dSPaul E. McKenney rcu_segcblist_n_cbs(&rdp->cblist));
274045975c7dSPaul E. McKenney else
274145975c7dSPaul E. McKenney trace_rcu_callback(rcu_state.name, head,
27421893afd6SPaul E. McKenney rcu_segcblist_n_cbs(&rdp->cblist));
27431893afd6SPaul E. McKenney
27441893afd6SPaul E. McKenney trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
27451893afd6SPaul E. McKenney
27461893afd6SPaul E. McKenney /* Go handle any RCU core processing required. */
27471893afd6SPaul E. McKenney if (unlikely(rcu_rdp_is_offloaded(rdp))) {
27481893afd6SPaul E. McKenney __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
27491893afd6SPaul E. McKenney } else {
275027fdb35fSPaul E. McKenney __call_rcu_core(rdp, head, flags);
275145975c7dSPaul E. McKenney }
275245975c7dSPaul E. McKenney local_irq_restore(flags);
275345975c7dSPaul E. McKenney }
275445975c7dSPaul E. McKenney
275545975c7dSPaul E. McKenney #ifdef CONFIG_RCU_LAZY
275645975c7dSPaul E. McKenney /**
275745975c7dSPaul E. McKenney * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
275845975c7dSPaul E. McKenney * flush all lazy callbacks (including the new one) to the main ->cblist while
275945975c7dSPaul E. McKenney * doing so.
276045975c7dSPaul E. McKenney *
276145975c7dSPaul E. McKenney * @head: structure to be used for queueing the RCU updates.
2762a68a2bb2SPaul E. McKenney * @func: actual callback function to be invoked after the grace period
276345975c7dSPaul E. McKenney *
276445975c7dSPaul E. McKenney * The callback function will be invoked some time after a full grace
276545975c7dSPaul E. McKenney * period elapses, in other words after all pre-existing RCU read-side
276645975c7dSPaul E. McKenney * critical sections have completed.
276745975c7dSPaul E. McKenney *
276845975c7dSPaul E. McKenney * Use this API instead of call_rcu() if you don't want the callback to be
27693d3a0d1bSPaul E. McKenney * invoked after very long periods of time, which can happen on systems without
27703d3a0d1bSPaul E. McKenney * memory pressure and on systems which are lightly loaded or mostly idle.
27713d3a0d1bSPaul E. McKenney * This function will cause callbacks to be invoked sooner than later at the
27724102adabSPaul E. McKenney * expense of extra power. Other than that, this function is identical to, and
277345975c7dSPaul E. McKenney * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory
27744102adabSPaul E. McKenney * ordering and other functionality.
27754102adabSPaul E. McKenney */
call_rcu_hurry(struct rcu_head * head,rcu_callback_t func)27764102adabSPaul E. McKenney void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
27774102adabSPaul E. McKenney {
27784102adabSPaul E. McKenney return __call_rcu_common(head, func, false);
27794102adabSPaul E. McKenney }
27804102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(call_rcu_hurry);
27814102adabSPaul E. McKenney #endif
27824102adabSPaul E. McKenney
27834102adabSPaul E. McKenney /**
27844102adabSPaul E. McKenney * call_rcu() - Queue an RCU callback for invocation after a grace period.
27854102adabSPaul E. McKenney * By default the callbacks are 'lazy' and are kept hidden from the main
27864102adabSPaul E. McKenney * ->cblist to prevent starting of grace periods too soon.
27871fe09ebeSPaul E. McKenney * If you desire grace periods to start very soon, use call_rcu_hurry().
27884102adabSPaul E. McKenney *
27894102adabSPaul E. McKenney * @head: structure to be used for queueing the RCU updates.
27904102adabSPaul E. McKenney * @func: actual callback function to be invoked after the grace period
27914102adabSPaul E. McKenney *
27924102adabSPaul E. McKenney * The callback function will be invoked some time after a full grace
27934102adabSPaul E. McKenney * period elapses, in other words after all pre-existing RCU read-side
27944102adabSPaul E. McKenney * critical sections have completed. However, the callback function
27954102adabSPaul E. McKenney * might well execute concurrently with RCU read-side critical sections
27964102adabSPaul E. McKenney * that started after call_rcu() was invoked.
27974102adabSPaul E. McKenney *
27984102adabSPaul E. McKenney * RCU read-side critical sections are delimited by rcu_read_lock()
2799d818cc76SZqiang * and rcu_read_unlock(), and may be nested. In addition, but only in
28004102adabSPaul E. McKenney * v5.0 and later, regions of code across which interrupts, preemption,
28014102adabSPaul E. McKenney * or softirqs have been disabled also serve as RCU read-side critical
28024102adabSPaul E. McKenney * sections. This includes hardware interrupt handlers, softirq handlers,
28034102adabSPaul E. McKenney * and NMI handlers.
28044102adabSPaul E. McKenney *
28054102adabSPaul E. McKenney * Note that all CPUs must agree that the grace period extended beyond
28064102adabSPaul E. McKenney * all pre-existing RCU read-side critical section. On systems with more
28074102adabSPaul E. McKenney * than one CPU, this means that when "func()" is invoked, each CPU is
28084102adabSPaul E. McKenney * guaranteed to have executed a full memory barrier since the end of its
28094102adabSPaul E. McKenney * last RCU read-side critical section whose beginning preceded the call
28104102adabSPaul E. McKenney * to call_rcu(). It also means that each CPU executing an RCU read-side
28114102adabSPaul E. McKenney * critical section that continues beyond the start of "func()" must have
28124102adabSPaul E. McKenney * executed a memory barrier after the call_rcu() but before the beginning
28134102adabSPaul E. McKenney * of that RCU read-side critical section. Note that these guarantees
28144102adabSPaul E. McKenney * include CPUs that are offline, idle, or executing in user mode, as
28154102adabSPaul E. McKenney * well as CPUs that are executing in the kernel.
28164102adabSPaul E. McKenney *
28174102adabSPaul E. McKenney * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
28184102adabSPaul E. McKenney * resulting RCU callback function "func()", then both CPU A and CPU B are
28194102adabSPaul E. McKenney * guaranteed to execute a full memory barrier during the time interval
28204102adabSPaul E. McKenney * between the call to call_rcu() and the invocation of "func()" -- even
28214102adabSPaul E. McKenney * if CPU A and CPU B are the same CPU (but again only if the system has
28224102adabSPaul E. McKenney * more than one CPU).
28234102adabSPaul E. McKenney *
28244102adabSPaul E. McKenney * Implementation of these memory-ordering guarantees is described here:
28254102adabSPaul E. McKenney * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
28264102adabSPaul E. McKenney */
call_rcu(struct rcu_head * head,rcu_callback_t func)28274102adabSPaul E. McKenney void call_rcu(struct rcu_head *head, rcu_callback_t func)
28284102adabSPaul E. McKenney {
28294102adabSPaul E. McKenney return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
28304102adabSPaul E. McKenney }
28314102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(call_rcu);
28324102adabSPaul E. McKenney
28334102adabSPaul E. McKenney /* Maximum number of jiffies to wait before draining a batch. */
28344102adabSPaul E. McKenney #define KFREE_DRAIN_JIFFIES (5 * HZ)
28354102adabSPaul E. McKenney #define KFREE_N_BATCHES 2
283645975c7dSPaul E. McKenney #define FREE_N_CHANNELS 2
28374102adabSPaul E. McKenney
2838a35d1690SByungchul Park /**
2839a35d1690SByungchul Park * struct kvfree_rcu_bulk_data - single block to store kvfree_rcu() pointers
284051824b78SUladzislau Rezki (Sony) * @list: List node. All blocks are linked between each other
28410392bebeSJoel Fernandes (Google) * @gp_snap: Snapshot of RCU state for objects placed to this bulk
28425f3c8d62SUladzislau Rezki (Sony) * @nr_records: Number of active pointers in the array
28430392bebeSJoel Fernandes (Google) * @records: Array of the kvfree_rcu() pointers
284434c88174SUladzislau Rezki (Sony) */
28455f3c8d62SUladzislau Rezki (Sony) struct kvfree_rcu_bulk_data {
284634c88174SUladzislau Rezki (Sony) struct list_head list;
284734c88174SUladzislau Rezki (Sony) struct rcu_gp_oldstate gp_snap;
28485f3c8d62SUladzislau Rezki (Sony) unsigned long nr_records;
284934c88174SUladzislau Rezki (Sony) void *records[];
28505f3c8d62SUladzislau Rezki (Sony) };
285134c88174SUladzislau Rezki (Sony)
28525f3c8d62SUladzislau Rezki (Sony) /*
28533af84862SUladzislau Rezki (Sony) * This macro defines how many entries the "records" array
285434c88174SUladzislau Rezki (Sony) * will contain. It is based on the fact that the size of
28550392bebeSJoel Fernandes (Google) * kvfree_rcu_bulk_data structure becomes exactly one page.
285634c88174SUladzislau Rezki (Sony) */
285734c88174SUladzislau Rezki (Sony) #define KVFREE_BULK_MAX_ENTR \
285834c88174SUladzislau Rezki (Sony) ((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *))
28595f3c8d62SUladzislau Rezki (Sony)
286034c88174SUladzislau Rezki (Sony) /**
28615f3c8d62SUladzislau Rezki (Sony) * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
28625f3c8d62SUladzislau Rezki (Sony) * @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
286334c88174SUladzislau Rezki (Sony) * @head_free: List of kfree_rcu() objects waiting for a grace period
28640392bebeSJoel Fernandes (Google) * @head_free_gp_snap: Grace-period snapshot to check for attempted premature frees.
28650392bebeSJoel Fernandes (Google) * @bulk_head_free: Bulk-List of kvfree_rcu() objects waiting for a grace period
28660392bebeSJoel Fernandes (Google) * @krcp: Pointer to @kfree_rcu_cpu structure
28670392bebeSJoel Fernandes (Google) */
28685f3c8d62SUladzislau Rezki (Sony)
28690392bebeSJoel Fernandes (Google) struct kfree_rcu_cpu_work {
28700392bebeSJoel Fernandes (Google) struct rcu_work rcu_work;
28710392bebeSJoel Fernandes (Google) struct rcu_head *head_free;
28720392bebeSJoel Fernandes (Google) struct rcu_gp_oldstate head_free_gp_snap;
28730392bebeSJoel Fernandes (Google) struct list_head bulk_head_free[FREE_N_CHANNELS];
28740392bebeSJoel Fernandes (Google) struct kfree_rcu_cpu *krcp;
28755f3c8d62SUladzislau Rezki (Sony) };
28760392bebeSJoel Fernandes (Google)
28770392bebeSJoel Fernandes (Google) /**
2878a35d1690SByungchul Park * struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
2879a35d1690SByungchul Park * @head: List of kfree_rcu() objects not yet waiting for a grace period
2880a35d1690SByungchul Park * @head_gp_snap: Snapshot of RCU state for objects placed to "@head"
2881a35d1690SByungchul Park * @bulk_head: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period
28825f3c8d62SUladzislau Rezki (Sony) * @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
28830392bebeSJoel Fernandes (Google) * @lock: Synchronize access to this structure
2884a35d1690SByungchul Park * @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
2885a35d1690SByungchul Park * @initialized: The @rcu_work fields have been initialized
288669f08d39SSebastian Andrzej Siewior * @head_count: Number of objects in rcu_head singular list
28878e11690dSMauro Carvalho Chehab * @bulk_count: Number of objects in bulk-list
288872a2fbdaSMauro Carvalho Chehab * @bkvcache:
288972a2fbdaSMauro Carvalho Chehab * A simple cache list that contains objects for reuse purpose.
289072a2fbdaSMauro Carvalho Chehab * In order to save some per-cpu space the list is singular.
289172a2fbdaSMauro Carvalho Chehab * Even though it is lockless an access has to be protected by the
289272a2fbdaSMauro Carvalho Chehab * per-cpu lock.
289356292e86SUladzislau Rezki (Sony) * @page_cache_work: A work to refill the cache when it is empty
2894d0bfa8b3SZhang Qiang * @backoff_page_cache_fill: Delay cache refills
289556292e86SUladzislau Rezki (Sony) * @work_in_progress: Indicates that page_cache_work is running
289656292e86SUladzislau Rezki (Sony) * @hrtimer: A hrtimer for scheduling a page_cache_work
289772a2fbdaSMauro Carvalho Chehab * @nr_bkv_objs: number of allocated objects at @bkvcache.
2898a35d1690SByungchul Park *
2899a35d1690SByungchul Park * This is a per-CPU structure. The reason that it is not included in
2900a35d1690SByungchul Park * the rcu_data structure is to permit this code to be extracted from
2901a35d1690SByungchul Park * the RCU files. Such extraction could allow further optimization of
2902a35d1690SByungchul Park * the interactions with the slab allocators.
2903495aa969SAndreea-Cristina Bernat */
2904a35d1690SByungchul Park struct kfree_rcu_cpu {
2905a35d1690SByungchul Park // Objects queued on a linked list
29065f3c8d62SUladzislau Rezki (Sony) // through their rcu_head structures.
29070392bebeSJoel Fernandes (Google) struct rcu_head *head;
29088ac88f71SJoel Fernandes (Google) unsigned long head_gp_snap;
2909a35d1690SByungchul Park atomic_t head_count;
2910a35d1690SByungchul Park
29119154244cSJoel Fernandes (Google) // Objects queued on a bulk-list.
291256292e86SUladzislau Rezki (Sony) struct list_head bulk_head[FREE_N_CHANNELS];
2913d0bfa8b3SZhang Qiang atomic_t bulk_count[FREE_N_CHANNELS];
2914d0bfa8b3SZhang Qiang
291556292e86SUladzislau Rezki (Sony) struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
291656292e86SUladzislau Rezki (Sony) raw_spinlock_t lock;
291756292e86SUladzislau Rezki (Sony) struct delayed_work monitor_work;
291853c72b59SUladzislau Rezki (Sony) bool initialized;
291953c72b59SUladzislau Rezki (Sony)
2920a35d1690SByungchul Park struct delayed_work page_cache_work;
2921a35d1690SByungchul Park atomic_t backoff_page_cache_fill;
292269f08d39SSebastian Andrzej Siewior atomic_t work_in_progress;
292369f08d39SSebastian Andrzej Siewior struct hrtimer hrtimer;
292469f08d39SSebastian Andrzej Siewior
2925a35d1690SByungchul Park struct llist_head bkvcache;
292634c88174SUladzislau Rezki (Sony) int nr_bkv_objs;
29275f3c8d62SUladzislau Rezki (Sony) };
292834c88174SUladzislau Rezki (Sony)
292934c88174SUladzislau Rezki (Sony) static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc) = {
2930446044ebSJoel Fernandes (Google) .lock = __RAW_SPIN_LOCK_UNLOCKED(krc.lock),
2931446044ebSJoel Fernandes (Google) };
2932446044ebSJoel Fernandes (Google)
2933446044ebSJoel Fernandes (Google) static __always_inline void
debug_rcu_bhead_unqueue(struct kvfree_rcu_bulk_data * bhead)293434c88174SUladzislau Rezki (Sony) debug_rcu_bhead_unqueue(struct kvfree_rcu_bulk_data *bhead)
293534c88174SUladzislau Rezki (Sony) {
293634c88174SUladzislau Rezki (Sony) #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
2937952371d6SUladzislau Rezki (Sony) int i;
2938952371d6SUladzislau Rezki (Sony)
2939952371d6SUladzislau Rezki (Sony) for (i = 0; i < bhead->nr_records; i++)
2940952371d6SUladzislau Rezki (Sony) debug_rcu_head_unqueue((struct rcu_head *)(bhead->records[i]));
2941952371d6SUladzislau Rezki (Sony) #endif
2942952371d6SUladzislau Rezki (Sony) }
2943952371d6SUladzislau Rezki (Sony)
2944952371d6SUladzislau Rezki (Sony) static inline struct kfree_rcu_cpu *
krc_this_cpu_lock(unsigned long * flags)2945952371d6SUladzislau Rezki (Sony) krc_this_cpu_lock(unsigned long *flags)
2946952371d6SUladzislau Rezki (Sony) {
2947952371d6SUladzislau Rezki (Sony) struct kfree_rcu_cpu *krcp;
2948952371d6SUladzislau Rezki (Sony)
2949952371d6SUladzislau Rezki (Sony) local_irq_save(*flags); // For safely calling this_cpu_ptr().
2950952371d6SUladzislau Rezki (Sony) krcp = this_cpu_ptr(&krc);
2951952371d6SUladzislau Rezki (Sony) raw_spin_lock(&krcp->lock);
29527ffc9ec8SPaul E. McKenney
2953952371d6SUladzislau Rezki (Sony) return krcp;
2954952371d6SUladzislau Rezki (Sony) }
29555f3c8d62SUladzislau Rezki (Sony)
295653c72b59SUladzislau Rezki (Sony) static inline void
krc_this_cpu_unlock(struct kfree_rcu_cpu * krcp,unsigned long flags)295753c72b59SUladzislau Rezki (Sony) krc_this_cpu_unlock(struct kfree_rcu_cpu *krcp, unsigned long flags)
295853c72b59SUladzislau Rezki (Sony) {
295953c72b59SUladzislau Rezki (Sony) raw_spin_unlock_irqrestore(&krcp->lock, flags);
296053c72b59SUladzislau Rezki (Sony) }
2961ac7625ebSUladzislau Rezki (Sony)
29625f3c8d62SUladzislau Rezki (Sony) static inline struct kvfree_rcu_bulk_data *
get_cached_bnode(struct kfree_rcu_cpu * krcp)296353c72b59SUladzislau Rezki (Sony) get_cached_bnode(struct kfree_rcu_cpu *krcp)
296453c72b59SUladzislau Rezki (Sony) {
296553c72b59SUladzislau Rezki (Sony) if (!krcp->nr_bkv_objs)
296653c72b59SUladzislau Rezki (Sony) return NULL;
296753c72b59SUladzislau Rezki (Sony)
29685f3c8d62SUladzislau Rezki (Sony) WRITE_ONCE(krcp->nr_bkv_objs, krcp->nr_bkv_objs - 1);
296953c72b59SUladzislau Rezki (Sony) return (struct kvfree_rcu_bulk_data *)
297053c72b59SUladzislau Rezki (Sony) llist_del_first(&krcp->bkvcache);
297153c72b59SUladzislau Rezki (Sony) }
297253c72b59SUladzislau Rezki (Sony)
297353c72b59SUladzislau Rezki (Sony) static inline bool
put_cached_bnode(struct kfree_rcu_cpu * krcp,struct kvfree_rcu_bulk_data * bnode)297453c72b59SUladzislau Rezki (Sony) put_cached_bnode(struct kfree_rcu_cpu *krcp,
2975ac7625ebSUladzislau Rezki (Sony) struct kvfree_rcu_bulk_data *bnode)
297653c72b59SUladzislau Rezki (Sony) {
297753c72b59SUladzislau Rezki (Sony) // Check the limit.
297853c72b59SUladzislau Rezki (Sony) if (krcp->nr_bkv_objs >= rcu_min_cached_objs)
2979d0bfa8b3SZhang Qiang return false;
2980d0bfa8b3SZhang Qiang
2981d0bfa8b3SZhang Qiang llist_add((struct llist_node *) bnode, &krcp->bkvcache);
2982d0bfa8b3SZhang Qiang WRITE_ONCE(krcp->nr_bkv_objs, krcp->nr_bkv_objs + 1);
2983d0bfa8b3SZhang Qiang return true;
2984d0bfa8b3SZhang Qiang }
2985d0bfa8b3SZhang Qiang
2986d0bfa8b3SZhang Qiang static int
drain_page_cache(struct kfree_rcu_cpu * krcp)2987d0bfa8b3SZhang Qiang drain_page_cache(struct kfree_rcu_cpu *krcp)
2988ac7625ebSUladzislau Rezki (Sony) {
2989d0bfa8b3SZhang Qiang unsigned long flags;
2990d0bfa8b3SZhang Qiang struct llist_node *page_list, *pos, *n;
2991d0bfa8b3SZhang Qiang int freed = 0;
2992d0bfa8b3SZhang Qiang
2993d0bfa8b3SZhang Qiang if (!rcu_min_cached_objs)
2994d0bfa8b3SZhang Qiang return 0;
2995d0bfa8b3SZhang Qiang
2996d0bfa8b3SZhang Qiang raw_spin_lock_irqsave(&krcp->lock, flags);
299753c72b59SUladzislau Rezki (Sony) page_list = llist_del_all(&krcp->bkvcache);
299853c72b59SUladzislau Rezki (Sony) WRITE_ONCE(krcp->nr_bkv_objs, 0);
29994102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&krcp->lock, flags);
3000a35d1690SByungchul Park
3001277ffe1bSZhouyi Zhou llist_for_each_safe(pos, n, page_list) {
3002a35d1690SByungchul Park free_page((unsigned long)pos);
3003a35d1690SByungchul Park freed++;
3004a35d1690SByungchul Park }
3005a35d1690SByungchul Park
30065f3c8d62SUladzislau Rezki (Sony) return freed;
3007a35d1690SByungchul Park }
3008a35d1690SByungchul Park
30090392bebeSJoel Fernandes (Google) static void
kvfree_rcu_bulk(struct kfree_rcu_cpu * krcp,struct kvfree_rcu_bulk_data * bnode,int idx)30105f3c8d62SUladzislau Rezki (Sony) kvfree_rcu_bulk(struct kfree_rcu_cpu *krcp,
3011a35d1690SByungchul Park struct kvfree_rcu_bulk_data *bnode, int idx)
30120392bebeSJoel Fernandes (Google) {
30130392bebeSJoel Fernandes (Google) unsigned long flags;
30140392bebeSJoel Fernandes (Google) int i;
30155f3c8d62SUladzislau Rezki (Sony)
30168ac88f71SJoel Fernandes (Google) if (!WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&bnode->gp_snap))) {
30175f3c8d62SUladzislau Rezki (Sony) debug_rcu_bhead_unqueue(bnode);
30185f3c8d62SUladzislau Rezki (Sony) rcu_lock_acquire(&rcu_callback_map);
30195f3c8d62SUladzislau Rezki (Sony) if (idx == 0) { // kmalloc() / kfree().
30205f3c8d62SUladzislau Rezki (Sony) trace_rcu_invoke_kfree_bulk_callback(
30215f3c8d62SUladzislau Rezki (Sony) rcu_state.name, bnode->nr_records,
30225f3c8d62SUladzislau Rezki (Sony) bnode->records);
30235f3c8d62SUladzislau Rezki (Sony)
30240392bebeSJoel Fernandes (Google) kfree_bulk(bnode->nr_records, bnode->records);
30250392bebeSJoel Fernandes (Google) } else { // vmalloc() / vfree().
30268ac88f71SJoel Fernandes (Google) for (i = 0; i < bnode->nr_records; i++) {
3027a35d1690SByungchul Park trace_rcu_invoke_kvfree_callback(
3028277ffe1bSZhouyi Zhou rcu_state.name, bnode->records[i], 0);
30295f3c8d62SUladzislau Rezki (Sony)
30305f3c8d62SUladzislau Rezki (Sony) vfree(bnode->records[i]);
30315f3c8d62SUladzislau Rezki (Sony) }
30325f3c8d62SUladzislau Rezki (Sony) }
303334c88174SUladzislau Rezki (Sony) rcu_lock_release(&rcu_callback_map);
303434c88174SUladzislau Rezki (Sony) }
30355f3c8d62SUladzislau Rezki (Sony)
30365f3c8d62SUladzislau Rezki (Sony) raw_spin_lock_irqsave(&krcp->lock, flags);
30375f3c8d62SUladzislau Rezki (Sony) if (put_cached_bnode(krcp, bnode))
30385f3c8d62SUladzislau Rezki (Sony) bnode = NULL;
303961370792SUladzislau Rezki (Sony) raw_spin_unlock_irqrestore(&krcp->lock, flags);
30405f3c8d62SUladzislau Rezki (Sony)
30415f3c8d62SUladzislau Rezki (Sony) if (bnode)
30425f3c8d62SUladzislau Rezki (Sony) free_page((unsigned long) bnode);
30435f3c8d62SUladzislau Rezki (Sony)
3044c408b215SUladzislau Rezki (Sony) cond_resched_tasks_rcu_qs();
30455f3c8d62SUladzislau Rezki (Sony) }
30465f3c8d62SUladzislau Rezki (Sony)
30475f3c8d62SUladzislau Rezki (Sony) static void
kvfree_rcu_list(struct rcu_head * head)30485f3c8d62SUladzislau Rezki (Sony) kvfree_rcu_list(struct rcu_head *head)
30495f3c8d62SUladzislau Rezki (Sony) {
30505f3c8d62SUladzislau Rezki (Sony) struct rcu_head *next;
305134c88174SUladzislau Rezki (Sony)
305234c88174SUladzislau Rezki (Sony) for (; head; head = next) {
305356292e86SUladzislau Rezki (Sony) void *ptr = (void *) head->func;
30545f3c8d62SUladzislau Rezki (Sony) unsigned long offset = (void *) head - ptr;
30555f3c8d62SUladzislau Rezki (Sony)
305656292e86SUladzislau Rezki (Sony) next = head->next;
305753c72b59SUladzislau Rezki (Sony) debug_rcu_head_unqueue((struct rcu_head *)ptr);
30585f3c8d62SUladzislau Rezki (Sony) rcu_lock_acquire(&rcu_callback_map);
30595f3c8d62SUladzislau Rezki (Sony) trace_rcu_invoke_kvfree_callback(rcu_state.name, head, offset);
306034c88174SUladzislau Rezki (Sony)
306134c88174SUladzislau Rezki (Sony) if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset)))
306234c88174SUladzislau Rezki (Sony) kvfree(ptr);
30635f3c8d62SUladzislau Rezki (Sony)
306434c88174SUladzislau Rezki (Sony) rcu_lock_release(&rcu_callback_map);
306534c88174SUladzislau Rezki (Sony) cond_resched_tasks_rcu_qs();
3066d8628f35SUladzislau Rezki (Sony) }
3067d8628f35SUladzislau Rezki (Sony) }
3068d8628f35SUladzislau Rezki (Sony)
3069d8628f35SUladzislau Rezki (Sony) /*
3070d8628f35SUladzislau Rezki (Sony) * This function is invoked in workqueue context after a grace period.
307134c88174SUladzislau Rezki (Sony) * It frees all the objects queued on ->bulk_head_free or ->head_free.
3072a35d1690SByungchul Park */
kfree_rcu_work(struct work_struct * work)307377a40f97SJoel Fernandes (Google) static void kfree_rcu_work(struct work_struct *work)
3074446044ebSJoel Fernandes (Google) {
307577a40f97SJoel Fernandes (Google) unsigned long flags;
3076a35d1690SByungchul Park struct kvfree_rcu_bulk_data *bnode, *n;
3077446044ebSJoel Fernandes (Google) struct list_head bulk_head[FREE_N_CHANNELS];
307877a40f97SJoel Fernandes (Google) struct rcu_head *head;
3079c408b215SUladzislau Rezki (Sony) struct kfree_rcu_cpu *krcp;
308077a40f97SJoel Fernandes (Google) struct kfree_rcu_cpu_work *krwp;
3081c408b215SUladzislau Rezki (Sony) struct rcu_gp_oldstate head_gp_snap;
30825f3c8d62SUladzislau Rezki (Sony) int i;
308377a40f97SJoel Fernandes (Google)
308477a40f97SJoel Fernandes (Google) krwp = container_of(to_rcu_work(work),
3085a35d1690SByungchul Park struct kfree_rcu_cpu_work, rcu_work);
3086a35d1690SByungchul Park krcp = krwp->krcp;
3087a35d1690SByungchul Park
3088a35d1690SByungchul Park raw_spin_lock_irqsave(&krcp->lock, flags);
308982d26c36SJoel Fernandes (Google) // Channels 1 and 2.
309082d26c36SJoel Fernandes (Google) for (i = 0; i < FREE_N_CHANNELS; i++)
309182d26c36SJoel Fernandes (Google) list_replace_init(&krwp->bulk_head_free[i], &bulk_head[i]);
309282d26c36SJoel Fernandes (Google)
309382d26c36SJoel Fernandes (Google) // Channel 3.
309482d26c36SJoel Fernandes (Google) head = krwp->head_free;
309582d26c36SJoel Fernandes (Google) krwp->head_free = NULL;
309682d26c36SJoel Fernandes (Google) head_gp_snap = krwp->head_free_gp_snap;
309782d26c36SJoel Fernandes (Google) raw_spin_unlock_irqrestore(&krcp->lock, flags);
309882d26c36SJoel Fernandes (Google)
309982d26c36SJoel Fernandes (Google) // Handle the first two channels.
310082d26c36SJoel Fernandes (Google) for (i = 0; i < FREE_N_CHANNELS; i++) {
310151824b78SUladzislau Rezki (Sony) // Start from the tail page, so a GP is likely passed for it.
310251824b78SUladzislau Rezki (Sony) list_for_each_entry_safe(bnode, n, &bulk_head[i], list)
310351824b78SUladzislau Rezki (Sony) kvfree_rcu_bulk(krcp, bnode, i);
310451824b78SUladzislau Rezki (Sony) }
310551824b78SUladzislau Rezki (Sony)
310651824b78SUladzislau Rezki (Sony) /*
310751824b78SUladzislau Rezki (Sony) * This is used when the "bulk" path can not be used for the
310851824b78SUladzislau Rezki (Sony) * double-argument of kvfree_rcu(). This happens when the
310951824b78SUladzislau Rezki (Sony) * page-cache is empty, which means that objects are instead
311051824b78SUladzislau Rezki (Sony) * queued on a linked list through their rcu_head structures.
311151824b78SUladzislau Rezki (Sony) * This list is named "Channel 3".
311251824b78SUladzislau Rezki (Sony) */
311351824b78SUladzislau Rezki (Sony) if (head && !WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&head_gp_snap)))
311451824b78SUladzislau Rezki (Sony) kvfree_rcu_list(head);
311551824b78SUladzislau Rezki (Sony) }
3116a35d1690SByungchul Park
3117a78d4a2aSUladzislau Rezki (Sony) static bool
need_offload_krc(struct kfree_rcu_cpu * krcp)3118a35d1690SByungchul Park need_offload_krc(struct kfree_rcu_cpu *krcp)
3119a78d4a2aSUladzislau Rezki (Sony) {
3120a35d1690SByungchul Park int i;
3121a78d4a2aSUladzislau Rezki (Sony)
3122a78d4a2aSUladzislau Rezki (Sony) for (i = 0; i < FREE_N_CHANNELS; i++)
3123a78d4a2aSUladzislau Rezki (Sony) if (!list_empty(&krcp->bulk_head[i]))
31245f3c8d62SUladzislau Rezki (Sony) return true;
31250392bebeSJoel Fernandes (Google)
3126a78d4a2aSUladzislau Rezki (Sony) return !!READ_ONCE(krcp->head);
312734c88174SUladzislau Rezki (Sony) }
3128a78d4a2aSUladzislau Rezki (Sony)
312934c88174SUladzislau Rezki (Sony) static bool
need_wait_for_krwp_work(struct kfree_rcu_cpu_work * krwp)3130a78d4a2aSUladzislau Rezki (Sony) need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp)
313134c88174SUladzislau Rezki (Sony) {
3132a78d4a2aSUladzislau Rezki (Sony) int i;
3133a78d4a2aSUladzislau Rezki (Sony)
3134a78d4a2aSUladzislau Rezki (Sony) for (i = 0; i < FREE_N_CHANNELS; i++)
3135a78d4a2aSUladzislau Rezki (Sony) if (!list_empty(&krwp->bulk_head_free[i]))
3136a78d4a2aSUladzislau Rezki (Sony) return true;
31375f3c8d62SUladzislau Rezki (Sony)
31385f3c8d62SUladzislau Rezki (Sony) return !!krwp->head_free;
313934c88174SUladzislau Rezki (Sony) }
3140d8628f35SUladzislau Rezki (Sony)
krc_count(struct kfree_rcu_cpu * krcp)3141d8628f35SUladzislau Rezki (Sony) static int krc_count(struct kfree_rcu_cpu *krcp)
31425f3c8d62SUladzislau Rezki (Sony) {
31435f3c8d62SUladzislau Rezki (Sony) int sum = atomic_read(&krcp->head_count);
31445f3c8d62SUladzislau Rezki (Sony) int i;
31455f3c8d62SUladzislau Rezki (Sony)
31465f3c8d62SUladzislau Rezki (Sony) for (i = 0; i < FREE_N_CHANNELS; i++)
31470392bebeSJoel Fernandes (Google) sum += atomic_read(&krcp->bulk_count[i]);
3148a35d1690SByungchul Park
3149d8628f35SUladzislau Rezki (Sony) return sum;
3150d8628f35SUladzislau Rezki (Sony) }
315134c88174SUladzislau Rezki (Sony)
31520392bebeSJoel Fernandes (Google) static void
__schedule_delayed_monitor_work(struct kfree_rcu_cpu * krcp)3153a35d1690SByungchul Park __schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
315434c88174SUladzislau Rezki (Sony) {
315534c88174SUladzislau Rezki (Sony) long delay, delay_left;
3156a6a82ce1SJoel Fernandes (Google)
31579154244cSJoel Fernandes (Google) delay = krc_count(krcp) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
3158a78d4a2aSUladzislau Rezki (Sony) if (delayed_work_pending(&krcp->monitor_work)) {
3159a78d4a2aSUladzislau Rezki (Sony) delay_left = krcp->monitor_work.timer.expires - jiffies;
3160a78d4a2aSUladzislau Rezki (Sony) if (delay < delay_left)
3161a78d4a2aSUladzislau Rezki (Sony) mod_delayed_work(system_wq, &krcp->monitor_work, delay);
3162a78d4a2aSUladzislau Rezki (Sony) return;
31630392bebeSJoel Fernandes (Google) }
316434c88174SUladzislau Rezki (Sony) queue_delayed_work(system_wq, &krcp->monitor_work, delay);
3165594aa597SUladzislau Rezki (Sony) }
3166594aa597SUladzislau Rezki (Sony)
3167a78d4a2aSUladzislau Rezki (Sony) static void
schedule_delayed_monitor_work(struct kfree_rcu_cpu * krcp)3168a78d4a2aSUladzislau Rezki (Sony) schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
3169a78d4a2aSUladzislau Rezki (Sony) {
3170a78d4a2aSUladzislau Rezki (Sony) unsigned long flags;
3171a78d4a2aSUladzislau Rezki (Sony)
317282d26c36SJoel Fernandes (Google) raw_spin_lock_irqsave(&krcp->lock, flags);
317351824b78SUladzislau Rezki (Sony) __schedule_delayed_monitor_work(krcp);
3174a78d4a2aSUladzislau Rezki (Sony) raw_spin_unlock_irqrestore(&krcp->lock, flags);
31758ac88f71SJoel Fernandes (Google) }
3176a35d1690SByungchul Park
3177a35d1690SByungchul Park static void
kvfree_rcu_drain_ready(struct kfree_rcu_cpu * krcp)317856292e86SUladzislau Rezki (Sony) kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
317956292e86SUladzislau Rezki (Sony) {
318056292e86SUladzislau Rezki (Sony) struct list_head bulk_ready[FREE_N_CHANNELS];
318156292e86SUladzislau Rezki (Sony) struct kvfree_rcu_bulk_data *bnode, *n;
318256292e86SUladzislau Rezki (Sony) struct rcu_head *head_ready = NULL;
318356292e86SUladzislau Rezki (Sony) unsigned long flags;
3184d0bfa8b3SZhang Qiang int i;
318556292e86SUladzislau Rezki (Sony)
318656292e86SUladzislau Rezki (Sony) raw_spin_lock_irqsave(&krcp->lock, flags);
318756292e86SUladzislau Rezki (Sony) for (i = 0; i < FREE_N_CHANNELS; i++) {
318856292e86SUladzislau Rezki (Sony) INIT_LIST_HEAD(&bulk_ready[i]);
318956292e86SUladzislau Rezki (Sony)
319056292e86SUladzislau Rezki (Sony) list_for_each_entry_safe_reverse(bnode, n, &krcp->bulk_head[i], list) {
319156292e86SUladzislau Rezki (Sony) if (!poll_state_synchronize_rcu_full(&bnode->gp_snap))
319256292e86SUladzislau Rezki (Sony) break;
3193d0bfa8b3SZhang Qiang
319456292e86SUladzislau Rezki (Sony) atomic_sub(bnode->nr_records, &krcp->bulk_count[i]);
3195d0bfa8b3SZhang Qiang list_move(&bnode->list, &bulk_ready[i]);
319656292e86SUladzislau Rezki (Sony) }
319756292e86SUladzislau Rezki (Sony) }
319856292e86SUladzislau Rezki (Sony)
3199d0bfa8b3SZhang Qiang if (krcp->head && poll_state_synchronize_rcu(krcp->head_gp_snap)) {
3200d0bfa8b3SZhang Qiang head_ready = krcp->head;
3201d0bfa8b3SZhang Qiang atomic_set(&krcp->head_count, 0);
3202d0bfa8b3SZhang Qiang WRITE_ONCE(krcp->head, NULL);
320356292e86SUladzislau Rezki (Sony) }
3204ee6ddf58SUladzislau Rezki (Sony) raw_spin_unlock_irqrestore(&krcp->lock, flags);
320556292e86SUladzislau Rezki (Sony)
3206093590c1SMichal Hocko for (i = 0; i < FREE_N_CHANNELS; i++) {
3207093590c1SMichal Hocko list_for_each_entry_safe(bnode, n, &bulk_ready[i], list)
3208093590c1SMichal Hocko kvfree_rcu_bulk(krcp, bnode, i);
320956292e86SUladzislau Rezki (Sony) }
321056292e86SUladzislau Rezki (Sony)
321156292e86SUladzislau Rezki (Sony) if (head_ready)
321256292e86SUladzislau Rezki (Sony) kvfree_rcu_list(head_ready);
321356292e86SUladzislau Rezki (Sony) }
321456292e86SUladzislau Rezki (Sony)
321556292e86SUladzislau Rezki (Sony) /*
321656292e86SUladzislau Rezki (Sony) * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
321756292e86SUladzislau Rezki (Sony) */
kfree_rcu_monitor(struct work_struct * work)321856292e86SUladzislau Rezki (Sony) static void kfree_rcu_monitor(struct work_struct *work)
321956292e86SUladzislau Rezki (Sony) {
3220d0bfa8b3SZhang Qiang struct kfree_rcu_cpu *krcp = container_of(work,
322156292e86SUladzislau Rezki (Sony) struct kfree_rcu_cpu, monitor_work.work);
322256292e86SUladzislau Rezki (Sony) unsigned long flags;
322356292e86SUladzislau Rezki (Sony) int i, j;
322456292e86SUladzislau Rezki (Sony)
322556292e86SUladzislau Rezki (Sony) // Drain ready for reclaim.
322656292e86SUladzislau Rezki (Sony) kvfree_rcu_drain_ready(krcp);
322756292e86SUladzislau Rezki (Sony)
3228d0bfa8b3SZhang Qiang raw_spin_lock_irqsave(&krcp->lock, flags);
3229d0bfa8b3SZhang Qiang
3230d0bfa8b3SZhang Qiang // Attempt to start a new batch.
3231d0bfa8b3SZhang Qiang for (i = 0; i < KFREE_N_BATCHES; i++) {
3232d0bfa8b3SZhang Qiang struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]);
3233d0bfa8b3SZhang Qiang
323456292e86SUladzislau Rezki (Sony) // Try to detach bulk_head or head and attach it, only when
323556292e86SUladzislau Rezki (Sony) // all channels are free. Any channel is not free means at krwp
323656292e86SUladzislau Rezki (Sony) // there is on-going rcu work to handle krwp's free business.
323756292e86SUladzislau Rezki (Sony) if (need_wait_for_krwp_work(krwp))
3238d0bfa8b3SZhang Qiang continue;
323956292e86SUladzislau Rezki (Sony)
3240148e3731SUladzislau Rezki (Sony) // kvfree_rcu_drain_ready() might handle this krcp, if so give up.
3241148e3731SUladzislau Rezki (Sony) if (need_offload_krc(krcp)) {
3242148e3731SUladzislau Rezki (Sony) // Channel 1 corresponds to the SLAB-pointer bulk path.
3243148e3731SUladzislau Rezki (Sony) // Channel 2 corresponds to vmalloc-pointer bulk path.
3244148e3731SUladzislau Rezki (Sony) for (j = 0; j < FREE_N_CHANNELS; j++) {
3245148e3731SUladzislau Rezki (Sony) if (list_empty(&krwp->bulk_head_free[j])) {
324634c88174SUladzislau Rezki (Sony) atomic_set(&krcp->bulk_count[j], 0);
3247148e3731SUladzislau Rezki (Sony) list_replace_init(&krcp->bulk_head[j],
3248148e3731SUladzislau Rezki (Sony) &krwp->bulk_head_free[j]);
324934c88174SUladzislau Rezki (Sony) }
32505f3c8d62SUladzislau Rezki (Sony) }
32515f3c8d62SUladzislau Rezki (Sony)
325234c88174SUladzislau Rezki (Sony) // Channel 3 corresponds to both SLAB and vmalloc
3253148e3731SUladzislau Rezki (Sony) // objects queued on the linked list.
3254148e3731SUladzislau Rezki (Sony) if (!krwp->head_free) {
325534c88174SUladzislau Rezki (Sony) krwp->head_free = krcp->head;
325634c88174SUladzislau Rezki (Sony) get_state_synchronize_rcu_full(&krwp->head_free_gp_snap);
32575f3c8d62SUladzislau Rezki (Sony) atomic_set(&krcp->head_count, 0);
325834c88174SUladzislau Rezki (Sony) WRITE_ONCE(krcp->head, NULL);
325934c88174SUladzislau Rezki (Sony) }
3260148e3731SUladzislau Rezki (Sony)
3261148e3731SUladzislau Rezki (Sony) // One work is per one batch, so there are three
3262148e3731SUladzislau Rezki (Sony) // "free channels", the batch can handle. It can
3263148e3731SUladzislau Rezki (Sony) // be that the work is in the pending state when
3264148e3731SUladzislau Rezki (Sony) // channels have been detached following by each
32653e7ce7a1SUladzislau Rezki (Sony) // other.
32663e7ce7a1SUladzislau Rezki (Sony) queue_rcu_work(system_wq, &krwp->rcu_work);
32673e7ce7a1SUladzislau Rezki (Sony) }
32683e7ce7a1SUladzislau Rezki (Sony) }
32693e7ce7a1SUladzislau Rezki (Sony)
32703e7ce7a1SUladzislau Rezki (Sony) raw_spin_unlock_irqrestore(&krcp->lock, flags);
32713e7ce7a1SUladzislau Rezki (Sony)
32723e7ce7a1SUladzislau Rezki (Sony) // If there is nothing to detach, it means that our job is
32733e7ce7a1SUladzislau Rezki (Sony) // successfully done here. In case of having at least one
32743e7ce7a1SUladzislau Rezki (Sony) // of the channels that is still busy we should rearm the
32753e7ce7a1SUladzislau Rezki (Sony) // work to repeat an attempt. Because previous batches are
32763e7ce7a1SUladzislau Rezki (Sony) // still in progress.
3277148e3731SUladzislau Rezki (Sony) if (need_offload_krc(krcp))
32783e7ce7a1SUladzislau Rezki (Sony) schedule_delayed_monitor_work(krcp);
3279148e3731SUladzislau Rezki (Sony) }
3280148e3731SUladzislau Rezki (Sony)
3281148e3731SUladzislau Rezki (Sony) static enum hrtimer_restart
schedule_page_work_fn(struct hrtimer * t)328256292e86SUladzislau Rezki (Sony) schedule_page_work_fn(struct hrtimer *t)
328334c88174SUladzislau Rezki (Sony) {
328434c88174SUladzislau Rezki (Sony) struct kfree_rcu_cpu *krcp =
328534c88174SUladzislau Rezki (Sony) container_of(t, struct kfree_rcu_cpu, hrtimer);
328634c88174SUladzislau Rezki (Sony)
3287148e3731SUladzislau Rezki (Sony) queue_delayed_work(system_highpri_wq, &krcp->page_cache_work, 0);
328834c88174SUladzislau Rezki (Sony) return HRTIMER_NORESTART;
328934c88174SUladzislau Rezki (Sony) }
3290148e3731SUladzislau Rezki (Sony)
fill_page_cache_func(struct work_struct * work)329134c88174SUladzislau Rezki (Sony) static void fill_page_cache_func(struct work_struct *work)
329234c88174SUladzislau Rezki (Sony) {
329334c88174SUladzislau Rezki (Sony) struct kvfree_rcu_bulk_data *bnode;
3294148e3731SUladzislau Rezki (Sony) struct kfree_rcu_cpu *krcp =
3295148e3731SUladzislau Rezki (Sony) container_of(work, struct kfree_rcu_cpu,
329634c88174SUladzislau Rezki (Sony) page_cache_work.work);
329734c88174SUladzislau Rezki (Sony) unsigned long flags;
329834c88174SUladzislau Rezki (Sony) int nr_pages;
329934c88174SUladzislau Rezki (Sony) bool pushed;
3300a35d1690SByungchul Park int i;
3301277ffe1bSZhouyi Zhou
3302277ffe1bSZhouyi Zhou nr_pages = atomic_read(&krcp->backoff_page_cache_fill) ?
3303277ffe1bSZhouyi Zhou 1 : rcu_min_cached_objs;
3304277ffe1bSZhouyi Zhou
3305277ffe1bSZhouyi Zhou for (i = READ_ONCE(krcp->nr_bkv_objs); i < nr_pages; i++) {
3306a35d1690SByungchul Park bnode = (struct kvfree_rcu_bulk_data *)
3307c408b215SUladzislau Rezki (Sony) __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
330834c88174SUladzislau Rezki (Sony)
330934c88174SUladzislau Rezki (Sony) if (!bnode)
33105f3c8d62SUladzislau Rezki (Sony) break;
3311495aa969SAndreea-Cristina Bernat
3312c408b215SUladzislau Rezki (Sony) raw_spin_lock_irqsave(&krcp->lock, flags);
3313495aa969SAndreea-Cristina Bernat pushed = put_cached_bnode(krcp, bnode);
3314a35d1690SByungchul Park raw_spin_unlock_irqrestore(&krcp->lock, flags);
3315a35d1690SByungchul Park
33163042f83fSUladzislau Rezki (Sony) if (!pushed) {
3317446044ebSJoel Fernandes (Google) free_page((unsigned long) bnode);
3318a35d1690SByungchul Park break;
33193042f83fSUladzislau Rezki (Sony) }
3320446044ebSJoel Fernandes (Google) }
33213042f83fSUladzislau Rezki (Sony)
33223042f83fSUladzislau Rezki (Sony) atomic_set(&krcp->work_in_progress, 0);
33233042f83fSUladzislau Rezki (Sony) atomic_set(&krcp->backoff_page_cache_fill, 0);
33243042f83fSUladzislau Rezki (Sony) }
33253042f83fSUladzislau Rezki (Sony)
33263042f83fSUladzislau Rezki (Sony) static void
run_page_cache_worker(struct kfree_rcu_cpu * krcp)33273042f83fSUladzislau Rezki (Sony) run_page_cache_worker(struct kfree_rcu_cpu *krcp)
33283042f83fSUladzislau Rezki (Sony) {
33293042f83fSUladzislau Rezki (Sony) // If cache disabled, bail out.
33303042f83fSUladzislau Rezki (Sony) if (!rcu_min_cached_objs)
33313042f83fSUladzislau Rezki (Sony) return;
33323042f83fSUladzislau Rezki (Sony)
3333a35d1690SByungchul Park if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
3334446044ebSJoel Fernandes (Google) !atomic_xchg(&krcp->work_in_progress, 1)) {
3335e99637beSJoel Fernandes (Google) if (atomic_read(&krcp->backoff_page_cache_fill)) {
3336e99637beSJoel Fernandes (Google) queue_delayed_work(system_wq,
3337e99637beSJoel Fernandes (Google) &krcp->page_cache_work,
33383042f83fSUladzislau Rezki (Sony) msecs_to_jiffies(rcu_delay_page_cache_fill_msec));
33393042f83fSUladzislau Rezki (Sony) } else {
3340148e3731SUladzislau Rezki (Sony) hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3341e99637beSJoel Fernandes (Google) krcp->hrtimer.function = schedule_page_work_fn;
334234c88174SUladzislau Rezki (Sony) hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
3343300c0c5eSJun Miao }
3344148e3731SUladzislau Rezki (Sony) }
33453042f83fSUladzislau Rezki (Sony) }
334656292e86SUladzislau Rezki (Sony)
334756292e86SUladzislau Rezki (Sony) // Record ptr in a page managed by krcp, with the pre-krc_this_cpu_lock()
33483042f83fSUladzislau Rezki (Sony) // state specified by flags. If can_alloc is true, the caller must
33493042f83fSUladzislau Rezki (Sony) // be schedulable and not be holding any locks or mutexes that might be
33503042f83fSUladzislau Rezki (Sony) // acquired by the memory allocator or anything that it might invoke.
33513042f83fSUladzislau Rezki (Sony) // Returns true if ptr was successfully recorded, else the caller must
3352a35d1690SByungchul Park // use a fallback.
3353a35d1690SByungchul Park static inline bool
add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu ** krcp,unsigned long * flags,void * ptr,bool can_alloc)3354a35d1690SByungchul Park add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
33553042f83fSUladzislau Rezki (Sony) unsigned long *flags, void *ptr, bool can_alloc)
335634c88174SUladzislau Rezki (Sony) {
3357a35d1690SByungchul Park struct kvfree_rcu_bulk_data *bnode;
3358a6a82ce1SJoel Fernandes (Google) int idx;
33599154244cSJoel Fernandes (Google)
3360a35d1690SByungchul Park *krcp = krc_this_cpu_lock(flags);
336182d26c36SJoel Fernandes (Google) if (unlikely(!(*krcp)->initialized))
336251824b78SUladzislau Rezki (Sony) return false;
3363a35d1690SByungchul Park
3364e99637beSJoel Fernandes (Google) idx = !!is_vmalloc_addr(ptr);
3365952371d6SUladzislau Rezki (Sony) bnode = list_first_entry_or_null(&(*krcp)->bulk_head[idx],
33663042f83fSUladzislau Rezki (Sony) struct kvfree_rcu_bulk_data, list);
33673042f83fSUladzislau Rezki (Sony)
33683042f83fSUladzislau Rezki (Sony) /* Check if a new block is required. */
33693042f83fSUladzislau Rezki (Sony) if (!bnode || bnode->nr_records == KVFREE_BULK_MAX_ENTR) {
33703042f83fSUladzislau Rezki (Sony) bnode = get_cached_bnode(*krcp);
33713042f83fSUladzislau Rezki (Sony) if (!bnode && can_alloc) {
33723042f83fSUladzislau Rezki (Sony) krc_this_cpu_unlock(*krcp, *flags);
33733042f83fSUladzislau Rezki (Sony)
33743042f83fSUladzislau Rezki (Sony) // __GFP_NORETRY - allows a light-weight direct reclaim
33753042f83fSUladzislau Rezki (Sony) // what is OK from minimizing of fallback hitting point of
3376495aa969SAndreea-Cristina Bernat // view. Apart of that it forbids any OOM invoking what is
3377495aa969SAndreea-Cristina Bernat // also beneficial since we are about to release memory soon.
3378c408b215SUladzislau Rezki (Sony) //
3379495aa969SAndreea-Cristina Bernat // __GFP_NOMEMALLOC - prevents from consuming of all the
33809154244cSJoel Fernandes (Google) // memory reserves. Please note we have a fallback path.
33819154244cSJoel Fernandes (Google) //
33829154244cSJoel Fernandes (Google) // __GFP_NOWARN - it is supposed that an allocation can
33839154244cSJoel Fernandes (Google) // be failed under low memory or high memory pressure
3384a6a82ce1SJoel Fernandes (Google) // scenarios.
33859154244cSJoel Fernandes (Google) bnode = (struct kvfree_rcu_bulk_data *)
33869154244cSJoel Fernandes (Google) __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
338770060b87SZqiang raw_spin_lock_irqsave(&(*krcp)->lock, *flags);
33889154244cSJoel Fernandes (Google) }
33899154244cSJoel Fernandes (Google)
3390a6a82ce1SJoel Fernandes (Google) if (!bnode)
3391ac7625ebSUladzislau Rezki (Sony) return false;
3392d0bfa8b3SZhang Qiang
33939154244cSJoel Fernandes (Google) // Initialize the new block and attach it.
33949154244cSJoel Fernandes (Google) bnode->nr_records = 0;
339538269096SJoel Fernandes (Google) list_add(&bnode->list, &(*krcp)->bulk_head[idx]);
33969154244cSJoel Fernandes (Google) }
33979154244cSJoel Fernandes (Google)
33989154244cSJoel Fernandes (Google) // Finally insert and update the GP for this page.
33999154244cSJoel Fernandes (Google) bnode->records[bnode->nr_records++] = ptr;
34009154244cSJoel Fernandes (Google) get_state_synchronize_rcu_full(&bnode->gp_snap);
34019154244cSJoel Fernandes (Google) atomic_inc(&(*krcp)->bulk_count[idx]);
34029154244cSJoel Fernandes (Google)
340370060b87SZqiang return true;
34049154244cSJoel Fernandes (Google) }
34059154244cSJoel Fernandes (Google)
34069154244cSJoel Fernandes (Google) /*
34079154244cSJoel Fernandes (Google) * Queue a request for lazy invocation of the appropriate free routine
3408d0bfa8b3SZhang Qiang * after a grace period. Please note that three paths are maintained,
34097fe1da33SUladzislau Rezki (Sony) * two for the common case using arrays of pointers and a third one that
34109154244cSJoel Fernandes (Google) * is used only when the main paths cannot be used, for example, due to
34119154244cSJoel Fernandes (Google) * memory pressure.
34129154244cSJoel Fernandes (Google) *
34139154244cSJoel Fernandes (Google) * Each kvfree_call_rcu() request is added to a batch. The batch will be drained
34149154244cSJoel Fernandes (Google) * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch will
34159154244cSJoel Fernandes (Google) * be free'd in workqueue context. This allows us to: batch requests together to
34169154244cSJoel Fernandes (Google) * reduce the number of grace periods during heavy kfree_rcu()/kvfree_rcu() load.
34179154244cSJoel Fernandes (Google) */
kvfree_call_rcu(struct rcu_head * head,void * ptr)3418c6dfd72bSPeter Enderborg void kvfree_call_rcu(struct rcu_head *head, void *ptr)
34199154244cSJoel Fernandes (Google) {
34209154244cSJoel Fernandes (Google) unsigned long flags;
34219154244cSJoel Fernandes (Google) struct kfree_rcu_cpu *krcp;
34229154244cSJoel Fernandes (Google) bool success;
34239154244cSJoel Fernandes (Google)
34249154244cSJoel Fernandes (Google) /*
34259154244cSJoel Fernandes (Google) * Please note there is a limitation for the head-less
34269154244cSJoel Fernandes (Google) * variant, that is why there is a clear rule for such
34279154244cSJoel Fernandes (Google) * objects: it can be used from might_sleep() context
3428a35d1690SByungchul Park * only. For other places please embed an rcu_head to
3429a35d1690SByungchul Park * your data.
3430a35d1690SByungchul Park */
3431a35d1690SByungchul Park if (!head)
3432a35d1690SByungchul Park might_sleep();
343370060b87SZqiang
3434a35d1690SByungchul Park // Queue the object but don't yet schedule the batch.
3435a35d1690SByungchul Park if (debug_rcu_head_queue(ptr)) {
34368ac88f71SJoel Fernandes (Google) // Probable double kfree_rcu(), just leak.
343782d26c36SJoel Fernandes (Google) WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",
343851824b78SUladzislau Rezki (Sony) __func__, head);
34398ac88f71SJoel Fernandes (Google)
3440a35d1690SByungchul Park // Mark as success and leave.
3441a35d1690SByungchul Park return;
3442a35d1690SByungchul Park }
3443e5bc3af7SPaul E. McKenney
3444e5bc3af7SPaul E. McKenney kasan_record_aux_stack_noalloc(ptr);
3445258f887aSPaul E. McKenney success = add_ptr_to_bulk_krc_lock(&krcp, &flags, ptr, !head);
3446e5bc3af7SPaul E. McKenney if (!success) {
3447258f887aSPaul E. McKenney run_page_cache_worker(krcp);
3448258f887aSPaul E. McKenney
3449258f887aSPaul E. McKenney if (head == NULL)
3450258f887aSPaul E. McKenney // Inline if kvfree_rcu(one_arg) call.
3451258f887aSPaul E. McKenney goto unlock_return;
3452e5bc3af7SPaul E. McKenney
3453e5bc3af7SPaul E. McKenney head->func = ptr;
3454e5bc3af7SPaul E. McKenney head->next = krcp->head;
3455258f887aSPaul E. McKenney WRITE_ONCE(krcp->head, head);
3456258f887aSPaul E. McKenney atomic_inc(&krcp->head_count);
3457e5bc3af7SPaul E. McKenney
3458258f887aSPaul E. McKenney // Take a snapshot for this krcp.
3459e5bc3af7SPaul E. McKenney krcp->head_gp_snap = get_state_synchronize_rcu();
3460e5bc3af7SPaul E. McKenney success = true;
3461e5bc3af7SPaul E. McKenney }
3462e5bc3af7SPaul E. McKenney
3463e5bc3af7SPaul E. McKenney /*
3464e5bc3af7SPaul E. McKenney * The kvfree_rcu() caller considers the pointer freed at this point
3465e5bc3af7SPaul E. McKenney * and likely removes any references to it. Since the actual slab
3466e5bc3af7SPaul E. McKenney * freeing (and kmemleak_free()) is deferred, tell kmemleak to ignore
3467e5bc3af7SPaul E. McKenney * this object (no scanning or false positives reporting).
3468e5bc3af7SPaul E. McKenney */
34691893afd6SPaul E. McKenney kmemleak_ignore(ptr);
34701893afd6SPaul E. McKenney
34711893afd6SPaul E. McKenney // Set timer to drain after KFREE_DRAIN_JIFFIES.
34721893afd6SPaul E. McKenney if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
34731893afd6SPaul E. McKenney __schedule_delayed_monitor_work(krcp);
34741893afd6SPaul E. McKenney
3475e5bc3af7SPaul E. McKenney unlock_return:
3476e5bc3af7SPaul E. McKenney krc_this_cpu_unlock(krcp, flags);
3477e5bc3af7SPaul E. McKenney
3478e5bc3af7SPaul E. McKenney /*
3479e5bc3af7SPaul E. McKenney * Inline kvfree() after synchronize_rcu(). We can do
3480e5bc3af7SPaul E. McKenney * it from might_sleep() context only, so the current
3481e5bc3af7SPaul E. McKenney * CPU can pass the QS state.
3482e5bc3af7SPaul E. McKenney */
3483e5bc3af7SPaul E. McKenney if (!success) {
3484e5bc3af7SPaul E. McKenney debug_rcu_head_unqueue((struct rcu_head *) ptr);
3485e5bc3af7SPaul E. McKenney synchronize_rcu();
3486e5bc3af7SPaul E. McKenney kvfree(ptr);
3487e5bc3af7SPaul E. McKenney }
3488e5bc3af7SPaul E. McKenney }
3489e5bc3af7SPaul E. McKenney EXPORT_SYMBOL_GPL(kvfree_call_rcu);
3490e5bc3af7SPaul E. McKenney
3491e5bc3af7SPaul E. McKenney static unsigned long
kfree_rcu_shrink_count(struct shrinker * shrink,struct shrink_control * sc)3492e5bc3af7SPaul E. McKenney kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
3493e5bc3af7SPaul E. McKenney {
3494e5bc3af7SPaul E. McKenney int cpu;
34953d3a0d1bSPaul E. McKenney unsigned long count = 0;
34963d3a0d1bSPaul E. McKenney
34973d3a0d1bSPaul E. McKenney /* Snapshot count of all CPUs */
3498e5bc3af7SPaul E. McKenney for_each_possible_cpu(cpu) {
3499e5bc3af7SPaul E. McKenney struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
3500e5bc3af7SPaul E. McKenney
3501910e1209SPaul E. McKenney count += krc_count(krcp);
3502910e1209SPaul E. McKenney count += READ_ONCE(krcp->nr_bkv_objs);
3503910e1209SPaul E. McKenney atomic_set(&krcp->backoff_page_cache_fill, 1);
3504e5bc3af7SPaul E. McKenney }
3505e5bc3af7SPaul E. McKenney
3506e5bc3af7SPaul E. McKenney return count == 0 ? SHRINK_EMPTY : count;
3507e5bc3af7SPaul E. McKenney }
3508910e1209SPaul E. McKenney
3509e5bc3af7SPaul E. McKenney static unsigned long
kfree_rcu_shrink_scan(struct shrinker * shrink,struct shrink_control * sc)3510e5bc3af7SPaul E. McKenney kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
3511e5bc3af7SPaul E. McKenney {
3512e5bc3af7SPaul E. McKenney int cpu, freed = 0;
3513910e1209SPaul E. McKenney
3514910e1209SPaul E. McKenney for_each_possible_cpu(cpu) {
3515910e1209SPaul E. McKenney int count;
3516910e1209SPaul E. McKenney struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
3517910e1209SPaul E. McKenney
3518910e1209SPaul E. McKenney count = krc_count(krcp);
3519910e1209SPaul E. McKenney count += drain_page_cache(krcp);
3520910e1209SPaul E. McKenney kfree_rcu_monitor(&krcp->monitor_work.work);
3521910e1209SPaul E. McKenney
3522910e1209SPaul E. McKenney sc->nr_to_scan -= count;
3523910e1209SPaul E. McKenney freed += count;
3524910e1209SPaul E. McKenney
3525d761de8aSPaul E. McKenney if (sc->nr_to_scan <= 0)
3526d761de8aSPaul E. McKenney break;
3527d761de8aSPaul E. McKenney }
3528d761de8aSPaul E. McKenney
3529910e1209SPaul E. McKenney return freed == 0 ? SHRINK_STOP : freed;
3530910e1209SPaul E. McKenney }
3531910e1209SPaul E. McKenney
3532d761de8aSPaul E. McKenney static struct shrinker kfree_rcu_shrinker = {
3533910e1209SPaul E. McKenney .count_objects = kfree_rcu_shrink_count,
3534910e1209SPaul E. McKenney .scan_objects = kfree_rcu_shrink_scan,
3535e5bc3af7SPaul E. McKenney .batch = 0,
3536e5bc3af7SPaul E. McKenney .seeks = DEFAULT_SEEKS,
3537e5bc3af7SPaul E. McKenney };
3538765a3f4fSPaul E. McKenney
kfree_rcu_scheduler_running(void)353991a967fdSPaul E. McKenney void __init kfree_rcu_scheduler_running(void)
354091a967fdSPaul E. McKenney {
354191a967fdSPaul E. McKenney int cpu;
354291a967fdSPaul E. McKenney
354391a967fdSPaul E. McKenney for_each_possible_cpu(cpu) {
354491a967fdSPaul E. McKenney struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
354591a967fdSPaul E. McKenney
354691a967fdSPaul E. McKenney if (need_offload_krc(krcp))
354791a967fdSPaul E. McKenney schedule_delayed_monitor_work(krcp);
354891a967fdSPaul E. McKenney }
354991a967fdSPaul E. McKenney }
355091a967fdSPaul E. McKenney
355191a967fdSPaul E. McKenney /*
355291a967fdSPaul E. McKenney * During early boot, any blocking grace-period wait automatically
355391a967fdSPaul E. McKenney * implies a grace period.
3554765a3f4fSPaul E. McKenney *
3555765a3f4fSPaul E. McKenney * Later on, this could in theory be the case for kernels built with
3556765a3f4fSPaul E. McKenney * CONFIG_SMP=y && CONFIG_PREEMPTION=y running on a single CPU, but this
35577abb18bdSPaul E. McKenney * is not a common case. Furthermore, this optimization would cause
35587abb18bdSPaul E. McKenney * the rcu_gp_oldstate structure to expand by 50%, so this potential
3559765a3f4fSPaul E. McKenney * grace-period optimization is ignored once the scheduler is running.
3560765a3f4fSPaul E. McKenney */
rcu_blocking_is_gp(void)3561765a3f4fSPaul E. McKenney static int rcu_blocking_is_gp(void)
3562765a3f4fSPaul E. McKenney {
3563765a3f4fSPaul E. McKenney if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE) {
3564e4be81a2SPaul E. McKenney might_sleep();
3565765a3f4fSPaul E. McKenney return false;
3566765a3f4fSPaul E. McKenney }
3567bf95b2bcSPaul E. McKenney return true;
3568765a3f4fSPaul E. McKenney }
3569765a3f4fSPaul E. McKenney
3570765a3f4fSPaul E. McKenney /**
3571765a3f4fSPaul E. McKenney * synchronize_rcu - wait until a grace period has elapsed.
35723fdefca9SPaul E. McKenney *
35733fdefca9SPaul E. McKenney * Control will return to the caller some time after a full grace
35747abb18bdSPaul E. McKenney * period has elapsed, in other words after all currently executing RCU
35753fdefca9SPaul E. McKenney * read-side critical sections have completed. Note, however, that
35763fdefca9SPaul E. McKenney * upon return from synchronize_rcu(), the caller might well be executing
35773fdefca9SPaul E. McKenney * concurrently with new RCU read-side critical sections that began while
35783fdefca9SPaul E. McKenney * synchronize_rcu() was waiting.
35793fdefca9SPaul E. McKenney *
35803fdefca9SPaul E. McKenney * RCU read-side critical sections are delimited by rcu_read_lock()
35813fdefca9SPaul E. McKenney * and rcu_read_unlock(), and may be nested. In addition, but only in
35823fdefca9SPaul E. McKenney * v5.0 and later, regions of code across which interrupts, preemption,
35837abb18bdSPaul E. McKenney * or softirqs have been disabled also serve as RCU read-side critical
35843fdefca9SPaul E. McKenney * sections. This includes hardware interrupt handlers, softirq handlers,
35853fdefca9SPaul E. McKenney * and NMI handlers.
35867abb18bdSPaul E. McKenney *
35873fdefca9SPaul E. McKenney * Note that this guarantee implies further memory-ordering guarantees.
35883fdefca9SPaul E. McKenney * On systems with more than one CPU, when synchronize_rcu() returns,
35893fdefca9SPaul E. McKenney * each CPU is guaranteed to have executed a full memory barrier since
35903fdefca9SPaul E. McKenney * the end of its last RCU read-side critical section whose beginning
35913fdefca9SPaul E. McKenney * preceded the call to synchronize_rcu(). In addition, each CPU having
35923fdefca9SPaul E. McKenney * an RCU read-side critical section that extends beyond the return from
35933fdefca9SPaul E. McKenney * synchronize_rcu() is guaranteed to have executed a full memory barrier
35943fdefca9SPaul E. McKenney * after the beginning of synchronize_rcu() and before the beginning of
35953fdefca9SPaul E. McKenney * that RCU read-side critical section. Note that these guarantees include
35963fdefca9SPaul E. McKenney * CPUs that are offline, idle, or executing in user mode, as well as CPUs
35973fdefca9SPaul E. McKenney * that are executing in the kernel.
35983fdefca9SPaul E. McKenney *
35993fdefca9SPaul E. McKenney * Furthermore, if CPU A invoked synchronize_rcu(), which returned
36003fdefca9SPaul E. McKenney * to its caller on CPU B, then both CPU A and CPU B are guaranteed
360176ea3641SPaul E. McKenney * to have executed a full memory barrier during the execution of
360276ea3641SPaul E. McKenney * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
360376ea3641SPaul E. McKenney * again only if the system has more than one CPU).
36047abb18bdSPaul E. McKenney *
360576ea3641SPaul E. McKenney * Implementation of these memory-ordering guarantees is described here:
36067abb18bdSPaul E. McKenney * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
36077abb18bdSPaul E. McKenney */
synchronize_rcu(void)36087abb18bdSPaul E. McKenney void synchronize_rcu(void)
36097abb18bdSPaul E. McKenney {
36107abb18bdSPaul E. McKenney unsigned long flags;
36117abb18bdSPaul E. McKenney struct rcu_node *rnp;
36127abb18bdSPaul E. McKenney
36137abb18bdSPaul E. McKenney RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
36147abb18bdSPaul E. McKenney lock_is_held(&rcu_lock_map) ||
36157abb18bdSPaul E. McKenney lock_is_held(&rcu_sched_lock_map),
36167abb18bdSPaul E. McKenney "Illegal synchronize_rcu() in RCU read-side critical section");
3617bf95b2bcSPaul E. McKenney if (!rcu_blocking_is_gp()) {
3618bf95b2bcSPaul E. McKenney if (rcu_gp_is_expedited())
3619bf95b2bcSPaul E. McKenney synchronize_rcu_expedited();
3620bf95b2bcSPaul E. McKenney else
3621bf95b2bcSPaul E. McKenney wait_rcu_gp(call_rcu_hurry);
3622bf95b2bcSPaul E. McKenney return;
3623bf95b2bcSPaul E. McKenney }
36247abb18bdSPaul E. McKenney
36257abb18bdSPaul E. McKenney // Context allows vacuous grace periods.
36267abb18bdSPaul E. McKenney // Note well that this code runs with !PREEMPT && !SMP.
362776ea3641SPaul E. McKenney // In addition, all code that advances grace periods runs at
362876ea3641SPaul E. McKenney // process level. Therefore, this normal GP overlaps with other
362976ea3641SPaul E. McKenney // normal GPs only by being fully nested within them, which allows
363076ea3641SPaul E. McKenney // reuse of ->gp_seq_polled_snap.
363176ea3641SPaul E. McKenney rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap);
363276ea3641SPaul E. McKenney rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap);
363376ea3641SPaul E. McKenney
363476ea3641SPaul E. McKenney // Update the normal grace-period counters to record
363576ea3641SPaul E. McKenney // this grace period, but only those used by the boot CPU.
363676ea3641SPaul E. McKenney // The rcu_scheduler_starting() will take care of the rest of
363776ea3641SPaul E. McKenney // these counters.
363876ea3641SPaul E. McKenney local_irq_save(flags);
363976ea3641SPaul E. McKenney WARN_ON_ONCE(num_online_cpus() > 1);
364076ea3641SPaul E. McKenney rcu_state.gp_seq += (1 << RCU_SEQ_CTR_SHIFT);
364176ea3641SPaul E. McKenney for (rnp = this_cpu_ptr(&rcu_data)->mynode; rnp; rnp = rnp->parent)
364276ea3641SPaul E. McKenney rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq;
364376ea3641SPaul E. McKenney local_irq_restore(flags);
364476ea3641SPaul E. McKenney }
364576ea3641SPaul E. McKenney EXPORT_SYMBOL_GPL(synchronize_rcu);
36467abb18bdSPaul E. McKenney
36477abb18bdSPaul E. McKenney /**
36487abb18bdSPaul E. McKenney * get_completed_synchronize_rcu_full - Return a full pre-completed polled state cookie
36497abb18bdSPaul E. McKenney * @rgosp: Place to put state cookie
36507abb18bdSPaul E. McKenney *
365176ea3641SPaul E. McKenney * Stores into @rgosp a value that will always be treated by functions
365276ea3641SPaul E. McKenney * like poll_state_synchronize_rcu_full() as a cookie whose grace period
36537abb18bdSPaul E. McKenney * has already completed.
365476ea3641SPaul E. McKenney */
get_completed_synchronize_rcu_full(struct rcu_gp_oldstate * rgosp)365576ea3641SPaul E. McKenney void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
365676ea3641SPaul E. McKenney {
365776ea3641SPaul E. McKenney rgosp->rgos_norm = RCU_GET_STATE_COMPLETED;
365876ea3641SPaul E. McKenney rgosp->rgos_exp = RCU_GET_STATE_COMPLETED;
365976ea3641SPaul E. McKenney }
366076ea3641SPaul E. McKenney EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full);
366176ea3641SPaul E. McKenney
366276ea3641SPaul E. McKenney /**
366376ea3641SPaul E. McKenney * get_state_synchronize_rcu - Snapshot current RCU state
366476ea3641SPaul E. McKenney *
366576ea3641SPaul E. McKenney * Returns a cookie that is used by a later call to cond_synchronize_rcu()
366676ea3641SPaul E. McKenney * or poll_state_synchronize_rcu() to determine whether or not a full
366776ea3641SPaul E. McKenney * grace period has elapsed in the meantime.
366876ea3641SPaul E. McKenney */
get_state_synchronize_rcu(void)366976ea3641SPaul E. McKenney unsigned long get_state_synchronize_rcu(void)
367076ea3641SPaul E. McKenney {
367176ea3641SPaul E. McKenney /*
367276ea3641SPaul E. McKenney * Any prior manipulation of RCU-protected data must happen
367391a967fdSPaul E. McKenney * before the load from ->gp_seq.
36743d3a0d1bSPaul E. McKenney */
36757abb18bdSPaul E. McKenney smp_mb(); /* ^^^ */
36767abb18bdSPaul E. McKenney return rcu_seq_snap(&rcu_state.gp_seq_polled);
3677f21e0143SPaul E. McKenney }
3678a616aec9SIngo Molnar EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
36797abb18bdSPaul E. McKenney
36807abb18bdSPaul E. McKenney /**
36817abb18bdSPaul E. McKenney * get_state_synchronize_rcu_full - Snapshot RCU state, both normal and expedited
36827abb18bdSPaul E. McKenney * @rgosp: location to place combined normal/expedited grace-period state
36837abb18bdSPaul E. McKenney *
36847abb18bdSPaul E. McKenney * Places the normal and expedited grace-period states in @rgosp. This
36852403e804SPaul E. McKenney * state value can be passed to a later call to cond_synchronize_rcu_full()
36867abb18bdSPaul E. McKenney * or poll_state_synchronize_rcu_full() to determine whether or not a
368791a967fdSPaul E. McKenney * grace period (whether normal or expedited) has elapsed in the meantime.
368891a967fdSPaul E. McKenney * The rcu_gp_oldstate structure takes up twice the memory of an unsigned
368991a967fdSPaul E. McKenney * long, but is guaranteed to see all grace periods. In contrast, the
369091a967fdSPaul E. McKenney * combined state occupies less memory, but can sometimes fail to take
36913d3a0d1bSPaul E. McKenney * grace periods into account.
36923d3a0d1bSPaul E. McKenney *
36933d3a0d1bSPaul E. McKenney * This does not guarantee that the needed grace period will actually
36943d3a0d1bSPaul E. McKenney * start.
36953d3a0d1bSPaul E. McKenney */
get_state_synchronize_rcu_full(struct rcu_gp_oldstate * rgosp)36967abb18bdSPaul E. McKenney void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
36977abb18bdSPaul E. McKenney {
36987abb18bdSPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
3699414c1238SPaul E. McKenney
3700bf95b2bcSPaul E. McKenney /*
37017abb18bdSPaul E. McKenney * Any prior manipulation of RCU-protected data must happen
37027abb18bdSPaul E. McKenney * before the loads from ->gp_seq and ->expedited_sequence.
37037abb18bdSPaul E. McKenney */
37047abb18bdSPaul E. McKenney smp_mb(); /* ^^^ */
37057abb18bdSPaul E. McKenney rgosp->rgos_norm = rcu_seq_snap(&rnp->gp_seq);
37067abb18bdSPaul E. McKenney rgosp->rgos_exp = rcu_seq_snap(&rcu_state.expedited_sequence);
37077abb18bdSPaul E. McKenney }
37087abb18bdSPaul E. McKenney EXPORT_SYMBOL_GPL(get_state_synchronize_rcu_full);
370991a967fdSPaul E. McKenney
371091a967fdSPaul E. McKenney /*
3711765a3f4fSPaul E. McKenney * Helper function for start_poll_synchronize_rcu() and
371291a967fdSPaul E. McKenney * start_poll_synchronize_rcu_full().
371391a967fdSPaul E. McKenney */
start_poll_synchronize_rcu_common(void)371491a967fdSPaul E. McKenney static void start_poll_synchronize_rcu_common(void)
371591a967fdSPaul E. McKenney {
371691a967fdSPaul E. McKenney unsigned long flags;
371791a967fdSPaul E. McKenney bool needwake;
371891a967fdSPaul E. McKenney struct rcu_data *rdp;
371991a967fdSPaul E. McKenney struct rcu_node *rnp;
372091a967fdSPaul E. McKenney
372191a967fdSPaul E. McKenney lockdep_assert_irqs_enabled();
372291a967fdSPaul E. McKenney local_irq_save(flags);
372391a967fdSPaul E. McKenney rdp = this_cpu_ptr(&rcu_data);
372491a967fdSPaul E. McKenney rnp = rdp->mynode;
372591a967fdSPaul E. McKenney raw_spin_lock_rcu_node(rnp); // irqs already disabled.
372691a967fdSPaul E. McKenney // Note it is possible for a grace period to have elapsed between
372791a967fdSPaul E. McKenney // the above call to get_state_synchronize_rcu() and the below call
372891a967fdSPaul E. McKenney // to rcu_seq_snap. This is OK, the worst that happens is that we
372991a967fdSPaul E. McKenney // get a grace period that no one needed. These accesses are ordered
373091a967fdSPaul E. McKenney // by smp_mb(), and we are accessing them in the opposite order
373191a967fdSPaul E. McKenney // from which they are updated at grace-period start, as required.
373291a967fdSPaul E. McKenney needwake = rcu_start_this_gp(rnp, rdp, rcu_seq_snap(&rcu_state.gp_seq));
373391a967fdSPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
373491a967fdSPaul E. McKenney if (needwake)
373591a967fdSPaul E. McKenney rcu_gp_kthread_wake();
373691a967fdSPaul E. McKenney }
373791a967fdSPaul E. McKenney
373891a967fdSPaul E. McKenney /**
373991a967fdSPaul E. McKenney * start_poll_synchronize_rcu - Snapshot and start RCU grace period
374091a967fdSPaul E. McKenney *
374191a967fdSPaul E. McKenney * Returns a cookie that is used by a later call to cond_synchronize_rcu()
374291a967fdSPaul E. McKenney * or poll_state_synchronize_rcu() to determine whether or not a full
374391a967fdSPaul E. McKenney * grace period has elapsed in the meantime. If the needed grace period
374491a967fdSPaul E. McKenney * is not already slated to start, notifies RCU core of the need for that
374591a967fdSPaul E. McKenney * grace period.
374691a967fdSPaul E. McKenney *
374791a967fdSPaul E. McKenney * Interrupts must be enabled for the case where it is necessary to awaken
374891a967fdSPaul E. McKenney * the grace-period kthread.
37497ecef087SPaul E. McKenney */
start_poll_synchronize_rcu(void)375091a967fdSPaul E. McKenney unsigned long start_poll_synchronize_rcu(void)
375191a967fdSPaul E. McKenney {
375291a967fdSPaul E. McKenney unsigned long gp_seq = get_state_synchronize_rcu();
375391a967fdSPaul E. McKenney
375491a967fdSPaul E. McKenney start_poll_synchronize_rcu_common();
375591a967fdSPaul E. McKenney return gp_seq;
375691a967fdSPaul E. McKenney }
375791a967fdSPaul E. McKenney EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
3758765a3f4fSPaul E. McKenney
3759d96c52feSPaul E. McKenney /**
3760765a3f4fSPaul E. McKenney * start_poll_synchronize_rcu_full - Take a full snapshot and start RCU grace period
3761765a3f4fSPaul E. McKenney * @rgosp: value from get_state_synchronize_rcu_full() or start_poll_synchronize_rcu_full()
37627abb18bdSPaul E. McKenney *
37637abb18bdSPaul E. McKenney * Places the normal and expedited grace-period states in *@rgos. This
3764765a3f4fSPaul E. McKenney * state value can be passed to a later call to cond_synchronize_rcu_full()
3765d96c52feSPaul E. McKenney * or poll_state_synchronize_rcu_full() to determine whether or not a
3766d96c52feSPaul E. McKenney * grace period (whether normal or expedited) has elapsed in the meantime.
3767765a3f4fSPaul E. McKenney * If the needed grace period is not already slated to start, notifies
3768d96c52feSPaul E. McKenney * RCU core of the need for that grace period.
37693d3a0d1bSPaul E. McKenney *
37703d3a0d1bSPaul E. McKenney * Interrupts must be enabled for the case where it is necessary to awaken
37713d3a0d1bSPaul E. McKenney * the grace-period kthread.
3772d96c52feSPaul E. McKenney */
start_poll_synchronize_rcu_full(struct rcu_gp_oldstate * rgosp)37733d3a0d1bSPaul E. McKenney void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
3774765a3f4fSPaul E. McKenney {
3775765a3f4fSPaul E. McKenney get_state_synchronize_rcu_full(rgosp);
3776765a3f4fSPaul E. McKenney
37777abb18bdSPaul E. McKenney start_poll_synchronize_rcu_common();
3778765a3f4fSPaul E. McKenney }
3779765a3f4fSPaul E. McKenney EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_full);
3780765a3f4fSPaul E. McKenney
3781765a3f4fSPaul E. McKenney /**
3782b6fe4917SPaul E. McKenney * poll_state_synchronize_rcu - Has the specified RCU grace period completed?
3783b6fe4917SPaul E. McKenney * @oldstate: value from get_state_synchronize_rcu() or start_poll_synchronize_rcu()
3784b6fe4917SPaul E. McKenney *
3785b6fe4917SPaul E. McKenney * If a full RCU grace period has elapsed since the earlier call from
3786b6fe4917SPaul E. McKenney * which @oldstate was obtained, return @true, otherwise return @false.
3787b6fe4917SPaul E. McKenney * If @false is returned, it is the caller's responsibility to invoke this
3788b6fe4917SPaul E. McKenney * function later on until it does return @true. Alternatively, the caller
3789b6fe4917SPaul E. McKenney * can explicitly wait for a grace period, for example, by passing @oldstate
3790b6fe4917SPaul E. McKenney * to either cond_synchronize_rcu() or cond_synchronize_rcu_expedited()
3791b6fe4917SPaul E. McKenney * on the one hand or by directly invoking either synchronize_rcu() or
3792b6fe4917SPaul E. McKenney * synchronize_rcu_expedited() on the other.
3793b6fe4917SPaul E. McKenney *
3794b6fe4917SPaul E. McKenney * Yes, this function does not take counter wrap into account.
3795b6fe4917SPaul E. McKenney * But counter wrap is harmless. If the counter wraps, we have waited for
3796b6fe4917SPaul E. McKenney * more than a billion grace periods (and way more on a 64-bit system!).
3797b6fe4917SPaul E. McKenney * Those needing to keep old state values for very long time periods
3798b6fe4917SPaul E. McKenney * (many hours even on 32-bit systems) should check them occasionally and
3799b6fe4917SPaul E. McKenney * either refresh them or set a flag indicating that the grace period has
3800b6fe4917SPaul E. McKenney * completed. Alternatively, they can use get_completed_synchronize_rcu()
3801b6fe4917SPaul E. McKenney * to get a guaranteed-completed grace-period state.
3802b6fe4917SPaul E. McKenney *
3803b6fe4917SPaul E. McKenney * In addition, because oldstate compresses the grace-period state for
3804b6fe4917SPaul E. McKenney * both normal and expedited grace periods into a single unsigned long,
3805b6fe4917SPaul E. McKenney * it can miss a grace period when synchronize_rcu() runs concurrently
3806b6fe4917SPaul E. McKenney * with synchronize_rcu_expedited(). If this is unacceptable, please
3807b6fe4917SPaul E. McKenney * instead use the _full() variant of these polling APIs.
3808b6fe4917SPaul E. McKenney *
380924560056SPaul E. McKenney * This function provides the same memory-ordering guarantees that
381098ece508SPaul E. McKenney * would be provided by a synchronize_rcu() that was invoked at the call
381149918a54SPaul E. McKenney * to the function that provided @oldstate, and that returned at the end
381249918a54SPaul E. McKenney * of this function.
381349918a54SPaul E. McKenney */
poll_state_synchronize_rcu(unsigned long oldstate)381449918a54SPaul E. McKenney bool poll_state_synchronize_rcu(unsigned long oldstate)
381524560056SPaul E. McKenney {
3816dd7dafd1SPaul E. McKenney if (oldstate == RCU_GET_STATE_COMPLETED ||
381724560056SPaul E. McKenney rcu_seq_done_exact(&rcu_state.gp_seq_polled, oldstate)) {
3818ed93dfc6SPaul E. McKenney smp_mb(); /* Ensure GP ends before subsequent accesses. */
381998ece508SPaul E. McKenney return true;
38204102adabSPaul E. McKenney }
38214102adabSPaul E. McKenney return false;
3822a649d25dSPaul E. McKenney }
3823a649d25dSPaul E. McKenney EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
38244102adabSPaul E. McKenney
3825ea12ff2bSPaul E. McKenney /**
38264102adabSPaul E. McKenney * poll_state_synchronize_rcu_full - Has the specified RCU grace period completed?
382785f69b32SPaul E. McKenney * @rgosp: value from get_state_synchronize_rcu_full() or start_poll_synchronize_rcu_full()
382887090516SFrederic Weisbecker *
382985f69b32SPaul E. McKenney * If a full RCU grace period has elapsed since the earlier call from
383085f69b32SPaul E. McKenney * which *rgosp was obtained, return @true, otherwise return @false.
3831dd7dafd1SPaul E. McKenney * If @false is returned, it is the caller's responsibility to invoke this
3832dd7dafd1SPaul E. McKenney * function later on until it does return @true. Alternatively, the caller
3833a096932fSPaul E. McKenney * can explicitly wait for a grace period, for example, by passing @rgosp
3834a096932fSPaul E. McKenney * to cond_synchronize_rcu() or by directly invoking synchronize_rcu().
38354102adabSPaul E. McKenney *
3836ed93dfc6SPaul E. McKenney * Yes, this function does not take counter wrap into account.
3837ed93dfc6SPaul E. McKenney * But counter wrap is harmless. If the counter wraps, we have waited
38384102adabSPaul E. McKenney * for more than a billion grace periods (and way more on a 64-bit
38394102adabSPaul E. McKenney * system!). Those needing to keep rcu_gp_oldstate values for very
38404102adabSPaul E. McKenney * long time periods (many hours even on 32-bit systems) should check
38413820b513SFrederic Weisbecker * them occasionally and either refresh them or set a flag indicating
3842bd56e0a4SJoel Fernandes (Google) * that the grace period has completed. Alternatively, they can use
38434102adabSPaul E. McKenney * get_completed_synchronize_rcu_full() to get a guaranteed-completed
38444102adabSPaul E. McKenney * grace-period state.
38454102adabSPaul E. McKenney *
3846ed93dfc6SPaul E. McKenney * This function provides the same memory-ordering guarantees that would
38473820b513SFrederic Weisbecker * be provided by a synchronize_rcu() that was invoked at the call to
3848c1935209SPaul E. McKenney * the function that provided @rgosp, and that returned at the end of this
38494102adabSPaul E. McKenney * function. And this guarantee requires that the root rcu_node structure's
38504102adabSPaul E. McKenney * ->gp_seq field be checked instead of that of the rcu_state structure.
385167e14c1eSPaul E. McKenney * The problem is that the just-ending grace-period's callbacks can be
385267e14c1eSPaul E. McKenney * invoked between the time that the root rcu_node structure's ->gp_seq
385301c495f7SPaul E. McKenney * field is updated and the time that the rcu_state structure's ->gp_seq
38544102adabSPaul E. McKenney * field is updated. Therefore, if a single synchronize_rcu() is to
38554102adabSPaul E. McKenney * cause a subsequent poll_state_synchronize_rcu_full() to return @true,
38564102adabSPaul E. McKenney * then the root rcu_node structure is the one that needs to be polled.
38574102adabSPaul E. McKenney */
poll_state_synchronize_rcu_full(struct rcu_gp_oldstate * rgosp)38584102adabSPaul E. McKenney bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
38594102adabSPaul E. McKenney {
38604102adabSPaul E. McKenney struct rcu_node *rnp = rcu_get_root();
3861dd46a788SPaul E. McKenney
38624102adabSPaul E. McKenney smp_mb(); // Order against root rcu_node structure grace-period cleanup.
38634102adabSPaul E. McKenney if (rgosp->rgos_norm == RCU_GET_STATE_COMPLETED ||
3864dd46a788SPaul E. McKenney rcu_seq_done_exact(&rnp->gp_seq, rgosp->rgos_norm) ||
38654102adabSPaul E. McKenney rgosp->rgos_exp == RCU_GET_STATE_COMPLETED ||
38668344b871SPaul E. McKenney rcu_seq_done_exact(&rcu_state.expedited_sequence, rgosp->rgos_exp)) {
38678344b871SPaul E. McKenney smp_mb(); /* Ensure GP ends before subsequent accesses. */
38684102adabSPaul E. McKenney return true;
38694102adabSPaul E. McKenney }
38704102adabSPaul E. McKenney return false;
3871dd46a788SPaul E. McKenney }
3872dd46a788SPaul E. McKenney EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu_full);
3873aa24f937SPaul E. McKenney
3874aa24f937SPaul E. McKenney /**
3875aa24f937SPaul E. McKenney * cond_synchronize_rcu - Conditionally wait for an RCU grace period
3876aa24f937SPaul E. McKenney * @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited()
3877aa24f937SPaul E. McKenney *
3878aa24f937SPaul E. McKenney * If a full RCU grace period has elapsed since the earlier call to
38794102adabSPaul E. McKenney * get_state_synchronize_rcu() or start_poll_synchronize_rcu(), just return.
38804102adabSPaul E. McKenney * Otherwise, invoke synchronize_rcu() to wait for a full grace period.
38814102adabSPaul E. McKenney *
3882aa24f937SPaul E. McKenney * Yes, this function does not take counter wrap into account.
3883aa24f937SPaul E. McKenney * But counter wrap is harmless. If the counter wraps, we have waited for
3884ec9f5835SPaul E. McKenney * more than 2 billion grace periods (and way more on a 64-bit system!),
3885aa24f937SPaul E. McKenney * so waiting for a couple of additional grace periods should be just fine.
3886ec9f5835SPaul E. McKenney *
38874102adabSPaul E. McKenney * This function provides the same memory-ordering guarantees that
3888aa24f937SPaul E. McKenney * would be provided by a synchronize_rcu() that was invoked at the call
38894102adabSPaul E. McKenney * to the function that provided @oldstate and that returned at the end
38904102adabSPaul E. McKenney * of this function.
38914102adabSPaul E. McKenney */
cond_synchronize_rcu(unsigned long oldstate)38924102adabSPaul E. McKenney void cond_synchronize_rcu(unsigned long oldstate)
3893a16578ddSPaul E. McKenney {
38944102adabSPaul E. McKenney if (!poll_state_synchronize_rcu(oldstate))
3895a16578ddSPaul E. McKenney synchronize_rcu();
38964102adabSPaul E. McKenney }
3897a16578ddSPaul E. McKenney EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
3898a16578ddSPaul E. McKenney
38994102adabSPaul E. McKenney /**
390080b3fd47SPaul E. McKenney * cond_synchronize_rcu_full - Conditionally wait for an RCU grace period
3901a16578ddSPaul E. McKenney * @rgosp: value from get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), or start_poll_synchronize_rcu_expedited_full()
3902a16578ddSPaul E. McKenney *
3903dd46a788SPaul E. McKenney * If a full RCU grace period has elapsed since the call to
3904f92c734fSPaul E. McKenney * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(),
3905f92c734fSPaul E. McKenney * or start_poll_synchronize_rcu_expedited_full() from which @rgosp was
39065d6742b3SPaul E. McKenney * obtained, just return. Otherwise, invoke synchronize_rcu() to wait
3907d1b222c6SPaul E. McKenney * for a full grace period.
390877a40f97SJoel Fernandes (Google) *
3909ec9f5835SPaul E. McKenney * Yes, this function does not take counter wrap into account.
3910f92c734fSPaul E. McKenney * But counter wrap is harmless. If the counter wraps, we have waited for
3911f92c734fSPaul E. McKenney * more than 2 billion grace periods (and way more on a 64-bit system!),
3912a16578ddSPaul E. McKenney * so waiting for a couple of additional grace periods should be just fine.
3913f92c734fSPaul E. McKenney *
39145d6742b3SPaul E. McKenney * This function provides the same memory-ordering guarantees that
3915a16578ddSPaul E. McKenney * would be provided by a synchronize_rcu() that was invoked at the call
3916a16578ddSPaul E. McKenney * to the function that provided @rgosp and that returned at the end of
3917a16578ddSPaul E. McKenney * this function.
3918a16578ddSPaul E. McKenney */
cond_synchronize_rcu_full(struct rcu_gp_oldstate * rgosp)3919a16578ddSPaul E. McKenney void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
3920a16578ddSPaul E. McKenney {
3921a16578ddSPaul E. McKenney if (!poll_state_synchronize_rcu_full(rgosp))
3922a16578ddSPaul E. McKenney synchronize_rcu();
3923a16578ddSPaul E. McKenney }
3924a16578ddSPaul E. McKenney EXPORT_SYMBOL_GPL(cond_synchronize_rcu_full);
3925a16578ddSPaul E. McKenney
3926a16578ddSPaul E. McKenney /*
3927a16578ddSPaul E. McKenney * Check to see if there is any immediate RCU-related work to be done by
3928a16578ddSPaul E. McKenney * the current CPU, returning 1 if so and zero otherwise. The checks are
392980b3fd47SPaul E. McKenney * in order of increasing expense: checks that can be carried out against
3930a16578ddSPaul E. McKenney * CPU-local state are performed first. However, we must check for CPU
393180b3fd47SPaul E. McKenney * stalls first, else we might not get a chance.
39324102adabSPaul E. McKenney */
rcu_pending(int user)39334102adabSPaul E. McKenney static int rcu_pending(int user)
3934dd46a788SPaul E. McKenney {
3935dd46a788SPaul E. McKenney bool gp_in_progress;
3936dd46a788SPaul E. McKenney struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
3937dd46a788SPaul E. McKenney struct rcu_node *rnp = rdp->mynode;
3938dd46a788SPaul E. McKenney
3939dd46a788SPaul E. McKenney lockdep_assert_irqs_disabled();
3940dd46a788SPaul E. McKenney
39414102adabSPaul E. McKenney /* Check for CPU stalls, if enabled. */
3942dd46a788SPaul E. McKenney check_cpu_stall(rdp);
39434102adabSPaul E. McKenney
3944127e2981SPaul E. McKenney /* Does this CPU need a deferred NOCB wakeup? */
3945a16578ddSPaul E. McKenney if (rcu_nocb_need_deferred_wakeup(rdp, RCU_NOCB_WAKE))
3946a16578ddSPaul E. McKenney return 1;
39474102adabSPaul E. McKenney
3948ec9f5835SPaul E. McKenney /* Is this a nohz_full CPU in userspace or idle? (Ignore RCU if so.) */
39494102adabSPaul E. McKenney if ((user || rcu_is_cpu_rrupt_from_idle()) && rcu_nohz_full_cpu())
3950dd46a788SPaul E. McKenney return 0;
39514102adabSPaul E. McKenney
39524102adabSPaul E. McKenney /* Is the RCU core waiting for a quiescent state from this CPU? */
3953ec9f5835SPaul E. McKenney gp_in_progress = rcu_gp_in_progress();
39544102adabSPaul E. McKenney if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress)
39554f525a52SPaul E. McKenney return 1;
3956ec9f5835SPaul E. McKenney
39570cabb47aSPaul E. McKenney /* Does this CPU have callbacks ready to invoke? */
39584102adabSPaul E. McKenney if (!rcu_rdp_is_offloaded(rdp) &&
3959ec9f5835SPaul E. McKenney rcu_segcblist_ready_cbs(&rdp->cblist))
39604102adabSPaul E. McKenney return 1;
39614102adabSPaul E. McKenney
39624102adabSPaul E. McKenney /* Has RCU gone idle with this CPU needing another grace period? */
39634f525a52SPaul E. McKenney if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) &&
396480b3fd47SPaul E. McKenney !rcu_rdp_is_offloaded(rdp) &&
3965ec9f5835SPaul E. McKenney !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
3966a16578ddSPaul E. McKenney return 1;
3967dd46a788SPaul E. McKenney
39684102adabSPaul E. McKenney /* Have RCU grace period completed or started? */
39694102adabSPaul E. McKenney if (rcu_seq_current(&rnp->gp_seq) != rdp->gp_seq ||
3970127e2981SPaul E. McKenney unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */
3971127e2981SPaul E. McKenney return 1;
3972127e2981SPaul E. McKenney
3973127e2981SPaul E. McKenney /* nothing to do */
3974127e2981SPaul E. McKenney return 0;
39754102adabSPaul E. McKenney }
3976ec9f5835SPaul E. McKenney
3977127e2981SPaul E. McKenney /*
397880b3fd47SPaul E. McKenney * Helper function for rcu_barrier() tracing. If tracing is disabled,
39794102adabSPaul E. McKenney * the compiler is expected to optimize this away.
39804102adabSPaul E. McKenney */
rcu_barrier_trace(const char * s,int cpu,unsigned long done)39814102adabSPaul E. McKenney static void rcu_barrier_trace(const char *s, int cpu, unsigned long done)
39824102adabSPaul E. McKenney {
39834102adabSPaul E. McKenney trace_rcu_barrier(rcu_state.name, s, cpu,
39844102adabSPaul E. McKenney atomic_read(&rcu_state.barrier_cpu_count), done);
39854102adabSPaul E. McKenney }
3986da1df50dSPaul E. McKenney
3987a16578ddSPaul E. McKenney /*
3988a16578ddSPaul E. McKenney * RCU callback function for rcu_barrier(). If we are last, wake
3989ce5215c1SPaul E. McKenney * up the task executing rcu_barrier().
399080b3fd47SPaul E. McKenney *
39910cabb47aSPaul E. McKenney * Note that the value of rcu_state.barrier_sequence must be captured
3992a16578ddSPaul E. McKenney * before the atomic_dec_and_test(). Otherwise, if this CPU is not last,
399380b3fd47SPaul E. McKenney * other CPUs might count the value down to zero before this CPU gets
39940cabb47aSPaul E. McKenney * around to invoking rcu_barrier_trace(), which might result in bogus
3995ce5215c1SPaul E. McKenney * data from the next instance of rcu_barrier().
39964102adabSPaul E. McKenney */
rcu_barrier_callback(struct rcu_head * rhp)3997a16578ddSPaul E. McKenney static void rcu_barrier_callback(struct rcu_head *rhp)
3998a16578ddSPaul E. McKenney {
3999a16578ddSPaul E. McKenney unsigned long __maybe_unused s = rcu_state.barrier_sequence;
400080b3fd47SPaul E. McKenney
40010cabb47aSPaul E. McKenney if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) {
4002a16578ddSPaul E. McKenney rcu_barrier_trace(TPS("LastCB"), -1, s);
40034102adabSPaul E. McKenney complete(&rcu_state.barrier_completion);
400480b3fd47SPaul E. McKenney } else {
4005a16578ddSPaul E. McKenney rcu_barrier_trace(TPS("CB"), -1, s);
4006a16578ddSPaul E. McKenney }
4007a16578ddSPaul E. McKenney }
4008a16578ddSPaul E. McKenney
4009a16578ddSPaul E. McKenney /*
4010a16578ddSPaul E. McKenney * If needed, entrain an rcu_barrier() callback on rdp->cblist.
40114102adabSPaul E. McKenney */
rcu_barrier_entrain(struct rcu_data * rdp)40124102adabSPaul E. McKenney static void rcu_barrier_entrain(struct rcu_data *rdp)
40134102adabSPaul E. McKenney {
40144102adabSPaul E. McKenney unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
40154102adabSPaul E. McKenney unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
40164102adabSPaul E. McKenney bool wake_nocb = false;
4017127e2981SPaul E. McKenney bool was_alldone = false;
4018ec9f5835SPaul E. McKenney
40194102adabSPaul E. McKenney lockdep_assert_held(&rcu_state.barrier_lock);
40204102adabSPaul E. McKenney if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
4021ec9f5835SPaul E. McKenney return;
40224102adabSPaul E. McKenney rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
40234f525a52SPaul E. McKenney rdp->barrier_head.func = rcu_barrier_callback;
4024dd46a788SPaul E. McKenney debug_rcu_head_queue(&rdp->barrier_head);
4025ec9f5835SPaul E. McKenney rcu_nocb_lock(rdp);
4026a16578ddSPaul E. McKenney /*
4027a16578ddSPaul E. McKenney * Flush bypass and wakeup rcuog if we add callbacks to an empty regular
4028a16578ddSPaul E. McKenney * queue. This way we don't wait for bypass timer that can reach seconds
4029a16578ddSPaul E. McKenney * if it's fully lazy.
4030a16578ddSPaul E. McKenney */
4031a16578ddSPaul E. McKenney was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
40324f525a52SPaul E. McKenney WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
40334102adabSPaul E. McKenney wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
4034ec9f5835SPaul E. McKenney if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
40354102adabSPaul E. McKenney atomic_inc(&rcu_state.barrier_cpu_count);
403645975c7dSPaul E. McKenney } else {
40374102adabSPaul E. McKenney debug_rcu_head_unqueue(&rdp->barrier_head);
40384102adabSPaul E. McKenney rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
40390aa04b05SPaul E. McKenney }
40400aa04b05SPaul E. McKenney rcu_nocb_unlock(rdp);
4041a616aec9SIngo Molnar if (wake_nocb)
40420aa04b05SPaul E. McKenney wake_nocb_gp(rdp, false);
40430aa04b05SPaul E. McKenney smp_store_release(&rdp->barrier_seq_snap, gseq);
40440aa04b05SPaul E. McKenney }
40450aa04b05SPaul E. McKenney
40460aa04b05SPaul E. McKenney /*
40478d672fa6SPaul E. McKenney * Called with preemption disabled, and from cross-cpu IRQ context.
40480aa04b05SPaul E. McKenney */
rcu_barrier_handler(void * cpu_in)40490aa04b05SPaul E. McKenney static void rcu_barrier_handler(void *cpu_in)
40508d672fa6SPaul E. McKenney {
4051962aff03SPaul E. McKenney uintptr_t cpu = (uintptr_t)cpu_in;
40520aa04b05SPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
40530aa04b05SPaul E. McKenney
40540aa04b05SPaul E. McKenney lockdep_assert_irqs_disabled();
40550aa04b05SPaul E. McKenney WARN_ON_ONCE(cpu != rdp->cpu);
40560aa04b05SPaul E. McKenney WARN_ON_ONCE(cpu != smp_processor_id());
40576cf10081SPaul E. McKenney raw_spin_lock(&rcu_state.barrier_lock);
40588d672fa6SPaul E. McKenney rcu_barrier_entrain(rdp);
40590aa04b05SPaul E. McKenney raw_spin_unlock(&rcu_state.barrier_lock);
406067c583a7SBoqun Feng }
40618d672fa6SPaul E. McKenney
40628d672fa6SPaul E. McKenney /**
40630aa04b05SPaul E. McKenney * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
40640aa04b05SPaul E. McKenney *
40650aa04b05SPaul E. McKenney * Note that this primitive does not necessarily wait for an RCU grace period
40660aa04b05SPaul E. McKenney * to complete. For example, if there are no RCU callbacks queued anywhere
40674102adabSPaul E. McKenney * in the system, then rcu_barrier() is within its rights to return
40684102adabSPaul E. McKenney * immediately, without waiting for anything, much less an RCU grace period.
40694102adabSPaul E. McKenney */
rcu_barrier(void)407053b46303SPaul E. McKenney void rcu_barrier(void)
40714102adabSPaul E. McKenney {
4072904e600eSFrederic Weisbecker uintptr_t cpu;
4073da1df50dSPaul E. McKenney unsigned long flags;
40744102adabSPaul E. McKenney unsigned long gseq;
40754102adabSPaul E. McKenney struct rcu_data *rdp;
4076bc75e999SMark Rutland unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);
4077a657f261SPaul E. McKenney
4078904e600eSFrederic Weisbecker rcu_barrier_trace(TPS("Begin"), -1, s);
407962e2412dSFrederic Weisbecker
4080a16578ddSPaul E. McKenney /* Take mutex to serialize concurrent rcu_barrier() requests. */
408153b46303SPaul E. McKenney mutex_lock(&rcu_state.barrier_mutex);
408257738942SPaul E. McKenney
408353b46303SPaul E. McKenney /* Did someone else do our work for us? */
408457738942SPaul E. McKenney if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
4085c708b08cSPaul E. McKenney rcu_barrier_trace(TPS("EarlyExit"), -1, rcu_state.barrier_sequence);
40864102adabSPaul E. McKenney smp_mb(); /* caller's subsequent code after above check. */
40874102adabSPaul E. McKenney mutex_unlock(&rcu_state.barrier_mutex);
40884102adabSPaul E. McKenney return;
40894102adabSPaul E. McKenney }
40904102adabSPaul E. McKenney
409153b46303SPaul E. McKenney /* Mark the start of the barrier operation. */
409253b46303SPaul E. McKenney raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
409353b46303SPaul E. McKenney rcu_seq_start(&rcu_state.barrier_sequence);
409453b46303SPaul E. McKenney gseq = rcu_state.barrier_sequence;
4095ff3bb6f4SPaul E. McKenney rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
4096ff3bb6f4SPaul E. McKenney
4097e83e73f5SPaul E. McKenney /*
4098e83e73f5SPaul E. McKenney * Initialize the count to two rather than to zero in order
40994102adabSPaul E. McKenney * to avoid a too-soon return to zero in case of an immediate
410053b46303SPaul E. McKenney * invocation of the just-enqueued callback (or preemption of
41014102adabSPaul E. McKenney * this task). Exclude CPU-hotplug operations to ensure that no
41024102adabSPaul E. McKenney * offline non-offloaded CPU has callbacks queued.
4103904e600eSFrederic Weisbecker */
4104da1df50dSPaul E. McKenney init_completion(&rcu_state.barrier_completion);
4105336a4f6cSPaul E. McKenney atomic_set(&rcu_state.barrier_cpu_count, 2);
41064102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
41074102adabSPaul E. McKenney
41086cf10081SPaul E. McKenney /*
41094102adabSPaul E. McKenney * Force each CPU with callbacks to register a new callback.
41102431774fSPaul E. McKenney * When that callback is invoked, we will know that all of the
41114102adabSPaul E. McKenney * corresponding CPU's preceding callbacks have been invoked.
4112904e600eSFrederic Weisbecker */
411367c583a7SBoqun Feng for_each_possible_cpu(cpu) {
4114ec711bc1SFrederic Weisbecker rdp = per_cpu_ptr(&rcu_data, cpu);
4115126d9d49SFrederic Weisbecker retry:
4116ec711bc1SFrederic Weisbecker if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq)
4117ec711bc1SFrederic Weisbecker continue;
4118126d9d49SFrederic Weisbecker raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
4119ec711bc1SFrederic Weisbecker if (!rcu_segcblist_n_cbs(&rdp->cblist)) {
4120126d9d49SFrederic Weisbecker WRITE_ONCE(rdp->barrier_seq_snap, gseq);
41214102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
41220aa04b05SPaul E. McKenney rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence);
41230aa04b05SPaul E. McKenney continue;
41240aa04b05SPaul E. McKenney }
41250aa04b05SPaul E. McKenney if (!rcu_rdp_cpu_online(rdp)) {
41260aa04b05SPaul E. McKenney rcu_barrier_entrain(rdp);
41274102adabSPaul E. McKenney WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
41282a67e741SPeter Zijlstra raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
4129b9585e94SPaul E. McKenney rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
41308ff37290SPaul E. McKenney continue;
41318ff37290SPaul E. McKenney }
41325b74c458SPaul E. McKenney raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
413397c668b8SPaul E. McKenney if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) {
41349b9500daSPaul E. McKenney schedule_timeout_uninterruptible(1);
41357a9f50a0SPeter Zijlstra goto retry;
41368ff37290SPaul E. McKenney }
413753b46303SPaul E. McKenney WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
413867c583a7SBoqun Feng rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
41393ef5a1c3SPaul E. McKenney }
4140ad368d15SPaul E. McKenney
4141ed73860cSNeeraj Upadhyay /*
41424df83742SThomas Gleixner * Now that we have an rcu_barrier_callback() callback on each
41434df83742SThomas Gleixner * CPU, and thus each counted, remove the initial count.
41444df83742SThomas Gleixner */
41454df83742SThomas Gleixner if (atomic_sub_and_test(2, &rcu_state.barrier_cpu_count))
4146deb34f36SPaul E. McKenney complete(&rcu_state.barrier_completion);
4147deb34f36SPaul E. McKenney
4148deb34f36SPaul E. McKenney /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
41494df83742SThomas Gleixner wait_for_completion(&rcu_state.barrier_completion);
41504df83742SThomas Gleixner
4151da1df50dSPaul E. McKenney /* Mark the end of the barrier operation. */
41524df83742SThomas Gleixner rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
41534df83742SThomas Gleixner rcu_seq_end(&rcu_state.barrier_sequence);
41544df83742SThomas Gleixner gseq = rcu_state.barrier_sequence;
41554df83742SThomas Gleixner for_each_possible_cpu(cpu) {
4156deb34f36SPaul E. McKenney rdp = per_cpu_ptr(&rcu_data, cpu);
4157deb34f36SPaul E. McKenney
4158deb34f36SPaul E. McKenney WRITE_ONCE(rdp->barrier_seq_snap, gseq);
4159deb34f36SPaul E. McKenney }
41604df83742SThomas Gleixner
41614df83742SThomas Gleixner /* Other rcu_barrier() invocations can now safely proceed. */
41629b9500daSPaul E. McKenney mutex_unlock(&rcu_state.barrier_mutex);
41639b9500daSPaul E. McKenney }
41649b9500daSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_barrier);
41659b9500daSPaul E. McKenney
4166da1df50dSPaul E. McKenney /*
41679b9500daSPaul E. McKenney * Compute the mask of online CPUs for the specified rcu_node structure.
41689b9500daSPaul E. McKenney * This will not be stable unless the rcu_node structure's ->lock is
41699b9500daSPaul E. McKenney * held, but the bit corresponding to the current CPU will be stable
41709b9500daSPaul E. McKenney * in most contexts.
41719b9500daSPaul E. McKenney */
rcu_rnp_online_cpus(struct rcu_node * rnp)41729b9500daSPaul E. McKenney static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
41739b9500daSPaul E. McKenney {
41749b9500daSPaul E. McKenney return READ_ONCE(rnp->qsmaskinitnext);
417596926686SPaul E. McKenney }
417696926686SPaul E. McKenney
417796926686SPaul E. McKenney /*
41784df83742SThomas Gleixner * Is the CPU corresponding to the specified rcu_data structure online
41794df83742SThomas Gleixner * from RCU's perspective? This perspective is given by that structure's
41804df83742SThomas Gleixner * ->qsmaskinitnext field rather than by the global cpu_online_mask.
4181deb34f36SPaul E. McKenney */
rcu_rdp_cpu_online(struct rcu_data * rdp)4182deb34f36SPaul E. McKenney static bool rcu_rdp_cpu_online(struct rcu_data *rdp)
4183deb34f36SPaul E. McKenney {
4184deb34f36SPaul E. McKenney return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));
41854df83742SThomas Gleixner }
41864df83742SThomas Gleixner
rcu_cpu_online(int cpu)41879b9500daSPaul E. McKenney bool rcu_cpu_online(int cpu)
41889b9500daSPaul E. McKenney {
41899b9500daSPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
41909b9500daSPaul E. McKenney
4191da1df50dSPaul E. McKenney return rcu_rdp_cpu_online(rdp);
41929b9500daSPaul E. McKenney }
41939b9500daSPaul E. McKenney
41949b9500daSPaul E. McKenney #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
41959b9500daSPaul E. McKenney
41969b9500daSPaul E. McKenney /*
41974df83742SThomas Gleixner * Is the current CPU online as far as RCU is concerned?
419896926686SPaul E. McKenney *
419996926686SPaul E. McKenney * Disable preemption to avoid false positives that could otherwise
420096926686SPaul E. McKenney * happen due to the current CPU number being sampled, this task being
42014df83742SThomas Gleixner * preempted, its old CPU being taken offline, resuming on some other CPU,
42024df83742SThomas Gleixner * then determining that its old CPU is now offline.
42034df83742SThomas Gleixner *
42047ec99de3SPaul E. McKenney * Disable checking if in an NMI handler because we cannot safely
42057ec99de3SPaul E. McKenney * report errors from NMI handlers anyway. In addition, it is OK to use
42067ec99de3SPaul E. McKenney * RCU on an offline processor during initial boot, hence the check for
42077ec99de3SPaul E. McKenney * rcu_scheduler_fully_active.
42087ec99de3SPaul E. McKenney */
rcu_lockdep_current_cpu_online(void)42097ec99de3SPaul E. McKenney bool rcu_lockdep_current_cpu_online(void)
4210deb34f36SPaul E. McKenney {
4211deb34f36SPaul E. McKenney struct rcu_data *rdp;
4212deb34f36SPaul E. McKenney bool ret = false;
4213deb34f36SPaul E. McKenney
42147ec99de3SPaul E. McKenney if (in_nmi() || !rcu_scheduler_fully_active)
42157ec99de3SPaul E. McKenney return true;
42167ec99de3SPaul E. McKenney preempt_disable_notrace();
42177ec99de3SPaul E. McKenney rdp = this_cpu_ptr(&rcu_data);
42187ec99de3SPaul E. McKenney /*
42197ec99de3SPaul E. McKenney * Strictly, we care here about the case where the current CPU is
42207ec99de3SPaul E. McKenney * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
4221abfce041SWei Yang * not being up to date. So arch_spin_is_locked() might have a
42227ec99de3SPaul E. McKenney * false positive if it's held by some *other* CPU, but that's
4223da1df50dSPaul E. McKenney * OK because that just means a false *negative* on the warning.
4224c0f97f20SPaul E. McKenney */
4225c0f97f20SPaul E. McKenney if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock))
4226c0f97f20SPaul E. McKenney ret = true;
4227c0f97f20SPaul E. McKenney preempt_enable_notrace();
42287ec99de3SPaul E. McKenney return ret;
42297ec99de3SPaul E. McKenney }
423082980b16SDavid Woodhouse EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
423182980b16SDavid Woodhouse
42322caebefbSPaul E. McKenney #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
423380b3fd47SPaul E. McKenney
423482980b16SDavid Woodhouse // Has rcu_init() been invoked? This is used (for example) to determine
4235105abf82SPaul E. McKenney // whether spinlocks may be acquired safely.
rcu_init_invoked(void)423680b3fd47SPaul E. McKenney static bool rcu_init_invoked(void)
4237abfce041SWei Yang {
42387ec99de3SPaul E. McKenney return !!rcu_state.n_online_cpus;
4239313517fcSPaul E. McKenney }
4240abfce041SWei Yang
42412f084695SPaul E. McKenney /*
4242e05121baSPaul E. McKenney * Near the end of the offline process. Trace the fact that this CPU
4243eb7a6653SPaul E. McKenney * is going offline.
4244eb7a6653SPaul E. McKenney */
rcutree_dying_cpu(unsigned int cpu)42459f866dacSJoel Fernandes (Google) int rcutree_dying_cpu(unsigned int cpu)
42469f866dacSJoel Fernandes (Google) {
42479f866dacSJoel Fernandes (Google) bool blkd;
424882980b16SDavid Woodhouse struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
424982980b16SDavid Woodhouse struct rcu_node *rnp = rdp->mynode;
425082980b16SDavid Woodhouse
425182980b16SDavid Woodhouse if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
4252516e5ae0SJoel Fernandes (Google) return 0;
425399990da1SPaul E. McKenney
425482980b16SDavid Woodhouse blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
425599990da1SPaul E. McKenney trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
425682980b16SDavid Woodhouse blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
42577ec99de3SPaul E. McKenney return 0;
425882980b16SDavid Woodhouse }
425982980b16SDavid Woodhouse
4260313517fcSPaul E. McKenney /*
42617ec99de3SPaul E. McKenney * All CPUs for the specified rcu_node structure have gone offline,
42627ec99de3SPaul E. McKenney * and all tasks that were preempted within an RCU read-side critical
426327d50c7eSThomas Gleixner * section while running on one of those CPUs have since exited their RCU
4264deb34f36SPaul E. McKenney * read-side critical section. Some other CPU is reporting this fact with
426553b46303SPaul E. McKenney * the specified rcu_node structure's ->lock held and interrupts disabled.
4266deb34f36SPaul E. McKenney * This function therefore goes up the tree of rcu_node structures,
4267deb34f36SPaul E. McKenney * clearing the corresponding bits in the ->qsmaskinit fields. Note that
4268deb34f36SPaul E. McKenney * the leaf rcu_node structure's ->qsmaskinit field has already been
4269deb34f36SPaul E. McKenney * updated.
4270deb34f36SPaul E. McKenney *
427127d50c7eSThomas Gleixner * This function does check that the specified rcu_node structure has
427227d50c7eSThomas Gleixner * all CPUs offline and no blocked tasks, so it is OK to invoke it
427382980b16SDavid Woodhouse * prematurely. That said, invoking it after the fact will cost you
427453b46303SPaul E. McKenney * a needless lock acquisition. So once it has done its work, don't
427553b46303SPaul E. McKenney * invoke it again.
427653b46303SPaul E. McKenney */
rcu_cleanup_dead_rnp(struct rcu_node * rnp_leaf)427727d50c7eSThomas Gleixner static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
4278147c6852SPaul E. McKenney {
4279147c6852SPaul E. McKenney long mask;
4280147c6852SPaul E. McKenney struct rcu_node *rnp = rnp_leaf;
428149918a54SPaul E. McKenney
4282768f5d50SPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp_leaf);
42833e310098SPaul E. McKenney if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
428453b46303SPaul E. McKenney WARN_ON_ONCE(rnp_leaf->qsmaskinit) ||
428553b46303SPaul E. McKenney WARN_ON_ONCE(rcu_preempt_has_tasks(rnp_leaf)))
428653b46303SPaul E. McKenney return;
428782980b16SDavid Woodhouse for (;;) {
428882980b16SDavid Woodhouse mask = rnp->grpmask;
428953b46303SPaul E. McKenney rnp = rnp->parent;
429053b46303SPaul E. McKenney if (!rnp)
429153b46303SPaul E. McKenney break;
429253b46303SPaul E. McKenney raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
429353b46303SPaul E. McKenney rnp->qsmaskinit &= ~mask;
4294e2bb1288SZqiang /* Between grace periods, so better already be zero! */
429553b46303SPaul E. McKenney WARN_ON_ONCE(rnp->qsmask);
429653b46303SPaul E. McKenney if (rnp->qsmaskinit) {
429753b46303SPaul E. McKenney raw_spin_unlock_rcu_node(rnp);
4298105abf82SPaul E. McKenney /* irqs remain disabled. */
429953b46303SPaul E. McKenney return;
430082980b16SDavid Woodhouse }
430182980b16SDavid Woodhouse raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
4302f64c6013SPeter Zijlstra }
4303c0f97f20SPaul E. McKenney }
430427d50c7eSThomas Gleixner
4305a58163d8SPaul E. McKenney /*
430604e613deSWill Deacon * The CPU has been completely removed, and some other CPU is reporting
430753b46303SPaul E. McKenney * this fact from process context. Do the remainder of the cleanup.
430853b46303SPaul E. McKenney * There can only be one CPU hotplug operation at a time, so no need for
430953b46303SPaul E. McKenney * explicit locking.
431053b46303SPaul E. McKenney */
rcutree_dead_cpu(unsigned int cpu)431153b46303SPaul E. McKenney int rcutree_dead_cpu(unsigned int cpu)
431253b46303SPaul E. McKenney {
4313a58163d8SPaul E. McKenney if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
4314a58163d8SPaul E. McKenney return 0;
4315b1a2d79fSPaul E. McKenney
4316c00045beSPaul E. McKenney WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
4317da1df50dSPaul E. McKenney // Stop-machine done, so allow nohz_full to disable tick.
4318ec4eacceSPaul E. McKenney tick_dep_clear(TICK_DEP_BIT_RCU);
4319a58163d8SPaul E. McKenney return 0;
43203820b513SFrederic Weisbecker }
4321ce5215c1SPaul E. McKenney
432295335c03SPaul E. McKenney /*
432395335c03SPaul E. McKenney * Propagate ->qsinitmask bits up the rcu_node tree to account for the
432480b3fd47SPaul E. McKenney * first CPU in a given leaf rcu_node structure coming online. The caller
4325a16578ddSPaul E. McKenney * must hold the corresponding leaf rcu_node ->lock with interrupts
4326a16578ddSPaul E. McKenney * disabled.
4327da1df50dSPaul E. McKenney */
rcu_init_new_rnp(struct rcu_node * rnp_leaf)4328c00045beSPaul E. McKenney static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
43295d6742b3SPaul E. McKenney {
4330d1b222c6SPaul E. McKenney long mask;
4331c00045beSPaul E. McKenney long oldmask;
4332ec4eacceSPaul E. McKenney struct rcu_node *rnp = rnp_leaf;
4333c00045beSPaul E. McKenney
4334c00045beSPaul E. McKenney raw_lockdep_assert_held_rcu_node(rnp_leaf);
4335f2dbe4a5SPaul E. McKenney WARN_ON_ONCE(rnp->wait_blkd_tasks);
433680b3fd47SPaul E. McKenney for (;;) {
433723651d9bSPaul E. McKenney mask = rnp->grpmask;
4338c035280fSPaul E. McKenney rnp = rnp->parent;
4339a16578ddSPaul E. McKenney if (rnp == NULL)
434052c1d81eSZqiang return;
43413820b513SFrederic Weisbecker raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */
43425d6742b3SPaul E. McKenney oldmask = rnp->qsmaskinit;
43435d6742b3SPaul E. McKenney rnp->qsmaskinit |= mask;
43445d6742b3SPaul E. McKenney raw_spin_unlock_rcu_node(rnp); /* Interrupts remain disabled. */
43455d6742b3SPaul E. McKenney if (oldmask)
4346c00045beSPaul E. McKenney return;
43475d6742b3SPaul E. McKenney }
4348ec4eacceSPaul E. McKenney }
4349532c00c9SPaul E. McKenney
43505d6742b3SPaul E. McKenney /*
4351a58163d8SPaul E. McKenney * Do boot-time initialization of a CPU's per-CPU RCU data.
4352a58163d8SPaul E. McKenney */
4353a58163d8SPaul E. McKenney static void __init
rcu_boot_init_percpu_data(int cpu)4354a58163d8SPaul E. McKenney rcu_boot_init_percpu_data(int cpu)
4355a58163d8SPaul E. McKenney {
4356a58163d8SPaul E. McKenney struct context_tracking *ct = this_cpu_ptr(&context_tracking);
435727d50c7eSThomas Gleixner struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
435827d50c7eSThomas Gleixner
4359deb34f36SPaul E. McKenney /* Set up local state, ensuring consistent view of global state. */
4360deb34f36SPaul E. McKenney rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
4361deb34f36SPaul E. McKenney INIT_WORK(&rdp->strict_work, strict_work_handler);
4362deb34f36SPaul E. McKenney WARN_ON_ONCE(ct->dynticks_nesting != 1);
43634102adabSPaul E. McKenney WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu)));
43644102adabSPaul E. McKenney rdp->barrier_seq_snap = rcu_state.barrier_sequence;
43654102adabSPaul E. McKenney rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
43664102adabSPaul E. McKenney rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
43674102adabSPaul E. McKenney rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
43684102adabSPaul E. McKenney rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;
43695afff48bSPaul E. McKenney rdp->last_sched_clock = jiffies;
43704102adabSPaul E. McKenney rdp->cpu = cpu;
43714102adabSPaul E. McKenney rcu_boot_init_nocb_percpu_data(rdp);
43724102adabSPaul E. McKenney }
43735afff48bSPaul E. McKenney
43744102adabSPaul E. McKenney /*
43754102adabSPaul E. McKenney * Invoked early in the CPU-online process, when pretty much all services
43764102adabSPaul E. McKenney * are available. The incoming CPU is not present.
43774102adabSPaul E. McKenney *
43784102adabSPaul E. McKenney * Initializes a CPU's per-CPU RCU data. Note that only one online or
43794102adabSPaul E. McKenney * offline event can be happening at a given time. Note also that we can
43804102adabSPaul E. McKenney * accept some slop in the rsp->gp_seq access due to the fact that this
43819621fbeeSKalesh Singh * CPU cannot possibly have any non-offloaded RCU callbacks in flight yet.
43829621fbeeSKalesh Singh * And any offloaded callbacks are being numbered elsewhere.
43839621fbeeSKalesh Singh */
rcutree_prepare_cpu(unsigned int cpu)43849621fbeeSKalesh Singh int rcutree_prepare_cpu(unsigned int cpu)
43859621fbeeSKalesh Singh {
43869621fbeeSKalesh Singh unsigned long flags;
43879621fbeeSKalesh Singh struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
43889621fbeeSKalesh Singh struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
43899621fbeeSKalesh Singh struct rcu_node *rnp = rcu_get_root();
43909621fbeeSKalesh Singh
43919621fbeeSKalesh Singh /* Set up local state, ensuring consistent view of global state. */
43929621fbeeSKalesh Singh raw_spin_lock_irqsave_rcu_node(rnp, flags);
43939621fbeeSKalesh Singh rdp->qlen_last_fqs_check = 0;
43949621fbeeSKalesh Singh rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);
43959621fbeeSKalesh Singh rdp->blimit = blimit;
43969621fbeeSKalesh Singh ct->dynticks_nesting = 1; /* CPU not up, no tearing. */
43979621fbeeSKalesh Singh raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
43989621fbeeSKalesh Singh
43999621fbeeSKalesh Singh /*
44009621fbeeSKalesh Singh * Only non-NOCB CPUs that didn't have early-boot callbacks need to be
44019621fbeeSKalesh Singh * (re-)initialized.
44029621fbeeSKalesh Singh */
44039621fbeeSKalesh Singh if (!rcu_segcblist_is_enabled(&rdp->cblist))
44049621fbeeSKalesh Singh rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */
44059621fbeeSKalesh Singh
44069621fbeeSKalesh Singh /*
44079621fbeeSKalesh Singh * Add CPU to leaf rcu_node pending-online bitmask. Any needed
44089621fbeeSKalesh Singh * propagation up the rcu_node tree will happen at the beginning
44099621fbeeSKalesh Singh * of the next grace period.
44109621fbeeSKalesh Singh */
44119621fbeeSKalesh Singh rnp = rdp->mynode;
44129621fbeeSKalesh Singh raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
44139621fbeeSKalesh Singh rdp->gp_seq = READ_ONCE(rnp->gp_seq);
44149621fbeeSKalesh Singh rdp->gp_seq_needed = rdp->gp_seq;
44159621fbeeSKalesh Singh rdp->cpu_no_qs.b.norm = true;
44169621fbeeSKalesh Singh rdp->core_needs_qs = false;
44179621fbeeSKalesh Singh rdp->rcu_iw_pending = false;
44189621fbeeSKalesh Singh rdp->rcu_iw = IRQ_WORK_INIT_HARD(rcu_iw_handler);
44199621fbeeSKalesh Singh rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;
44209621fbeeSKalesh Singh trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
44219621fbeeSKalesh Singh raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
44229621fbeeSKalesh Singh rcu_spawn_one_boost_kthread(rnp);
44239621fbeeSKalesh Singh rcu_spawn_cpu_nocb_kthread(cpu);
44249621fbeeSKalesh Singh WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1);
44259621fbeeSKalesh Singh
44264102adabSPaul E. McKenney return 0;
442749918a54SPaul E. McKenney }
44284102adabSPaul E. McKenney
44294102adabSPaul E. McKenney /*
44304102adabSPaul E. McKenney * Update RCU priority boot kthread affinity for CPU-hotplug changes.
44314102adabSPaul E. McKenney */
rcutree_affinity_setting(unsigned int cpu,int outgoing)44324102adabSPaul E. McKenney static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
4433a94844b2SPaul E. McKenney {
44344102adabSPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
44353352911fSFrederic Weisbecker
44364102adabSPaul E. McKenney rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);
44379386c0b7SPaul E. McKenney }
4438b97d23c5SPaul E. McKenney
443908543bdaSPaul E. McKenney /*
444008543bdaSPaul E. McKenney * Has the specified (known valid) CPU ever been fully online?
4441a94844b2SPaul E. McKenney */
rcu_cpu_beenfullyonline(int cpu)4442a94844b2SPaul E. McKenney bool rcu_cpu_beenfullyonline(int cpu)
4443a94844b2SPaul E. McKenney {
4444a94844b2SPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
44454102adabSPaul E. McKenney
44464102adabSPaul E. McKenney return smp_load_acquire(&rdp->beenonline);
44475648d659SPaul E. McKenney }
44485648d659SPaul E. McKenney
44495648d659SPaul E. McKenney /*
44505648d659SPaul E. McKenney * Near the end of the CPU-online process. Pretty much all services
445167c583a7SBoqun Feng * enabled, and the CPU is now very much alive.
4452e11f1335SPeter Zijlstra */
rcutree_online_cpu(unsigned int cpu)44533352911fSFrederic Weisbecker int rcutree_online_cpu(unsigned int cpu)
44543352911fSFrederic Weisbecker {
445587c5adf0SFrederic Weisbecker unsigned long flags;
445687c5adf0SFrederic Weisbecker struct rcu_data *rdp;
445787c5adf0SFrederic Weisbecker struct rcu_node *rnp;
445887c5adf0SFrederic Weisbecker
445987c5adf0SFrederic Weisbecker rdp = per_cpu_ptr(&rcu_data, cpu);
44603352911fSFrederic Weisbecker rnp = rdp->mynode;
44618e4b1d2bSPaul E. McKenney raw_spin_lock_irqsave_rcu_node(rnp, flags);
44629621fbeeSKalesh Singh rnp->ffmask |= rdp->grpmask;
44639621fbeeSKalesh Singh raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
44644102adabSPaul E. McKenney if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
44654102adabSPaul E. McKenney return 0; /* Too early in boot for scheduler work. */
44664102adabSPaul E. McKenney sync_sched_exp_online_cleanup(cpu);
44674102adabSPaul E. McKenney rcutree_affinity_setting(cpu, -1);
44684102adabSPaul E. McKenney
446952d7e48bSPaul E. McKenney // Stop-machine done, so allow nohz_full to disable tick.
447052d7e48bSPaul E. McKenney tick_dep_clear(TICK_DEP_BIT_RCU);
447152d7e48bSPaul E. McKenney return 0;
447252d7e48bSPaul E. McKenney }
447352d7e48bSPaul E. McKenney
447452d7e48bSPaul E. McKenney /*
4475900b1028SPaul E. McKenney * Near the beginning of the process. The CPU is still very much alive
447652d7e48bSPaul E. McKenney * with pretty much all services enabled.
44774102adabSPaul E. McKenney */
rcutree_offline_cpu(unsigned int cpu)44784102adabSPaul E. McKenney int rcutree_offline_cpu(unsigned int cpu)
44794102adabSPaul E. McKenney {
4480d761de8aSPaul E. McKenney unsigned long flags;
4481d761de8aSPaul E. McKenney struct rcu_data *rdp;
4482d761de8aSPaul E. McKenney struct rcu_node *rnp;
44834102adabSPaul E. McKenney
44844102adabSPaul E. McKenney rdp = per_cpu_ptr(&rcu_data, cpu);
448552d7e48bSPaul E. McKenney rnp = rdp->mynode;
4486d761de8aSPaul E. McKenney raw_spin_lock_irqsave_rcu_node(rnp, flags);
4487d761de8aSPaul E. McKenney rnp->ffmask &= ~rdp->grpmask;
4488d761de8aSPaul E. McKenney raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4489d761de8aSPaul E. McKenney
4490d761de8aSPaul E. McKenney rcutree_affinity_setting(cpu, cpu);
4491d761de8aSPaul E. McKenney
4492d761de8aSPaul E. McKenney // nohz_full CPUs need the tick for stop-machine to work quickly
4493d761de8aSPaul E. McKenney tick_dep_set(TICK_DEP_BIT_RCU);
449452d7e48bSPaul E. McKenney return 0;
449552d7e48bSPaul E. McKenney }
44964102adabSPaul E. McKenney
44974102adabSPaul E. McKenney /*
44984102adabSPaul E. McKenney * Mark the specified CPU as being online so that subsequent grace periods
449949918a54SPaul E. McKenney * (both expedited and normal) will wait on it. Note that this means that
45004102adabSPaul E. McKenney * incoming CPUs are not allowed to use RCU read-side critical sections
4501b8bb1f63SPaul E. McKenney * until this function is called. Failing to observe this restriction
45024102adabSPaul E. McKenney * will result in lockdep splats.
4503cb007102SAlexander Gordeev *
4504cb007102SAlexander Gordeev * Note that this function is special in that it is invoked directly
45053dc5dbe9SPaul E. McKenney * from the incoming CPU rather than from the cpuhp_step mechanism.
45063dc5dbe9SPaul E. McKenney * This is because this function must be invoked at a precise location.
4507199977bfSAlexander Gordeev * This incoming CPU must not have enabled interrupts yet.
4508199977bfSAlexander Gordeev */
rcu_cpu_starting(unsigned int cpu)45094102adabSPaul E. McKenney void rcu_cpu_starting(unsigned int cpu)
45104102adabSPaul E. McKenney {
45114102adabSPaul E. McKenney unsigned long mask;
45124102adabSPaul E. McKenney struct rcu_data *rdp;
45134102adabSPaul E. McKenney struct rcu_node *rnp;
451405b84aecSAlexander Gordeev bool newcpu;
45154102adabSPaul E. McKenney
45163eaaaf6cSPaul E. McKenney lockdep_assert_irqs_disabled();
45173eaaaf6cSPaul E. McKenney rdp = per_cpu_ptr(&rcu_data, cpu);
45183eaaaf6cSPaul E. McKenney if (rdp->cpu_started)
45194102adabSPaul E. McKenney return;
45204102adabSPaul E. McKenney rdp->cpu_started = true;
45214102adabSPaul E. McKenney
45224102adabSPaul E. McKenney rnp = rdp->mynode;
4523eb7a6653SPaul E. McKenney mask = rdp->grpmask;
4524eb7a6653SPaul E. McKenney arch_spin_lock(&rcu_state.ofl_lock);
452541f5c631SPaul E. McKenney rcu_dynticks_eqs_online();
45264102adabSPaul E. McKenney raw_spin_lock(&rcu_state.barrier_lock);
45274102adabSPaul E. McKenney raw_spin_lock_rcu_node(rnp);
45284102adabSPaul E. McKenney WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
45294102adabSPaul E. McKenney raw_spin_unlock(&rcu_state.barrier_lock);
4530199977bfSAlexander Gordeev newcpu = !(rnp->expmaskinitnext & mask);
4531eb7a6653SPaul E. McKenney rnp->expmaskinitnext |= mask;
453241f5c631SPaul E. McKenney /* Allow lockless access for expedited grace periods. */
453367c583a7SBoqun Feng smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + newcpu); /* ^^^ */
453467c583a7SBoqun Feng ASSERT_EXCLUSIVE_WRITER(rcu_state.ncpus);
45354102adabSPaul E. McKenney rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
45364102adabSPaul E. McKenney rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
45374102adabSPaul E. McKenney rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
45384102adabSPaul E. McKenney
4539eb7a6653SPaul E. McKenney /* An incoming CPU should never be blocking a grace period. */
4540eb7a6653SPaul E. McKenney if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
4541eb7a6653SPaul E. McKenney /* rcu_report_qs_rnp() *really* wants some flags to restore */
45424102adabSPaul E. McKenney unsigned long flags;
45434102adabSPaul E. McKenney
45444102adabSPaul E. McKenney local_irq_save(flags);
45454102adabSPaul E. McKenney rcu_disable_urgency_upon_qs(rdp);
4546595f3900SHimangi Saraogi /* Report QS -after- changing ->qsmaskinitnext! */
4547595f3900SHimangi Saraogi rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
45484102adabSPaul E. McKenney } else {
45494102adabSPaul E. McKenney raw_spin_unlock_rcu_node(rnp);
45504102adabSPaul E. McKenney }
45514102adabSPaul E. McKenney arch_spin_unlock(&rcu_state.ofl_lock);
45524102adabSPaul E. McKenney smp_store_release(&rdp->beenonline, true);
4553199977bfSAlexander Gordeev smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
4554df63fa5bSPaul E. McKenney }
4555eb7a6653SPaul E. McKenney
4556199977bfSAlexander Gordeev /*
45574102adabSPaul E. McKenney * The outgoing function has no further need of RCU, so remove it from
45584102adabSPaul E. McKenney * the rcu_node tree's ->qsmaskinitnext bit masks.
45594102adabSPaul E. McKenney *
45604102adabSPaul E. McKenney * Note that this function is special in that it is invoked directly
4561f6a12f34SPaul E. McKenney * from the outgoing CPU rather than from the cpuhp_step mechanism.
4562f6a12f34SPaul E. McKenney * This is because this function must be invoked at a precise location.
45633b5f668eSPaul E. McKenney */
rcu_report_dead(unsigned int cpu)45643b5f668eSPaul E. McKenney void rcu_report_dead(unsigned int cpu)
4565f6a12f34SPaul E. McKenney {
4566218b957aSDavid Woodhouse unsigned long flags, seq_flags;
4567d96c52feSPaul E. McKenney unsigned long mask;
4568d96c52feSPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
4569d96c52feSPaul E. McKenney struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
45704102adabSPaul E. McKenney
45714102adabSPaul E. McKenney // Do any dangling deferred wakeups.
45724102adabSPaul E. McKenney do_nocb_deferred_wakeup(rdp);
4573eb7a6653SPaul E. McKenney
4574eb7a6653SPaul E. McKenney rcu_preempt_deferred_qs(current);
4575aedf4ba9SPaul E. McKenney
45764102adabSPaul E. McKenney /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
45774102adabSPaul E. McKenney mask = rdp->grpmask;
45784102adabSPaul E. McKenney local_irq_save(seq_flags);
4579da1df50dSPaul E. McKenney arch_spin_lock(&rcu_state.ofl_lock);
458053b46303SPaul E. McKenney raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
45814102adabSPaul E. McKenney rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
45824102adabSPaul E. McKenney rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);
45834102adabSPaul E. McKenney if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */
45844102adabSPaul E. McKenney /* Report quiescent state -before- changing ->qsmaskinitnext! */
4585c8db27ddSAlison Chaiken rcu_disable_urgency_upon_qs(rdp);
4586c8db27ddSAlison Chaiken rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
4587c8db27ddSAlison Chaiken raw_spin_lock_irqsave_rcu_node(rnp, flags);
4588c8db27ddSAlison Chaiken }
4589c8db27ddSAlison Chaiken WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
4590c8db27ddSAlison Chaiken raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4591c8db27ddSAlison Chaiken arch_spin_unlock(&rcu_state.ofl_lock);
4592c8db27ddSAlison Chaiken local_irq_restore(seq_flags);
4593c8db27ddSAlison Chaiken
4594c8db27ddSAlison Chaiken rdp->cpu_started = false;
4595c8db27ddSAlison Chaiken }
4596c8db27ddSAlison Chaiken
4597c8db27ddSAlison Chaiken #ifdef CONFIG_HOTPLUG_CPU
4598c8db27ddSAlison Chaiken /*
4599c8db27ddSAlison Chaiken * The outgoing CPU has just passed through the dying-idle state, and we
4600c8db27ddSAlison Chaiken * are being invoked from the CPU that was IPIed to continue the offline
4601c8db27ddSAlison Chaiken * operation. Migrate the outgoing CPU's callbacks to the current CPU.
4602c8db27ddSAlison Chaiken */
rcutree_migrate_callbacks(int cpu)4603c8db27ddSAlison Chaiken void rcutree_migrate_callbacks(int cpu)
4604c8db27ddSAlison Chaiken {
4605c8db27ddSAlison Chaiken unsigned long flags;
4606c8db27ddSAlison Chaiken struct rcu_data *my_rdp;
46074102adabSPaul E. McKenney struct rcu_node *my_rnp;
46084102adabSPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
46094102adabSPaul E. McKenney bool needwake;
46104102adabSPaul E. McKenney
4611b5befe84SFrederic Weisbecker if (rcu_rdp_is_offloaded(rdp))
46124102adabSPaul E. McKenney return;
46134102adabSPaul E. McKenney
46144102adabSPaul E. McKenney raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
4615b5befe84SFrederic Weisbecker if (rcu_segcblist_empty(&rdp->cblist)) {
461605b84aecSAlexander Gordeev raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
4617b5befe84SFrederic Weisbecker return; /* No callbacks to migrate. */
4618b5befe84SFrederic Weisbecker }
4619b5befe84SFrederic Weisbecker
4620b5befe84SFrederic Weisbecker WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
4621b5befe84SFrederic Weisbecker rcu_barrier_entrain(rdp);
4622b5befe84SFrederic Weisbecker my_rdp = this_cpu_ptr(&rcu_data);
4623b5befe84SFrederic Weisbecker my_rnp = my_rdp->mynode;
4624b5befe84SFrederic Weisbecker rcu_nocb_lock(my_rdp); /* irqs already disabled. */
4625b5befe84SFrederic Weisbecker WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false));
4626b5befe84SFrederic Weisbecker raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
4627b5befe84SFrederic Weisbecker /* Leverage recent GPs and set GP for new callbacks. */
4628b5befe84SFrederic Weisbecker needwake = rcu_advance_cbs(my_rnp, rdp) ||
4629b5befe84SFrederic Weisbecker rcu_advance_cbs(my_rnp, my_rdp);
46304102adabSPaul E. McKenney rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
46314102adabSPaul E. McKenney raw_spin_unlock(&rcu_state.barrier_lock); /* irqs remain disabled. */
46324102adabSPaul E. McKenney needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);
46334102adabSPaul E. McKenney rcu_segcblist_disable(&rdp->cblist);
46344102adabSPaul E. McKenney WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist));
46354102adabSPaul E. McKenney check_cb_ovld_locked(my_rdp, my_rnp);
46364102adabSPaul E. McKenney if (rcu_rdp_is_offloaded(my_rdp)) {
46374102adabSPaul E. McKenney raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
46384102adabSPaul E. McKenney __call_rcu_nocb_wake(my_rdp, true, flags);
46394102adabSPaul E. McKenney } else {
46404102adabSPaul E. McKenney rcu_nocb_unlock(my_rdp); /* irqs remain disabled. */
46414102adabSPaul E. McKenney raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
46424102adabSPaul E. McKenney }
4643c06aed0eSPaul E. McKenney local_irq_restore(flags);
46444102adabSPaul E. McKenney if (needwake)
46454102adabSPaul E. McKenney rcu_gp_kthread_wake();
464647d631afSPaul E. McKenney lockdep_assert_irqs_enabled();
46474102adabSPaul E. McKenney WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
46484102adabSPaul E. McKenney !rcu_segcblist_empty(&rdp->cblist),
4649a7538352SJoe Perches "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
465039479098SPaul E. McKenney cpu, rcu_segcblist_n_cbs(&rdp->cblist),
46514102adabSPaul E. McKenney rcu_segcblist_first_cb(&rdp->cblist));
46524102adabSPaul E. McKenney }
4653ee968ac6SPaul E. McKenney #endif
4654ee968ac6SPaul E. McKenney
4655ee968ac6SPaul E. McKenney /*
4656ee968ac6SPaul E. McKenney * On non-huge systems, use expedited RCU grace periods to make suspend
46574102adabSPaul E. McKenney * and hibernation run faster.
4658ee968ac6SPaul E. McKenney */
rcu_pm_notify(struct notifier_block * self,unsigned long action,void * hcpu)465975cf15a4SAlexander Gordeev static int rcu_pm_notify(struct notifier_block *self,
466013bd6494SPaul E. McKenney unsigned long action, void *hcpu)
46614102adabSPaul E. McKenney {
46624102adabSPaul E. McKenney switch (action) {
46634102adabSPaul E. McKenney case PM_HIBERNATION_PREPARE:
46644102adabSPaul E. McKenney case PM_SUSPEND_PREPARE:
466575cf15a4SAlexander Gordeev rcu_async_hurry();
46664102adabSPaul E. McKenney rcu_expedite_gp();
46679618138bSAlexander Gordeev break;
46684102adabSPaul E. McKenney case PM_POST_HIBERNATION:
46699618138bSAlexander Gordeev case PM_POST_SUSPEND:
467005b84aecSAlexander Gordeev rcu_unexpedite_gp();
46714102adabSPaul E. McKenney rcu_async_relax();
46724102adabSPaul E. McKenney break;
46734102adabSPaul E. McKenney default:
467475cf15a4SAlexander Gordeev break;
4675ee968ac6SPaul E. McKenney }
46764102adabSPaul E. McKenney return NOTIFY_OK;
4677ee968ac6SPaul E. McKenney }
4678ee968ac6SPaul E. McKenney
4679ee968ac6SPaul E. McKenney #ifdef CONFIG_RCU_EXP_KTHREAD
4680ee968ac6SPaul E. McKenney struct kthread_worker *rcu_exp_gp_kworker;
4681ee968ac6SPaul E. McKenney struct kthread_worker *rcu_exp_par_gp_kworker;
46824102adabSPaul E. McKenney
rcu_start_exp_gp_kworkers(void)4683679f9858SAlexander Gordeev static void __init rcu_start_exp_gp_kworkers(void)
46849618138bSAlexander Gordeev {
4685372b0ec2SAlexander Gordeev const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker";
46869618138bSAlexander Gordeev const char *gp_kworker_name = "rcu_exp_gp_kthread_worker";
4687679f9858SAlexander Gordeev struct sched_param param = { .sched_priority = kthread_prio };
46884102adabSPaul E. McKenney
4689679f9858SAlexander Gordeev rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name);
46909618138bSAlexander Gordeev if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
4691679f9858SAlexander Gordeev pr_err("Failed to create %s!\n", gp_kworker_name);
46924102adabSPaul E. McKenney rcu_exp_gp_kworker = NULL;
46934102adabSPaul E. McKenney return;
46944102adabSPaul E. McKenney }
46954102adabSPaul E. McKenney
4696679f9858SAlexander Gordeev rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name);
46974102adabSPaul E. McKenney if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) {
46984102adabSPaul E. McKenney pr_err("Failed to create %s!\n", par_gp_kworker_name);
46994102adabSPaul E. McKenney rcu_exp_par_gp_kworker = NULL;
4700a3dc2948SPaul E. McKenney kthread_destroy_worker(rcu_exp_gp_kworker);
4701a3dc2948SPaul E. McKenney rcu_exp_gp_kworker = NULL;
470249918a54SPaul E. McKenney return;
4703a3dc2948SPaul E. McKenney }
4704b8bb1f63SPaul E. McKenney
4705a3dc2948SPaul E. McKenney sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m);
4706a3dc2948SPaul E. McKenney sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,
4707a3dc2948SPaul E. McKenney ¶m);
4708a3dc2948SPaul E. McKenney }
4709a3dc2948SPaul E. McKenney
rcu_alloc_par_gp_wq(void)4710a3dc2948SPaul E. McKenney static inline void rcu_alloc_par_gp_wq(void)
4711aedf4ba9SPaul E. McKenney {
4712a3dc2948SPaul E. McKenney }
4713a3dc2948SPaul E. McKenney #else /* !CONFIG_RCU_EXP_KTHREAD */
4714a3dc2948SPaul E. McKenney struct workqueue_struct *rcu_par_gp_wq;
4715a3dc2948SPaul E. McKenney
rcu_start_exp_gp_kworkers(void)4716a3dc2948SPaul E. McKenney static void __init rcu_start_exp_gp_kworkers(void)
4717a3dc2948SPaul E. McKenney {
4718a3dc2948SPaul E. McKenney }
4719a3dc2948SPaul E. McKenney
rcu_alloc_par_gp_wq(void)4720a3dc2948SPaul E. McKenney static inline void rcu_alloc_par_gp_wq(void)
4721a3dc2948SPaul E. McKenney {
4722ad7c946bSPaul E. McKenney rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
4723ad7c946bSPaul E. McKenney WARN_ON(!rcu_par_gp_wq);
4724a35d1690SByungchul Park }
4725a35d1690SByungchul Park #endif /* CONFIG_RCU_EXP_KTHREAD */
4726a35d1690SByungchul Park
47270392bebeSJoel Fernandes (Google) /*
4728a35d1690SByungchul Park * Spawn the kthreads that handle RCU's grace periods.
4729d0bfa8b3SZhang Qiang */
rcu_spawn_gp_kthread(void)4730d0bfa8b3SZhang Qiang static int __init rcu_spawn_gp_kthread(void)
4731d0bfa8b3SZhang Qiang {
4732d0bfa8b3SZhang Qiang unsigned long flags;
4733d0bfa8b3SZhang Qiang struct rcu_node *rnp;
4734d0bfa8b3SZhang Qiang struct sched_param sp;
4735d0bfa8b3SZhang Qiang struct task_struct *t;
4736d0bfa8b3SZhang Qiang struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
4737d0bfa8b3SZhang Qiang
4738d0bfa8b3SZhang Qiang rcu_scheduler_fully_active = 1;
4739d0bfa8b3SZhang Qiang t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
4740d0bfa8b3SZhang Qiang if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__))
4741a35d1690SByungchul Park return 0;
4742a35d1690SByungchul Park if (kthread_prio) {
4743a35d1690SByungchul Park sp.sched_priority = kthread_prio;
474434c88174SUladzislau Rezki (Sony) sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
474534c88174SUladzislau Rezki (Sony) }
47460392bebeSJoel Fernandes (Google) rnp = rcu_get_root();
474734c88174SUladzislau Rezki (Sony) raw_spin_lock_irqsave_rcu_node(rnp, flags);
474834c88174SUladzislau Rezki (Sony) WRITE_ONCE(rcu_state.gp_activity, jiffies);
4749a35d1690SByungchul Park WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
4750d0bfa8b3SZhang Qiang // Reset .gp_activity and .gp_req_activity before setting .gp_kthread.
4751a35d1690SByungchul Park smp_store_release(&rcu_state.gp_kthread, t); /* ^^^ */
4752a35d1690SByungchul Park raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4753e33c267aSRoman Gushchin wake_up_process(t);
47549154244cSJoel Fernandes (Google) /* This is a pre-SMP initcall, we expect a single CPU */
4755a35d1690SByungchul Park WARN_ON(num_online_cpus() > 1);
4756a35d1690SByungchul Park /*
47574102adabSPaul E. McKenney * Those kthreads couldn't be created on rcu_init() -> rcutree_prepare_cpu()
47584102adabSPaul E. McKenney * due to rcu_scheduler_fully_active.
47592eed973aSFrederic Weisbecker */
47604102adabSPaul E. McKenney rcu_spawn_cpu_nocb_kthread(smp_processor_id());
476147627678SPaul E. McKenney rcu_spawn_one_boost_kthread(rdp->mynode);
476247627678SPaul E. McKenney rcu_spawn_core_kthreads();
4763a35d1690SByungchul Park /* Create kthread worker for expedited GPs */
47644102adabSPaul E. McKenney rcu_start_exp_gp_kworkers();
4765c8db27ddSAlison Chaiken return 0;
47664102adabSPaul E. McKenney }
4767b8bb1f63SPaul E. McKenney early_initcall(rcu_spawn_gp_kthread);
4768a3dc2948SPaul E. McKenney
4769b8bb1f63SPaul E. McKenney /*
477048d07c04SSebastian Andrzej Siewior * This function is invoked towards the end of the scheduler's
477148d07c04SSebastian Andrzej Siewior * initialization process. Before this is called, the idle task might
47724102adabSPaul E. McKenney * contain synchronous grace-period primitives (during which time, this idle
47734102adabSPaul E. McKenney * task is booting the system, and such primitives are no-ops). After this
47744102adabSPaul E. McKenney * function is called, any synchronous grace-period primitives are run as
47754102adabSPaul E. McKenney * expedited, with the requesting task driving the grace period forward.
47764102adabSPaul E. McKenney * A later core_initcall() rcu_set_runtime_mode() will switch to full
47774102adabSPaul E. McKenney * runtime RCU functionality.
47784102adabSPaul E. McKenney */
rcu_scheduler_starting(void)47792eed973aSFrederic Weisbecker void rcu_scheduler_starting(void)
47804df83742SThomas Gleixner {
47817ec99de3SPaul E. McKenney unsigned long flags;
47829b9500daSPaul E. McKenney struct rcu_node *rnp;
4783ad7c946bSPaul E. McKenney
4784277ffe1bSZhouyi Zhou WARN_ON(num_online_cpus() != 1);
4785ad7c946bSPaul E. McKenney WARN_ON(nr_context_switches() > 0);
4786ad7c946bSPaul E. McKenney rcu_test_sync_prims();
47879621fbeeSKalesh Singh
4788b2b00ddfSPaul E. McKenney // Fix up the ->gp_seq counters.
4789b2b00ddfSPaul E. McKenney local_irq_save(flags);
4790b2b00ddfSPaul E. McKenney rcu_for_each_node_breadth_first(rnp)
4791b2b00ddfSPaul E. McKenney rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq;
4792b2b00ddfSPaul E. McKenney local_irq_restore(flags);
4793b2b00ddfSPaul E. McKenney
4794b2b00ddfSPaul E. McKenney // Switch out of early boot mode.
4795d96c52feSPaul E. McKenney rcu_scheduler_active = RCU_SCHEDULER_INIT;
4796d96c52feSPaul E. McKenney rcu_test_sync_prims();
4797d96c52feSPaul E. McKenney }
4798d96c52feSPaul E. McKenney
47994102adabSPaul E. McKenney /*
48004102adabSPaul E. McKenney * Helper function for rcu_init() that initializes the rcu_state structure.
480110462d6fSPaul E. McKenney */
rcu_init_one(void)48023549c2bcSPaul E. McKenney static void __init rcu_init_one(void)
4803dfcb2754SFrederic Weisbecker {
48044102adabSPaul E. McKenney static const char * const buf[] = RCU_NODE_NAME_INIT;
4805 static const char * const fqs[] = RCU_FQS_NAME_INIT;
4806 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
4807 static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
4808
4809 int levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
4810 int cpustride = 1;
4811 int i;
4812 int j;
4813 struct rcu_node *rnp;
4814
4815 BUILD_BUG_ON(RCU_NUM_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
4816
4817 /* Silence gcc 4.8 false positive about array index out of range. */
4818 if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS)
4819 panic("rcu_init_one: rcu_num_lvls out of range");
4820
4821 /* Initialize the level-tracking arrays. */
4822
4823 for (i = 1; i < rcu_num_lvls; i++)
4824 rcu_state.level[i] =
4825 rcu_state.level[i - 1] + num_rcu_lvl[i - 1];
4826 rcu_init_levelspread(levelspread, num_rcu_lvl);
4827
4828 /* Initialize the elements themselves, starting from the leaves. */
4829
4830 for (i = rcu_num_lvls - 1; i >= 0; i--) {
4831 cpustride *= levelspread[i];
4832 rnp = rcu_state.level[i];
4833 for (j = 0; j < num_rcu_lvl[i]; j++, rnp++) {
4834 raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));
4835 lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),
4836 &rcu_node_class[i], buf[i]);
4837 raw_spin_lock_init(&rnp->fqslock);
4838 lockdep_set_class_and_name(&rnp->fqslock,
4839 &rcu_fqs_class[i], fqs[i]);
4840 rnp->gp_seq = rcu_state.gp_seq;
4841 rnp->gp_seq_needed = rcu_state.gp_seq;
4842 rnp->completedqs = rcu_state.gp_seq;
4843 rnp->qsmask = 0;
4844 rnp->qsmaskinit = 0;
4845 rnp->grplo = j * cpustride;
4846 rnp->grphi = (j + 1) * cpustride - 1;
4847 if (rnp->grphi >= nr_cpu_ids)
4848 rnp->grphi = nr_cpu_ids - 1;
4849 if (i == 0) {
4850 rnp->grpnum = 0;
4851 rnp->grpmask = 0;
4852 rnp->parent = NULL;
4853 } else {
4854 rnp->grpnum = j % levelspread[i - 1];
4855 rnp->grpmask = BIT(rnp->grpnum);
4856 rnp->parent = rcu_state.level[i - 1] +
4857 j / levelspread[i - 1];
4858 }
4859 rnp->level = i;
4860 INIT_LIST_HEAD(&rnp->blkd_tasks);
4861 rcu_init_one_nocb(rnp);
4862 init_waitqueue_head(&rnp->exp_wq[0]);
4863 init_waitqueue_head(&rnp->exp_wq[1]);
4864 init_waitqueue_head(&rnp->exp_wq[2]);
4865 init_waitqueue_head(&rnp->exp_wq[3]);
4866 spin_lock_init(&rnp->exp_lock);
4867 mutex_init(&rnp->boost_kthread_mutex);
4868 raw_spin_lock_init(&rnp->exp_poll_lock);
4869 rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED;
4870 INIT_WORK(&rnp->exp_poll_wq, sync_rcu_do_polled_gp);
4871 }
4872 }
4873
4874 init_swait_queue_head(&rcu_state.gp_wq);
4875 init_swait_queue_head(&rcu_state.expedited_wq);
4876 rnp = rcu_first_leaf_node();
4877 for_each_possible_cpu(i) {
4878 while (i > rnp->grphi)
4879 rnp++;
4880 per_cpu_ptr(&rcu_data, i)->mynode = rnp;
4881 rcu_boot_init_percpu_data(i);
4882 }
4883 }
4884
4885 /*
4886 * Force priority from the kernel command-line into range.
4887 */
sanitize_kthread_prio(void)4888 static void __init sanitize_kthread_prio(void)
4889 {
4890 int kthread_prio_in = kthread_prio;
4891
4892 if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
4893 && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
4894 kthread_prio = 2;
4895 else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
4896 kthread_prio = 1;
4897 else if (kthread_prio < 0)
4898 kthread_prio = 0;
4899 else if (kthread_prio > 99)
4900 kthread_prio = 99;
4901
4902 if (kthread_prio != kthread_prio_in)
4903 pr_alert("%s: Limited prio to %d from %d\n",
4904 __func__, kthread_prio, kthread_prio_in);
4905 }
4906
4907 /*
4908 * Compute the rcu_node tree geometry from kernel parameters. This cannot
4909 * replace the definitions in tree.h because those are needed to size
4910 * the ->node array in the rcu_state structure.
4911 */
rcu_init_geometry(void)4912 void rcu_init_geometry(void)
4913 {
4914 ulong d;
4915 int i;
4916 static unsigned long old_nr_cpu_ids;
4917 int rcu_capacity[RCU_NUM_LVLS];
4918 static bool initialized;
4919
4920 if (initialized) {
4921 /*
4922 * Warn if setup_nr_cpu_ids() had not yet been invoked,
4923 * unless nr_cpus_ids == NR_CPUS, in which case who cares?
4924 */
4925 WARN_ON_ONCE(old_nr_cpu_ids != nr_cpu_ids);
4926 return;
4927 }
4928
4929 old_nr_cpu_ids = nr_cpu_ids;
4930 initialized = true;
4931
4932 /*
4933 * Initialize any unspecified boot parameters.
4934 * The default values of jiffies_till_first_fqs and
4935 * jiffies_till_next_fqs are set to the RCU_JIFFIES_TILL_FORCE_QS
4936 * value, which is a function of HZ, then adding one for each
4937 * RCU_JIFFIES_FQS_DIV CPUs that might be on the system.
4938 */
4939 d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
4940 if (jiffies_till_first_fqs == ULONG_MAX)
4941 jiffies_till_first_fqs = d;
4942 if (jiffies_till_next_fqs == ULONG_MAX)
4943 jiffies_till_next_fqs = d;
4944 adjust_jiffies_till_sched_qs();
4945
4946 /* If the compile-time values are accurate, just leave. */
4947 if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
4948 nr_cpu_ids == NR_CPUS)
4949 return;
4950 pr_info("Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n",
4951 rcu_fanout_leaf, nr_cpu_ids);
4952
4953 /*
4954 * The boot-time rcu_fanout_leaf parameter must be at least two
4955 * and cannot exceed the number of bits in the rcu_node masks.
4956 * Complain and fall back to the compile-time values if this
4957 * limit is exceeded.
4958 */
4959 if (rcu_fanout_leaf < 2 ||
4960 rcu_fanout_leaf > sizeof(unsigned long) * 8) {
4961 rcu_fanout_leaf = RCU_FANOUT_LEAF;
4962 WARN_ON(1);
4963 return;
4964 }
4965
4966 /*
4967 * Compute number of nodes that can be handled an rcu_node tree
4968 * with the given number of levels.
4969 */
4970 rcu_capacity[0] = rcu_fanout_leaf;
4971 for (i = 1; i < RCU_NUM_LVLS; i++)
4972 rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT;
4973
4974 /*
4975 * The tree must be able to accommodate the configured number of CPUs.
4976 * If this limit is exceeded, fall back to the compile-time values.
4977 */
4978 if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1]) {
4979 rcu_fanout_leaf = RCU_FANOUT_LEAF;
4980 WARN_ON(1);
4981 return;
4982 }
4983
4984 /* Calculate the number of levels in the tree. */
4985 for (i = 0; nr_cpu_ids > rcu_capacity[i]; i++) {
4986 }
4987 rcu_num_lvls = i + 1;
4988
4989 /* Calculate the number of rcu_nodes at each level of the tree. */
4990 for (i = 0; i < rcu_num_lvls; i++) {
4991 int cap = rcu_capacity[(rcu_num_lvls - 1) - i];
4992 num_rcu_lvl[i] = DIV_ROUND_UP(nr_cpu_ids, cap);
4993 }
4994
4995 /* Calculate the total number of rcu_node structures. */
4996 rcu_num_nodes = 0;
4997 for (i = 0; i < rcu_num_lvls; i++)
4998 rcu_num_nodes += num_rcu_lvl[i];
4999 }
5000
5001 /*
5002 * Dump out the structure of the rcu_node combining tree associated
5003 * with the rcu_state structure.
5004 */
rcu_dump_rcu_node_tree(void)5005 static void __init rcu_dump_rcu_node_tree(void)
5006 {
5007 int level = 0;
5008 struct rcu_node *rnp;
5009
5010 pr_info("rcu_node tree layout dump\n");
5011 pr_info(" ");
5012 rcu_for_each_node_breadth_first(rnp) {
5013 if (rnp->level != level) {
5014 pr_cont("\n");
5015 pr_info(" ");
5016 level = rnp->level;
5017 }
5018 pr_cont("%d:%d ^%d ", rnp->grplo, rnp->grphi, rnp->grpnum);
5019 }
5020 pr_cont("\n");
5021 }
5022
5023 struct workqueue_struct *rcu_gp_wq;
5024
kfree_rcu_batch_init(void)5025 static void __init kfree_rcu_batch_init(void)
5026 {
5027 int cpu;
5028 int i, j;
5029
5030 /* Clamp it to [0:100] seconds interval. */
5031 if (rcu_delay_page_cache_fill_msec < 0 ||
5032 rcu_delay_page_cache_fill_msec > 100 * MSEC_PER_SEC) {
5033
5034 rcu_delay_page_cache_fill_msec =
5035 clamp(rcu_delay_page_cache_fill_msec, 0,
5036 (int) (100 * MSEC_PER_SEC));
5037
5038 pr_info("Adjusting rcutree.rcu_delay_page_cache_fill_msec to %d ms.\n",
5039 rcu_delay_page_cache_fill_msec);
5040 }
5041
5042 for_each_possible_cpu(cpu) {
5043 struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
5044
5045 for (i = 0; i < KFREE_N_BATCHES; i++) {
5046 INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
5047 krcp->krw_arr[i].krcp = krcp;
5048
5049 for (j = 0; j < FREE_N_CHANNELS; j++)
5050 INIT_LIST_HEAD(&krcp->krw_arr[i].bulk_head_free[j]);
5051 }
5052
5053 for (i = 0; i < FREE_N_CHANNELS; i++)
5054 INIT_LIST_HEAD(&krcp->bulk_head[i]);
5055
5056 INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
5057 INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);
5058 krcp->initialized = true;
5059 }
5060 if (register_shrinker(&kfree_rcu_shrinker, "rcu-kfree"))
5061 pr_err("Failed to register kfree_rcu() shrinker!\n");
5062 }
5063
rcu_init(void)5064 void __init rcu_init(void)
5065 {
5066 int cpu = smp_processor_id();
5067
5068 rcu_early_boot_tests();
5069
5070 kfree_rcu_batch_init();
5071 rcu_bootup_announce();
5072 sanitize_kthread_prio();
5073 rcu_init_geometry();
5074 rcu_init_one();
5075 if (dump_tree)
5076 rcu_dump_rcu_node_tree();
5077 if (use_softirq)
5078 open_softirq(RCU_SOFTIRQ, rcu_core_si);
5079
5080 /*
5081 * We don't need protection against CPU-hotplug here because
5082 * this is called early in boot, before either interrupts
5083 * or the scheduler are operational.
5084 */
5085 pm_notifier(rcu_pm_notify, 0);
5086 WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot.
5087 rcutree_prepare_cpu(cpu);
5088 rcu_cpu_starting(cpu);
5089 rcutree_online_cpu(cpu);
5090
5091 /* Create workqueue for Tree SRCU and for expedited GPs. */
5092 rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
5093 WARN_ON(!rcu_gp_wq);
5094 rcu_alloc_par_gp_wq();
5095
5096 /* Fill in default value for rcutree.qovld boot parameter. */
5097 /* -After- the rcu_node ->lock fields are initialized! */
5098 if (qovld < 0)
5099 qovld_calc = DEFAULT_RCU_QOVLD_MULT * qhimark;
5100 else
5101 qovld_calc = qovld;
5102
5103 // Kick-start in case any polled grace periods started early.
5104 (void)start_poll_synchronize_rcu_expedited();
5105
5106 rcu_test_sync_prims();
5107 }
5108
5109 #include "tree_stall.h"
5110 #include "tree_exp.h"
5111 #include "tree_nocb.h"
5112 #include "tree_plugin.h"
5113