kernel/rcu/tree.c

1 // SPDX-License-Identifier: GPL-2.0+
3  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
14  * For detailed explanation of Read-Copy Update mechanism see -
67 #include "../time/tick-internal.h"
88 	.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
106 /* Control rcu_node-tree auto-balancing at boot time. */
124  * to detect real grace periods.  This variable is also used to suppress
125  * boot-time false positives from lockdep-RCU error checking.  Finally, it
161  * real-time priority(enabling/disabling) is controlled by
167 /* Delay in jiffies for grace-period initialization delays, debug only. */
183  * This rcu parameter is runtime-read-only. It reflects
185  * per-CPU. Object size is equal to one page. This value
194 // defer page-cache filling for a short time period.
213  * for non-zero delays, the overall slowdown of grace periods is constant
222  * permit this function to be invoked without holding the root rcu_node
223  * structure's ->lock, but of course results can be subject to change.
238 	if (rcu_segcblist_is_enabled(&rdp->cblist))  in rcu_get_n_cbs_cpu()
239 		return rcu_segcblist_n_cbs(&rdp->cblist);  in rcu_get_n_cbs_cpu()
251  * Reset the current CPU's ->dynticks counter to indicate that the
254  * to the next non-quiescent value.
256  * The non-atomic test/increment sequence works because the upper bits
257  * of the ->dynticks counter are manipulated only by the corresponding CPU,
268  * Snapshot the ->dynticks counter with full ordering so as to allow
293 	return snap != rcu_dynticks_snap(rdp->cpu);  in rcu_dynticks_in_eqs_since()
297  * Return true if the referenced integer is zero while the specified
306 	smp_rmb(); // Order ->dynticks and *vp reads.  in rcu_dynticks_zero_in_eqs()
308 		return false;  // Non-zero, so report failure;  in rcu_dynticks_zero_in_eqs()
309 	smp_rmb(); // Order *vp read and ->dynticks re-read.  in rcu_dynticks_zero_in_eqs()
322  * We inform the RCU core by emulating a zero-duration dyntick-idle period.
339  * rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle
341  * If the current CPU is idle and running at a first-level (not nested)
388 static long qovld_calc = -1;	  // No pre-initialization lock acquisitions!
407  * quiescent-state help from rcu_note_context_switch().
415  * Make sure that we give the grace-period kthread time to detect any
434 	pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);  in adjust_jiffies_till_sched_qs()
444 		WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);  in param_set_first_fqs_jiffies()
456 		WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));  in param_set_next_fqs_jiffies()
509  * Send along grace-period-related data for rcutorture diagnostics.
527  * An empty function that will trigger a reschedule on
528  * IRQ tail once IRQs get re-enabled on userspace/guest resume.
545  * get re-enabled again.
551 	if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU))  in rcu_irq_work_resched()
554 	if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU))  in rcu_irq_work_resched()
567  * rcu_irq_exit_check_preempt - Validate that scheduling is possible
577 			 "Bad RCU  dynticks_nmi_nesting counter\n");  in rcu_irq_exit_check_preempt()
585  * __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
596  * in a timely manner, the RCU grace-period kthread sets that CPU's
597  * ->rcu_urgent_qs flag with the expectation that the next interrupt or
598  * exception will invoke this function, which will turn on the scheduler
599  * tick, which will enable RCU to detect that CPU's quiescent states,
600  * for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.
605  * interrupt or exception.  In that case, the RCU grace-period kthread
607  * controlled environments, this function allows RCU to get what it
621 	if (!tick_nohz_full_cpu(rdp->cpu) ||  in __rcu_irq_enter_check_tick()
622 	    !READ_ONCE(rdp->rcu_urgent_qs) ||  in __rcu_irq_enter_check_tick()
623 	    READ_ONCE(rdp->rcu_forced_tick)) {  in __rcu_irq_enter_check_tick()
632 	// handler and that the rcu_node lock is an irq-disabled lock  in __rcu_irq_enter_check_tick()
633 	// prevents self-deadlock.  So we can safely recheck under the lock.  in __rcu_irq_enter_check_tick()
635 	raw_spin_lock_rcu_node(rdp->mynode);  in __rcu_irq_enter_check_tick()
636 	if (READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) {  in __rcu_irq_enter_check_tick()
639 		WRITE_ONCE(rdp->rcu_forced_tick, true);  in __rcu_irq_enter_check_tick()
640 		tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);  in __rcu_irq_enter_check_tick()
642 	raw_spin_unlock_rcu_node(rdp->mynode);  in __rcu_irq_enter_check_tick()
648  * Check to see if any future non-offloaded RCU-related work will need
650  * returning 1 if so.  This function is part of the RCU implementation;
651  * it is -not- an exported member of the RCU API.  This is used by
652  * the idle-entry code to figure out whether it is safe to disable the
653  * scheduler-clock interrupt.
655  * Just check whether or not this CPU has non-offloaded RCU callbacks
660 	return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&  in rcu_needs_cpu()
666  * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
671 	raw_lockdep_assert_held_rcu_node(rdp->mynode);  in rcu_disable_urgency_upon_qs()
672 	WRITE_ONCE(rdp->rcu_urgent_qs, false);  in rcu_disable_urgency_upon_qs()
673 	WRITE_ONCE(rdp->rcu_need_heavy_qs, false);  in rcu_disable_urgency_upon_qs()
674 	if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {  in rcu_disable_urgency_upon_qs()
675 		tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);  in rcu_disable_urgency_upon_qs()
676 		WRITE_ONCE(rdp->rcu_forced_tick, false);  in rcu_disable_urgency_upon_qs()
681  * rcu_is_watching - RCU read-side critical sections permitted on current CPU?
684  * An @true return means that this CPU can safely enter RCU read-side
727  * of the rcu_node ->gp_seq counter with respect to the rcu_data counters.
734 	if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,  in rcu_gpnum_ovf()
735 			 rnp->gp_seq))  in rcu_gpnum_ovf()
736 		WRITE_ONCE(rdp->gpwrap, true);  in rcu_gpnum_ovf()
737 	if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))  in rcu_gpnum_ovf()
738 		rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;  in rcu_gpnum_ovf()
748 	rdp->dynticks_snap = rcu_dynticks_snap(rdp->cpu);  in dyntick_save_progress_counter()
749 	if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {  in dyntick_save_progress_counter()
750 		trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));  in dyntick_save_progress_counter()
751 		rcu_gpnum_ovf(rdp->mynode, rdp);  in dyntick_save_progress_counter()
775 	struct rcu_node *rnp = rdp->mynode;  in rcu_implicit_dynticks_qs()
782 	 * read-side critical section that started before the beginning  in rcu_implicit_dynticks_qs()
785 	if (rcu_dynticks_in_eqs_since(rdp, rdp->dynticks_snap)) {  in rcu_implicit_dynticks_qs()
786 		trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));  in rcu_implicit_dynticks_qs()
795 	 * the CPU-offline process, or, failing that, by rcu_gp_init()  in rcu_implicit_dynticks_qs()
797 	 * last task on a leaf rcu_node structure exiting its RCU read-side  in rcu_implicit_dynticks_qs()
802 	 * The rcu_node structure's ->lock is held here, which excludes  in rcu_implicit_dynticks_qs()
803 	 * the relevant portions the CPU-hotplug code, the grace-period  in rcu_implicit_dynticks_qs()
812 		pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",  in rcu_implicit_dynticks_qs()
813 			__func__, rnp->grplo, rnp->grphi, rnp->level,  in rcu_implicit_dynticks_qs()
814 			(long)rnp->gp_seq, (long)rnp->completedqs);  in rcu_implicit_dynticks_qs()
815 		for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)  in rcu_implicit_dynticks_qs()
816 …pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n…  in rcu_implicit_dynticks_qs()
817 …__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rc…  in rcu_implicit_dynticks_qs()
818 		pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",  in rcu_implicit_dynticks_qs()
819 			__func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)],  in rcu_implicit_dynticks_qs()
820 			(long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,  in rcu_implicit_dynticks_qs()
821 			(long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);  in rcu_implicit_dynticks_qs()
830 	 * unsynchronized assignments to the per-CPU rcu_need_heavy_qs  in rcu_implicit_dynticks_qs()
837 	if (!READ_ONCE(rdp->rcu_need_heavy_qs) &&  in rcu_implicit_dynticks_qs()
841 		WRITE_ONCE(rdp->rcu_need_heavy_qs, true);  in rcu_implicit_dynticks_qs()
843 		smp_store_release(&rdp->rcu_urgent_qs, true);  in rcu_implicit_dynticks_qs()
845 		WRITE_ONCE(rdp->rcu_urgent_qs, true);  in rcu_implicit_dynticks_qs()
849 	 * NO_HZ_FULL CPUs can run in-kernel without rcu_sched_clock_irq!  in rcu_implicit_dynticks_qs()
851 	 * And some in-kernel loops check need_resched() before calling  in rcu_implicit_dynticks_qs()
853 	 * running in-kernel with scheduling-clock interrupts disabled.  in rcu_implicit_dynticks_qs()
856 	if (tick_nohz_full_cpu(rdp->cpu) &&  in rcu_implicit_dynticks_qs()
857 	    (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) ||  in rcu_implicit_dynticks_qs()
859 		WRITE_ONCE(rdp->rcu_urgent_qs, true);  in rcu_implicit_dynticks_qs()
860 		WRITE_ONCE(rdp->last_fqs_resched, jiffies);  in rcu_implicit_dynticks_qs()
861 		ret = -1;  in rcu_implicit_dynticks_qs()
865 	 * If more than halfway to RCU CPU stall-warning time, invoke  in rcu_implicit_dynticks_qs()
873 			       READ_ONCE(rdp->last_fqs_resched) + jtsq)) {  in rcu_implicit_dynticks_qs()
874 			WRITE_ONCE(rdp->last_fqs_resched, jiffies);  in rcu_implicit_dynticks_qs()
875 			ret = -1;  in rcu_implicit_dynticks_qs()
878 		    !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&  in rcu_implicit_dynticks_qs()
879 		    (rnp->ffmask & rdp->grpmask)) {  in rcu_implicit_dynticks_qs()
880 			rdp->rcu_iw_pending = true;  in rcu_implicit_dynticks_qs()
881 			rdp->rcu_iw_gp_seq = rnp->gp_seq;  in rcu_implicit_dynticks_qs()
882 			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);  in rcu_implicit_dynticks_qs()
885 		if (rcu_cpu_stall_cputime && rdp->snap_record.gp_seq != rdp->gp_seq) {  in rcu_implicit_dynticks_qs()
886 			int cpu = rdp->cpu;  in rcu_implicit_dynticks_qs()
892 			rsrp = &rdp->snap_record;  in rcu_implicit_dynticks_qs()
893 			rsrp->cputime_irq     = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);  in rcu_implicit_dynticks_qs()
894 			rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);  in rcu_implicit_dynticks_qs()
895 			rsrp->cputime_system  = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);  in rcu_implicit_dynticks_qs()
896 			rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu);  in rcu_implicit_dynticks_qs()
897 			rsrp->nr_softirqs = kstat_cpu_softirqs_sum(cpu);  in rcu_implicit_dynticks_qs()
898 			rsrp->nr_csw = nr_context_switches_cpu(cpu);  in rcu_implicit_dynticks_qs()
899 			rsrp->jiffies = jiffies;  in rcu_implicit_dynticks_qs()
900 			rsrp->gp_seq = rdp->gp_seq;  in rcu_implicit_dynticks_qs()
907 /* Trace-event wrapper function for trace_rcu_future_grace_period.  */
911 	trace_rcu_future_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),  in trace_rcu_this_gp()
912 				      gp_seq_req, rnp->level,  in trace_rcu_this_gp()
913 				      rnp->grplo, rnp->grphi, s);  in trace_rcu_this_gp()
917  * rcu_start_this_gp - Request the start of a particular grace period
924  * rcu_node structure's ->gp_seq_needed field.  Returns true if there
925  * is reason to awaken the grace-period kthread.
927  * The caller must hold the specified rcu_node structure's ->lock, which
928  * is why the caller is responsible for waking the grace-period kthread.
940 	 * structure's lock or bail out if the need for this grace period  in rcu_start_this_gp()
941 	 * has already been recorded -- or if that grace period has in  in rcu_start_this_gp()
943 	 * progress in a non-leaf node, no recording is needed because the  in rcu_start_this_gp()
945 	 * Note that rnp_start->lock must not be released.  in rcu_start_this_gp()
949 	for (rnp = rnp_start; 1; rnp = rnp->parent) {  in rcu_start_this_gp()
952 		if (ULONG_CMP_GE(rnp->gp_seq_needed, gp_seq_req) ||  in rcu_start_this_gp()
953 		    rcu_seq_started(&rnp->gp_seq, gp_seq_req) ||  in rcu_start_this_gp()
955 		     rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))) {  in rcu_start_this_gp()
960 		WRITE_ONCE(rnp->gp_seq_needed, gp_seq_req);  in rcu_start_this_gp()
961 		if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) {  in rcu_start_this_gp()
972 		if (rnp != rnp_start && rnp->parent != NULL)  in rcu_start_this_gp()
974 		if (!rnp->parent)  in rcu_start_this_gp()
994 	if (ULONG_CMP_LT(gp_seq_req, rnp->gp_seq_needed)) {  in rcu_start_this_gp()
995 		WRITE_ONCE(rnp_start->gp_seq_needed, rnp->gp_seq_needed);  in rcu_start_this_gp()
996 		WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);  in rcu_start_this_gp()
1004  * Clean up any old requests for the just-ended grace period.  Also return
1012 	needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed);  in rcu_future_gp_cleanup()
1014 		rnp->gp_seq_needed = rnp->gp_seq; /* Avoid counter wrap. */  in rcu_future_gp_cleanup()
1015 	trace_rcu_this_gp(rnp, rdp, rnp->gp_seq,  in rcu_future_gp_cleanup()
1033 	 * is dangerous that late in the CPU-down hotplug process. The  in swake_up_one_online()
1053  * Awaken the grace-period kthread.  Don't do a self-awaken (unless in an
1055  * sleep upon return, resulting in a grace-period hang), and don't bother
1056  * awakening when there is nothing for the grace-period kthread to do
1061  * So why do the self-wakeup when in an interrupt or softirq handler
1062  * in the grace-period kthread's context?  Because the kthread might have
1064  * pre-sleep check of the awaken condition.  In this case, a wakeup really
1080  * If there is room, assign a ->gp_seq number to any callbacks on this
1082  * that were previously assigned a ->gp_seq number that has since proven
1084  * ->gp_seq number while RCU is idle, but with reference to a non-root
1085  * rcu_node structure.  This function is idempotent, so it does not hurt
1087  * the RCU grace-period kthread.
1089  * The caller must hold rnp->lock with interrupts disabled.
1100 	if (!rcu_segcblist_pend_cbs(&rdp->cblist))  in rcu_accelerate_cbs()
1103 	trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPreAcc"));  in rcu_accelerate_cbs()
1106 	 * Callbacks are often registered with incomplete grace-period  in rcu_accelerate_cbs()
1108 	 * information requires acquiring a global lock...  RCU therefore  in rcu_accelerate_cbs()
1112 	 * accelerating callback invocation to an earlier grace-period  in rcu_accelerate_cbs()
1116 	if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req))  in rcu_accelerate_cbs()
1120 	if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))  in rcu_accelerate_cbs()
1125 	trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPostAcc"));  in rcu_accelerate_cbs()
1132  * rcu_node structure's ->lock be held.  It consults the cached value
1133  * of ->gp_seq_needed in the rcu_data structure, and if that indicates
1134  * that a new grace-period request be made, invokes rcu_accelerate_cbs()
1135  * while holding the leaf rcu_node structure's ->lock.
1145 	if (!READ_ONCE(rdp->gpwrap) && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {  in rcu_accelerate_cbs_unlocked()
1147 		(void)rcu_segcblist_accelerate(&rdp->cblist, c);  in rcu_accelerate_cbs_unlocked()
1160  * assign ->gp_seq numbers to any callbacks in the RCU_NEXT_TAIL
1161  * sublist.  This function is idempotent, so it does not hurt to
1162  * invoke it repeatedly.  As long as it is not invoked -too- often...
1163  * Returns true if the RCU grace-period kthread needs to be awakened.
1165  * The caller must hold rnp->lock with interrupts disabled.
1173 	if (!rcu_segcblist_pend_cbs(&rdp->cblist))  in rcu_advance_cbs()
1177 	 * Find all callbacks whose ->gp_seq numbers indicate that they  in rcu_advance_cbs()
1180 	rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq);  in rcu_advance_cbs()
1188  * that the RCU grace-period kthread be awakened.
1194 	if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp))  in rcu_advance_cbs_nowake()
1196 	// The grace period cannot end while we hold the rcu_node lock.  in rcu_advance_cbs_nowake()
1197 	if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))  in rcu_advance_cbs_nowake()
1216  * Update CPU-local rcu_data state to record the beginnings and ends of
1217  * grace periods.  The caller must hold the ->lock of the leaf rcu_node
1219  * Returns true if the grace-period kthread needs to be awakened.
1229 	if (rdp->gp_seq == rnp->gp_seq)  in __note_gp_changes()
1233 	if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) ||  in __note_gp_changes()
1234 	    unlikely(READ_ONCE(rdp->gpwrap))) {  in __note_gp_changes()
1237 		rdp->core_needs_qs = false;  in __note_gp_changes()
1238 		trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend"));  in __note_gp_changes()
1242 		if (rdp->core_needs_qs)  in __note_gp_changes()
1243 			rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask);  in __note_gp_changes()
1246 	/* Now handle the beginnings of any new-to-this-CPU grace periods. */  in __note_gp_changes()
1247 	if (rcu_seq_new_gp(rdp->gp_seq, rnp->gp_seq) ||  in __note_gp_changes()
1248 	    unlikely(READ_ONCE(rdp->gpwrap))) {  in __note_gp_changes()
1251 		 * set up to detect a quiescent state, otherwise don't  in __note_gp_changes()
1254 		trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));  in __note_gp_changes()
1255 		need_qs = !!(rnp->qsmask & rdp->grpmask);  in __note_gp_changes()
1256 		rdp->cpu_no_qs.b.norm = need_qs;  in __note_gp_changes()
1257 		rdp->core_needs_qs = need_qs;  in __note_gp_changes()
1260 	rdp->gp_seq = rnp->gp_seq;  /* Remember new grace-period state. */  in __note_gp_changes()
1261 	if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)  in __note_gp_changes()
1262 		WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);  in __note_gp_changes()
1263 	if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap))  in __note_gp_changes()
1264 		WRITE_ONCE(rdp->last_sched_clock, jiffies);  in __note_gp_changes()
1265 	WRITE_ONCE(rdp->gpwrap, false);  in __note_gp_changes()
1277 	rnp = rdp->mynode;  in note_gp_changes()
1278 	if ((rdp->gp_seq == rcu_seq_current(&rnp->gp_seq) &&  in note_gp_changes()
1279 	     !unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */  in note_gp_changes()
1293 /* Register a counter to suppress debugging grace-period delays. */
1327 /* Allow rcutorture to stall the grace-period kthread. */
1344 		pr_alert("%s: Waiting %lu jiffies\n", __func__, duration);  in rcu_gp_torture_wait()
1346 		pr_alert("%s: Wait complete\n", __func__);  in rcu_gp_torture_wait()
1384 	// end of that GP.  Either way, zero counter to avoid counter-wrap  in rcu_poll_gp_seq_end()
1396 // where caller does not hold the root rcu_node structure's lock.
1413 // caller does not hold the root rcu_node structure's lock.
1468 	 * Apply per-leaf buffered online and offline operations to  in rcu_gp_init()
1471 	 * offlining path, when combined with checks in this function,  in rcu_gp_init()
1482 		if (rnp->qsmaskinit == rnp->qsmaskinitnext &&  in rcu_gp_init()
1483 		    !rnp->wait_blkd_tasks) {  in rcu_gp_init()
1491 		/* Record old state, apply changes to ->qsmaskinit field. */  in rcu_gp_init()
1492 		oldmask = rnp->qsmaskinit;  in rcu_gp_init()
1493 		rnp->qsmaskinit = rnp->qsmaskinitnext;  in rcu_gp_init()
1495 		/* If zero-ness of ->qsmaskinit changed, propagate up tree. */  in rcu_gp_init()
1496 		if (!oldmask != !rnp->qsmaskinit) {  in rcu_gp_init()
1498 				if (!rnp->wait_blkd_tasks) /* Ever offline? */  in rcu_gp_init()
1501 				rnp->wait_blkd_tasks = true; /* blocked tasks */  in rcu_gp_init()
1508 		 * If all waited-on tasks from prior grace period are  in rcu_gp_init()
1511 		 * clear ->wait_blkd_tasks.  Otherwise, if one of this  in rcu_gp_init()
1513 		 * simply clear ->wait_blkd_tasks.  in rcu_gp_init()
1515 		if (rnp->wait_blkd_tasks &&  in rcu_gp_init()
1516 		    (!rcu_preempt_has_tasks(rnp) || rnp->qsmaskinit)) {  in rcu_gp_init()
1517 			rnp->wait_blkd_tasks = false;  in rcu_gp_init()
1518 			if (!rnp->qsmaskinit)  in rcu_gp_init()
1529 	 * Set the quiescent-state-needed bits in all the rcu_node  in rcu_gp_init()
1530 	 * structures for all currently online CPUs in breadth-first  in rcu_gp_init()
1546 		rnp->qsmask = rnp->qsmaskinit;  in rcu_gp_init()
1547 		WRITE_ONCE(rnp->gp_seq, rcu_state.gp_seq);  in rcu_gp_init()
1548 		if (rnp == rdp->mynode)  in rcu_gp_init()
1551 		trace_rcu_grace_period_init(rcu_state.name, rnp->gp_seq,  in rcu_gp_init()
1552 					    rnp->level, rnp->grplo,  in rcu_gp_init()
1553 					    rnp->grphi, rnp->qsmask);  in rcu_gp_init()
1554 		/* Quiescent states for tasks on any now-offline CPUs. */  in rcu_gp_init()
1555 		mask = rnp->qsmask & ~rnp->qsmaskinitnext;  in rcu_gp_init()
1556 		rnp->rcu_gp_init_mask = mask;  in rcu_gp_init()
1557 		if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp))  in rcu_gp_init()
1558 			rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);  in rcu_gp_init()
1573  * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state
1584 	// Someone like call_rcu() requested a force-quiescent-state scan.  in rcu_gp_fqs_check_wake()
1590 	if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))  in rcu_gp_fqs_check_wake()
1597  * Do one round of quiescent-state forcing.
1614 		WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, --nr_fqs);  in rcu_gp_fqs()
1618 		/* Collect dyntick-idle snapshots. */  in rcu_gp_fqs()
1621 		/* Handle dyntick-idle and offline CPUs. */  in rcu_gp_fqs()
1624 	/* Clear flag to prevent immediate re-entry. */  in rcu_gp_fqs()
1634  * Loop doing repeated quiescent-state forcing until the grace period ends.
1675 		 * is required only for single-node rcu_node trees because readers blocking  in rcu_gp_fqs_loop()
1677 		 * For multi-node trees, checking the root node's ->qsmask suffices, because a  in rcu_gp_fqs_loop()
1678 		 * given root node's ->qsmask bit is cleared only when all CPUs and tasks from  in rcu_gp_fqs_loop()
1681 		if (!READ_ONCE(rnp->qsmask) &&  in rcu_gp_fqs_loop()
1684 		/* If time for quiescent-state forcing, do it. */  in rcu_gp_fqs_loop()
1713 				j = rcu_state.jiffies_force_qs - j;  in rcu_gp_fqs_loop()
1736 	gp_duration = rcu_state.gp_end - rcu_state.gp_start;  in rcu_gp_cleanup()
1745 	 * safe for us to drop the lock in order to mark the grace  in rcu_gp_cleanup()
1752 	 * Propagate new ->gp_seq value to rcu_node structures so that  in rcu_gp_cleanup()
1755 	 * RCU grace-period initialization races by forcing the end of  in rcu_gp_cleanup()
1766 		WARN_ON_ONCE(rnp->qsmask);  in rcu_gp_cleanup()
1767 		WRITE_ONCE(rnp->gp_seq, new_gp_seq);  in rcu_gp_cleanup()
1768 		if (!rnp->parent)  in rcu_gp_cleanup()
1771 		if (rnp == rdp->mynode)  in rcu_gp_cleanup()
1773 		/* smp_mb() provided by prior unlock-lock pair. */  in rcu_gp_cleanup()
1777 			for_each_leaf_node_cpu_mask(rnp, cpu, rnp->cbovldmask) {  in rcu_gp_cleanup()
1789 	raw_spin_lock_irq_rcu_node(rnp); /* GP before ->gp_seq update. */  in rcu_gp_cleanup()
1798 	if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {  in rcu_gp_cleanup()
1799 		trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed,  in rcu_gp_cleanup()
1809 		// the RCU_GP_FLAG_INIT bit in ->gp_state (which records  in rcu_gp_cleanup()
1813 		// hold the ->nocb_lock needed to safely access an offloaded  in rcu_gp_cleanup()
1814 		// ->cblist.  We do not want to acquire that lock because  in rcu_gp_cleanup()
1824 		// already set the RCU_GP_FLAG_INIT bit in ->gp_flags.   in rcu_gp_cleanup()
1826 		// ->gp_flags bits.  in rcu_gp_cleanup()
1845 		/* Handle grace-period start. */  in rcu_gp_kthread()
1865 		/* Handle quiescent-state forcing. */  in rcu_gp_kthread()
1868 		/* Handle grace-period end. */  in rcu_gp_kthread()
1877  * Invoke rcu_gp_kthread_wake() to awaken the grace-period kthread if
1878  * another grace period is required.  Whether we wake the grace-period
1879  * kthread or it awakens itself for the next round of quiescent-state
1880  * forcing, that kthread will clean up after the just-completed grace
1881  * period.  Note that the caller must hold rnp->lock, which is released
1885 	__releases(rcu_get_root()->lock)  in rcu_report_qs_rsp()
1896  * Similar to rcu_report_qs_rdp(), for which it is a helper function.
1901  * is the grace-period snapshot, which means that the quiescent states
1902  * are valid only if rnp->gp_seq is equal to gps.  That structure's lock
1905  * As a special case, if mask is zero, the bit-already-cleared check is
1907  * during grace-period initialization.
1911 	__releases(rnp->lock)  in rcu_report_qs_rnp()
1920 		if ((!(rnp->qsmask & mask) && mask) || rnp->gp_seq != gps) {  in rcu_report_qs_rnp()
1932 		WRITE_ONCE(rnp->qsmask, rnp->qsmask & ~mask);  in rcu_report_qs_rnp()
1933 		trace_rcu_quiescent_state_report(rcu_state.name, rnp->gp_seq,  in rcu_report_qs_rnp()
1934 						 mask, rnp->qsmask, rnp->level,  in rcu_report_qs_rnp()
1935 						 rnp->grplo, rnp->grphi,  in rcu_report_qs_rnp()
1936 						 !!rnp->gp_tasks);  in rcu_report_qs_rnp()
1937 		if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {  in rcu_report_qs_rnp()
1943 		rnp->completedqs = rnp->gp_seq;  in rcu_report_qs_rnp()
1944 		mask = rnp->grpmask;  in rcu_report_qs_rnp()
1945 		if (rnp->parent == NULL) {  in rcu_report_qs_rnp()
1947 			/* No more levels.  Exit loop holding root lock. */  in rcu_report_qs_rnp()
1953 		rnp = rnp->parent;  in rcu_report_qs_rnp()
1955 		oldmask = READ_ONCE(rnp_c->qsmask);  in rcu_report_qs_rnp()
1963 	rcu_report_qs_rsp(flags); /* releases rnp->lock. */  in rcu_report_qs_rnp()
1969  * RCU grace period.  The caller must hold the corresponding rnp->lock with
1970  * irqs disabled, and this lock is released upon return, but irqs remain
1975 	__releases(rnp->lock)  in rcu_report_unblock_qs_rnp()
1984 	    rnp->qsmask != 0) {  in rcu_report_unblock_qs_rnp()
1989 	rnp->completedqs = rnp->gp_seq;  in rcu_report_unblock_qs_rnp()
1990 	rnp_p = rnp->parent;  in rcu_report_unblock_qs_rnp()
2000 	/* Report up the rest of the hierarchy, tracking current ->gp_seq. */  in rcu_report_unblock_qs_rnp()
2001 	gps = rnp->gp_seq;  in rcu_report_unblock_qs_rnp()
2002 	mask = rnp->grpmask;  in rcu_report_unblock_qs_rnp()
2020 	WARN_ON_ONCE(rdp->cpu != smp_processor_id());  in rcu_report_qs_rdp()
2021 	rnp = rdp->mynode;  in rcu_report_qs_rdp()
2023 	if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||  in rcu_report_qs_rdp()
2024 	    rdp->gpwrap) {  in rcu_report_qs_rdp()
2032 		rdp->cpu_no_qs.b.norm = true;	/* need qs for new gp. */  in rcu_report_qs_rdp()
2036 	mask = rdp->grpmask;  in rcu_report_qs_rdp()
2037 	rdp->core_needs_qs = false;  in rcu_report_qs_rdp()
2038 	if ((rnp->qsmask & mask) == 0) {  in rcu_report_qs_rdp()
2054 		} else if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {  in rcu_report_qs_rdp()
2057 			 * if in the middle of a (de-)offloading process.  in rcu_report_qs_rdp()
2063 		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);  in rcu_report_qs_rdp()
2064 		/* ^^^ Released rnp->lock */  in rcu_report_qs_rdp()
2083 	/* Check for grace-period ends and beginnings. */  in rcu_check_quiescent_state()
2090 	if (!rdp->core_needs_qs)  in rcu_check_quiescent_state()
2097 	if (rdp->cpu_no_qs.b.norm)  in rcu_check_quiescent_state()
2107 /* Return true if callback-invocation time limit exceeded. */
2121  * period.  Throttle as specified by rdp->blimit.
2138 	if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {  in rcu_do_batch()
2140 				      rcu_segcblist_n_cbs(&rdp->cblist), 0);  in rcu_do_batch()
2142 				    !rcu_segcblist_empty(&rdp->cblist),  in rcu_do_batch()
2155 	pending = rcu_segcblist_get_seglen(&rdp->cblist, RCU_DONE_TAIL);  in rcu_do_batch()
2157 	div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;  in rcu_do_batch()
2158 	bl = max(rdp->blimit, pending >> div);  in rcu_do_batch()
2159 	if ((in_serving_softirq() || rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING) &&  in rcu_do_batch()
2170 			      rcu_segcblist_n_cbs(&rdp->cblist), bl);  in rcu_do_batch()
2171 	rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);  in rcu_do_batch()
2173 		rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);  in rcu_do_batch()
2175 	trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued"));  in rcu_do_batch()
2191 		f = rhp->func;  in rcu_do_batch()
2193 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);  in rcu_do_batch()
2218 			// But rcuc kthreads can delay quiescent-state  in rcu_do_batch()
2220 			if (rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING &&  in rcu_do_batch()
2222 				rdp->rcu_cpu_has_work = 1;  in rcu_do_batch()
2229 	rdp->n_cbs_invoked += count;  in rcu_do_batch()
2234 	rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);  in rcu_do_batch()
2235 	rcu_segcblist_add_len(&rdp->cblist, -count);  in rcu_do_batch()
2238 	count = rcu_segcblist_n_cbs(&rdp->cblist);  in rcu_do_batch()
2239 	if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark)  in rcu_do_batch()
2240 		rdp->blimit = blimit;  in rcu_do_batch()
2242 	/* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */  in rcu_do_batch()
2243 	if (count == 0 && rdp->qlen_last_fqs_check != 0) {  in rcu_do_batch()
2244 		rdp->qlen_last_fqs_check = 0;  in rcu_do_batch()
2245 		rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);  in rcu_do_batch()
2246 	} else if (count < rdp->qlen_last_fqs_check - qhimark)  in rcu_do_batch()
2247 		rdp->qlen_last_fqs_check = count;  in rcu_do_batch()
2253 	empty = rcu_segcblist_empty(&rdp->cblist);  in rcu_do_batch()
2257 	WARN_ON_ONCE(count == 0 && rcu_segcblist_n_segment_cbs(&rdp->cblist) != 0);  in rcu_do_batch()
2258 	WARN_ON_ONCE(!empty && rcu_segcblist_n_segment_cbs(&rdp->cblist) == 0);  in rcu_do_batch()
2266  * This function is invoked from each scheduling-clock interrupt,
2267  * and checks to see if this CPU is in a non-context-switch quiescent
2282 	trace_rcu_utilization(TPS("Start scheduler-tick"));  in rcu_sched_clock_irq()
2285 	/* The load-acquire pairs with the store-release setting to true. */  in rcu_sched_clock_irq()
2301 	trace_rcu_utilization(TPS("End scheduler-tick"));  in rcu_sched_clock_irq()
2308  * Otherwise, invoke the specified function to check dyntick state for
2325 		rcu_state.cbovldnext |= !!rnp->cbovldmask;  in force_qs_rnp()
2326 		if (rnp->qsmask == 0) {  in force_qs_rnp()
2331 				 * priority-boost blocked readers.  in force_qs_rnp()
2334 				/* rcu_initiate_boost() releases rnp->lock */  in force_qs_rnp()
2340 		for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) {  in force_qs_rnp()
2347 				mask |= rdp->grpmask;  in force_qs_rnp()
2351 				rsmask |= rdp->grpmask;  in force_qs_rnp()
2354 			/* Idle/offline CPUs, report (releases rnp->lock). */  in force_qs_rnp()
2355 			rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);  in force_qs_rnp()
2357 			/* Nothing to do here, so just drop the lock. */  in force_qs_rnp()
2367  * Force quiescent states on reluctant CPUs, and also detect which
2368  * CPUs are in dyntick-idle mode.
2379 	for (; rnp != NULL; rnp = rnp->parent) {  in rcu_force_quiescent_state()
2381 		       !raw_spin_trylock(&rnp->fqslock);  in rcu_force_quiescent_state()
2383 			raw_spin_unlock(&rnp_old->fqslock);  in rcu_force_quiescent_state()
2390 	/* Reached the root of the rcu_node tree, acquire lock. */  in rcu_force_quiescent_state()
2392 	raw_spin_unlock(&rnp_old->fqslock);  in rcu_force_quiescent_state()
2417 	struct rcu_node *rnp = rdp->mynode;  in rcu_core()
2420 	 * Therefore this function can race with concurrent NOCB (de-)offloading  in rcu_core()
2435 	const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist);  in rcu_core()
2440 	WARN_ON_ONCE(!rdp->beenonline);  in rcu_core()
2455 	    rcu_segcblist_is_enabled(&rdp->cblist) && do_batch) {  in rcu_core()
2457 		if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))  in rcu_core()
2465 	if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) &&  in rcu_core()
2468 		/* Re-invoke RCU core processing if there are callbacks remaining. */  in rcu_core()
2469 		if (rcu_segcblist_ready_cbs(&rdp->cblist))  in rcu_core()
2479 		queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);  in rcu_core()
2534  * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
2581  * Spawn per-CPU RCU core processing kthreads.
2592 		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);  in rcu_spawn_core_kthreads()
2597  * Handle any core-RCU processing required by a call_rcu() invocation.
2604 	 * core in order to force a re-evaluation of RCU's idleness.  in __call_rcu_core()
2620 	if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) >  in __call_rcu_core()
2621 		     rdp->qlen_last_fqs_check + qhimark)) {  in __call_rcu_core()
2628 			rcu_accelerate_cbs_unlocked(rdp->mynode, rdp);  in __call_rcu_core()
2631 			rdp->blimit = DEFAULT_MAX_RCU_BLIMIT;  in __call_rcu_core()
2632 			if (READ_ONCE(rcu_state.n_force_qs) == rdp->n_force_qs_snap &&  in __call_rcu_core()
2633 			    rcu_segcblist_first_pend_cb(&rdp->cblist) != head)  in __call_rcu_core()
2635 			rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);  in __call_rcu_core()
2636 			rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);  in __call_rcu_core()
2642  * RCU callback function to leak a callback.
2650  * ->cbovldmask bit corresponding to the current CPU based on that CPU's
2652  * structure's ->lock.
2659 	if (rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc)  in check_cb_ovld_locked()
2660 		WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask | rdp->grpmask);  in check_cb_ovld_locked()
2662 		WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask & ~rdp->grpmask);  in check_cb_ovld_locked()
2667  * ->cbovldmask bit corresponding to the current CPU based on that CPU's
2671  * Note that this function ignores the possibility that there are a lot
2673  * grace periods.  This omission is due to the need for no-CBs CPUs to
2674  * be holding ->nocb_lock to do this check, which is too heavy for a
2675  * common-case operation.
2679 	struct rcu_node *const rnp = rdp->mynode;  in check_cb_ovld()
2682 	    ((rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) ==  in check_cb_ovld()
2683 	     !!(READ_ONCE(rnp->cbovldmask) & rdp->grpmask)))  in check_cb_ovld()
2700 	WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));  in __call_rcu_common()
2709 			pr_err("%s(): Double-freed CB %p->%pS()!!!  ", __func__, head, head->func);  in __call_rcu_common()
2712 		WRITE_ONCE(head->func, rcu_leak_callback);  in __call_rcu_common()
2715 	head->func = func;  in __call_rcu_common()
2716 	head->next = NULL;  in __call_rcu_common()
2723 	if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) {  in __call_rcu_common()
2729 		if (rcu_segcblist_empty(&rdp->cblist))  in __call_rcu_common()
2730 			rcu_segcblist_init(&rdp->cblist);  in __call_rcu_common()
2736 		return; // Enqueued onto ->nocb_bypass, so just leave.  in __call_rcu_common()
2738 	// If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.  in __call_rcu_common()
2739 	rcu_segcblist_enqueue(&rdp->cblist, head);  in __call_rcu_common()
2743 					 rcu_segcblist_n_cbs(&rdp->cblist));  in __call_rcu_common()
2746 				   rcu_segcblist_n_cbs(&rdp->cblist));  in __call_rcu_common()
2748 	trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));  in __call_rcu_common()
2761  * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
2762  * flush all lazy callbacks (including the new one) to the main ->cblist while
2766  * @func: actual callback function to be invoked after the grace period
2768  * The callback function will be invoked some time after a full grace
2769  * period elapses, in other words after all pre-existing RCU read-side
2775  * This function will cause callbacks to be invoked sooner than later at the
2776  * expense of extra power. Other than that, this function is identical to, and
2788  * call_rcu() - Queue an RCU callback for invocation after a grace period.
2790  * ->cblist to prevent starting of grace periods too soon.
2794  * @func: actual callback function to be invoked after the grace period
2796  * The callback function will be invoked some time after a full grace
2797  * period elapses, in other words after all pre-existing RCU read-side
2798  * critical sections have completed.  However, the callback function
2799  * might well execute concurrently with RCU read-side critical sections
2802  * RCU read-side critical sections are delimited by rcu_read_lock()
2805  * or softirqs have been disabled also serve as RCU read-side critical
2810  * all pre-existing RCU read-side critical section.  On systems with more
2813  * last RCU read-side critical section whose beginning preceded the call
2814  * to call_rcu().  It also means that each CPU executing an RCU read-side
2817  * of that RCU read-side critical section.  Note that these guarantees
2822  * resulting RCU callback function "func()", then both CPU A and CPU B are
2824  * between the call to call_rcu() and the invocation of "func()" -- even
2828  * Implementation of these memory-ordering guarantees is described here:
2829  * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
2843  * struct kvfree_rcu_bulk_data - single block to store kvfree_rcu() pointers
2862 	((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *))
2865  * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
2868  * @head_free_gp_snap: Grace-period snapshot to check for attempted premature frees.
2869  * @bulk_head_free: Bulk-List of kvfree_rcu() objects waiting for a grace period
2882  * struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
2885  * @bulk_head: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period
2887  * @lock: Synchronize access to this structure
2891  * @bulk_count: Number of objects in bulk-list
2894  *	In order to save some per-cpu space the list is singular.
2896  *	per-cpu lock.
2903  * This is a per-CPU structure.  The reason that it is not included in
2915 	// Objects queued on a bulk-list.
2920 	raw_spinlock_t lock;  member
2934 	.lock = __RAW_SPIN_LOCK_UNLOCKED(krc.lock),
2943 	for (i = 0; i < bhead->nr_records; i++)  in debug_rcu_bhead_unqueue()
2944 		debug_rcu_head_unqueue((struct rcu_head *)(bhead->records[i]));  in debug_rcu_bhead_unqueue()
2955 	raw_spin_lock(&krcp->lock);  in krc_this_cpu_lock()
2963 	raw_spin_unlock_irqrestore(&krcp->lock, flags);  in krc_this_cpu_unlock()
2969 	if (!krcp->nr_bkv_objs)  in get_cached_bnode()
2972 	WRITE_ONCE(krcp->nr_bkv_objs, krcp->nr_bkv_objs - 1);  in get_cached_bnode()
2974 		llist_del_first(&krcp->bkvcache);  in get_cached_bnode()
2982 	if (krcp->nr_bkv_objs >= rcu_min_cached_objs)  in put_cached_bnode()
2985 	llist_add((struct llist_node *) bnode, &krcp->bkvcache);  in put_cached_bnode()
2986 	WRITE_ONCE(krcp->nr_bkv_objs, krcp->nr_bkv_objs + 1);  in put_cached_bnode()
2994 	struct llist_node *page_list, *pos, *n;  in drain_page_cache()  local
3000 	raw_spin_lock_irqsave(&krcp->lock, flags);  in drain_page_cache()
3001 	page_list = llist_del_all(&krcp->bkvcache);  in drain_page_cache()
3002 	WRITE_ONCE(krcp->nr_bkv_objs, 0);  in drain_page_cache()
3003 	raw_spin_unlock_irqrestore(&krcp->lock, flags);  in drain_page_cache()
3005 	llist_for_each_safe(pos, n, page_list) {  in drain_page_cache()
3020 	if (!WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&bnode->gp_snap))) {  in kvfree_rcu_bulk()
3025 				rcu_state.name, bnode->nr_records,  in kvfree_rcu_bulk()
3026 				bnode->records);  in kvfree_rcu_bulk()
3028 			kfree_bulk(bnode->nr_records, bnode->records);  in kvfree_rcu_bulk()
3030 			for (i = 0; i < bnode->nr_records; i++) {  in kvfree_rcu_bulk()
3032 					rcu_state.name, bnode->records[i], 0);  in kvfree_rcu_bulk()
3034 				vfree(bnode->records[i]);  in kvfree_rcu_bulk()
3040 	raw_spin_lock_irqsave(&krcp->lock, flags);  in kvfree_rcu_bulk()
3043 	raw_spin_unlock_irqrestore(&krcp->lock, flags);  in kvfree_rcu_bulk()
3057 		void *ptr = (void *) head->func;  in kvfree_rcu_list()
3058 		unsigned long offset = (void *) head - ptr;  in kvfree_rcu_list()
3060 		next = head->next;  in kvfree_rcu_list()
3074  * This function is invoked in workqueue context after a grace period.
3075  * It frees all the objects queued on ->bulk_head_free or ->head_free.
3080 	struct kvfree_rcu_bulk_data *bnode, *n;  in kfree_rcu_work()  local
3090 	krcp = krwp->krcp;  in kfree_rcu_work()
3092 	raw_spin_lock_irqsave(&krcp->lock, flags);  in kfree_rcu_work()
3095 		list_replace_init(&krwp->bulk_head_free[i], &bulk_head[i]);  in kfree_rcu_work()
3098 	head = krwp->head_free;  in kfree_rcu_work()
3099 	krwp->head_free = NULL;  in kfree_rcu_work()
3100 	head_gp_snap = krwp->head_free_gp_snap;  in kfree_rcu_work()
3101 	raw_spin_unlock_irqrestore(&krcp->lock, flags);  in kfree_rcu_work()
3106 		list_for_each_entry_safe(bnode, n, &bulk_head[i], list)  in kfree_rcu_work()
3112 	 * double-argument of kvfree_rcu().  This happens when the  in kfree_rcu_work()
3113 	 * page-cache is empty, which means that objects are instead  in kfree_rcu_work()
3127 		if (!list_empty(&krcp->bulk_head[i]))  in need_offload_krc()
3130 	return !!READ_ONCE(krcp->head);  in need_offload_krc()
3139 		if (!list_empty(&krwp->bulk_head_free[i]))  in need_wait_for_krwp_work()
3142 	return !!krwp->head_free;  in need_wait_for_krwp_work()
3147 	int sum = atomic_read(&krcp->head_count);  in krc_count()
3151 		sum += atomic_read(&krcp->bulk_count[i]);  in krc_count()
3162 	if (delayed_work_pending(&krcp->monitor_work)) {  in __schedule_delayed_monitor_work()
3163 		delay_left = krcp->monitor_work.timer.expires - jiffies;  in __schedule_delayed_monitor_work()
3165 			mod_delayed_work(system_wq, &krcp->monitor_work, delay);  in __schedule_delayed_monitor_work()
3168 	queue_delayed_work(system_wq, &krcp->monitor_work, delay);  in __schedule_delayed_monitor_work()
3176 	raw_spin_lock_irqsave(&krcp->lock, flags);  in schedule_delayed_monitor_work()
3178 	raw_spin_unlock_irqrestore(&krcp->lock, flags);  in schedule_delayed_monitor_work()
3185 	struct kvfree_rcu_bulk_data *bnode, *n;  in kvfree_rcu_drain_ready()  local
3190 	raw_spin_lock_irqsave(&krcp->lock, flags);  in kvfree_rcu_drain_ready()
3194 		list_for_each_entry_safe_reverse(bnode, n, &krcp->bulk_head[i], list) {  in kvfree_rcu_drain_ready()
3195 			if (!poll_state_synchronize_rcu_full(&bnode->gp_snap))  in kvfree_rcu_drain_ready()
3198 			atomic_sub(bnode->nr_records, &krcp->bulk_count[i]);  in kvfree_rcu_drain_ready()
3199 			list_move(&bnode->list, &bulk_ready[i]);  in kvfree_rcu_drain_ready()
3203 	if (krcp->head && poll_state_synchronize_rcu(krcp->head_gp_snap)) {  in kvfree_rcu_drain_ready()
3204 		head_ready = krcp->head;  in kvfree_rcu_drain_ready()
3205 		atomic_set(&krcp->head_count, 0);  in kvfree_rcu_drain_ready()
3206 		WRITE_ONCE(krcp->head, NULL);  in kvfree_rcu_drain_ready()
3208 	raw_spin_unlock_irqrestore(&krcp->lock, flags);  in kvfree_rcu_drain_ready()
3211 		list_for_each_entry_safe(bnode, n, &bulk_ready[i], list)  in kvfree_rcu_drain_ready()
3220  * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
3232 	raw_spin_lock_irqsave(&krcp->lock, flags);  in kfree_rcu_monitor()
3236 		struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]);  in kfree_rcu_monitor()
3240 		// there is on-going rcu work to handle krwp's free business.  in kfree_rcu_monitor()
3246 			// Channel 1 corresponds to the SLAB-pointer bulk path.  in kfree_rcu_monitor()
3247 			// Channel 2 corresponds to vmalloc-pointer bulk path.  in kfree_rcu_monitor()
3249 				if (list_empty(&krwp->bulk_head_free[j])) {  in kfree_rcu_monitor()
3250 					atomic_set(&krcp->bulk_count[j], 0);  in kfree_rcu_monitor()
3251 					list_replace_init(&krcp->bulk_head[j],  in kfree_rcu_monitor()
3252 						&krwp->bulk_head_free[j]);  in kfree_rcu_monitor()
3258 			if (!krwp->head_free) {  in kfree_rcu_monitor()
3259 				krwp->head_free = krcp->head;  in kfree_rcu_monitor()
3260 				get_state_synchronize_rcu_full(&krwp->head_free_gp_snap);  in kfree_rcu_monitor()
3261 				atomic_set(&krcp->head_count, 0);  in kfree_rcu_monitor()
3262 				WRITE_ONCE(krcp->head, NULL);  in kfree_rcu_monitor()
3270 			queue_rcu_work(system_wq, &krwp->rcu_work);  in kfree_rcu_monitor()
3274 	raw_spin_unlock_irqrestore(&krcp->lock, flags);  in kfree_rcu_monitor()
3291 	queue_delayed_work(system_highpri_wq, &krcp->page_cache_work, 0);  in schedule_page_work_fn()
3306 	nr_pages = atomic_read(&krcp->backoff_page_cache_fill) ?  in fill_page_cache_func()
3309 	for (i = READ_ONCE(krcp->nr_bkv_objs); i < nr_pages; i++) {  in fill_page_cache_func()
3316 		raw_spin_lock_irqsave(&krcp->lock, flags);  in fill_page_cache_func()
3318 		raw_spin_unlock_irqrestore(&krcp->lock, flags);  in fill_page_cache_func()
3326 	atomic_set(&krcp->work_in_progress, 0);  in fill_page_cache_func()
3327 	atomic_set(&krcp->backoff_page_cache_fill, 0);  in fill_page_cache_func()
3338 			!atomic_xchg(&krcp->work_in_progress, 1)) {  in run_page_cache_worker()
3339 		if (atomic_read(&krcp->backoff_page_cache_fill)) {  in run_page_cache_worker()
3341 				&krcp->page_cache_work,  in run_page_cache_worker()
3344 			hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);  in run_page_cache_worker()
3345 			krcp->hrtimer.function = schedule_page_work_fn;  in run_page_cache_worker()
3346 			hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);  in run_page_cache_worker()
3351 // Record ptr in a page managed by krcp, with the pre-krc_this_cpu_lock()
3365 	if (unlikely(!(*krcp)->initialized))  in add_ptr_to_bulk_krc_lock()
3369 	bnode = list_first_entry_or_null(&(*krcp)->bulk_head[idx],  in add_ptr_to_bulk_krc_lock()
3373 	if (!bnode || bnode->nr_records == KVFREE_BULK_MAX_ENTR) {  in add_ptr_to_bulk_krc_lock()
3378 			// __GFP_NORETRY - allows a light-weight direct reclaim  in add_ptr_to_bulk_krc_lock()
3383 			// __GFP_NOMEMALLOC - prevents from consuming of all the  in add_ptr_to_bulk_krc_lock()
3386 			// __GFP_NOWARN - it is supposed that an allocation can  in add_ptr_to_bulk_krc_lock()
3391 			raw_spin_lock_irqsave(&(*krcp)->lock, *flags);  in add_ptr_to_bulk_krc_lock()
3398 		bnode->nr_records = 0;  in add_ptr_to_bulk_krc_lock()
3399 		list_add(&bnode->list, &(*krcp)->bulk_head[idx]);  in add_ptr_to_bulk_krc_lock()
3403 	bnode->records[bnode->nr_records++] = ptr;  in add_ptr_to_bulk_krc_lock()
3404 	get_state_synchronize_rcu_full(&bnode->gp_snap);  in add_ptr_to_bulk_krc_lock()
3405 	atomic_inc(&(*krcp)->bulk_count[idx]);  in add_ptr_to_bulk_krc_lock()
3429 	 * Please note there is a limitation for the head-less  in kvfree_call_rcu()
3441 		WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",  in kvfree_call_rcu()
3457 		head->func = ptr;  in kvfree_call_rcu()
3458 		head->next = krcp->head;  in kvfree_call_rcu()
3459 		WRITE_ONCE(krcp->head, head);  in kvfree_call_rcu()
3460 		atomic_inc(&krcp->head_count);  in kvfree_call_rcu()
3463 		krcp->head_gp_snap = get_state_synchronize_rcu();  in kvfree_call_rcu()
3506 		count += READ_ONCE(krcp->nr_bkv_objs);  in kfree_rcu_shrink_count()
3507 		atomic_set(&krcp->backoff_page_cache_fill, 1);  in kfree_rcu_shrink_count()
3524 		kfree_rcu_monitor(&krcp->monitor_work.work);  in kfree_rcu_shrink_scan()
3526 		sc->nr_to_scan -= count;  in kfree_rcu_shrink_scan()
3529 		if (sc->nr_to_scan <= 0)  in kfree_rcu_shrink_scan()
3556  * During early boot, any blocking grace-period wait automatically
3563  * grace-period optimization is ignored once the scheduler is running.
3575  * synchronize_rcu - wait until a grace period has elapsed.
3579  * read-side critical sections have completed.  Note, however, that
3581  * concurrently with new RCU read-side critical sections that began while
3584  * RCU read-side critical sections are delimited by rcu_read_lock()
3587  * or softirqs have been disabled also serve as RCU read-side critical
3591  * Note that this guarantee implies further memory-ordering guarantees.
3594  * the end of its last RCU read-side critical section whose beginning
3596  * an RCU read-side critical section that extends beyond the return from
3599  * that RCU read-side critical section.  Note that these guarantees include
3606  * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
3609  * Implementation of these memory-ordering guarantees is described here:
3610  * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
3620 			 "Illegal synchronize_rcu() in RCU read-side critical section");  in synchronize_rcu()
3634 	// reuse of ->gp_seq_polled_snap.  in synchronize_rcu()
3638 	// Update the normal grace-period counters to record  in synchronize_rcu()
3645 	for (rnp = this_cpu_ptr(&rcu_data)->mynode; rnp; rnp = rnp->parent)  in synchronize_rcu()
3646 		rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq;  in synchronize_rcu()
3652  * get_completed_synchronize_rcu_full - Return a full pre-completed polled state cookie
3661 	rgosp->rgos_norm = RCU_GET_STATE_COMPLETED;  in get_completed_synchronize_rcu_full()
3662 	rgosp->rgos_exp = RCU_GET_STATE_COMPLETED;  in get_completed_synchronize_rcu_full()
3667  * get_state_synchronize_rcu - Snapshot current RCU state
3676 	 * Any prior manipulation of RCU-protected data must happen  in get_state_synchronize_rcu()
3677 	 * before the load from ->gp_seq.  in get_state_synchronize_rcu()
3685  * get_state_synchronize_rcu_full - Snapshot RCU state, both normal and expedited
3686  * @rgosp: location to place combined normal/expedited grace-period state
3688  * Places the normal and expedited grace-period states in @rgosp.  This
3705 	 * Any prior manipulation of RCU-protected data must happen  in get_state_synchronize_rcu_full()
3706 	 * before the loads from ->gp_seq and ->expedited_sequence.  in get_state_synchronize_rcu_full()
3709 	rgosp->rgos_norm = rcu_seq_snap(&rnp->gp_seq);  in get_state_synchronize_rcu_full()
3710 	rgosp->rgos_exp = rcu_seq_snap(&rcu_state.expedited_sequence);  in get_state_synchronize_rcu_full()
3715  * Helper function for start_poll_synchronize_rcu() and
3728 	rnp = rdp->mynode;  in start_poll_synchronize_rcu_common()
3735 	// from which they are updated at grace-period start, as required.  in start_poll_synchronize_rcu_common()
3743  * start_poll_synchronize_rcu - Snapshot and start RCU grace period
3752  * the grace-period kthread.
3764  * start_poll_synchronize_rcu_full - Take a full snapshot and start RCU grace period
3767  * Places the normal and expedited grace-period states in *@rgos.  This
3775  * the grace-period kthread.
3786  * poll_state_synchronize_rcu - Has the specified RCU grace period completed?
3792  * function later on until it does return @true.  Alternatively, the caller
3798  * Yes, this function does not take counter wrap into account.
3800  * more than a billion grace periods (and way more on a 64-bit system!).
3802  * (many hours even on 32-bit systems) should check them occasionally and
3805  * to get a guaranteed-completed grace-period state.
3807  * In addition, because oldstate compresses the grace-period state for
3813  * This function provides the same memory-ordering guarantees that
3815  * to the function that provided @oldstate, and that returned at the end
3816  * of this function.
3830  * poll_state_synchronize_rcu_full - Has the specified RCU grace period completed?
3836  * function later on until it does return @true.  Alternatively, the caller
3840  * Yes, this function does not take counter wrap into account.
3842  * for more than a billion grace periods (and way more on a 64-bit
3844  * long time periods (many hours even on 32-bit systems) should check
3847  * get_completed_synchronize_rcu_full() to get a guaranteed-completed
3848  * grace-period state.
3850  * This function provides the same memory-ordering guarantees that would
3852  * the function that provided @rgosp, and that returned at the end of this
3853  * function.  And this guarantee requires that the root rcu_node structure's
3854  * ->gp_seq field be checked instead of that of the rcu_state structure.
3855  * The problem is that the just-ending grace-period's callbacks can be
3856  * invoked between the time that the root rcu_node structure's ->gp_seq
3857  * field is updated and the time that the rcu_state structure's ->gp_seq
3866 	smp_mb(); // Order against root rcu_node structure grace-period cleanup.  in poll_state_synchronize_rcu_full()
3867 	if (rgosp->rgos_norm == RCU_GET_STATE_COMPLETED ||  in poll_state_synchronize_rcu_full()
3868 	    rcu_seq_done_exact(&rnp->gp_seq, rgosp->rgos_norm) ||  in poll_state_synchronize_rcu_full()
3869 	    rgosp->rgos_exp == RCU_GET_STATE_COMPLETED ||  in poll_state_synchronize_rcu_full()
3870 	    rcu_seq_done_exact(&rcu_state.expedited_sequence, rgosp->rgos_exp)) {  in poll_state_synchronize_rcu_full()
3879  * cond_synchronize_rcu - Conditionally wait for an RCU grace period
3886  * Yes, this function does not take counter wrap into account.
3888  * more than 2 billion grace periods (and way more on a 64-bit system!),
3891  * This function provides the same memory-ordering guarantees that
3893  * to the function that provided @oldstate and that returned at the end
3894  * of this function.
3904  * cond_synchronize_rcu_full - Conditionally wait for an RCU grace period
3913  * Yes, this function does not take counter wrap into account.
3915  * more than 2 billion grace periods (and way more on a 64-bit system!),
3918  * This function provides the same memory-ordering guarantees that
3920  * to the function that provided @rgosp and that returned at the end of
3921  * this function.
3931  * Check to see if there is any immediate RCU-related work to be done by
3934  * CPU-local state are performed first.  However, we must check for CPU
3941 	struct rcu_node *rnp = rdp->mynode;  in rcu_pending()
3958 	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress)  in rcu_pending()
3963 	    rcu_segcblist_ready_cbs(&rdp->cblist))  in rcu_pending()
3967 	if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) &&  in rcu_pending()
3969 	    !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))  in rcu_pending()
3973 	if (rcu_seq_current(&rnp->gp_seq) != rdp->gp_seq ||  in rcu_pending()
3974 	    unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */  in rcu_pending()
3982  * Helper function for rcu_barrier() tracing.  If tracing is disabled,
3992  * RCU callback function for rcu_barrier().  If we are last, wake
4006 		rcu_barrier_trace(TPS("LastCB"), -1, s);  in rcu_barrier_callback()
4009 		rcu_barrier_trace(TPS("CB"), -1, s);  in rcu_barrier_callback()
4014  * If needed, entrain an rcu_barrier() callback on rdp->cblist.
4019 	unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);  in rcu_barrier_entrain()
4026 	rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);  in rcu_barrier_entrain()
4027 	rdp->barrier_head.func = rcu_barrier_callback;  in rcu_barrier_entrain()
4028 	debug_rcu_head_queue(&rdp->barrier_head);  in rcu_barrier_entrain()
4035 	was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);  in rcu_barrier_entrain()
4037 	wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);  in rcu_barrier_entrain()
4038 	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {  in rcu_barrier_entrain()
4041 		debug_rcu_head_unqueue(&rdp->barrier_head);  in rcu_barrier_entrain()
4042 		rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);  in rcu_barrier_entrain()
4047 	smp_store_release(&rdp->barrier_seq_snap, gseq);  in rcu_barrier_entrain()
4051  * Called with preemption disabled, and from cross-cpu IRQ context.
4059 	WARN_ON_ONCE(cpu != rdp->cpu);  in rcu_barrier_handler()
4067  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
4082 	rcu_barrier_trace(TPS("Begin"), -1, s);  in rcu_barrier()
4089 		rcu_barrier_trace(TPS("EarlyExit"), -1, rcu_state.barrier_sequence);  in rcu_barrier()
4099 	rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);  in rcu_barrier()
4103 	 * to avoid a too-soon return to zero in case of an immediate  in rcu_barrier()
4104 	 * invocation of the just-enqueued callback (or preemption of  in rcu_barrier()
4105 	 * this task).  Exclude CPU-hotplug operations to ensure that no  in rcu_barrier()
4106 	 * offline non-offloaded CPU has callbacks queued.  in rcu_barrier()
4120 		if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq)  in rcu_barrier()
4123 		if (!rcu_segcblist_n_cbs(&rdp->cblist)) {  in rcu_barrier()
4124 			WRITE_ONCE(rdp->barrier_seq_snap, gseq);  in rcu_barrier()
4131 			WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);  in rcu_barrier()
4141 		WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);  in rcu_barrier()
4156 	rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);  in rcu_barrier()
4162 		WRITE_ONCE(rdp->barrier_seq_snap, gseq);  in rcu_barrier()
4172  * This will not be stable unless the rcu_node structure's ->lock is
4178 	return READ_ONCE(rnp->qsmaskinitnext);  in rcu_rnp_online_cpus()
4184  * ->qsmaskinitnext field rather than by the global cpu_online_mask.
4188 	return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));  in rcu_rdp_cpu_online()
4224 	 * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask  in rcu_lockdep_current_cpu_online()
4253 	struct rcu_node *rnp = rdp->mynode;  in rcutree_dying_cpu()
4258 	blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);  in rcutree_dying_cpu()
4259 	trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),  in rcutree_dying_cpu()
4260 			       blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));  in rcutree_dying_cpu()
4266  * and all tasks that were preempted within an RCU read-side critical
4268  * read-side critical section.  Some other CPU is reporting this fact with
4269  * the specified rcu_node structure's ->lock held and interrupts disabled.
4270  * This function therefore goes up the tree of rcu_node structures,
4271  * clearing the corresponding bits in the ->qsmaskinit fields.  Note that
4272  * the leaf rcu_node structure's ->qsmaskinit field has already been
4275  * This function does check that the specified rcu_node structure has
4278  * a needless lock acquisition.  So once it has done its work, don't
4288 	    WARN_ON_ONCE(rnp_leaf->qsmaskinit) ||  in rcu_cleanup_dead_rnp()
4292 		mask = rnp->grpmask;  in rcu_cleanup_dead_rnp()
4293 		rnp = rnp->parent;  in rcu_cleanup_dead_rnp()
4297 		rnp->qsmaskinit &= ~mask;  in rcu_cleanup_dead_rnp()
4299 		WARN_ON_ONCE(rnp->qsmask);  in rcu_cleanup_dead_rnp()
4300 		if (rnp->qsmaskinit) {  in rcu_cleanup_dead_rnp()
4320 	WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);  in rcutree_dead_cpu()
4321 	// Stop-machine done, so allow nohz_full to disable tick.  in rcutree_dead_cpu()
4327  * Propagate ->qsinitmask bits up the rcu_node tree to account for the
4329  * must hold the corresponding leaf rcu_node ->lock with interrupts
4339 	WARN_ON_ONCE(rnp->wait_blkd_tasks);  in rcu_init_new_rnp()
4341 		mask = rnp->grpmask;  in rcu_init_new_rnp()
4342 		rnp = rnp->parent;  in rcu_init_new_rnp()
4346 		oldmask = rnp->qsmaskinit;  in rcu_init_new_rnp()
4347 		rnp->qsmaskinit |= mask;  in rcu_init_new_rnp()
4355  * Do boot-time initialization of a CPU's per-CPU RCU data.
4364 	rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);  in rcu_boot_init_percpu_data()
4365 	INIT_WORK(&rdp->strict_work, strict_work_handler);  in rcu_boot_init_percpu_data()
4366 	WARN_ON_ONCE(ct->dynticks_nesting != 1);  in rcu_boot_init_percpu_data()
4368 	rdp->barrier_seq_snap = rcu_state.barrier_sequence;  in rcu_boot_init_percpu_data()
4369 	rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;  in rcu_boot_init_percpu_data()
4370 	rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;  in rcu_boot_init_percpu_data()
4371 	rdp->rcu_onl_gp_seq = rcu_state.gp_seq;  in rcu_boot_init_percpu_data()
4372 	rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;  in rcu_boot_init_percpu_data()
4373 	rdp->last_sched_clock = jiffies;  in rcu_boot_init_percpu_data()
4374 	rdp->cpu = cpu;  in rcu_boot_init_percpu_data()
4379  * Invoked early in the CPU-online process, when pretty much all services
4382  * Initializes a CPU's per-CPU RCU data.  Note that only one online or
4384  * accept some slop in the rsp->gp_seq access due to the fact that this
4385  * CPU cannot possibly have any non-offloaded RCU callbacks in flight yet.
4397 	rdp->qlen_last_fqs_check = 0;  in rcutree_prepare_cpu()
4398 	rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);  in rcutree_prepare_cpu()
4399 	rdp->blimit = blimit;  in rcutree_prepare_cpu()
4400 	ct->dynticks_nesting = 1;	/* CPU not up, no tearing. */  in rcutree_prepare_cpu()
4404 	 * Only non-NOCB CPUs that didn't have early-boot callbacks need to be  in rcutree_prepare_cpu()
4405 	 * (re-)initialized.  in rcutree_prepare_cpu()
4407 	if (!rcu_segcblist_is_enabled(&rdp->cblist))  in rcutree_prepare_cpu()
4408 		rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */  in rcutree_prepare_cpu()
4411 	 * Add CPU to leaf rcu_node pending-online bitmask.  Any needed  in rcutree_prepare_cpu()
4415 	rnp = rdp->mynode;  in rcutree_prepare_cpu()
4417 	rdp->gp_seq = READ_ONCE(rnp->gp_seq);  in rcutree_prepare_cpu()
4418 	rdp->gp_seq_needed = rdp->gp_seq;  in rcutree_prepare_cpu()
4419 	rdp->cpu_no_qs.b.norm = true;  in rcutree_prepare_cpu()
4420 	rdp->core_needs_qs = false;  in rcutree_prepare_cpu()
4421 	rdp->rcu_iw_pending = false;  in rcutree_prepare_cpu()
4422 	rdp->rcu_iw = IRQ_WORK_INIT_HARD(rcu_iw_handler);  in rcutree_prepare_cpu()
4423 	rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;  in rcutree_prepare_cpu()
4424 	trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));  in rcutree_prepare_cpu()
4434  * Update RCU priority boot kthread affinity for CPU-hotplug changes.
4440 	rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);  in rcutree_affinity_setting()
4450 	return smp_load_acquire(&rdp->beenonline);  in rcu_cpu_beenfullyonline()
4454  * Near the end of the CPU-online process.  Pretty much all services
4464 	rnp = rdp->mynode;  in rcutree_online_cpu()
4466 	rnp->ffmask |= rdp->grpmask;  in rcutree_online_cpu()
4471 	rcutree_affinity_setting(cpu, -1);  in rcutree_online_cpu()
4473 	// Stop-machine done, so allow nohz_full to disable tick.  in rcutree_online_cpu()
4489 	rnp = rdp->mynode;  in rcutree_offline_cpu()
4491 	rnp->ffmask &= ~rdp->grpmask;  in rcutree_offline_cpu()
4496 	// nohz_full CPUs need the tick for stop-machine to work quickly  in rcutree_offline_cpu()
4504  * incoming CPUs are not allowed to use RCU read-side critical sections
4505  * until this function is called.  Failing to observe this restriction
4508  * Note that this function is special in that it is invoked directly
4510  * This is because this function must be invoked at a precise location.
4522 	if (rdp->cpu_started)  in rcu_cpu_starting()
4524 	rdp->cpu_started = true;  in rcu_cpu_starting()
4526 	rnp = rdp->mynode;  in rcu_cpu_starting()
4527 	mask = rdp->grpmask;  in rcu_cpu_starting()
4532 	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);  in rcu_cpu_starting()
4534 	newcpu = !(rnp->expmaskinitnext & mask);  in rcu_cpu_starting()
4535 	rnp->expmaskinitnext |= mask;  in rcu_cpu_starting()
4539 	rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */  in rcu_cpu_starting()
4540 	rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);  in rcu_cpu_starting()
4541 	rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);  in rcu_cpu_starting()
4544 	if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */  in rcu_cpu_starting()
4550 		/* Report QS -after- changing ->qsmaskinitnext! */  in rcu_cpu_starting()
4551 		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);  in rcu_cpu_starting()
4556 	smp_store_release(&rdp->beenonline, true);  in rcu_cpu_starting()
4557 	smp_mb(); /* Ensure RCU read-side usage follows above initialization. */  in rcu_cpu_starting()
4561  * The outgoing function has no further need of RCU, so remove it from
4562  * the rcu_node tree's ->qsmaskinitnext bit masks.
4564  * Note that this function is special in that it is invoked directly
4566  * This is because this function must be invoked at a precise location.
4573 	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */  in rcu_report_dead()
4581 	mask = rdp->grpmask;  in rcu_report_dead()
4584 	raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */  in rcu_report_dead()
4585 	rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);  in rcu_report_dead()
4586 	rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);  in rcu_report_dead()
4587 	if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */  in rcu_report_dead()
4588 		/* Report quiescent state -before- changing ->qsmaskinitnext! */  in rcu_report_dead()
4590 		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);  in rcu_report_dead()
4593 	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);  in rcu_report_dead()
4598 	rdp->cpu_started = false;  in rcu_report_dead()
4603  * The outgoing CPU has just passed through the dying-idle state, and we
4619 	if (rcu_segcblist_empty(&rdp->cblist)) {  in rcutree_migrate_callbacks()
4627 	my_rnp = my_rdp->mynode;  in rcutree_migrate_callbacks()
4634 	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);  in rcutree_migrate_callbacks()
4637 	rcu_segcblist_disable(&rdp->cblist);  in rcutree_migrate_callbacks()
4638 	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist));  in rcutree_migrate_callbacks()
4651 	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||  in rcutree_migrate_callbacks()
4652 		  !rcu_segcblist_empty(&rdp->cblist),  in rcutree_migrate_callbacks()
4653 		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",  in rcutree_migrate_callbacks()
4654 		  cpu, rcu_segcblist_n_cbs(&rdp->cblist),  in rcutree_migrate_callbacks()
4655 		  rcu_segcblist_first_cb(&rdp->cblist));  in rcutree_migrate_callbacks()
4660  * On non-huge systems, use expedited RCU grace periods to make suspend
4695 		pr_err("Failed to create %s!\n", gp_kworker_name);  in rcu_start_exp_gp_kworkers()
4702 		pr_err("Failed to create %s!\n", par_gp_kworker_name);  in rcu_start_exp_gp_kworkers()
4709 	sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param);  in rcu_start_exp_gp_kworkers()
4710 	sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,  in rcu_start_exp_gp_kworkers()
4744 …if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n…  in rcu_spawn_gp_kthread()
4758 	/* This is a pre-SMP initcall, we expect a single CPU */  in rcu_spawn_gp_kthread()
4761 	 * Those kthreads couldn't be created on rcu_init() -> rcutree_prepare_cpu()  in rcu_spawn_gp_kthread()
4765 	rcu_spawn_one_boost_kthread(rdp->mynode);  in rcu_spawn_gp_kthread()
4774  * This function is invoked towards the end of the scheduler's
4776  * contain synchronous grace-period primitives (during which time, this idle
4777  * task is booting the system, and such primitives are no-ops).  After this
4778  * function is called, any synchronous grace-period primitives are run as
4792 	// Fix up the ->gp_seq counters.  in rcu_scheduler_starting()
4795 		rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq;  in rcu_scheduler_starting()
4804  * Helper function for rcu_init() that initializes the rcu_state structure.
4825 	/* Initialize the level-tracking arrays. */  in rcu_init_one()
4829 			rcu_state.level[i - 1] + num_rcu_lvl[i - 1];  in rcu_init_one()
4834 	for (i = rcu_num_lvls - 1; i >= 0; i--) {  in rcu_init_one()
4838 			raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));  in rcu_init_one()
4839 			lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),  in rcu_init_one()
4841 			raw_spin_lock_init(&rnp->fqslock);  in rcu_init_one()
4842 			lockdep_set_class_and_name(&rnp->fqslock,  in rcu_init_one()
4844 			rnp->gp_seq = rcu_state.gp_seq;  in rcu_init_one()
4845 			rnp->gp_seq_needed = rcu_state.gp_seq;  in rcu_init_one()
4846 			rnp->completedqs = rcu_state.gp_seq;  in rcu_init_one()
4847 			rnp->qsmask = 0;  in rcu_init_one()
4848 			rnp->qsmaskinit = 0;  in rcu_init_one()
4849 			rnp->grplo = j * cpustride;  in rcu_init_one()
4850 			rnp->grphi = (j + 1) * cpustride - 1;  in rcu_init_one()
4851 			if (rnp->grphi >= nr_cpu_ids)  in rcu_init_one()
4852 				rnp->grphi = nr_cpu_ids - 1;  in rcu_init_one()
4854 				rnp->grpnum = 0;  in rcu_init_one()
4855 				rnp->grpmask = 0;  in rcu_init_one()
4856 				rnp->parent = NULL;  in rcu_init_one()
4858 				rnp->grpnum = j % levelspread[i - 1];  in rcu_init_one()
4859 				rnp->grpmask = BIT(rnp->grpnum);  in rcu_init_one()
4860 				rnp->parent = rcu_state.level[i - 1] +  in rcu_init_one()
4861 					      j / levelspread[i - 1];  in rcu_init_one()
4863 			rnp->level = i;  in rcu_init_one()
4864 			INIT_LIST_HEAD(&rnp->blkd_tasks);  in rcu_init_one()
4866 			init_waitqueue_head(&rnp->exp_wq[0]);  in rcu_init_one()
4867 			init_waitqueue_head(&rnp->exp_wq[1]);  in rcu_init_one()
4868 			init_waitqueue_head(&rnp->exp_wq[2]);  in rcu_init_one()
4869 			init_waitqueue_head(&rnp->exp_wq[3]);  in rcu_init_one()
4870 			spin_lock_init(&rnp->exp_lock);  in rcu_init_one()
4871 			mutex_init(&rnp->boost_kthread_mutex);  in rcu_init_one()
4872 			raw_spin_lock_init(&rnp->exp_poll_lock);  in rcu_init_one()
4873 			rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED;  in rcu_init_one()
4874 			INIT_WORK(&rnp->exp_poll_wq, sync_rcu_do_polled_gp);  in rcu_init_one()
4882 		while (i > rnp->grphi)  in rcu_init_one()
4884 		per_cpu_ptr(&rcu_data, i)->mynode = rnp;  in rcu_init_one()
4890  * Force priority from the kernel command-line into range.
4907 		pr_alert("%s: Limited prio to %d from %d\n",  in sanitize_kthread_prio()
4914  * the ->node array in the rcu_state structure.
4940 	 * value, which is a function of HZ, then adding one for each  in rcu_init_geometry()
4950 	/* If the compile-time values are accurate, just leave. */  in rcu_init_geometry()
4954 	pr_info("Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n",  in rcu_init_geometry()
4958 	 * The boot-time rcu_fanout_leaf parameter must be at least two  in rcu_init_geometry()
4960 	 * Complain and fall back to the compile-time values if this  in rcu_init_geometry()
4976 		rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT;  in rcu_init_geometry()
4980 	 * If this limit is exceeded, fall back to the compile-time values.  in rcu_init_geometry()
4982 	if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1]) {  in rcu_init_geometry()
4995 		int cap = rcu_capacity[(rcu_num_lvls - 1) - i];  in rcu_init_geometry()
5014 	pr_info("rcu_node tree layout dump\n");  in rcu_dump_rcu_node_tree()
5017 		if (rnp->level != level) {  in rcu_dump_rcu_node_tree()
5018 			pr_cont("\n");  in rcu_dump_rcu_node_tree()
5020 			level = rnp->level;  in rcu_dump_rcu_node_tree()
5022 		pr_cont("%d:%d ^%d  ", rnp->grplo, rnp->grphi, rnp->grpnum);  in rcu_dump_rcu_node_tree()
5024 	pr_cont("\n");  in rcu_dump_rcu_node_tree()
5042 		pr_info("Adjusting rcutree.rcu_delay_page_cache_fill_msec to %d ms.\n",  in kfree_rcu_batch_init()
5050 			INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);  in kfree_rcu_batch_init()
5051 			krcp->krw_arr[i].krcp = krcp;  in kfree_rcu_batch_init()
5054 				INIT_LIST_HEAD(&krcp->krw_arr[i].bulk_head_free[j]);  in kfree_rcu_batch_init()
5058 			INIT_LIST_HEAD(&krcp->bulk_head[i]);  in kfree_rcu_batch_init()
5060 		INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);  in kfree_rcu_batch_init()
5061 		INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);  in kfree_rcu_batch_init()
5062 		krcp->initialized = true;  in kfree_rcu_batch_init()
5064 	if (register_shrinker(&kfree_rcu_shrinker, "rcu-kfree"))  in kfree_rcu_batch_init()
5065 		pr_err("Failed to register kfree_rcu() shrinker!\n");  in kfree_rcu_batch_init()
5085 	 * We don't need protection against CPU-hotplug here because  in rcu_init()
5101 	/* -After- the rcu_node ->lock fields are initialized! */  in rcu_init()
5107 	// Kick-start in case any polled grace periods started early.  in rcu_init()