Lines Matching +full:lock +full:- +full:detect +full:- +full:function +full:- +full:integer +full:- +full:n +full:- +full:enable

1 // SPDX-License-Identifier: GPL-2.0+
3 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
14 * For detailed explanation of Read-Copy Update mechanism see -
67 #include "../time/tick-internal.h"
88 .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
106 /* Control rcu_node-tree auto-balancing at boot time. */
124 * to detect real grace periods. This variable is also used to suppress
125 * boot-time false positives from lockdep-RCU error checking. Finally, it
161 * real-time priority(enabling/disabling) is controlled by
167 /* Delay in jiffies for grace-period initialization delays, debug only. */
183 * This rcu parameter is runtime-read-only. It reflects
185 * per-CPU. Object size is equal to one page. This value
194 // defer page-cache filling for a short time period.
213 * for non-zero delays, the overall slowdown of grace periods is constant
222 * permit this function to be invoked without holding the root rcu_node
223 * structure's ->lock, but of course results can be subject to change.
238 if (rcu_segcblist_is_enabled(&rdp->cblist)) in rcu_get_n_cbs_cpu()
239 return rcu_segcblist_n_cbs(&rdp->cblist); in rcu_get_n_cbs_cpu()
251 * Reset the current CPU's ->dynticks counter to indicate that the
254 * to the next non-quiescent value.
256 * The non-atomic test/increment sequence works because the upper bits
257 * of the ->dynticks counter are manipulated only by the corresponding CPU,
268 * Snapshot the ->dynticks counter with full ordering so as to allow
293 return snap != rcu_dynticks_snap(rdp->cpu); in rcu_dynticks_in_eqs_since()
297 * Return true if the referenced integer is zero while the specified
306 smp_rmb(); // Order ->dynticks and *vp reads. in rcu_dynticks_zero_in_eqs()
308 return false; // Non-zero, so report failure; in rcu_dynticks_zero_in_eqs()
309 smp_rmb(); // Order *vp read and ->dynticks re-read. in rcu_dynticks_zero_in_eqs()
322 * We inform the RCU core by emulating a zero-duration dyntick-idle period.
339 * rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle
341 * If the current CPU is idle and running at a first-level (not nested)
388 static long qovld_calc = -1; // No pre-initialization lock acquisitions!
407 * quiescent-state help from rcu_note_context_switch().
415 * Make sure that we give the grace-period kthread time to detect any
434 pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j); in adjust_jiffies_till_sched_qs()
444 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j); in param_set_first_fqs_jiffies()
456 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1)); in param_set_next_fqs_jiffies()
509 * Send along grace-period-related data for rcutorture diagnostics.
527 * An empty function that will trigger a reschedule on
528 * IRQ tail once IRQs get re-enabled on userspace/guest resume.
545 * get re-enabled again.
551 if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU)) in rcu_irq_work_resched()
554 if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU)) in rcu_irq_work_resched()
567 * rcu_irq_exit_check_preempt - Validate that scheduling is possible
577 "Bad RCU dynticks_nmi_nesting counter\n"); in rcu_irq_exit_check_preempt()
585 * __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
596 * in a timely manner, the RCU grace-period kthread sets that CPU's
597 * ->rcu_urgent_qs flag with the expectation that the next interrupt or
598 * exception will invoke this function, which will turn on the scheduler
599 * tick, which will enable RCU to detect that CPU's quiescent states,
600 * for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.
605 * interrupt or exception. In that case, the RCU grace-period kthread
607 * controlled environments, this function allows RCU to get what it
621 if (!tick_nohz_full_cpu(rdp->cpu) || in __rcu_irq_enter_check_tick()
622 !READ_ONCE(rdp->rcu_urgent_qs) || in __rcu_irq_enter_check_tick()
623 READ_ONCE(rdp->rcu_forced_tick)) { in __rcu_irq_enter_check_tick()
632 // handler and that the rcu_node lock is an irq-disabled lock in __rcu_irq_enter_check_tick()
633 // prevents self-deadlock. So we can safely recheck under the lock. in __rcu_irq_enter_check_tick()
635 raw_spin_lock_rcu_node(rdp->mynode); in __rcu_irq_enter_check_tick()
636 if (READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) { in __rcu_irq_enter_check_tick()
639 WRITE_ONCE(rdp->rcu_forced_tick, true); in __rcu_irq_enter_check_tick()
640 tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU); in __rcu_irq_enter_check_tick()
642 raw_spin_unlock_rcu_node(rdp->mynode); in __rcu_irq_enter_check_tick()
648 * Check to see if any future non-offloaded RCU-related work will need
650 * returning 1 if so. This function is part of the RCU implementation;
651 * it is -not- an exported member of the RCU API. This is used by
652 * the idle-entry code to figure out whether it is safe to disable the
653 * scheduler-clock interrupt.
655 * Just check whether or not this CPU has non-offloaded RCU callbacks
660 return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && in rcu_needs_cpu()
666 * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
671 raw_lockdep_assert_held_rcu_node(rdp->mynode); in rcu_disable_urgency_upon_qs()
672 WRITE_ONCE(rdp->rcu_urgent_qs, false); in rcu_disable_urgency_upon_qs()
673 WRITE_ONCE(rdp->rcu_need_heavy_qs, false); in rcu_disable_urgency_upon_qs()
674 if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) { in rcu_disable_urgency_upon_qs()
675 tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU); in rcu_disable_urgency_upon_qs()
676 WRITE_ONCE(rdp->rcu_forced_tick, false); in rcu_disable_urgency_upon_qs()
681 * rcu_is_watching - RCU read-side critical sections permitted on current CPU?
684 * An @true return means that this CPU can safely enter RCU read-side
727 * of the rcu_node ->gp_seq counter with respect to the rcu_data counters.
734 if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4, in rcu_gpnum_ovf()
735 rnp->gp_seq)) in rcu_gpnum_ovf()
736 WRITE_ONCE(rdp->gpwrap, true); in rcu_gpnum_ovf()
737 if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq)) in rcu_gpnum_ovf()
738 rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4; in rcu_gpnum_ovf()
748 rdp->dynticks_snap = rcu_dynticks_snap(rdp->cpu); in dyntick_save_progress_counter()
749 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { in dyntick_save_progress_counter()
750 trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti")); in dyntick_save_progress_counter()
751 rcu_gpnum_ovf(rdp->mynode, rdp); in dyntick_save_progress_counter()
775 struct rcu_node *rnp = rdp->mynode; in rcu_implicit_dynticks_qs()
782 * read-side critical section that started before the beginning in rcu_implicit_dynticks_qs()
785 if (rcu_dynticks_in_eqs_since(rdp, rdp->dynticks_snap)) { in rcu_implicit_dynticks_qs()
786 trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti")); in rcu_implicit_dynticks_qs()
795 * the CPU-offline process, or, failing that, by rcu_gp_init() in rcu_implicit_dynticks_qs()
797 * last task on a leaf rcu_node structure exiting its RCU read-side in rcu_implicit_dynticks_qs()
802 * The rcu_node structure's ->lock is held here, which excludes in rcu_implicit_dynticks_qs()
803 * the relevant portions the CPU-hotplug code, the grace-period in rcu_implicit_dynticks_qs()
812 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n", in rcu_implicit_dynticks_qs()
813 __func__, rnp->grplo, rnp->grphi, rnp->level, in rcu_implicit_dynticks_qs()
814 (long)rnp->gp_seq, (long)rnp->completedqs); in rcu_implicit_dynticks_qs()
815 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent) in rcu_implicit_dynticks_qs()
816 …pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n in rcu_implicit_dynticks_qs()
817 …__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rc… in rcu_implicit_dynticks_qs()
818 pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n", in rcu_implicit_dynticks_qs()
819 __func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)], in rcu_implicit_dynticks_qs()
820 (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags, in rcu_implicit_dynticks_qs()
821 (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags); in rcu_implicit_dynticks_qs()
830 * unsynchronized assignments to the per-CPU rcu_need_heavy_qs in rcu_implicit_dynticks_qs()
837 if (!READ_ONCE(rdp->rcu_need_heavy_qs) && in rcu_implicit_dynticks_qs()
841 WRITE_ONCE(rdp->rcu_need_heavy_qs, true); in rcu_implicit_dynticks_qs()
843 smp_store_release(&rdp->rcu_urgent_qs, true); in rcu_implicit_dynticks_qs()
845 WRITE_ONCE(rdp->rcu_urgent_qs, true); in rcu_implicit_dynticks_qs()
849 * NO_HZ_FULL CPUs can run in-kernel without rcu_sched_clock_irq! in rcu_implicit_dynticks_qs()
851 * And some in-kernel loops check need_resched() before calling in rcu_implicit_dynticks_qs()
853 * running in-kernel with scheduling-clock interrupts disabled. in rcu_implicit_dynticks_qs()
856 if (tick_nohz_full_cpu(rdp->cpu) && in rcu_implicit_dynticks_qs()
857 (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) || in rcu_implicit_dynticks_qs()
859 WRITE_ONCE(rdp->rcu_urgent_qs, true); in rcu_implicit_dynticks_qs()
860 WRITE_ONCE(rdp->last_fqs_resched, jiffies); in rcu_implicit_dynticks_qs()
861 ret = -1; in rcu_implicit_dynticks_qs()
865 * If more than halfway to RCU CPU stall-warning time, invoke in rcu_implicit_dynticks_qs()
873 READ_ONCE(rdp->last_fqs_resched) + jtsq)) { in rcu_implicit_dynticks_qs()
874 WRITE_ONCE(rdp->last_fqs_resched, jiffies); in rcu_implicit_dynticks_qs()
875 ret = -1; in rcu_implicit_dynticks_qs()
878 !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && in rcu_implicit_dynticks_qs()
879 (rnp->ffmask & rdp->grpmask)) { in rcu_implicit_dynticks_qs()
880 rdp->rcu_iw_pending = true; in rcu_implicit_dynticks_qs()
881 rdp->rcu_iw_gp_seq = rnp->gp_seq; in rcu_implicit_dynticks_qs()
882 irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); in rcu_implicit_dynticks_qs()
885 if (rcu_cpu_stall_cputime && rdp->snap_record.gp_seq != rdp->gp_seq) { in rcu_implicit_dynticks_qs()
886 int cpu = rdp->cpu; in rcu_implicit_dynticks_qs()
892 rsrp = &rdp->snap_record; in rcu_implicit_dynticks_qs()
893 rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu); in rcu_implicit_dynticks_qs()
894 rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu); in rcu_implicit_dynticks_qs()
895 rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); in rcu_implicit_dynticks_qs()
896 rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu); in rcu_implicit_dynticks_qs()
897 rsrp->nr_softirqs = kstat_cpu_softirqs_sum(cpu); in rcu_implicit_dynticks_qs()
898 rsrp->nr_csw = nr_context_switches_cpu(cpu); in rcu_implicit_dynticks_qs()
899 rsrp->jiffies = jiffies; in rcu_implicit_dynticks_qs()
900 rsrp->gp_seq = rdp->gp_seq; in rcu_implicit_dynticks_qs()
907 /* Trace-event wrapper function for trace_rcu_future_grace_period. */
911 trace_rcu_future_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), in trace_rcu_this_gp()
912 gp_seq_req, rnp->level, in trace_rcu_this_gp()
913 rnp->grplo, rnp->grphi, s); in trace_rcu_this_gp()
917 * rcu_start_this_gp - Request the start of a particular grace period
924 * rcu_node structure's ->gp_seq_needed field. Returns true if there
925 * is reason to awaken the grace-period kthread.
927 * The caller must hold the specified rcu_node structure's ->lock, which
928 * is why the caller is responsible for waking the grace-period kthread.
940 * structure's lock or bail out if the need for this grace period in rcu_start_this_gp()
941 * has already been recorded -- or if that grace period has in in rcu_start_this_gp()
943 * progress in a non-leaf node, no recording is needed because the in rcu_start_this_gp()
945 * Note that rnp_start->lock must not be released. in rcu_start_this_gp()
949 for (rnp = rnp_start; 1; rnp = rnp->parent) { in rcu_start_this_gp()
952 if (ULONG_CMP_GE(rnp->gp_seq_needed, gp_seq_req) || in rcu_start_this_gp()
953 rcu_seq_started(&rnp->gp_seq, gp_seq_req) || in rcu_start_this_gp()
955 rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))) { in rcu_start_this_gp()
960 WRITE_ONCE(rnp->gp_seq_needed, gp_seq_req); in rcu_start_this_gp()
961 if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) { in rcu_start_this_gp()
972 if (rnp != rnp_start && rnp->parent != NULL) in rcu_start_this_gp()
974 if (!rnp->parent) in rcu_start_this_gp()
994 if (ULONG_CMP_LT(gp_seq_req, rnp->gp_seq_needed)) { in rcu_start_this_gp()
995 WRITE_ONCE(rnp_start->gp_seq_needed, rnp->gp_seq_needed); in rcu_start_this_gp()
996 WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed); in rcu_start_this_gp()
1004 * Clean up any old requests for the just-ended grace period. Also return
1012 needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed); in rcu_future_gp_cleanup()
1014 rnp->gp_seq_needed = rnp->gp_seq; /* Avoid counter wrap. */ in rcu_future_gp_cleanup()
1015 trace_rcu_this_gp(rnp, rdp, rnp->gp_seq, in rcu_future_gp_cleanup()
1033 * is dangerous that late in the CPU-down hotplug process. The in swake_up_one_online()
1053 * Awaken the grace-period kthread. Don't do a self-awaken (unless in an
1055 * sleep upon return, resulting in a grace-period hang), and don't bother
1056 * awakening when there is nothing for the grace-period kthread to do
1061 * So why do the self-wakeup when in an interrupt or softirq handler
1062 * in the grace-period kthread's context? Because the kthread might have
1064 * pre-sleep check of the awaken condition. In this case, a wakeup really
1080 * If there is room, assign a ->gp_seq number to any callbacks on this
1082 * that were previously assigned a ->gp_seq number that has since proven
1084 * ->gp_seq number while RCU is idle, but with reference to a non-root
1085 * rcu_node structure. This function is idempotent, so it does not hurt
1087 * the RCU grace-period kthread.
1089 * The caller must hold rnp->lock with interrupts disabled.
1100 if (!rcu_segcblist_pend_cbs(&rdp->cblist)) in rcu_accelerate_cbs()
1103 trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPreAcc")); in rcu_accelerate_cbs()
1106 * Callbacks are often registered with incomplete grace-period in rcu_accelerate_cbs()
1108 * information requires acquiring a global lock... RCU therefore in rcu_accelerate_cbs()
1112 * accelerating callback invocation to an earlier grace-period in rcu_accelerate_cbs()
1116 if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req)) in rcu_accelerate_cbs()
1120 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) in rcu_accelerate_cbs()
1125 trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPostAcc")); in rcu_accelerate_cbs()
1132 * rcu_node structure's ->lock be held. It consults the cached value
1133 * of ->gp_seq_needed in the rcu_data structure, and if that indicates
1134 * that a new grace-period request be made, invokes rcu_accelerate_cbs()
1135 * while holding the leaf rcu_node structure's ->lock.
1145 if (!READ_ONCE(rdp->gpwrap) && ULONG_CMP_GE(rdp->gp_seq_needed, c)) { in rcu_accelerate_cbs_unlocked()
1147 (void)rcu_segcblist_accelerate(&rdp->cblist, c); in rcu_accelerate_cbs_unlocked()
1160 * assign ->gp_seq numbers to any callbacks in the RCU_NEXT_TAIL
1161 * sublist. This function is idempotent, so it does not hurt to
1162 * invoke it repeatedly. As long as it is not invoked -too- often...
1163 * Returns true if the RCU grace-period kthread needs to be awakened.
1165 * The caller must hold rnp->lock with interrupts disabled.
1173 if (!rcu_segcblist_pend_cbs(&rdp->cblist)) in rcu_advance_cbs()
1177 * Find all callbacks whose ->gp_seq numbers indicate that they in rcu_advance_cbs()
1180 rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq); in rcu_advance_cbs()
1188 * that the RCU grace-period kthread be awakened.
1194 if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp)) in rcu_advance_cbs_nowake()
1196 // The grace period cannot end while we hold the rcu_node lock. in rcu_advance_cbs_nowake()
1197 if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) in rcu_advance_cbs_nowake()
1216 * Update CPU-local rcu_data state to record the beginnings and ends of
1217 * grace periods. The caller must hold the ->lock of the leaf rcu_node
1219 * Returns true if the grace-period kthread needs to be awakened.
1229 if (rdp->gp_seq == rnp->gp_seq) in __note_gp_changes()
1233 if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) || in __note_gp_changes()
1234 unlikely(READ_ONCE(rdp->gpwrap))) { in __note_gp_changes()
1237 rdp->core_needs_qs = false; in __note_gp_changes()
1238 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend")); in __note_gp_changes()
1242 if (rdp->core_needs_qs) in __note_gp_changes()
1243 rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask); in __note_gp_changes()
1246 /* Now handle the beginnings of any new-to-this-CPU grace periods. */ in __note_gp_changes()
1247 if (rcu_seq_new_gp(rdp->gp_seq, rnp->gp_seq) || in __note_gp_changes()
1248 unlikely(READ_ONCE(rdp->gpwrap))) { in __note_gp_changes()
1251 * set up to detect a quiescent state, otherwise don't in __note_gp_changes()
1254 trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart")); in __note_gp_changes()
1255 need_qs = !!(rnp->qsmask & rdp->grpmask); in __note_gp_changes()
1256 rdp->cpu_no_qs.b.norm = need_qs; in __note_gp_changes()
1257 rdp->core_needs_qs = need_qs; in __note_gp_changes()
1260 rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */ in __note_gp_changes()
1261 if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap) in __note_gp_changes()
1262 WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed); in __note_gp_changes()
1263 if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap)) in __note_gp_changes()
1264 WRITE_ONCE(rdp->last_sched_clock, jiffies); in __note_gp_changes()
1265 WRITE_ONCE(rdp->gpwrap, false); in __note_gp_changes()
1277 rnp = rdp->mynode; in note_gp_changes()
1278 if ((rdp->gp_seq == rcu_seq_current(&rnp->gp_seq) && in note_gp_changes()
1279 !unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */ in note_gp_changes()
1293 /* Register a counter to suppress debugging grace-period delays. */
1327 /* Allow rcutorture to stall the grace-period kthread. */
1344 pr_alert("%s: Waiting %lu jiffies\n", __func__, duration); in rcu_gp_torture_wait()
1346 pr_alert("%s: Wait complete\n", __func__); in rcu_gp_torture_wait()
1384 // end of that GP. Either way, zero counter to avoid counter-wrap in rcu_poll_gp_seq_end()
1396 // where caller does not hold the root rcu_node structure's lock.
1413 // caller does not hold the root rcu_node structure's lock.
1468 * Apply per-leaf buffered online and offline operations to in rcu_gp_init()
1471 * offlining path, when combined with checks in this function, in rcu_gp_init()
1482 if (rnp->qsmaskinit == rnp->qsmaskinitnext && in rcu_gp_init()
1483 !rnp->wait_blkd_tasks) { in rcu_gp_init()
1491 /* Record old state, apply changes to ->qsmaskinit field. */ in rcu_gp_init()
1492 oldmask = rnp->qsmaskinit; in rcu_gp_init()
1493 rnp->qsmaskinit = rnp->qsmaskinitnext; in rcu_gp_init()
1495 /* If zero-ness of ->qsmaskinit changed, propagate up tree. */ in rcu_gp_init()
1496 if (!oldmask != !rnp->qsmaskinit) { in rcu_gp_init()
1498 if (!rnp->wait_blkd_tasks) /* Ever offline? */ in rcu_gp_init()
1501 rnp->wait_blkd_tasks = true; /* blocked tasks */ in rcu_gp_init()
1508 * If all waited-on tasks from prior grace period are in rcu_gp_init()
1511 * clear ->wait_blkd_tasks. Otherwise, if one of this in rcu_gp_init()
1513 * simply clear ->wait_blkd_tasks. in rcu_gp_init()
1515 if (rnp->wait_blkd_tasks && in rcu_gp_init()
1516 (!rcu_preempt_has_tasks(rnp) || rnp->qsmaskinit)) { in rcu_gp_init()
1517 rnp->wait_blkd_tasks = false; in rcu_gp_init()
1518 if (!rnp->qsmaskinit) in rcu_gp_init()
1529 * Set the quiescent-state-needed bits in all the rcu_node in rcu_gp_init()
1530 * structures for all currently online CPUs in breadth-first in rcu_gp_init()
1546 rnp->qsmask = rnp->qsmaskinit; in rcu_gp_init()
1547 WRITE_ONCE(rnp->gp_seq, rcu_state.gp_seq); in rcu_gp_init()
1548 if (rnp == rdp->mynode) in rcu_gp_init()
1551 trace_rcu_grace_period_init(rcu_state.name, rnp->gp_seq, in rcu_gp_init()
1552 rnp->level, rnp->grplo, in rcu_gp_init()
1553 rnp->grphi, rnp->qsmask); in rcu_gp_init()
1554 /* Quiescent states for tasks on any now-offline CPUs. */ in rcu_gp_init()
1555 mask = rnp->qsmask & ~rnp->qsmaskinitnext; in rcu_gp_init()
1556 rnp->rcu_gp_init_mask = mask; in rcu_gp_init()
1557 if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp)) in rcu_gp_init()
1558 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); in rcu_gp_init()
1573 * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state
1584 // Someone like call_rcu() requested a force-quiescent-state scan. in rcu_gp_fqs_check_wake()
1590 if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp)) in rcu_gp_fqs_check_wake()
1597 * Do one round of quiescent-state forcing.
1614 WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, --nr_fqs); in rcu_gp_fqs()
1618 /* Collect dyntick-idle snapshots. */ in rcu_gp_fqs()
1621 /* Handle dyntick-idle and offline CPUs. */ in rcu_gp_fqs()
1624 /* Clear flag to prevent immediate re-entry. */ in rcu_gp_fqs()
1634 * Loop doing repeated quiescent-state forcing until the grace period ends.
1675 * is required only for single-node rcu_node trees because readers blocking in rcu_gp_fqs_loop()
1677 * For multi-node trees, checking the root node's ->qsmask suffices, because a in rcu_gp_fqs_loop()
1678 * given root node's ->qsmask bit is cleared only when all CPUs and tasks from in rcu_gp_fqs_loop()
1681 if (!READ_ONCE(rnp->qsmask) && in rcu_gp_fqs_loop()
1684 /* If time for quiescent-state forcing, do it. */ in rcu_gp_fqs_loop()
1713 j = rcu_state.jiffies_force_qs - j; in rcu_gp_fqs_loop()
1736 gp_duration = rcu_state.gp_end - rcu_state.gp_start; in rcu_gp_cleanup()
1745 * safe for us to drop the lock in order to mark the grace in rcu_gp_cleanup()
1752 * Propagate new ->gp_seq value to rcu_node structures so that in rcu_gp_cleanup()
1755 * RCU grace-period initialization races by forcing the end of in rcu_gp_cleanup()
1766 WARN_ON_ONCE(rnp->qsmask); in rcu_gp_cleanup()
1767 WRITE_ONCE(rnp->gp_seq, new_gp_seq); in rcu_gp_cleanup()
1768 if (!rnp->parent) in rcu_gp_cleanup()
1771 if (rnp == rdp->mynode) in rcu_gp_cleanup()
1773 /* smp_mb() provided by prior unlock-lock pair. */ in rcu_gp_cleanup()
1777 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->cbovldmask) { in rcu_gp_cleanup()
1789 raw_spin_lock_irq_rcu_node(rnp); /* GP before ->gp_seq update. */ in rcu_gp_cleanup()
1798 if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) { in rcu_gp_cleanup()
1799 trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed, in rcu_gp_cleanup()
1809 // the RCU_GP_FLAG_INIT bit in ->gp_state (which records in rcu_gp_cleanup()
1813 // hold the ->nocb_lock needed to safely access an offloaded in rcu_gp_cleanup()
1814 // ->cblist.  We do not want to acquire that lock because in rcu_gp_cleanup()
1824 // already set the RCU_GP_FLAG_INIT bit in ->gp_flags.  in rcu_gp_cleanup()
1826 // ->gp_flags bits. in rcu_gp_cleanup()
1845 /* Handle grace-period start. */ in rcu_gp_kthread()
1865 /* Handle quiescent-state forcing. */ in rcu_gp_kthread()
1868 /* Handle grace-period end. */ in rcu_gp_kthread()
1877 * Invoke rcu_gp_kthread_wake() to awaken the grace-period kthread if
1878 * another grace period is required. Whether we wake the grace-period
1879 * kthread or it awakens itself for the next round of quiescent-state
1880 * forcing, that kthread will clean up after the just-completed grace
1881 * period. Note that the caller must hold rnp->lock, which is released
1885 __releases(rcu_get_root()->lock) in rcu_report_qs_rsp()
1896 * Similar to rcu_report_qs_rdp(), for which it is a helper function.
1901 * is the grace-period snapshot, which means that the quiescent states
1902 * are valid only if rnp->gp_seq is equal to gps. That structure's lock
1905 * As a special case, if mask is zero, the bit-already-cleared check is
1907 * during grace-period initialization.
1911 __releases(rnp->lock) in rcu_report_qs_rnp()
1920 if ((!(rnp->qsmask & mask) && mask) || rnp->gp_seq != gps) { in rcu_report_qs_rnp()
1932 WRITE_ONCE(rnp->qsmask, rnp->qsmask & ~mask); in rcu_report_qs_rnp()
1933 trace_rcu_quiescent_state_report(rcu_state.name, rnp->gp_seq, in rcu_report_qs_rnp()
1934 mask, rnp->qsmask, rnp->level, in rcu_report_qs_rnp()
1935 rnp->grplo, rnp->grphi, in rcu_report_qs_rnp()
1936 !!rnp->gp_tasks); in rcu_report_qs_rnp()
1937 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { in rcu_report_qs_rnp()
1943 rnp->completedqs = rnp->gp_seq; in rcu_report_qs_rnp()
1944 mask = rnp->grpmask; in rcu_report_qs_rnp()
1945 if (rnp->parent == NULL) { in rcu_report_qs_rnp()
1947 /* No more levels. Exit loop holding root lock. */ in rcu_report_qs_rnp()
1953 rnp = rnp->parent; in rcu_report_qs_rnp()
1955 oldmask = READ_ONCE(rnp_c->qsmask); in rcu_report_qs_rnp()
1963 rcu_report_qs_rsp(flags); /* releases rnp->lock. */ in rcu_report_qs_rnp()
1969 * RCU grace period. The caller must hold the corresponding rnp->lock with
1970 * irqs disabled, and this lock is released upon return, but irqs remain
1975 __releases(rnp->lock) in rcu_report_unblock_qs_rnp()
1984 rnp->qsmask != 0) { in rcu_report_unblock_qs_rnp()
1989 rnp->completedqs = rnp->gp_seq; in rcu_report_unblock_qs_rnp()
1990 rnp_p = rnp->parent; in rcu_report_unblock_qs_rnp()
2000 /* Report up the rest of the hierarchy, tracking current ->gp_seq. */ in rcu_report_unblock_qs_rnp()
2001 gps = rnp->gp_seq; in rcu_report_unblock_qs_rnp()
2002 mask = rnp->grpmask; in rcu_report_unblock_qs_rnp()
2020 WARN_ON_ONCE(rdp->cpu != smp_processor_id()); in rcu_report_qs_rdp()
2021 rnp = rdp->mynode; in rcu_report_qs_rdp()
2023 if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq || in rcu_report_qs_rdp()
2024 rdp->gpwrap) { in rcu_report_qs_rdp()
2032 rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */ in rcu_report_qs_rdp()
2036 mask = rdp->grpmask; in rcu_report_qs_rdp()
2037 rdp->core_needs_qs = false; in rcu_report_qs_rdp()
2038 if ((rnp->qsmask & mask) == 0) { in rcu_report_qs_rdp()
2054 } else if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) { in rcu_report_qs_rdp()
2057 * if in the middle of a (de-)offloading process. in rcu_report_qs_rdp()
2063 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); in rcu_report_qs_rdp()
2064 /* ^^^ Released rnp->lock */ in rcu_report_qs_rdp()
2083 /* Check for grace-period ends and beginnings. */ in rcu_check_quiescent_state()
2090 if (!rdp->core_needs_qs) in rcu_check_quiescent_state()
2097 if (rdp->cpu_no_qs.b.norm) in rcu_check_quiescent_state()
2107 /* Return true if callback-invocation time limit exceeded. */
2121 * period. Throttle as specified by rdp->blimit.
2138 if (!rcu_segcblist_ready_cbs(&rdp->cblist)) { in rcu_do_batch()
2140 rcu_segcblist_n_cbs(&rdp->cblist), 0); in rcu_do_batch()
2142 !rcu_segcblist_empty(&rdp->cblist), in rcu_do_batch()
2155 pending = rcu_segcblist_get_seglen(&rdp->cblist, RCU_DONE_TAIL); in rcu_do_batch()
2157 div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div; in rcu_do_batch()
2158 bl = max(rdp->blimit, pending >> div); in rcu_do_batch()
2159 if ((in_serving_softirq() || rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING) && in rcu_do_batch()
2170 rcu_segcblist_n_cbs(&rdp->cblist), bl); in rcu_do_batch()
2171 rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl); in rcu_do_batch()
2173 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); in rcu_do_batch()
2175 trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued")); in rcu_do_batch()
2191 f = rhp->func; in rcu_do_batch()
2193 WRITE_ONCE(rhp->func, (rcu_callback_t)0L); in rcu_do_batch()
2218 // But rcuc kthreads can delay quiescent-state in rcu_do_batch()
2220 if (rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING && in rcu_do_batch()
2222 rdp->rcu_cpu_has_work = 1; in rcu_do_batch()
2229 rdp->n_cbs_invoked += count; in rcu_do_batch()
2234 rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl); in rcu_do_batch()
2235 rcu_segcblist_add_len(&rdp->cblist, -count); in rcu_do_batch()
2238 count = rcu_segcblist_n_cbs(&rdp->cblist); in rcu_do_batch()
2239 if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark) in rcu_do_batch()
2240 rdp->blimit = blimit; in rcu_do_batch()
2242 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ in rcu_do_batch()
2243 if (count == 0 && rdp->qlen_last_fqs_check != 0) { in rcu_do_batch()
2244 rdp->qlen_last_fqs_check = 0; in rcu_do_batch()
2245 rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); in rcu_do_batch()
2246 } else if (count < rdp->qlen_last_fqs_check - qhimark) in rcu_do_batch()
2247 rdp->qlen_last_fqs_check = count; in rcu_do_batch()
2253 empty = rcu_segcblist_empty(&rdp->cblist); in rcu_do_batch()
2257 WARN_ON_ONCE(count == 0 && rcu_segcblist_n_segment_cbs(&rdp->cblist) != 0); in rcu_do_batch()
2258 WARN_ON_ONCE(!empty && rcu_segcblist_n_segment_cbs(&rdp->cblist) == 0); in rcu_do_batch()
2266 * This function is invoked from each scheduling-clock interrupt,
2267 * and checks to see if this CPU is in a non-context-switch quiescent
2282 trace_rcu_utilization(TPS("Start scheduler-tick")); in rcu_sched_clock_irq()
2285 /* The load-acquire pairs with the store-release setting to true. */ in rcu_sched_clock_irq()
2301 trace_rcu_utilization(TPS("End scheduler-tick")); in rcu_sched_clock_irq()
2308 * Otherwise, invoke the specified function to check dyntick state for
2325 rcu_state.cbovldnext |= !!rnp->cbovldmask; in force_qs_rnp()
2326 if (rnp->qsmask == 0) { in force_qs_rnp()
2331 * priority-boost blocked readers. in force_qs_rnp()
2334 /* rcu_initiate_boost() releases rnp->lock */ in force_qs_rnp()
2340 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) { in force_qs_rnp()
2347 mask |= rdp->grpmask; in force_qs_rnp()
2351 rsmask |= rdp->grpmask; in force_qs_rnp()
2354 /* Idle/offline CPUs, report (releases rnp->lock). */ in force_qs_rnp()
2355 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); in force_qs_rnp()
2357 /* Nothing to do here, so just drop the lock. */ in force_qs_rnp()
2367 * Force quiescent states on reluctant CPUs, and also detect which
2368 * CPUs are in dyntick-idle mode.
2379 for (; rnp != NULL; rnp = rnp->parent) { in rcu_force_quiescent_state()
2381 !raw_spin_trylock(&rnp->fqslock); in rcu_force_quiescent_state()
2383 raw_spin_unlock(&rnp_old->fqslock); in rcu_force_quiescent_state()
2390 /* Reached the root of the rcu_node tree, acquire lock. */ in rcu_force_quiescent_state()
2392 raw_spin_unlock(&rnp_old->fqslock); in rcu_force_quiescent_state()
2417 struct rcu_node *rnp = rdp->mynode; in rcu_core()
2420 * Therefore this function can race with concurrent NOCB (de-)offloading in rcu_core()
2435 const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist); in rcu_core()
2440 WARN_ON_ONCE(!rdp->beenonline); in rcu_core()
2455 rcu_segcblist_is_enabled(&rdp->cblist) && do_batch) { in rcu_core()
2457 if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) in rcu_core()
2465 if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) && in rcu_core()
2468 /* Re-invoke RCU core processing if there are callbacks remaining. */ in rcu_core()
2469 if (rcu_segcblist_ready_cbs(&rdp->cblist)) in rcu_core()
2479 queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work); in rcu_core()
2534 * Per-CPU kernel thread that invokes RCU callbacks. This replaces
2581 * Spawn per-CPU RCU core processing kthreads.
2592 "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__); in rcu_spawn_core_kthreads()
2597 * Handle any core-RCU processing required by a call_rcu() invocation.
2604 * core in order to force a re-evaluation of RCU's idleness. in __call_rcu_core()
2620 if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) > in __call_rcu_core()
2621 rdp->qlen_last_fqs_check + qhimark)) { in __call_rcu_core()
2628 rcu_accelerate_cbs_unlocked(rdp->mynode, rdp); in __call_rcu_core()
2631 rdp->blimit = DEFAULT_MAX_RCU_BLIMIT; in __call_rcu_core()
2632 if (READ_ONCE(rcu_state.n_force_qs) == rdp->n_force_qs_snap && in __call_rcu_core()
2633 rcu_segcblist_first_pend_cb(&rdp->cblist) != head) in __call_rcu_core()
2635 rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); in __call_rcu_core()
2636 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); in __call_rcu_core()
2642 * RCU callback function to leak a callback.
2650 * ->cbovldmask bit corresponding to the current CPU based on that CPU's
2652 * structure's ->lock.
2659 if (rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) in check_cb_ovld_locked()
2660 WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask | rdp->grpmask); in check_cb_ovld_locked()
2662 WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask & ~rdp->grpmask); in check_cb_ovld_locked()
2667 * ->cbovldmask bit corresponding to the current CPU based on that CPU's
2671 * Note that this function ignores the possibility that there are a lot
2673 * grace periods. This omission is due to the need for no-CBs CPUs to
2674 * be holding ->nocb_lock to do this check, which is too heavy for a
2675 * common-case operation.
2679 struct rcu_node *const rnp = rdp->mynode; in check_cb_ovld()
2682 ((rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) == in check_cb_ovld()
2683 !!(READ_ONCE(rnp->cbovldmask) & rdp->grpmask))) in check_cb_ovld()
2700 WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); in __call_rcu_common()
2709 pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func); in __call_rcu_common()
2712 WRITE_ONCE(head->func, rcu_leak_callback); in __call_rcu_common()
2715 head->func = func; in __call_rcu_common()
2716 head->next = NULL; in __call_rcu_common()
2723 if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) { in __call_rcu_common()
2729 if (rcu_segcblist_empty(&rdp->cblist)) in __call_rcu_common()
2730 rcu_segcblist_init(&rdp->cblist); in __call_rcu_common()
2736 return; // Enqueued onto ->nocb_bypass, so just leave. in __call_rcu_common()
2738 // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock. in __call_rcu_common()
2739 rcu_segcblist_enqueue(&rdp->cblist, head); in __call_rcu_common()
2743 rcu_segcblist_n_cbs(&rdp->cblist)); in __call_rcu_common()
2746 rcu_segcblist_n_cbs(&rdp->cblist)); in __call_rcu_common()
2748 trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued")); in __call_rcu_common()
2761 * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
2762 * flush all lazy callbacks (including the new one) to the main ->cblist while
2766 * @func: actual callback function to be invoked after the grace period
2768 * The callback function will be invoked some time after a full grace
2769 * period elapses, in other words after all pre-existing RCU read-side
2775 * This function will cause callbacks to be invoked sooner than later at the
2776 * expense of extra power. Other than that, this function is identical to, and
2788 * call_rcu() - Queue an RCU callback for invocation after a grace period.
2790 * ->cblist to prevent starting of grace periods too soon.
2794 * @func: actual callback function to be invoked after the grace period
2796 * The callback function will be invoked some time after a full grace
2797 * period elapses, in other words after all pre-existing RCU read-side
2798 * critical sections have completed. However, the callback function
2799 * might well execute concurrently with RCU read-side critical sections
2802 * RCU read-side critical sections are delimited by rcu_read_lock()
2805 * or softirqs have been disabled also serve as RCU read-side critical
2810 * all pre-existing RCU read-side critical section. On systems with more
2813 * last RCU read-side critical section whose beginning preceded the call
2814 * to call_rcu(). It also means that each CPU executing an RCU read-side
2817 * of that RCU read-side critical section. Note that these guarantees
2822 * resulting RCU callback function "func()", then both CPU A and CPU B are
2824 * between the call to call_rcu() and the invocation of "func()" -- even
2828 * Implementation of these memory-ordering guarantees is described here:
2829 * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
2843 * struct kvfree_rcu_bulk_data - single block to store kvfree_rcu() pointers
2862 ((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *))
2865 * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
2868 * @head_free_gp_snap: Grace-period snapshot to check for attempted premature frees.
2869 * @bulk_head_free: Bulk-List of kvfree_rcu() objects waiting for a grace period
2882 * struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
2885 * @bulk_head: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period
2887 * @lock: Synchronize access to this structure
2891 * @bulk_count: Number of objects in bulk-list
2894 * In order to save some per-cpu space the list is singular.
2896 * per-cpu lock.
2903 * This is a per-CPU structure. The reason that it is not included in
2915 // Objects queued on a bulk-list.
2920 raw_spinlock_t lock; member
2934 .lock = __RAW_SPIN_LOCK_UNLOCKED(krc.lock),
2943 for (i = 0; i < bhead->nr_records; i++) in debug_rcu_bhead_unqueue()
2944 debug_rcu_head_unqueue((struct rcu_head *)(bhead->records[i])); in debug_rcu_bhead_unqueue()
2955 raw_spin_lock(&krcp->lock); in krc_this_cpu_lock()
2963 raw_spin_unlock_irqrestore(&krcp->lock, flags); in krc_this_cpu_unlock()
2969 if (!krcp->nr_bkv_objs) in get_cached_bnode()
2972 WRITE_ONCE(krcp->nr_bkv_objs, krcp->nr_bkv_objs - 1); in get_cached_bnode()
2974 llist_del_first(&krcp->bkvcache); in get_cached_bnode()
2982 if (krcp->nr_bkv_objs >= rcu_min_cached_objs) in put_cached_bnode()
2985 llist_add((struct llist_node *) bnode, &krcp->bkvcache); in put_cached_bnode()
2986 WRITE_ONCE(krcp->nr_bkv_objs, krcp->nr_bkv_objs + 1); in put_cached_bnode()
2994 struct llist_node *page_list, *pos, *n; in drain_page_cache() local
3000 raw_spin_lock_irqsave(&krcp->lock, flags); in drain_page_cache()
3001 page_list = llist_del_all(&krcp->bkvcache); in drain_page_cache()
3002 WRITE_ONCE(krcp->nr_bkv_objs, 0); in drain_page_cache()
3003 raw_spin_unlock_irqrestore(&krcp->lock, flags); in drain_page_cache()
3005 llist_for_each_safe(pos, n, page_list) { in drain_page_cache()
3020 if (!WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&bnode->gp_snap))) { in kvfree_rcu_bulk()
3025 rcu_state.name, bnode->nr_records, in kvfree_rcu_bulk()
3026 bnode->records); in kvfree_rcu_bulk()
3028 kfree_bulk(bnode->nr_records, bnode->records); in kvfree_rcu_bulk()
3030 for (i = 0; i < bnode->nr_records; i++) { in kvfree_rcu_bulk()
3032 rcu_state.name, bnode->records[i], 0); in kvfree_rcu_bulk()
3034 vfree(bnode->records[i]); in kvfree_rcu_bulk()
3040 raw_spin_lock_irqsave(&krcp->lock, flags); in kvfree_rcu_bulk()
3043 raw_spin_unlock_irqrestore(&krcp->lock, flags); in kvfree_rcu_bulk()
3057 void *ptr = (void *) head->func; in kvfree_rcu_list()
3058 unsigned long offset = (void *) head - ptr; in kvfree_rcu_list()
3060 next = head->next; in kvfree_rcu_list()
3074 * This function is invoked in workqueue context after a grace period.
3075 * It frees all the objects queued on ->bulk_head_free or ->head_free.
3080 struct kvfree_rcu_bulk_data *bnode, *n; in kfree_rcu_work() local
3090 krcp = krwp->krcp; in kfree_rcu_work()
3092 raw_spin_lock_irqsave(&krcp->lock, flags); in kfree_rcu_work()
3095 list_replace_init(&krwp->bulk_head_free[i], &bulk_head[i]); in kfree_rcu_work()
3098 head = krwp->head_free; in kfree_rcu_work()
3099 krwp->head_free = NULL; in kfree_rcu_work()
3100 head_gp_snap = krwp->head_free_gp_snap; in kfree_rcu_work()
3101 raw_spin_unlock_irqrestore(&krcp->lock, flags); in kfree_rcu_work()
3106 list_for_each_entry_safe(bnode, n, &bulk_head[i], list) in kfree_rcu_work()
3112 * double-argument of kvfree_rcu(). This happens when the in kfree_rcu_work()
3113 * page-cache is empty, which means that objects are instead in kfree_rcu_work()
3127 if (!list_empty(&krcp->bulk_head[i])) in need_offload_krc()
3130 return !!READ_ONCE(krcp->head); in need_offload_krc()
3139 if (!list_empty(&krwp->bulk_head_free[i])) in need_wait_for_krwp_work()
3142 return !!krwp->head_free; in need_wait_for_krwp_work()
3147 int sum = atomic_read(&krcp->head_count); in krc_count()
3151 sum += atomic_read(&krcp->bulk_count[i]); in krc_count()
3162 if (delayed_work_pending(&krcp->monitor_work)) { in __schedule_delayed_monitor_work()
3163 delay_left = krcp->monitor_work.timer.expires - jiffies; in __schedule_delayed_monitor_work()
3165 mod_delayed_work(system_wq, &krcp->monitor_work, delay); in __schedule_delayed_monitor_work()
3168 queue_delayed_work(system_wq, &krcp->monitor_work, delay); in __schedule_delayed_monitor_work()
3176 raw_spin_lock_irqsave(&krcp->lock, flags); in schedule_delayed_monitor_work()
3178 raw_spin_unlock_irqrestore(&krcp->lock, flags); in schedule_delayed_monitor_work()
3185 struct kvfree_rcu_bulk_data *bnode, *n; in kvfree_rcu_drain_ready() local
3190 raw_spin_lock_irqsave(&krcp->lock, flags); in kvfree_rcu_drain_ready()
3194 list_for_each_entry_safe_reverse(bnode, n, &krcp->bulk_head[i], list) { in kvfree_rcu_drain_ready()
3195 if (!poll_state_synchronize_rcu_full(&bnode->gp_snap)) in kvfree_rcu_drain_ready()
3198 atomic_sub(bnode->nr_records, &krcp->bulk_count[i]); in kvfree_rcu_drain_ready()
3199 list_move(&bnode->list, &bulk_ready[i]); in kvfree_rcu_drain_ready()
3203 if (krcp->head && poll_state_synchronize_rcu(krcp->head_gp_snap)) { in kvfree_rcu_drain_ready()
3204 head_ready = krcp->head; in kvfree_rcu_drain_ready()
3205 atomic_set(&krcp->head_count, 0); in kvfree_rcu_drain_ready()
3206 WRITE_ONCE(krcp->head, NULL); in kvfree_rcu_drain_ready()
3208 raw_spin_unlock_irqrestore(&krcp->lock, flags); in kvfree_rcu_drain_ready()
3211 list_for_each_entry_safe(bnode, n, &bulk_ready[i], list) in kvfree_rcu_drain_ready()
3220 * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
3232 raw_spin_lock_irqsave(&krcp->lock, flags); in kfree_rcu_monitor()
3236 struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]); in kfree_rcu_monitor()
3240 // there is on-going rcu work to handle krwp's free business. in kfree_rcu_monitor()
3246 // Channel 1 corresponds to the SLAB-pointer bulk path. in kfree_rcu_monitor()
3247 // Channel 2 corresponds to vmalloc-pointer bulk path. in kfree_rcu_monitor()
3249 if (list_empty(&krwp->bulk_head_free[j])) { in kfree_rcu_monitor()
3250 atomic_set(&krcp->bulk_count[j], 0); in kfree_rcu_monitor()
3251 list_replace_init(&krcp->bulk_head[j], in kfree_rcu_monitor()
3252 &krwp->bulk_head_free[j]); in kfree_rcu_monitor()
3258 if (!krwp->head_free) { in kfree_rcu_monitor()
3259 krwp->head_free = krcp->head; in kfree_rcu_monitor()
3260 get_state_synchronize_rcu_full(&krwp->head_free_gp_snap); in kfree_rcu_monitor()
3261 atomic_set(&krcp->head_count, 0); in kfree_rcu_monitor()
3262 WRITE_ONCE(krcp->head, NULL); in kfree_rcu_monitor()
3270 queue_rcu_work(system_wq, &krwp->rcu_work); in kfree_rcu_monitor()
3274 raw_spin_unlock_irqrestore(&krcp->lock, flags); in kfree_rcu_monitor()
3291 queue_delayed_work(system_highpri_wq, &krcp->page_cache_work, 0); in schedule_page_work_fn()
3306 nr_pages = atomic_read(&krcp->backoff_page_cache_fill) ? in fill_page_cache_func()
3309 for (i = READ_ONCE(krcp->nr_bkv_objs); i < nr_pages; i++) { in fill_page_cache_func()
3316 raw_spin_lock_irqsave(&krcp->lock, flags); in fill_page_cache_func()
3318 raw_spin_unlock_irqrestore(&krcp->lock, flags); in fill_page_cache_func()
3326 atomic_set(&krcp->work_in_progress, 0); in fill_page_cache_func()
3327 atomic_set(&krcp->backoff_page_cache_fill, 0); in fill_page_cache_func()
3338 !atomic_xchg(&krcp->work_in_progress, 1)) { in run_page_cache_worker()
3339 if (atomic_read(&krcp->backoff_page_cache_fill)) { in run_page_cache_worker()
3341 &krcp->page_cache_work, in run_page_cache_worker()
3344 hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); in run_page_cache_worker()
3345 krcp->hrtimer.function = schedule_page_work_fn; in run_page_cache_worker()
3346 hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL); in run_page_cache_worker()
3351 // Record ptr in a page managed by krcp, with the pre-krc_this_cpu_lock()
3365 if (unlikely(!(*krcp)->initialized)) in add_ptr_to_bulk_krc_lock()
3369 bnode = list_first_entry_or_null(&(*krcp)->bulk_head[idx], in add_ptr_to_bulk_krc_lock()
3373 if (!bnode || bnode->nr_records == KVFREE_BULK_MAX_ENTR) { in add_ptr_to_bulk_krc_lock()
3378 // __GFP_NORETRY - allows a light-weight direct reclaim in add_ptr_to_bulk_krc_lock()
3383 // __GFP_NOMEMALLOC - prevents from consuming of all the in add_ptr_to_bulk_krc_lock()
3386 // __GFP_NOWARN - it is supposed that an allocation can in add_ptr_to_bulk_krc_lock()
3391 raw_spin_lock_irqsave(&(*krcp)->lock, *flags); in add_ptr_to_bulk_krc_lock()
3398 bnode->nr_records = 0; in add_ptr_to_bulk_krc_lock()
3399 list_add(&bnode->list, &(*krcp)->bulk_head[idx]); in add_ptr_to_bulk_krc_lock()
3403 bnode->records[bnode->nr_records++] = ptr; in add_ptr_to_bulk_krc_lock()
3404 get_state_synchronize_rcu_full(&bnode->gp_snap); in add_ptr_to_bulk_krc_lock()
3405 atomic_inc(&(*krcp)->bulk_count[idx]); in add_ptr_to_bulk_krc_lock()
3429 * Please note there is a limitation for the head-less in kvfree_call_rcu()
3441 WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n", in kvfree_call_rcu()
3457 head->func = ptr; in kvfree_call_rcu()
3458 head->next = krcp->head; in kvfree_call_rcu()
3459 WRITE_ONCE(krcp->head, head); in kvfree_call_rcu()
3460 atomic_inc(&krcp->head_count); in kvfree_call_rcu()
3463 krcp->head_gp_snap = get_state_synchronize_rcu(); in kvfree_call_rcu()
3506 count += READ_ONCE(krcp->nr_bkv_objs); in kfree_rcu_shrink_count()
3507 atomic_set(&krcp->backoff_page_cache_fill, 1); in kfree_rcu_shrink_count()
3524 kfree_rcu_monitor(&krcp->monitor_work.work); in kfree_rcu_shrink_scan()
3526 sc->nr_to_scan -= count; in kfree_rcu_shrink_scan()
3529 if (sc->nr_to_scan <= 0) in kfree_rcu_shrink_scan()
3556 * During early boot, any blocking grace-period wait automatically
3563 * grace-period optimization is ignored once the scheduler is running.
3575 * synchronize_rcu - wait until a grace period has elapsed.
3579 * read-side critical sections have completed. Note, however, that
3581 * concurrently with new RCU read-side critical sections that began while
3584 * RCU read-side critical sections are delimited by rcu_read_lock()
3587 * or softirqs have been disabled also serve as RCU read-side critical
3591 * Note that this guarantee implies further memory-ordering guarantees.
3594 * the end of its last RCU read-side critical section whose beginning
3596 * an RCU read-side critical section that extends beyond the return from
3599 * that RCU read-side critical section. Note that these guarantees include
3606 * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
3609 * Implementation of these memory-ordering guarantees is described here:
3610 * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
3620 "Illegal synchronize_rcu() in RCU read-side critical section"); in synchronize_rcu()
3634 // reuse of ->gp_seq_polled_snap. in synchronize_rcu()
3638 // Update the normal grace-period counters to record in synchronize_rcu()
3645 for (rnp = this_cpu_ptr(&rcu_data)->mynode; rnp; rnp = rnp->parent) in synchronize_rcu()
3646 rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq; in synchronize_rcu()
3652 * get_completed_synchronize_rcu_full - Return a full pre-completed polled state cookie
3661 rgosp->rgos_norm = RCU_GET_STATE_COMPLETED; in get_completed_synchronize_rcu_full()
3662 rgosp->rgos_exp = RCU_GET_STATE_COMPLETED; in get_completed_synchronize_rcu_full()
3667 * get_state_synchronize_rcu - Snapshot current RCU state
3676 * Any prior manipulation of RCU-protected data must happen in get_state_synchronize_rcu()
3677 * before the load from ->gp_seq. in get_state_synchronize_rcu()
3685 * get_state_synchronize_rcu_full - Snapshot RCU state, both normal and expedited
3686 * @rgosp: location to place combined normal/expedited grace-period state
3688 * Places the normal and expedited grace-period states in @rgosp. This
3705 * Any prior manipulation of RCU-protected data must happen in get_state_synchronize_rcu_full()
3706 * before the loads from ->gp_seq and ->expedited_sequence. in get_state_synchronize_rcu_full()
3709 rgosp->rgos_norm = rcu_seq_snap(&rnp->gp_seq); in get_state_synchronize_rcu_full()
3710 rgosp->rgos_exp = rcu_seq_snap(&rcu_state.expedited_sequence); in get_state_synchronize_rcu_full()
3715 * Helper function for start_poll_synchronize_rcu() and
3728 rnp = rdp->mynode; in start_poll_synchronize_rcu_common()
3735 // from which they are updated at grace-period start, as required. in start_poll_synchronize_rcu_common()
3743 * start_poll_synchronize_rcu - Snapshot and start RCU grace period
3752 * the grace-period kthread.
3764 * start_poll_synchronize_rcu_full - Take a full snapshot and start RCU grace period
3767 * Places the normal and expedited grace-period states in *@rgos. This
3775 * the grace-period kthread.
3786 * poll_state_synchronize_rcu - Has the specified RCU grace period completed?
3792 * function later on until it does return @true. Alternatively, the caller
3798 * Yes, this function does not take counter wrap into account.
3800 * more than a billion grace periods (and way more on a 64-bit system!).
3802 * (many hours even on 32-bit systems) should check them occasionally and
3805 * to get a guaranteed-completed grace-period state.
3807 * In addition, because oldstate compresses the grace-period state for
3813 * This function provides the same memory-ordering guarantees that
3815 * to the function that provided @oldstate, and that returned at the end
3816 * of this function.
3830 * poll_state_synchronize_rcu_full - Has the specified RCU grace period completed?
3836 * function later on until it does return @true. Alternatively, the caller
3840 * Yes, this function does not take counter wrap into account.
3842 * for more than a billion grace periods (and way more on a 64-bit
3844 * long time periods (many hours even on 32-bit systems) should check
3847 * get_completed_synchronize_rcu_full() to get a guaranteed-completed
3848 * grace-period state.
3850 * This function provides the same memory-ordering guarantees that would
3852 * the function that provided @rgosp, and that returned at the end of this
3853 * function. And this guarantee requires that the root rcu_node structure's
3854 * ->gp_seq field be checked instead of that of the rcu_state structure.
3855 * The problem is that the just-ending grace-period's callbacks can be
3856 * invoked between the time that the root rcu_node structure's ->gp_seq
3857 * field is updated and the time that the rcu_state structure's ->gp_seq
3866 smp_mb(); // Order against root rcu_node structure grace-period cleanup. in poll_state_synchronize_rcu_full()
3867 if (rgosp->rgos_norm == RCU_GET_STATE_COMPLETED || in poll_state_synchronize_rcu_full()
3868 rcu_seq_done_exact(&rnp->gp_seq, rgosp->rgos_norm) || in poll_state_synchronize_rcu_full()
3869 rgosp->rgos_exp == RCU_GET_STATE_COMPLETED || in poll_state_synchronize_rcu_full()
3870 rcu_seq_done_exact(&rcu_state.expedited_sequence, rgosp->rgos_exp)) { in poll_state_synchronize_rcu_full()
3879 * cond_synchronize_rcu - Conditionally wait for an RCU grace period
3886 * Yes, this function does not take counter wrap into account.
3888 * more than 2 billion grace periods (and way more on a 64-bit system!),
3891 * This function provides the same memory-ordering guarantees that
3893 * to the function that provided @oldstate and that returned at the end
3894 * of this function.
3904 * cond_synchronize_rcu_full - Conditionally wait for an RCU grace period
3913 * Yes, this function does not take counter wrap into account.
3915 * more than 2 billion grace periods (and way more on a 64-bit system!),
3918 * This function provides the same memory-ordering guarantees that
3920 * to the function that provided @rgosp and that returned at the end of
3921 * this function.
3931 * Check to see if there is any immediate RCU-related work to be done by
3934 * CPU-local state are performed first. However, we must check for CPU
3941 struct rcu_node *rnp = rdp->mynode; in rcu_pending()
3958 if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress) in rcu_pending()
3963 rcu_segcblist_ready_cbs(&rdp->cblist)) in rcu_pending()
3967 if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) && in rcu_pending()
3969 !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) in rcu_pending()
3973 if (rcu_seq_current(&rnp->gp_seq) != rdp->gp_seq || in rcu_pending()
3974 unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */ in rcu_pending()
3982 * Helper function for rcu_barrier() tracing. If tracing is disabled,
3992 * RCU callback function for rcu_barrier(). If we are last, wake
4006 rcu_barrier_trace(TPS("LastCB"), -1, s); in rcu_barrier_callback()
4009 rcu_barrier_trace(TPS("CB"), -1, s); in rcu_barrier_callback()
4014 * If needed, entrain an rcu_barrier() callback on rdp->cblist.
4019 unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap); in rcu_barrier_entrain()
4026 rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence); in rcu_barrier_entrain()
4027 rdp->barrier_head.func = rcu_barrier_callback; in rcu_barrier_entrain()
4028 debug_rcu_head_queue(&rdp->barrier_head); in rcu_barrier_entrain()
4035 was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist); in rcu_barrier_entrain()
4037 wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist); in rcu_barrier_entrain()
4038 if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) { in rcu_barrier_entrain()
4041 debug_rcu_head_unqueue(&rdp->barrier_head); in rcu_barrier_entrain()
4042 rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence); in rcu_barrier_entrain()
4047 smp_store_release(&rdp->barrier_seq_snap, gseq); in rcu_barrier_entrain()
4051 * Called with preemption disabled, and from cross-cpu IRQ context.
4059 WARN_ON_ONCE(cpu != rdp->cpu); in rcu_barrier_handler()
4067 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
4082 rcu_barrier_trace(TPS("Begin"), -1, s); in rcu_barrier()
4089 rcu_barrier_trace(TPS("EarlyExit"), -1, rcu_state.barrier_sequence); in rcu_barrier()
4099 rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence); in rcu_barrier()
4103 * to avoid a too-soon return to zero in case of an immediate in rcu_barrier()
4104 * invocation of the just-enqueued callback (or preemption of in rcu_barrier()
4105 * this task). Exclude CPU-hotplug operations to ensure that no in rcu_barrier()
4106 * offline non-offloaded CPU has callbacks queued. in rcu_barrier()
4120 if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq) in rcu_barrier()
4123 if (!rcu_segcblist_n_cbs(&rdp->cblist)) { in rcu_barrier()
4124 WRITE_ONCE(rdp->barrier_seq_snap, gseq); in rcu_barrier()
4131 WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq); in rcu_barrier()
4141 WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq); in rcu_barrier()
4156 rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence); in rcu_barrier()
4162 WRITE_ONCE(rdp->barrier_seq_snap, gseq); in rcu_barrier()
4172 * This will not be stable unless the rcu_node structure's ->lock is
4178 return READ_ONCE(rnp->qsmaskinitnext); in rcu_rnp_online_cpus()
4184 * ->qsmaskinitnext field rather than by the global cpu_online_mask.
4188 return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode)); in rcu_rdp_cpu_online()
4224 * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask in rcu_lockdep_current_cpu_online()
4253 struct rcu_node *rnp = rdp->mynode; in rcutree_dying_cpu()
4258 blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask); in rcutree_dying_cpu()
4259 trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), in rcutree_dying_cpu()
4260 blkd ? TPS("cpuofl-bgp") : TPS("cpuofl")); in rcutree_dying_cpu()
4266 * and all tasks that were preempted within an RCU read-side critical
4268 * read-side critical section. Some other CPU is reporting this fact with
4269 * the specified rcu_node structure's ->lock held and interrupts disabled.
4270 * This function therefore goes up the tree of rcu_node structures,
4271 * clearing the corresponding bits in the ->qsmaskinit fields. Note that
4272 * the leaf rcu_node structure's ->qsmaskinit field has already been
4275 * This function does check that the specified rcu_node structure has
4278 * a needless lock acquisition. So once it has done its work, don't
4288 WARN_ON_ONCE(rnp_leaf->qsmaskinit) || in rcu_cleanup_dead_rnp()
4292 mask = rnp->grpmask; in rcu_cleanup_dead_rnp()
4293 rnp = rnp->parent; in rcu_cleanup_dead_rnp()
4297 rnp->qsmaskinit &= ~mask; in rcu_cleanup_dead_rnp()
4299 WARN_ON_ONCE(rnp->qsmask); in rcu_cleanup_dead_rnp()
4300 if (rnp->qsmaskinit) { in rcu_cleanup_dead_rnp()
4320 WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1); in rcutree_dead_cpu()
4321 // Stop-machine done, so allow nohz_full to disable tick. in rcutree_dead_cpu()
4327 * Propagate ->qsinitmask bits up the rcu_node tree to account for the
4329 * must hold the corresponding leaf rcu_node ->lock with interrupts
4339 WARN_ON_ONCE(rnp->wait_blkd_tasks); in rcu_init_new_rnp()
4341 mask = rnp->grpmask; in rcu_init_new_rnp()
4342 rnp = rnp->parent; in rcu_init_new_rnp()
4346 oldmask = rnp->qsmaskinit; in rcu_init_new_rnp()
4347 rnp->qsmaskinit |= mask; in rcu_init_new_rnp()
4355 * Do boot-time initialization of a CPU's per-CPU RCU data.
4364 rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); in rcu_boot_init_percpu_data()
4365 INIT_WORK(&rdp->strict_work, strict_work_handler); in rcu_boot_init_percpu_data()
4366 WARN_ON_ONCE(ct->dynticks_nesting != 1); in rcu_boot_init_percpu_data()
4368 rdp->barrier_seq_snap = rcu_state.barrier_sequence; in rcu_boot_init_percpu_data()
4369 rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; in rcu_boot_init_percpu_data()
4370 rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; in rcu_boot_init_percpu_data()
4371 rdp->rcu_onl_gp_seq = rcu_state.gp_seq; in rcu_boot_init_percpu_data()
4372 rdp->rcu_onl_gp_flags = RCU_GP_CLEANED; in rcu_boot_init_percpu_data()
4373 rdp->last_sched_clock = jiffies; in rcu_boot_init_percpu_data()
4374 rdp->cpu = cpu; in rcu_boot_init_percpu_data()
4379 * Invoked early in the CPU-online process, when pretty much all services
4382 * Initializes a CPU's per-CPU RCU data. Note that only one online or
4384 * accept some slop in the rsp->gp_seq access due to the fact that this
4385 * CPU cannot possibly have any non-offloaded RCU callbacks in flight yet.
4397 rdp->qlen_last_fqs_check = 0; in rcutree_prepare_cpu()
4398 rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); in rcutree_prepare_cpu()
4399 rdp->blimit = blimit; in rcutree_prepare_cpu()
4400 ct->dynticks_nesting = 1; /* CPU not up, no tearing. */ in rcutree_prepare_cpu()
4404 * Only non-NOCB CPUs that didn't have early-boot callbacks need to be in rcutree_prepare_cpu()
4405 * (re-)initialized. in rcutree_prepare_cpu()
4407 if (!rcu_segcblist_is_enabled(&rdp->cblist)) in rcutree_prepare_cpu()
4408 rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ in rcutree_prepare_cpu()
4411 * Add CPU to leaf rcu_node pending-online bitmask. Any needed in rcutree_prepare_cpu()
4415 rnp = rdp->mynode; in rcutree_prepare_cpu()
4417 rdp->gp_seq = READ_ONCE(rnp->gp_seq); in rcutree_prepare_cpu()
4418 rdp->gp_seq_needed = rdp->gp_seq; in rcutree_prepare_cpu()
4419 rdp->cpu_no_qs.b.norm = true; in rcutree_prepare_cpu()
4420 rdp->core_needs_qs = false; in rcutree_prepare_cpu()
4421 rdp->rcu_iw_pending = false; in rcutree_prepare_cpu()
4422 rdp->rcu_iw = IRQ_WORK_INIT_HARD(rcu_iw_handler); in rcutree_prepare_cpu()
4423 rdp->rcu_iw_gp_seq = rdp->gp_seq - 1; in rcutree_prepare_cpu()
4424 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); in rcutree_prepare_cpu()
4434 * Update RCU priority boot kthread affinity for CPU-hotplug changes.
4440 rcu_boost_kthread_setaffinity(rdp->mynode, outgoing); in rcutree_affinity_setting()
4450 return smp_load_acquire(&rdp->beenonline); in rcu_cpu_beenfullyonline()
4454 * Near the end of the CPU-online process. Pretty much all services
4464 rnp = rdp->mynode; in rcutree_online_cpu()
4466 rnp->ffmask |= rdp->grpmask; in rcutree_online_cpu()
4471 rcutree_affinity_setting(cpu, -1); in rcutree_online_cpu()
4473 // Stop-machine done, so allow nohz_full to disable tick. in rcutree_online_cpu()
4489 rnp = rdp->mynode; in rcutree_offline_cpu()
4491 rnp->ffmask &= ~rdp->grpmask; in rcutree_offline_cpu()
4496 // nohz_full CPUs need the tick for stop-machine to work quickly in rcutree_offline_cpu()
4504 * incoming CPUs are not allowed to use RCU read-side critical sections
4505 * until this function is called. Failing to observe this restriction
4508 * Note that this function is special in that it is invoked directly
4510 * This is because this function must be invoked at a precise location.
4522 if (rdp->cpu_started) in rcu_cpu_starting()
4524 rdp->cpu_started = true; in rcu_cpu_starting()
4526 rnp = rdp->mynode; in rcu_cpu_starting()
4527 mask = rdp->grpmask; in rcu_cpu_starting()
4532 WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); in rcu_cpu_starting()
4534 newcpu = !(rnp->expmaskinitnext & mask); in rcu_cpu_starting()
4535 rnp->expmaskinitnext |= mask; in rcu_cpu_starting()
4539 rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */ in rcu_cpu_starting()
4540 rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq); in rcu_cpu_starting()
4541 rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags); in rcu_cpu_starting()
4544 if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */ in rcu_cpu_starting()
4550 /* Report QS -after- changing ->qsmaskinitnext! */ in rcu_cpu_starting()
4551 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); in rcu_cpu_starting()
4556 smp_store_release(&rdp->beenonline, true); in rcu_cpu_starting()
4557 smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ in rcu_cpu_starting()
4561 * The outgoing function has no further need of RCU, so remove it from
4562 * the rcu_node tree's ->qsmaskinitnext bit masks.
4564 * Note that this function is special in that it is invoked directly
4566 * This is because this function must be invoked at a precise location.
4573 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ in rcu_report_dead()
4581 mask = rdp->grpmask; in rcu_report_dead()
4584 raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ in rcu_report_dead()
4585 rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq); in rcu_report_dead()
4586 rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags); in rcu_report_dead()
4587 if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */ in rcu_report_dead()
4588 /* Report quiescent state -before- changing ->qsmaskinitnext! */ in rcu_report_dead()
4590 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); in rcu_report_dead()
4593 WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask); in rcu_report_dead()
4598 rdp->cpu_started = false; in rcu_report_dead()
4603 * The outgoing CPU has just passed through the dying-idle state, and we
4619 if (rcu_segcblist_empty(&rdp->cblist)) { in rcutree_migrate_callbacks()
4627 my_rnp = my_rdp->mynode; in rcutree_migrate_callbacks()
4634 rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); in rcutree_migrate_callbacks()
4637 rcu_segcblist_disable(&rdp->cblist); in rcutree_migrate_callbacks()
4638 WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist)); in rcutree_migrate_callbacks()
4651 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || in rcutree_migrate_callbacks()
4652 !rcu_segcblist_empty(&rdp->cblist), in rcutree_migrate_callbacks()
4653 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", in rcutree_migrate_callbacks()
4654 cpu, rcu_segcblist_n_cbs(&rdp->cblist), in rcutree_migrate_callbacks()
4655 rcu_segcblist_first_cb(&rdp->cblist)); in rcutree_migrate_callbacks()
4660 * On non-huge systems, use expedited RCU grace periods to make suspend
4695 pr_err("Failed to create %s!\n", gp_kworker_name); in rcu_start_exp_gp_kworkers()
4702 pr_err("Failed to create %s!\n", par_gp_kworker_name); in rcu_start_exp_gp_kworkers()
4709 sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param); in rcu_start_exp_gp_kworkers()
4710 sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO, in rcu_start_exp_gp_kworkers()
4744 …if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n in rcu_spawn_gp_kthread()
4758 /* This is a pre-SMP initcall, we expect a single CPU */ in rcu_spawn_gp_kthread()
4761 * Those kthreads couldn't be created on rcu_init() -> rcutree_prepare_cpu() in rcu_spawn_gp_kthread()
4765 rcu_spawn_one_boost_kthread(rdp->mynode); in rcu_spawn_gp_kthread()
4774 * This function is invoked towards the end of the scheduler's
4776 * contain synchronous grace-period primitives (during which time, this idle
4777 * task is booting the system, and such primitives are no-ops). After this
4778 * function is called, any synchronous grace-period primitives are run as
4792 // Fix up the ->gp_seq counters. in rcu_scheduler_starting()
4795 rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq; in rcu_scheduler_starting()
4804 * Helper function for rcu_init() that initializes the rcu_state structure.
4825 /* Initialize the level-tracking arrays. */ in rcu_init_one()
4829 rcu_state.level[i - 1] + num_rcu_lvl[i - 1]; in rcu_init_one()
4834 for (i = rcu_num_lvls - 1; i >= 0; i--) { in rcu_init_one()
4838 raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock)); in rcu_init_one()
4839 lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock), in rcu_init_one()
4841 raw_spin_lock_init(&rnp->fqslock); in rcu_init_one()
4842 lockdep_set_class_and_name(&rnp->fqslock, in rcu_init_one()
4844 rnp->gp_seq = rcu_state.gp_seq; in rcu_init_one()
4845 rnp->gp_seq_needed = rcu_state.gp_seq; in rcu_init_one()
4846 rnp->completedqs = rcu_state.gp_seq; in rcu_init_one()
4847 rnp->qsmask = 0; in rcu_init_one()
4848 rnp->qsmaskinit = 0; in rcu_init_one()
4849 rnp->grplo = j * cpustride; in rcu_init_one()
4850 rnp->grphi = (j + 1) * cpustride - 1; in rcu_init_one()
4851 if (rnp->grphi >= nr_cpu_ids) in rcu_init_one()
4852 rnp->grphi = nr_cpu_ids - 1; in rcu_init_one()
4854 rnp->grpnum = 0; in rcu_init_one()
4855 rnp->grpmask = 0; in rcu_init_one()
4856 rnp->parent = NULL; in rcu_init_one()
4858 rnp->grpnum = j % levelspread[i - 1]; in rcu_init_one()
4859 rnp->grpmask = BIT(rnp->grpnum); in rcu_init_one()
4860 rnp->parent = rcu_state.level[i - 1] + in rcu_init_one()
4861 j / levelspread[i - 1]; in rcu_init_one()
4863 rnp->level = i; in rcu_init_one()
4864 INIT_LIST_HEAD(&rnp->blkd_tasks); in rcu_init_one()
4866 init_waitqueue_head(&rnp->exp_wq[0]); in rcu_init_one()
4867 init_waitqueue_head(&rnp->exp_wq[1]); in rcu_init_one()
4868 init_waitqueue_head(&rnp->exp_wq[2]); in rcu_init_one()
4869 init_waitqueue_head(&rnp->exp_wq[3]); in rcu_init_one()
4870 spin_lock_init(&rnp->exp_lock); in rcu_init_one()
4871 mutex_init(&rnp->boost_kthread_mutex); in rcu_init_one()
4872 raw_spin_lock_init(&rnp->exp_poll_lock); in rcu_init_one()
4873 rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED; in rcu_init_one()
4874 INIT_WORK(&rnp->exp_poll_wq, sync_rcu_do_polled_gp); in rcu_init_one()
4882 while (i > rnp->grphi) in rcu_init_one()
4884 per_cpu_ptr(&rcu_data, i)->mynode = rnp; in rcu_init_one()
4890 * Force priority from the kernel command-line into range.
4907 pr_alert("%s: Limited prio to %d from %d\n", in sanitize_kthread_prio()
4914 * the ->node array in the rcu_state structure.
4940 * value, which is a function of HZ, then adding one for each in rcu_init_geometry()
4950 /* If the compile-time values are accurate, just leave. */ in rcu_init_geometry()
4954 pr_info("Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n", in rcu_init_geometry()
4958 * The boot-time rcu_fanout_leaf parameter must be at least two in rcu_init_geometry()
4960 * Complain and fall back to the compile-time values if this in rcu_init_geometry()
4976 rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT; in rcu_init_geometry()
4980 * If this limit is exceeded, fall back to the compile-time values. in rcu_init_geometry()
4982 if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1]) { in rcu_init_geometry()
4995 int cap = rcu_capacity[(rcu_num_lvls - 1) - i]; in rcu_init_geometry()
5014 pr_info("rcu_node tree layout dump\n"); in rcu_dump_rcu_node_tree()
5017 if (rnp->level != level) { in rcu_dump_rcu_node_tree()
5018 pr_cont("\n"); in rcu_dump_rcu_node_tree()
5020 level = rnp->level; in rcu_dump_rcu_node_tree()
5022 pr_cont("%d:%d ^%d ", rnp->grplo, rnp->grphi, rnp->grpnum); in rcu_dump_rcu_node_tree()
5024 pr_cont("\n"); in rcu_dump_rcu_node_tree()
5042 pr_info("Adjusting rcutree.rcu_delay_page_cache_fill_msec to %d ms.\n", in kfree_rcu_batch_init()
5050 INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work); in kfree_rcu_batch_init()
5051 krcp->krw_arr[i].krcp = krcp; in kfree_rcu_batch_init()
5054 INIT_LIST_HEAD(&krcp->krw_arr[i].bulk_head_free[j]); in kfree_rcu_batch_init()
5058 INIT_LIST_HEAD(&krcp->bulk_head[i]); in kfree_rcu_batch_init()
5060 INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor); in kfree_rcu_batch_init()
5061 INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func); in kfree_rcu_batch_init()
5062 krcp->initialized = true; in kfree_rcu_batch_init()
5064 if (register_shrinker(&kfree_rcu_shrinker, "rcu-kfree")) in kfree_rcu_batch_init()
5065 pr_err("Failed to register kfree_rcu() shrinker!\n"); in kfree_rcu_batch_init()
5085 * We don't need protection against CPU-hotplug here because in rcu_init()
5101 /* -After- the rcu_node ->lock fields are initialized! */ in rcu_init()
5107 // Kick-start in case any polled grace periods started early. in rcu_init()