14102adabSPaul E. McKenney /* 24102adabSPaul E. McKenney * Read-Copy Update mechanism for mutual exclusion 34102adabSPaul E. McKenney * 44102adabSPaul E. McKenney * This program is free software; you can redistribute it and/or modify 54102adabSPaul E. McKenney * it under the terms of the GNU General Public License as published by 64102adabSPaul E. McKenney * the Free Software Foundation; either version 2 of the License, or 74102adabSPaul E. McKenney * (at your option) any later version. 84102adabSPaul E. McKenney * 94102adabSPaul E. McKenney * This program is distributed in the hope that it will be useful, 104102adabSPaul E. McKenney * but WITHOUT ANY WARRANTY; without even the implied warranty of 114102adabSPaul E. McKenney * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 124102adabSPaul E. McKenney * GNU General Public License for more details. 134102adabSPaul E. McKenney * 144102adabSPaul E. McKenney * You should have received a copy of the GNU General Public License 1587de1cfdSPaul E. McKenney * along with this program; if not, you can access it online at 1687de1cfdSPaul E. McKenney * http://www.gnu.org/licenses/gpl-2.0.html. 174102adabSPaul E. McKenney * 184102adabSPaul E. McKenney * Copyright IBM Corporation, 2008 194102adabSPaul E. McKenney * 204102adabSPaul E. McKenney * Authors: Dipankar Sarma <dipankar@in.ibm.com> 214102adabSPaul E. McKenney * Manfred Spraul <manfred@colorfullife.com> 224102adabSPaul E. McKenney * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version 234102adabSPaul E. McKenney * 244102adabSPaul E. McKenney * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 254102adabSPaul E. McKenney * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 264102adabSPaul E. McKenney * 274102adabSPaul E. McKenney * For detailed explanation of Read-Copy Update mechanism see - 284102adabSPaul E. McKenney * Documentation/RCU 294102adabSPaul E. McKenney */ 304102adabSPaul E. McKenney #include <linux/types.h> 314102adabSPaul E. McKenney #include <linux/kernel.h> 324102adabSPaul E. McKenney #include <linux/init.h> 334102adabSPaul E. McKenney #include <linux/spinlock.h> 344102adabSPaul E. McKenney #include <linux/smp.h> 354102adabSPaul E. McKenney #include <linux/rcupdate.h> 364102adabSPaul E. McKenney #include <linux/interrupt.h> 374102adabSPaul E. McKenney #include <linux/sched.h> 384102adabSPaul E. McKenney #include <linux/nmi.h> 394102adabSPaul E. McKenney #include <linux/atomic.h> 404102adabSPaul E. McKenney #include <linux/bitops.h> 414102adabSPaul E. McKenney #include <linux/export.h> 424102adabSPaul E. McKenney #include <linux/completion.h> 434102adabSPaul E. McKenney #include <linux/moduleparam.h> 444102adabSPaul E. McKenney #include <linux/module.h> 454102adabSPaul E. McKenney #include <linux/percpu.h> 464102adabSPaul E. McKenney #include <linux/notifier.h> 474102adabSPaul E. McKenney #include <linux/cpu.h> 484102adabSPaul E. McKenney #include <linux/mutex.h> 494102adabSPaul E. McKenney #include <linux/time.h> 504102adabSPaul E. McKenney #include <linux/kernel_stat.h> 514102adabSPaul E. McKenney #include <linux/wait.h> 524102adabSPaul E. McKenney #include <linux/kthread.h> 534102adabSPaul E. McKenney #include <linux/prefetch.h> 544102adabSPaul E. McKenney #include <linux/delay.h> 554102adabSPaul E. McKenney #include <linux/stop_machine.h> 564102adabSPaul E. McKenney #include <linux/random.h> 574102adabSPaul E. McKenney #include <linux/ftrace_event.h> 584102adabSPaul E. McKenney #include <linux/suspend.h> 594102adabSPaul E. McKenney 604102adabSPaul E. McKenney #include "tree.h" 614102adabSPaul E. McKenney #include "rcu.h" 624102adabSPaul E. McKenney 634102adabSPaul E. McKenney MODULE_ALIAS("rcutree"); 644102adabSPaul E. McKenney #ifdef MODULE_PARAM_PREFIX 654102adabSPaul E. McKenney #undef MODULE_PARAM_PREFIX 664102adabSPaul E. McKenney #endif 674102adabSPaul E. McKenney #define MODULE_PARAM_PREFIX "rcutree." 684102adabSPaul E. McKenney 694102adabSPaul E. McKenney /* Data structures. */ 704102adabSPaul E. McKenney 714102adabSPaul E. McKenney static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; 724102adabSPaul E. McKenney static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; 734102adabSPaul E. McKenney 744102adabSPaul E. McKenney /* 754102adabSPaul E. McKenney * In order to export the rcu_state name to the tracing tools, it 764102adabSPaul E. McKenney * needs to be added in the __tracepoint_string section. 774102adabSPaul E. McKenney * This requires defining a separate variable tp_<sname>_varname 784102adabSPaul E. McKenney * that points to the string being used, and this will allow 794102adabSPaul E. McKenney * the tracing userspace tools to be able to decipher the string 804102adabSPaul E. McKenney * address to the matching string. 814102adabSPaul E. McKenney */ 82a8a29b3bSArd Biesheuvel #ifdef CONFIG_TRACING 83a8a29b3bSArd Biesheuvel # define DEFINE_RCU_TPS(sname) \ 844102adabSPaul E. McKenney static char sname##_varname[] = #sname; \ 85a8a29b3bSArd Biesheuvel static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; 86a8a29b3bSArd Biesheuvel # define RCU_STATE_NAME(sname) sname##_varname 87a8a29b3bSArd Biesheuvel #else 88a8a29b3bSArd Biesheuvel # define DEFINE_RCU_TPS(sname) 89a8a29b3bSArd Biesheuvel # define RCU_STATE_NAME(sname) __stringify(sname) 90a8a29b3bSArd Biesheuvel #endif 91a8a29b3bSArd Biesheuvel 92a8a29b3bSArd Biesheuvel #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ 93a8a29b3bSArd Biesheuvel DEFINE_RCU_TPS(sname) \ 942723249aSPaul E. McKenney DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ 954102adabSPaul E. McKenney struct rcu_state sname##_state = { \ 964102adabSPaul E. McKenney .level = { &sname##_state.node[0] }, \ 972723249aSPaul E. McKenney .rda = &sname##_data, \ 984102adabSPaul E. McKenney .call = cr, \ 994102adabSPaul E. McKenney .fqs_state = RCU_GP_IDLE, \ 1004102adabSPaul E. McKenney .gpnum = 0UL - 300UL, \ 1014102adabSPaul E. McKenney .completed = 0UL - 300UL, \ 1024102adabSPaul E. McKenney .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \ 1034102adabSPaul E. McKenney .orphan_nxttail = &sname##_state.orphan_nxtlist, \ 1044102adabSPaul E. McKenney .orphan_donetail = &sname##_state.orphan_donelist, \ 1054102adabSPaul E. McKenney .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 106a8a29b3bSArd Biesheuvel .name = RCU_STATE_NAME(sname), \ 1074102adabSPaul E. McKenney .abbr = sabbr, \ 1082723249aSPaul E. McKenney } 1094102adabSPaul E. McKenney 1104102adabSPaul E. McKenney RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); 1114102adabSPaul E. McKenney RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); 1124102adabSPaul E. McKenney 113e534165bSUma Sharma static struct rcu_state *rcu_state_p; 1144102adabSPaul E. McKenney LIST_HEAD(rcu_struct_flavors); 1154102adabSPaul E. McKenney 116a3dc2948SPaul E. McKenney /* Dump rcu_node combining tree at boot to verify correct setup. */ 117a3dc2948SPaul E. McKenney static bool dump_tree; 118a3dc2948SPaul E. McKenney module_param(dump_tree, bool, 0444); 1197fa27001SPaul E. McKenney /* Control rcu_node-tree auto-balancing at boot time. */ 1207fa27001SPaul E. McKenney static bool rcu_fanout_exact; 1217fa27001SPaul E. McKenney module_param(rcu_fanout_exact, bool, 0444); 122*47d631afSPaul E. McKenney /* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */ 123*47d631afSPaul E. McKenney static int rcu_fanout_leaf = RCU_FANOUT_LEAF; 1244102adabSPaul E. McKenney module_param(rcu_fanout_leaf, int, 0444); 1254102adabSPaul E. McKenney int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; 1264102adabSPaul E. McKenney static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ 1274102adabSPaul E. McKenney NUM_RCU_LVL_0, 1284102adabSPaul E. McKenney NUM_RCU_LVL_1, 1294102adabSPaul E. McKenney NUM_RCU_LVL_2, 1304102adabSPaul E. McKenney NUM_RCU_LVL_3, 1314102adabSPaul E. McKenney NUM_RCU_LVL_4, 1324102adabSPaul E. McKenney }; 1334102adabSPaul E. McKenney int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ 1344102adabSPaul E. McKenney 1354102adabSPaul E. McKenney /* 1364102adabSPaul E. McKenney * The rcu_scheduler_active variable transitions from zero to one just 1374102adabSPaul E. McKenney * before the first task is spawned. So when this variable is zero, RCU 1384102adabSPaul E. McKenney * can assume that there is but one task, allowing RCU to (for example) 1394102adabSPaul E. McKenney * optimize synchronize_sched() to a simple barrier(). When this variable 1404102adabSPaul E. McKenney * is one, RCU must actually do all the hard work required to detect real 1414102adabSPaul E. McKenney * grace periods. This variable is also used to suppress boot-time false 1424102adabSPaul E. McKenney * positives from lockdep-RCU error checking. 1434102adabSPaul E. McKenney */ 1444102adabSPaul E. McKenney int rcu_scheduler_active __read_mostly; 1454102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_scheduler_active); 1464102adabSPaul E. McKenney 1474102adabSPaul E. McKenney /* 1484102adabSPaul E. McKenney * The rcu_scheduler_fully_active variable transitions from zero to one 1494102adabSPaul E. McKenney * during the early_initcall() processing, which is after the scheduler 1504102adabSPaul E. McKenney * is capable of creating new tasks. So RCU processing (for example, 1514102adabSPaul E. McKenney * creating tasks for RCU priority boosting) must be delayed until after 1524102adabSPaul E. McKenney * rcu_scheduler_fully_active transitions from zero to one. We also 1534102adabSPaul E. McKenney * currently delay invocation of any RCU callbacks until after this point. 1544102adabSPaul E. McKenney * 1554102adabSPaul E. McKenney * It might later prove better for people registering RCU callbacks during 1564102adabSPaul E. McKenney * early boot to take responsibility for these callbacks, but one step at 1574102adabSPaul E. McKenney * a time. 1584102adabSPaul E. McKenney */ 1594102adabSPaul E. McKenney static int rcu_scheduler_fully_active __read_mostly; 1604102adabSPaul E. McKenney 1610aa04b05SPaul E. McKenney static void rcu_init_new_rnp(struct rcu_node *rnp_leaf); 1620aa04b05SPaul E. McKenney static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf); 1634102adabSPaul E. McKenney static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 1644102adabSPaul E. McKenney static void invoke_rcu_core(void); 1654102adabSPaul E. McKenney static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 1664102adabSPaul E. McKenney 167a94844b2SPaul E. McKenney /* rcuc/rcub kthread realtime priority */ 168a94844b2SPaul E. McKenney static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; 169a94844b2SPaul E. McKenney module_param(kthread_prio, int, 0644); 170a94844b2SPaul E. McKenney 1718d7dc928SPaul E. McKenney /* Delay in jiffies for grace-period initialization delays, debug only. */ 1720f41c0ddSPaul E. McKenney 1730f41c0ddSPaul E. McKenney #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT 1740f41c0ddSPaul E. McKenney static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY; 1750f41c0ddSPaul E. McKenney module_param(gp_preinit_delay, int, 0644); 1760f41c0ddSPaul E. McKenney #else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ 1770f41c0ddSPaul E. McKenney static const int gp_preinit_delay; 1780f41c0ddSPaul E. McKenney #endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ 1790f41c0ddSPaul E. McKenney 1808d7dc928SPaul E. McKenney #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT 1818d7dc928SPaul E. McKenney static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; 18237745d28SPaul E. McKenney module_param(gp_init_delay, int, 0644); 1838d7dc928SPaul E. McKenney #else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ 1848d7dc928SPaul E. McKenney static const int gp_init_delay; 1858d7dc928SPaul E. McKenney #endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ 186eab128e8SPaul E. McKenney 1870f41c0ddSPaul E. McKenney #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP 1880f41c0ddSPaul E. McKenney static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY; 1890f41c0ddSPaul E. McKenney module_param(gp_cleanup_delay, int, 0644); 1900f41c0ddSPaul E. McKenney #else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ 1910f41c0ddSPaul E. McKenney static const int gp_cleanup_delay; 1920f41c0ddSPaul E. McKenney #endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ 1930f41c0ddSPaul E. McKenney 194eab128e8SPaul E. McKenney /* 195eab128e8SPaul E. McKenney * Number of grace periods between delays, normalized by the duration of 196eab128e8SPaul E. McKenney * the delay. The longer the the delay, the more the grace periods between 197eab128e8SPaul E. McKenney * each delay. The reason for this normalization is that it means that, 198eab128e8SPaul E. McKenney * for non-zero delays, the overall slowdown of grace periods is constant 199eab128e8SPaul E. McKenney * regardless of the duration of the delay. This arrangement balances 200eab128e8SPaul E. McKenney * the need for long delays to increase some race probabilities with the 201eab128e8SPaul E. McKenney * need for fast grace periods to increase other race probabilities. 202eab128e8SPaul E. McKenney */ 203eab128e8SPaul E. McKenney #define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays. */ 20437745d28SPaul E. McKenney 2054102adabSPaul E. McKenney /* 2064102adabSPaul E. McKenney * Track the rcutorture test sequence number and the update version 2074102adabSPaul E. McKenney * number within a given test. The rcutorture_testseq is incremented 2084102adabSPaul E. McKenney * on every rcutorture module load and unload, so has an odd value 2094102adabSPaul E. McKenney * when a test is running. The rcutorture_vernum is set to zero 2104102adabSPaul E. McKenney * when rcutorture starts and is incremented on each rcutorture update. 2114102adabSPaul E. McKenney * These variables enable correlating rcutorture output with the 2124102adabSPaul E. McKenney * RCU tracing information. 2134102adabSPaul E. McKenney */ 2144102adabSPaul E. McKenney unsigned long rcutorture_testseq; 2154102adabSPaul E. McKenney unsigned long rcutorture_vernum; 2164102adabSPaul E. McKenney 2174102adabSPaul E. McKenney /* 2180aa04b05SPaul E. McKenney * Compute the mask of online CPUs for the specified rcu_node structure. 2190aa04b05SPaul E. McKenney * This will not be stable unless the rcu_node structure's ->lock is 2200aa04b05SPaul E. McKenney * held, but the bit corresponding to the current CPU will be stable 2210aa04b05SPaul E. McKenney * in most contexts. 2220aa04b05SPaul E. McKenney */ 2230aa04b05SPaul E. McKenney unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp) 2240aa04b05SPaul E. McKenney { 2257d0ae808SPaul E. McKenney return READ_ONCE(rnp->qsmaskinitnext); 2260aa04b05SPaul E. McKenney } 2270aa04b05SPaul E. McKenney 2280aa04b05SPaul E. McKenney /* 2297d0ae808SPaul E. McKenney * Return true if an RCU grace period is in progress. The READ_ONCE()s 2304102adabSPaul E. McKenney * permit this function to be invoked without holding the root rcu_node 2314102adabSPaul E. McKenney * structure's ->lock, but of course results can be subject to change. 2324102adabSPaul E. McKenney */ 2334102adabSPaul E. McKenney static int rcu_gp_in_progress(struct rcu_state *rsp) 2344102adabSPaul E. McKenney { 2357d0ae808SPaul E. McKenney return READ_ONCE(rsp->completed) != READ_ONCE(rsp->gpnum); 2364102adabSPaul E. McKenney } 2374102adabSPaul E. McKenney 2384102adabSPaul E. McKenney /* 2394102adabSPaul E. McKenney * Note a quiescent state. Because we do not need to know 2404102adabSPaul E. McKenney * how many quiescent states passed, just if there was at least 2414102adabSPaul E. McKenney * one since the start of the grace period, this just sets a flag. 2424102adabSPaul E. McKenney * The caller must have disabled preemption. 2434102adabSPaul E. McKenney */ 244284a8c93SPaul E. McKenney void rcu_sched_qs(void) 2454102adabSPaul E. McKenney { 246284a8c93SPaul E. McKenney if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) { 247284a8c93SPaul E. McKenney trace_rcu_grace_period(TPS("rcu_sched"), 248284a8c93SPaul E. McKenney __this_cpu_read(rcu_sched_data.gpnum), 249284a8c93SPaul E. McKenney TPS("cpuqs")); 250284a8c93SPaul E. McKenney __this_cpu_write(rcu_sched_data.passed_quiesce, 1); 251284a8c93SPaul E. McKenney } 2524102adabSPaul E. McKenney } 2534102adabSPaul E. McKenney 254284a8c93SPaul E. McKenney void rcu_bh_qs(void) 2554102adabSPaul E. McKenney { 256284a8c93SPaul E. McKenney if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) { 257284a8c93SPaul E. McKenney trace_rcu_grace_period(TPS("rcu_bh"), 258284a8c93SPaul E. McKenney __this_cpu_read(rcu_bh_data.gpnum), 259284a8c93SPaul E. McKenney TPS("cpuqs")); 260284a8c93SPaul E. McKenney __this_cpu_write(rcu_bh_data.passed_quiesce, 1); 261284a8c93SPaul E. McKenney } 2624102adabSPaul E. McKenney } 2634102adabSPaul E. McKenney 2644a81e832SPaul E. McKenney static DEFINE_PER_CPU(int, rcu_sched_qs_mask); 2654a81e832SPaul E. McKenney 2664a81e832SPaul E. McKenney static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 2674a81e832SPaul E. McKenney .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, 2684a81e832SPaul E. McKenney .dynticks = ATOMIC_INIT(1), 2694a81e832SPaul E. McKenney #ifdef CONFIG_NO_HZ_FULL_SYSIDLE 2704a81e832SPaul E. McKenney .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, 2714a81e832SPaul E. McKenney .dynticks_idle = ATOMIC_INIT(1), 2724a81e832SPaul E. McKenney #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 2734a81e832SPaul E. McKenney }; 2744a81e832SPaul E. McKenney 2755cd37193SPaul E. McKenney DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); 2765cd37193SPaul E. McKenney EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr); 2775cd37193SPaul E. McKenney 2784a81e832SPaul E. McKenney /* 2794a81e832SPaul E. McKenney * Let the RCU core know that this CPU has gone through the scheduler, 2804a81e832SPaul E. McKenney * which is a quiescent state. This is called when the need for a 2814a81e832SPaul E. McKenney * quiescent state is urgent, so we burn an atomic operation and full 2824a81e832SPaul E. McKenney * memory barriers to let the RCU core know about it, regardless of what 2834a81e832SPaul E. McKenney * this CPU might (or might not) do in the near future. 2844a81e832SPaul E. McKenney * 2854a81e832SPaul E. McKenney * We inform the RCU core by emulating a zero-duration dyntick-idle 2864a81e832SPaul E. McKenney * period, which we in turn do by incrementing the ->dynticks counter 2874a81e832SPaul E. McKenney * by two. 2884a81e832SPaul E. McKenney */ 2894a81e832SPaul E. McKenney static void rcu_momentary_dyntick_idle(void) 2904a81e832SPaul E. McKenney { 2914a81e832SPaul E. McKenney unsigned long flags; 2924a81e832SPaul E. McKenney struct rcu_data *rdp; 2934a81e832SPaul E. McKenney struct rcu_dynticks *rdtp; 2944a81e832SPaul E. McKenney int resched_mask; 2954a81e832SPaul E. McKenney struct rcu_state *rsp; 2964a81e832SPaul E. McKenney 2974a81e832SPaul E. McKenney local_irq_save(flags); 2984a81e832SPaul E. McKenney 2994a81e832SPaul E. McKenney /* 3004a81e832SPaul E. McKenney * Yes, we can lose flag-setting operations. This is OK, because 3014a81e832SPaul E. McKenney * the flag will be set again after some delay. 3024a81e832SPaul E. McKenney */ 3034a81e832SPaul E. McKenney resched_mask = raw_cpu_read(rcu_sched_qs_mask); 3044a81e832SPaul E. McKenney raw_cpu_write(rcu_sched_qs_mask, 0); 3054a81e832SPaul E. McKenney 3064a81e832SPaul E. McKenney /* Find the flavor that needs a quiescent state. */ 3074a81e832SPaul E. McKenney for_each_rcu_flavor(rsp) { 3084a81e832SPaul E. McKenney rdp = raw_cpu_ptr(rsp->rda); 3094a81e832SPaul E. McKenney if (!(resched_mask & rsp->flavor_mask)) 3104a81e832SPaul E. McKenney continue; 3114a81e832SPaul E. McKenney smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */ 3127d0ae808SPaul E. McKenney if (READ_ONCE(rdp->mynode->completed) != 3137d0ae808SPaul E. McKenney READ_ONCE(rdp->cond_resched_completed)) 3144a81e832SPaul E. McKenney continue; 3154a81e832SPaul E. McKenney 3164a81e832SPaul E. McKenney /* 3174a81e832SPaul E. McKenney * Pretend to be momentarily idle for the quiescent state. 3184a81e832SPaul E. McKenney * This allows the grace-period kthread to record the 3194a81e832SPaul E. McKenney * quiescent state, with no need for this CPU to do anything 3204a81e832SPaul E. McKenney * further. 3214a81e832SPaul E. McKenney */ 3224a81e832SPaul E. McKenney rdtp = this_cpu_ptr(&rcu_dynticks); 3234a81e832SPaul E. McKenney smp_mb__before_atomic(); /* Earlier stuff before QS. */ 3244a81e832SPaul E. McKenney atomic_add(2, &rdtp->dynticks); /* QS. */ 3254a81e832SPaul E. McKenney smp_mb__after_atomic(); /* Later stuff after QS. */ 3264a81e832SPaul E. McKenney break; 3274a81e832SPaul E. McKenney } 3284a81e832SPaul E. McKenney local_irq_restore(flags); 3294a81e832SPaul E. McKenney } 3304a81e832SPaul E. McKenney 3314102adabSPaul E. McKenney /* 3324102adabSPaul E. McKenney * Note a context switch. This is a quiescent state for RCU-sched, 3334102adabSPaul E. McKenney * and requires special handling for preemptible RCU. 3344102adabSPaul E. McKenney * The caller must have disabled preemption. 3354102adabSPaul E. McKenney */ 33638200cf2SPaul E. McKenney void rcu_note_context_switch(void) 3374102adabSPaul E. McKenney { 3384102adabSPaul E. McKenney trace_rcu_utilization(TPS("Start context switch")); 339284a8c93SPaul E. McKenney rcu_sched_qs(); 34038200cf2SPaul E. McKenney rcu_preempt_note_context_switch(); 3414a81e832SPaul E. McKenney if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) 3424a81e832SPaul E. McKenney rcu_momentary_dyntick_idle(); 3434102adabSPaul E. McKenney trace_rcu_utilization(TPS("End context switch")); 3444102adabSPaul E. McKenney } 3454102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_note_context_switch); 3464102adabSPaul E. McKenney 3475cd37193SPaul E. McKenney /* 3481925d196SPaul E. McKenney * Register a quiescent state for all RCU flavors. If there is an 3495cd37193SPaul E. McKenney * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight 3505cd37193SPaul E. McKenney * dyntick-idle quiescent state visible to other CPUs (but only for those 3511925d196SPaul E. McKenney * RCU flavors in desperate need of a quiescent state, which will normally 3525cd37193SPaul E. McKenney * be none of them). Either way, do a lightweight quiescent state for 3535cd37193SPaul E. McKenney * all RCU flavors. 3545cd37193SPaul E. McKenney */ 3555cd37193SPaul E. McKenney void rcu_all_qs(void) 3565cd37193SPaul E. McKenney { 3575cd37193SPaul E. McKenney if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) 3585cd37193SPaul E. McKenney rcu_momentary_dyntick_idle(); 3595cd37193SPaul E. McKenney this_cpu_inc(rcu_qs_ctr); 3605cd37193SPaul E. McKenney } 3615cd37193SPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_all_qs); 3625cd37193SPaul E. McKenney 3634102adabSPaul E. McKenney static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 3644102adabSPaul E. McKenney static long qhimark = 10000; /* If this many pending, ignore blimit. */ 3654102adabSPaul E. McKenney static long qlowmark = 100; /* Once only this many pending, use blimit. */ 3664102adabSPaul E. McKenney 3674102adabSPaul E. McKenney module_param(blimit, long, 0444); 3684102adabSPaul E. McKenney module_param(qhimark, long, 0444); 3694102adabSPaul E. McKenney module_param(qlowmark, long, 0444); 3704102adabSPaul E. McKenney 3714102adabSPaul E. McKenney static ulong jiffies_till_first_fqs = ULONG_MAX; 3724102adabSPaul E. McKenney static ulong jiffies_till_next_fqs = ULONG_MAX; 3734102adabSPaul E. McKenney 3744102adabSPaul E. McKenney module_param(jiffies_till_first_fqs, ulong, 0644); 3754102adabSPaul E. McKenney module_param(jiffies_till_next_fqs, ulong, 0644); 3764102adabSPaul E. McKenney 3774a81e832SPaul E. McKenney /* 3784a81e832SPaul E. McKenney * How long the grace period must be before we start recruiting 3794a81e832SPaul E. McKenney * quiescent-state help from rcu_note_context_switch(). 3804a81e832SPaul E. McKenney */ 3814a81e832SPaul E. McKenney static ulong jiffies_till_sched_qs = HZ / 20; 3824a81e832SPaul E. McKenney module_param(jiffies_till_sched_qs, ulong, 0644); 3834a81e832SPaul E. McKenney 38448a7639cSPaul E. McKenney static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 3854102adabSPaul E. McKenney struct rcu_data *rdp); 3864102adabSPaul E. McKenney static void force_qs_rnp(struct rcu_state *rsp, 3874102adabSPaul E. McKenney int (*f)(struct rcu_data *rsp, bool *isidle, 3884102adabSPaul E. McKenney unsigned long *maxj), 3894102adabSPaul E. McKenney bool *isidle, unsigned long *maxj); 3904102adabSPaul E. McKenney static void force_quiescent_state(struct rcu_state *rsp); 391e3950ecdSPaul E. McKenney static int rcu_pending(void); 3924102adabSPaul E. McKenney 3934102adabSPaul E. McKenney /* 394917963d0SPaul E. McKenney * Return the number of RCU batches started thus far for debug & stats. 3954102adabSPaul E. McKenney */ 396917963d0SPaul E. McKenney unsigned long rcu_batches_started(void) 397917963d0SPaul E. McKenney { 398917963d0SPaul E. McKenney return rcu_state_p->gpnum; 399917963d0SPaul E. McKenney } 400917963d0SPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_batches_started); 401917963d0SPaul E. McKenney 402917963d0SPaul E. McKenney /* 403917963d0SPaul E. McKenney * Return the number of RCU-sched batches started thus far for debug & stats. 404917963d0SPaul E. McKenney */ 405917963d0SPaul E. McKenney unsigned long rcu_batches_started_sched(void) 406917963d0SPaul E. McKenney { 407917963d0SPaul E. McKenney return rcu_sched_state.gpnum; 408917963d0SPaul E. McKenney } 409917963d0SPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_batches_started_sched); 410917963d0SPaul E. McKenney 411917963d0SPaul E. McKenney /* 412917963d0SPaul E. McKenney * Return the number of RCU BH batches started thus far for debug & stats. 413917963d0SPaul E. McKenney */ 414917963d0SPaul E. McKenney unsigned long rcu_batches_started_bh(void) 415917963d0SPaul E. McKenney { 416917963d0SPaul E. McKenney return rcu_bh_state.gpnum; 417917963d0SPaul E. McKenney } 418917963d0SPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_batches_started_bh); 419917963d0SPaul E. McKenney 420917963d0SPaul E. McKenney /* 421917963d0SPaul E. McKenney * Return the number of RCU batches completed thus far for debug & stats. 422917963d0SPaul E. McKenney */ 423917963d0SPaul E. McKenney unsigned long rcu_batches_completed(void) 424917963d0SPaul E. McKenney { 425917963d0SPaul E. McKenney return rcu_state_p->completed; 426917963d0SPaul E. McKenney } 427917963d0SPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_batches_completed); 428917963d0SPaul E. McKenney 429917963d0SPaul E. McKenney /* 430917963d0SPaul E. McKenney * Return the number of RCU-sched batches completed thus far for debug & stats. 4314102adabSPaul E. McKenney */ 4329733e4f0SPaul E. McKenney unsigned long rcu_batches_completed_sched(void) 4334102adabSPaul E. McKenney { 4344102adabSPaul E. McKenney return rcu_sched_state.completed; 4354102adabSPaul E. McKenney } 4364102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 4374102adabSPaul E. McKenney 4384102adabSPaul E. McKenney /* 439917963d0SPaul E. McKenney * Return the number of RCU BH batches completed thus far for debug & stats. 4404102adabSPaul E. McKenney */ 4419733e4f0SPaul E. McKenney unsigned long rcu_batches_completed_bh(void) 4424102adabSPaul E. McKenney { 4434102adabSPaul E. McKenney return rcu_bh_state.completed; 4444102adabSPaul E. McKenney } 4454102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); 4464102adabSPaul E. McKenney 4474102adabSPaul E. McKenney /* 448a381d757SAndreea-Cristina Bernat * Force a quiescent state. 449a381d757SAndreea-Cristina Bernat */ 450a381d757SAndreea-Cristina Bernat void rcu_force_quiescent_state(void) 451a381d757SAndreea-Cristina Bernat { 452e534165bSUma Sharma force_quiescent_state(rcu_state_p); 453a381d757SAndreea-Cristina Bernat } 454a381d757SAndreea-Cristina Bernat EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 455a381d757SAndreea-Cristina Bernat 456a381d757SAndreea-Cristina Bernat /* 4574102adabSPaul E. McKenney * Force a quiescent state for RCU BH. 4584102adabSPaul E. McKenney */ 4594102adabSPaul E. McKenney void rcu_bh_force_quiescent_state(void) 4604102adabSPaul E. McKenney { 4614102adabSPaul E. McKenney force_quiescent_state(&rcu_bh_state); 4624102adabSPaul E. McKenney } 4634102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); 4644102adabSPaul E. McKenney 4654102adabSPaul E. McKenney /* 466e7580f33SPaul E. McKenney * Force a quiescent state for RCU-sched. 467e7580f33SPaul E. McKenney */ 468e7580f33SPaul E. McKenney void rcu_sched_force_quiescent_state(void) 469e7580f33SPaul E. McKenney { 470e7580f33SPaul E. McKenney force_quiescent_state(&rcu_sched_state); 471e7580f33SPaul E. McKenney } 472e7580f33SPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); 473e7580f33SPaul E. McKenney 474e7580f33SPaul E. McKenney /* 475afea227fSPaul E. McKenney * Show the state of the grace-period kthreads. 476afea227fSPaul E. McKenney */ 477afea227fSPaul E. McKenney void show_rcu_gp_kthreads(void) 478afea227fSPaul E. McKenney { 479afea227fSPaul E. McKenney struct rcu_state *rsp; 480afea227fSPaul E. McKenney 481afea227fSPaul E. McKenney for_each_rcu_flavor(rsp) { 482afea227fSPaul E. McKenney pr_info("%s: wait state: %d ->state: %#lx\n", 483afea227fSPaul E. McKenney rsp->name, rsp->gp_state, rsp->gp_kthread->state); 484afea227fSPaul E. McKenney /* sched_show_task(rsp->gp_kthread); */ 485afea227fSPaul E. McKenney } 486afea227fSPaul E. McKenney } 487afea227fSPaul E. McKenney EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); 488afea227fSPaul E. McKenney 489afea227fSPaul E. McKenney /* 4904102adabSPaul E. McKenney * Record the number of times rcutorture tests have been initiated and 4914102adabSPaul E. McKenney * terminated. This information allows the debugfs tracing stats to be 4924102adabSPaul E. McKenney * correlated to the rcutorture messages, even when the rcutorture module 4934102adabSPaul E. McKenney * is being repeatedly loaded and unloaded. In other words, we cannot 4944102adabSPaul E. McKenney * store this state in rcutorture itself. 4954102adabSPaul E. McKenney */ 4964102adabSPaul E. McKenney void rcutorture_record_test_transition(void) 4974102adabSPaul E. McKenney { 4984102adabSPaul E. McKenney rcutorture_testseq++; 4994102adabSPaul E. McKenney rcutorture_vernum = 0; 5004102adabSPaul E. McKenney } 5014102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); 5024102adabSPaul E. McKenney 5034102adabSPaul E. McKenney /* 504ad0dc7f9SPaul E. McKenney * Send along grace-period-related data for rcutorture diagnostics. 505ad0dc7f9SPaul E. McKenney */ 506ad0dc7f9SPaul E. McKenney void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, 507ad0dc7f9SPaul E. McKenney unsigned long *gpnum, unsigned long *completed) 508ad0dc7f9SPaul E. McKenney { 509ad0dc7f9SPaul E. McKenney struct rcu_state *rsp = NULL; 510ad0dc7f9SPaul E. McKenney 511ad0dc7f9SPaul E. McKenney switch (test_type) { 512ad0dc7f9SPaul E. McKenney case RCU_FLAVOR: 513e534165bSUma Sharma rsp = rcu_state_p; 514ad0dc7f9SPaul E. McKenney break; 515ad0dc7f9SPaul E. McKenney case RCU_BH_FLAVOR: 516ad0dc7f9SPaul E. McKenney rsp = &rcu_bh_state; 517ad0dc7f9SPaul E. McKenney break; 518ad0dc7f9SPaul E. McKenney case RCU_SCHED_FLAVOR: 519ad0dc7f9SPaul E. McKenney rsp = &rcu_sched_state; 520ad0dc7f9SPaul E. McKenney break; 521ad0dc7f9SPaul E. McKenney default: 522ad0dc7f9SPaul E. McKenney break; 523ad0dc7f9SPaul E. McKenney } 524ad0dc7f9SPaul E. McKenney if (rsp != NULL) { 5257d0ae808SPaul E. McKenney *flags = READ_ONCE(rsp->gp_flags); 5267d0ae808SPaul E. McKenney *gpnum = READ_ONCE(rsp->gpnum); 5277d0ae808SPaul E. McKenney *completed = READ_ONCE(rsp->completed); 528ad0dc7f9SPaul E. McKenney return; 529ad0dc7f9SPaul E. McKenney } 530ad0dc7f9SPaul E. McKenney *flags = 0; 531ad0dc7f9SPaul E. McKenney *gpnum = 0; 532ad0dc7f9SPaul E. McKenney *completed = 0; 533ad0dc7f9SPaul E. McKenney } 534ad0dc7f9SPaul E. McKenney EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); 535ad0dc7f9SPaul E. McKenney 536ad0dc7f9SPaul E. McKenney /* 5374102adabSPaul E. McKenney * Record the number of writer passes through the current rcutorture test. 5384102adabSPaul E. McKenney * This is also used to correlate debugfs tracing stats with the rcutorture 5394102adabSPaul E. McKenney * messages. 5404102adabSPaul E. McKenney */ 5414102adabSPaul E. McKenney void rcutorture_record_progress(unsigned long vernum) 5424102adabSPaul E. McKenney { 5434102adabSPaul E. McKenney rcutorture_vernum++; 5444102adabSPaul E. McKenney } 5454102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcutorture_record_progress); 5464102adabSPaul E. McKenney 5474102adabSPaul E. McKenney /* 5484102adabSPaul E. McKenney * Does the CPU have callbacks ready to be invoked? 5494102adabSPaul E. McKenney */ 5504102adabSPaul E. McKenney static int 5514102adabSPaul E. McKenney cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) 5524102adabSPaul E. McKenney { 5534102adabSPaul E. McKenney return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] && 5544102adabSPaul E. McKenney rdp->nxttail[RCU_DONE_TAIL] != NULL; 5554102adabSPaul E. McKenney } 5564102adabSPaul E. McKenney 5574102adabSPaul E. McKenney /* 558365187fbSPaul E. McKenney * Return the root node of the specified rcu_state structure. 559365187fbSPaul E. McKenney */ 560365187fbSPaul E. McKenney static struct rcu_node *rcu_get_root(struct rcu_state *rsp) 561365187fbSPaul E. McKenney { 562365187fbSPaul E. McKenney return &rsp->node[0]; 563365187fbSPaul E. McKenney } 564365187fbSPaul E. McKenney 565365187fbSPaul E. McKenney /* 566365187fbSPaul E. McKenney * Is there any need for future grace periods? 567365187fbSPaul E. McKenney * Interrupts must be disabled. If the caller does not hold the root 568365187fbSPaul E. McKenney * rnp_node structure's ->lock, the results are advisory only. 569365187fbSPaul E. McKenney */ 570365187fbSPaul E. McKenney static int rcu_future_needs_gp(struct rcu_state *rsp) 571365187fbSPaul E. McKenney { 572365187fbSPaul E. McKenney struct rcu_node *rnp = rcu_get_root(rsp); 5737d0ae808SPaul E. McKenney int idx = (READ_ONCE(rnp->completed) + 1) & 0x1; 574365187fbSPaul E. McKenney int *fp = &rnp->need_future_gp[idx]; 575365187fbSPaul E. McKenney 5767d0ae808SPaul E. McKenney return READ_ONCE(*fp); 577365187fbSPaul E. McKenney } 578365187fbSPaul E. McKenney 579365187fbSPaul E. McKenney /* 5804102adabSPaul E. McKenney * Does the current CPU require a not-yet-started grace period? 5814102adabSPaul E. McKenney * The caller must have disabled interrupts to prevent races with 5824102adabSPaul E. McKenney * normal callback registry. 5834102adabSPaul E. McKenney */ 5844102adabSPaul E. McKenney static int 5854102adabSPaul E. McKenney cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 5864102adabSPaul E. McKenney { 5874102adabSPaul E. McKenney int i; 5884102adabSPaul E. McKenney 5894102adabSPaul E. McKenney if (rcu_gp_in_progress(rsp)) 5904102adabSPaul E. McKenney return 0; /* No, a grace period is already in progress. */ 591365187fbSPaul E. McKenney if (rcu_future_needs_gp(rsp)) 5924102adabSPaul E. McKenney return 1; /* Yes, a no-CBs CPU needs one. */ 5934102adabSPaul E. McKenney if (!rdp->nxttail[RCU_NEXT_TAIL]) 5944102adabSPaul E. McKenney return 0; /* No, this is a no-CBs (or offline) CPU. */ 5954102adabSPaul E. McKenney if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) 5964102adabSPaul E. McKenney return 1; /* Yes, this CPU has newly registered callbacks. */ 5974102adabSPaul E. McKenney for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) 5984102adabSPaul E. McKenney if (rdp->nxttail[i - 1] != rdp->nxttail[i] && 5997d0ae808SPaul E. McKenney ULONG_CMP_LT(READ_ONCE(rsp->completed), 6004102adabSPaul E. McKenney rdp->nxtcompleted[i])) 6014102adabSPaul E. McKenney return 1; /* Yes, CBs for future grace period. */ 6024102adabSPaul E. McKenney return 0; /* No grace period needed. */ 6034102adabSPaul E. McKenney } 6044102adabSPaul E. McKenney 6054102adabSPaul E. McKenney /* 6064102adabSPaul E. McKenney * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state 6074102adabSPaul E. McKenney * 6084102adabSPaul E. McKenney * If the new value of the ->dynticks_nesting counter now is zero, 6094102adabSPaul E. McKenney * we really have entered idle, and must do the appropriate accounting. 6104102adabSPaul E. McKenney * The caller must have disabled interrupts. 6114102adabSPaul E. McKenney */ 61228ced795SChristoph Lameter static void rcu_eqs_enter_common(long long oldval, bool user) 6134102adabSPaul E. McKenney { 61496d3fd0dSPaul E. McKenney struct rcu_state *rsp; 61596d3fd0dSPaul E. McKenney struct rcu_data *rdp; 61628ced795SChristoph Lameter struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 61796d3fd0dSPaul E. McKenney 6184102adabSPaul E. McKenney trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); 6194102adabSPaul E. McKenney if (!user && !is_idle_task(current)) { 6204102adabSPaul E. McKenney struct task_struct *idle __maybe_unused = 6214102adabSPaul E. McKenney idle_task(smp_processor_id()); 6224102adabSPaul E. McKenney 6234102adabSPaul E. McKenney trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0); 6244102adabSPaul E. McKenney ftrace_dump(DUMP_ORIG); 6254102adabSPaul E. McKenney WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 6264102adabSPaul E. McKenney current->pid, current->comm, 6274102adabSPaul E. McKenney idle->pid, idle->comm); /* must be idle task! */ 6284102adabSPaul E. McKenney } 62996d3fd0dSPaul E. McKenney for_each_rcu_flavor(rsp) { 63096d3fd0dSPaul E. McKenney rdp = this_cpu_ptr(rsp->rda); 63196d3fd0dSPaul E. McKenney do_nocb_deferred_wakeup(rdp); 63296d3fd0dSPaul E. McKenney } 633198bbf81SPaul E. McKenney rcu_prepare_for_idle(); 6344102adabSPaul E. McKenney /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 6354e857c58SPeter Zijlstra smp_mb__before_atomic(); /* See above. */ 6364102adabSPaul E. McKenney atomic_inc(&rdtp->dynticks); 6374e857c58SPeter Zijlstra smp_mb__after_atomic(); /* Force ordering with next sojourn. */ 6384102adabSPaul E. McKenney WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 639176f8f7aSPaul E. McKenney rcu_dynticks_task_enter(); 6404102adabSPaul E. McKenney 6414102adabSPaul E. McKenney /* 6424102adabSPaul E. McKenney * It is illegal to enter an extended quiescent state while 6434102adabSPaul E. McKenney * in an RCU read-side critical section. 6444102adabSPaul E. McKenney */ 6454102adabSPaul E. McKenney rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), 6464102adabSPaul E. McKenney "Illegal idle entry in RCU read-side critical section."); 6474102adabSPaul E. McKenney rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), 6484102adabSPaul E. McKenney "Illegal idle entry in RCU-bh read-side critical section."); 6494102adabSPaul E. McKenney rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), 6504102adabSPaul E. McKenney "Illegal idle entry in RCU-sched read-side critical section."); 6514102adabSPaul E. McKenney } 6524102adabSPaul E. McKenney 6534102adabSPaul E. McKenney /* 6544102adabSPaul E. McKenney * Enter an RCU extended quiescent state, which can be either the 6554102adabSPaul E. McKenney * idle loop or adaptive-tickless usermode execution. 6564102adabSPaul E. McKenney */ 6574102adabSPaul E. McKenney static void rcu_eqs_enter(bool user) 6584102adabSPaul E. McKenney { 6594102adabSPaul E. McKenney long long oldval; 6604102adabSPaul E. McKenney struct rcu_dynticks *rdtp; 6614102adabSPaul E. McKenney 6624102adabSPaul E. McKenney rdtp = this_cpu_ptr(&rcu_dynticks); 6634102adabSPaul E. McKenney oldval = rdtp->dynticks_nesting; 6644102adabSPaul E. McKenney WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); 6653a592405SPaul E. McKenney if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) { 6664102adabSPaul E. McKenney rdtp->dynticks_nesting = 0; 66728ced795SChristoph Lameter rcu_eqs_enter_common(oldval, user); 6683a592405SPaul E. McKenney } else { 6693a592405SPaul E. McKenney rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; 6703a592405SPaul E. McKenney } 6714102adabSPaul E. McKenney } 6724102adabSPaul E. McKenney 6734102adabSPaul E. McKenney /** 6744102adabSPaul E. McKenney * rcu_idle_enter - inform RCU that current CPU is entering idle 6754102adabSPaul E. McKenney * 6764102adabSPaul E. McKenney * Enter idle mode, in other words, -leave- the mode in which RCU 6774102adabSPaul E. McKenney * read-side critical sections can occur. (Though RCU read-side 6784102adabSPaul E. McKenney * critical sections can occur in irq handlers in idle, a possibility 6794102adabSPaul E. McKenney * handled by irq_enter() and irq_exit().) 6804102adabSPaul E. McKenney * 6814102adabSPaul E. McKenney * We crowbar the ->dynticks_nesting field to zero to allow for 6824102adabSPaul E. McKenney * the possibility of usermode upcalls having messed up our count 6834102adabSPaul E. McKenney * of interrupt nesting level during the prior busy period. 6844102adabSPaul E. McKenney */ 6854102adabSPaul E. McKenney void rcu_idle_enter(void) 6864102adabSPaul E. McKenney { 6874102adabSPaul E. McKenney unsigned long flags; 6884102adabSPaul E. McKenney 6894102adabSPaul E. McKenney local_irq_save(flags); 6904102adabSPaul E. McKenney rcu_eqs_enter(false); 69128ced795SChristoph Lameter rcu_sysidle_enter(0); 6924102adabSPaul E. McKenney local_irq_restore(flags); 6934102adabSPaul E. McKenney } 6944102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_idle_enter); 6954102adabSPaul E. McKenney 6964102adabSPaul E. McKenney #ifdef CONFIG_RCU_USER_QS 6974102adabSPaul E. McKenney /** 6984102adabSPaul E. McKenney * rcu_user_enter - inform RCU that we are resuming userspace. 6994102adabSPaul E. McKenney * 7004102adabSPaul E. McKenney * Enter RCU idle mode right before resuming userspace. No use of RCU 7014102adabSPaul E. McKenney * is permitted between this call and rcu_user_exit(). This way the 7024102adabSPaul E. McKenney * CPU doesn't need to maintain the tick for RCU maintenance purposes 7034102adabSPaul E. McKenney * when the CPU runs in userspace. 7044102adabSPaul E. McKenney */ 7054102adabSPaul E. McKenney void rcu_user_enter(void) 7064102adabSPaul E. McKenney { 7074102adabSPaul E. McKenney rcu_eqs_enter(1); 7084102adabSPaul E. McKenney } 7094102adabSPaul E. McKenney #endif /* CONFIG_RCU_USER_QS */ 7104102adabSPaul E. McKenney 7114102adabSPaul E. McKenney /** 7124102adabSPaul E. McKenney * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle 7134102adabSPaul E. McKenney * 7144102adabSPaul E. McKenney * Exit from an interrupt handler, which might possibly result in entering 7154102adabSPaul E. McKenney * idle mode, in other words, leaving the mode in which read-side critical 7164102adabSPaul E. McKenney * sections can occur. 7174102adabSPaul E. McKenney * 7184102adabSPaul E. McKenney * This code assumes that the idle loop never does anything that might 7194102adabSPaul E. McKenney * result in unbalanced calls to irq_enter() and irq_exit(). If your 7204102adabSPaul E. McKenney * architecture violates this assumption, RCU will give you what you 7214102adabSPaul E. McKenney * deserve, good and hard. But very infrequently and irreproducibly. 7224102adabSPaul E. McKenney * 7234102adabSPaul E. McKenney * Use things like work queues to work around this limitation. 7244102adabSPaul E. McKenney * 7254102adabSPaul E. McKenney * You have been warned. 7264102adabSPaul E. McKenney */ 7274102adabSPaul E. McKenney void rcu_irq_exit(void) 7284102adabSPaul E. McKenney { 7294102adabSPaul E. McKenney unsigned long flags; 7304102adabSPaul E. McKenney long long oldval; 7314102adabSPaul E. McKenney struct rcu_dynticks *rdtp; 7324102adabSPaul E. McKenney 7334102adabSPaul E. McKenney local_irq_save(flags); 7344102adabSPaul E. McKenney rdtp = this_cpu_ptr(&rcu_dynticks); 7354102adabSPaul E. McKenney oldval = rdtp->dynticks_nesting; 7364102adabSPaul E. McKenney rdtp->dynticks_nesting--; 7374102adabSPaul E. McKenney WARN_ON_ONCE(rdtp->dynticks_nesting < 0); 7384102adabSPaul E. McKenney if (rdtp->dynticks_nesting) 7394102adabSPaul E. McKenney trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting); 7404102adabSPaul E. McKenney else 74128ced795SChristoph Lameter rcu_eqs_enter_common(oldval, true); 74228ced795SChristoph Lameter rcu_sysidle_enter(1); 7434102adabSPaul E. McKenney local_irq_restore(flags); 7444102adabSPaul E. McKenney } 7454102adabSPaul E. McKenney 7464102adabSPaul E. McKenney /* 7474102adabSPaul E. McKenney * rcu_eqs_exit_common - current CPU moving away from extended quiescent state 7484102adabSPaul E. McKenney * 7494102adabSPaul E. McKenney * If the new value of the ->dynticks_nesting counter was previously zero, 7504102adabSPaul E. McKenney * we really have exited idle, and must do the appropriate accounting. 7514102adabSPaul E. McKenney * The caller must have disabled interrupts. 7524102adabSPaul E. McKenney */ 75328ced795SChristoph Lameter static void rcu_eqs_exit_common(long long oldval, int user) 7544102adabSPaul E. McKenney { 75528ced795SChristoph Lameter struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 75628ced795SChristoph Lameter 757176f8f7aSPaul E. McKenney rcu_dynticks_task_exit(); 7584e857c58SPeter Zijlstra smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ 7594102adabSPaul E. McKenney atomic_inc(&rdtp->dynticks); 7604102adabSPaul E. McKenney /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 7614e857c58SPeter Zijlstra smp_mb__after_atomic(); /* See above. */ 7624102adabSPaul E. McKenney WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 7638fa7845dSPaul E. McKenney rcu_cleanup_after_idle(); 7644102adabSPaul E. McKenney trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); 7654102adabSPaul E. McKenney if (!user && !is_idle_task(current)) { 7664102adabSPaul E. McKenney struct task_struct *idle __maybe_unused = 7674102adabSPaul E. McKenney idle_task(smp_processor_id()); 7684102adabSPaul E. McKenney 7694102adabSPaul E. McKenney trace_rcu_dyntick(TPS("Error on exit: not idle task"), 7704102adabSPaul E. McKenney oldval, rdtp->dynticks_nesting); 7714102adabSPaul E. McKenney ftrace_dump(DUMP_ORIG); 7724102adabSPaul E. McKenney WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 7734102adabSPaul E. McKenney current->pid, current->comm, 7744102adabSPaul E. McKenney idle->pid, idle->comm); /* must be idle task! */ 7754102adabSPaul E. McKenney } 7764102adabSPaul E. McKenney } 7774102adabSPaul E. McKenney 7784102adabSPaul E. McKenney /* 7794102adabSPaul E. McKenney * Exit an RCU extended quiescent state, which can be either the 7804102adabSPaul E. McKenney * idle loop or adaptive-tickless usermode execution. 7814102adabSPaul E. McKenney */ 7824102adabSPaul E. McKenney static void rcu_eqs_exit(bool user) 7834102adabSPaul E. McKenney { 7844102adabSPaul E. McKenney struct rcu_dynticks *rdtp; 7854102adabSPaul E. McKenney long long oldval; 7864102adabSPaul E. McKenney 7874102adabSPaul E. McKenney rdtp = this_cpu_ptr(&rcu_dynticks); 7884102adabSPaul E. McKenney oldval = rdtp->dynticks_nesting; 7894102adabSPaul E. McKenney WARN_ON_ONCE(oldval < 0); 7903a592405SPaul E. McKenney if (oldval & DYNTICK_TASK_NEST_MASK) { 7914102adabSPaul E. McKenney rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; 7923a592405SPaul E. McKenney } else { 7934102adabSPaul E. McKenney rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 79428ced795SChristoph Lameter rcu_eqs_exit_common(oldval, user); 7954102adabSPaul E. McKenney } 7963a592405SPaul E. McKenney } 7974102adabSPaul E. McKenney 7984102adabSPaul E. McKenney /** 7994102adabSPaul E. McKenney * rcu_idle_exit - inform RCU that current CPU is leaving idle 8004102adabSPaul E. McKenney * 8014102adabSPaul E. McKenney * Exit idle mode, in other words, -enter- the mode in which RCU 8024102adabSPaul E. McKenney * read-side critical sections can occur. 8034102adabSPaul E. McKenney * 8044102adabSPaul E. McKenney * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to 8054102adabSPaul E. McKenney * allow for the possibility of usermode upcalls messing up our count 8064102adabSPaul E. McKenney * of interrupt nesting level during the busy period that is just 8074102adabSPaul E. McKenney * now starting. 8084102adabSPaul E. McKenney */ 8094102adabSPaul E. McKenney void rcu_idle_exit(void) 8104102adabSPaul E. McKenney { 8114102adabSPaul E. McKenney unsigned long flags; 8124102adabSPaul E. McKenney 8134102adabSPaul E. McKenney local_irq_save(flags); 8144102adabSPaul E. McKenney rcu_eqs_exit(false); 81528ced795SChristoph Lameter rcu_sysidle_exit(0); 8164102adabSPaul E. McKenney local_irq_restore(flags); 8174102adabSPaul E. McKenney } 8184102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_idle_exit); 8194102adabSPaul E. McKenney 8204102adabSPaul E. McKenney #ifdef CONFIG_RCU_USER_QS 8214102adabSPaul E. McKenney /** 8224102adabSPaul E. McKenney * rcu_user_exit - inform RCU that we are exiting userspace. 8234102adabSPaul E. McKenney * 8244102adabSPaul E. McKenney * Exit RCU idle mode while entering the kernel because it can 8254102adabSPaul E. McKenney * run a RCU read side critical section anytime. 8264102adabSPaul E. McKenney */ 8274102adabSPaul E. McKenney void rcu_user_exit(void) 8284102adabSPaul E. McKenney { 8294102adabSPaul E. McKenney rcu_eqs_exit(1); 8304102adabSPaul E. McKenney } 8314102adabSPaul E. McKenney #endif /* CONFIG_RCU_USER_QS */ 8324102adabSPaul E. McKenney 8334102adabSPaul E. McKenney /** 8344102adabSPaul E. McKenney * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle 8354102adabSPaul E. McKenney * 8364102adabSPaul E. McKenney * Enter an interrupt handler, which might possibly result in exiting 8374102adabSPaul E. McKenney * idle mode, in other words, entering the mode in which read-side critical 8384102adabSPaul E. McKenney * sections can occur. 8394102adabSPaul E. McKenney * 8404102adabSPaul E. McKenney * Note that the Linux kernel is fully capable of entering an interrupt 8414102adabSPaul E. McKenney * handler that it never exits, for example when doing upcalls to 8424102adabSPaul E. McKenney * user mode! This code assumes that the idle loop never does upcalls to 8434102adabSPaul E. McKenney * user mode. If your architecture does do upcalls from the idle loop (or 8444102adabSPaul E. McKenney * does anything else that results in unbalanced calls to the irq_enter() 8454102adabSPaul E. McKenney * and irq_exit() functions), RCU will give you what you deserve, good 8464102adabSPaul E. McKenney * and hard. But very infrequently and irreproducibly. 8474102adabSPaul E. McKenney * 8484102adabSPaul E. McKenney * Use things like work queues to work around this limitation. 8494102adabSPaul E. McKenney * 8504102adabSPaul E. McKenney * You have been warned. 8514102adabSPaul E. McKenney */ 8524102adabSPaul E. McKenney void rcu_irq_enter(void) 8534102adabSPaul E. McKenney { 8544102adabSPaul E. McKenney unsigned long flags; 8554102adabSPaul E. McKenney struct rcu_dynticks *rdtp; 8564102adabSPaul E. McKenney long long oldval; 8574102adabSPaul E. McKenney 8584102adabSPaul E. McKenney local_irq_save(flags); 8594102adabSPaul E. McKenney rdtp = this_cpu_ptr(&rcu_dynticks); 8604102adabSPaul E. McKenney oldval = rdtp->dynticks_nesting; 8614102adabSPaul E. McKenney rdtp->dynticks_nesting++; 8624102adabSPaul E. McKenney WARN_ON_ONCE(rdtp->dynticks_nesting == 0); 8634102adabSPaul E. McKenney if (oldval) 8644102adabSPaul E. McKenney trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); 8654102adabSPaul E. McKenney else 86628ced795SChristoph Lameter rcu_eqs_exit_common(oldval, true); 86728ced795SChristoph Lameter rcu_sysidle_exit(1); 8684102adabSPaul E. McKenney local_irq_restore(flags); 8694102adabSPaul E. McKenney } 8704102adabSPaul E. McKenney 8714102adabSPaul E. McKenney /** 8724102adabSPaul E. McKenney * rcu_nmi_enter - inform RCU of entry to NMI context 8734102adabSPaul E. McKenney * 874734d1680SPaul E. McKenney * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and 875734d1680SPaul E. McKenney * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know 876734d1680SPaul E. McKenney * that the CPU is active. This implementation permits nested NMIs, as 877734d1680SPaul E. McKenney * long as the nesting level does not overflow an int. (You will probably 878734d1680SPaul E. McKenney * run out of stack space first.) 8794102adabSPaul E. McKenney */ 8804102adabSPaul E. McKenney void rcu_nmi_enter(void) 8814102adabSPaul E. McKenney { 8824102adabSPaul E. McKenney struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 883734d1680SPaul E. McKenney int incby = 2; 8844102adabSPaul E. McKenney 885734d1680SPaul E. McKenney /* Complain about underflow. */ 886734d1680SPaul E. McKenney WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); 887734d1680SPaul E. McKenney 888734d1680SPaul E. McKenney /* 889734d1680SPaul E. McKenney * If idle from RCU viewpoint, atomically increment ->dynticks 890734d1680SPaul E. McKenney * to mark non-idle and increment ->dynticks_nmi_nesting by one. 891734d1680SPaul E. McKenney * Otherwise, increment ->dynticks_nmi_nesting by two. This means 892734d1680SPaul E. McKenney * if ->dynticks_nmi_nesting is equal to one, we are guaranteed 893734d1680SPaul E. McKenney * to be in the outermost NMI handler that interrupted an RCU-idle 894734d1680SPaul E. McKenney * period (observation due to Andy Lutomirski). 895734d1680SPaul E. McKenney */ 896734d1680SPaul E. McKenney if (!(atomic_read(&rdtp->dynticks) & 0x1)) { 8974e857c58SPeter Zijlstra smp_mb__before_atomic(); /* Force delay from prior write. */ 8984102adabSPaul E. McKenney atomic_inc(&rdtp->dynticks); 899734d1680SPaul E. McKenney /* atomic_inc() before later RCU read-side crit sects */ 9004e857c58SPeter Zijlstra smp_mb__after_atomic(); /* See above. */ 9014102adabSPaul E. McKenney WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 902734d1680SPaul E. McKenney incby = 1; 903734d1680SPaul E. McKenney } 904734d1680SPaul E. McKenney rdtp->dynticks_nmi_nesting += incby; 905734d1680SPaul E. McKenney barrier(); 9064102adabSPaul E. McKenney } 9074102adabSPaul E. McKenney 9084102adabSPaul E. McKenney /** 9094102adabSPaul E. McKenney * rcu_nmi_exit - inform RCU of exit from NMI context 9104102adabSPaul E. McKenney * 911734d1680SPaul E. McKenney * If we are returning from the outermost NMI handler that interrupted an 912734d1680SPaul E. McKenney * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting 913734d1680SPaul E. McKenney * to let the RCU grace-period handling know that the CPU is back to 914734d1680SPaul E. McKenney * being RCU-idle. 9154102adabSPaul E. McKenney */ 9164102adabSPaul E. McKenney void rcu_nmi_exit(void) 9174102adabSPaul E. McKenney { 9184102adabSPaul E. McKenney struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 9194102adabSPaul E. McKenney 920734d1680SPaul E. McKenney /* 921734d1680SPaul E. McKenney * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. 922734d1680SPaul E. McKenney * (We are exiting an NMI handler, so RCU better be paying attention 923734d1680SPaul E. McKenney * to us!) 924734d1680SPaul E. McKenney */ 925734d1680SPaul E. McKenney WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); 926734d1680SPaul E. McKenney WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 927734d1680SPaul E. McKenney 928734d1680SPaul E. McKenney /* 929734d1680SPaul E. McKenney * If the nesting level is not 1, the CPU wasn't RCU-idle, so 930734d1680SPaul E. McKenney * leave it in non-RCU-idle state. 931734d1680SPaul E. McKenney */ 932734d1680SPaul E. McKenney if (rdtp->dynticks_nmi_nesting != 1) { 933734d1680SPaul E. McKenney rdtp->dynticks_nmi_nesting -= 2; 9344102adabSPaul E. McKenney return; 935734d1680SPaul E. McKenney } 936734d1680SPaul E. McKenney 937734d1680SPaul E. McKenney /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ 938734d1680SPaul E. McKenney rdtp->dynticks_nmi_nesting = 0; 9394102adabSPaul E. McKenney /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 9404e857c58SPeter Zijlstra smp_mb__before_atomic(); /* See above. */ 9414102adabSPaul E. McKenney atomic_inc(&rdtp->dynticks); 9424e857c58SPeter Zijlstra smp_mb__after_atomic(); /* Force delay to next write. */ 9434102adabSPaul E. McKenney WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 9444102adabSPaul E. McKenney } 9454102adabSPaul E. McKenney 9464102adabSPaul E. McKenney /** 9474102adabSPaul E. McKenney * __rcu_is_watching - are RCU read-side critical sections safe? 9484102adabSPaul E. McKenney * 9494102adabSPaul E. McKenney * Return true if RCU is watching the running CPU, which means that 9504102adabSPaul E. McKenney * this CPU can safely enter RCU read-side critical sections. Unlike 9514102adabSPaul E. McKenney * rcu_is_watching(), the caller of __rcu_is_watching() must have at 9524102adabSPaul E. McKenney * least disabled preemption. 9534102adabSPaul E. McKenney */ 954b29c8306SLinus Torvalds bool notrace __rcu_is_watching(void) 9554102adabSPaul E. McKenney { 9564102adabSPaul E. McKenney return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1; 9574102adabSPaul E. McKenney } 9584102adabSPaul E. McKenney 9594102adabSPaul E. McKenney /** 9604102adabSPaul E. McKenney * rcu_is_watching - see if RCU thinks that the current CPU is idle 9614102adabSPaul E. McKenney * 9624102adabSPaul E. McKenney * If the current CPU is in its idle loop and is neither in an interrupt 9634102adabSPaul E. McKenney * or NMI handler, return true. 9644102adabSPaul E. McKenney */ 965b29c8306SLinus Torvalds bool notrace rcu_is_watching(void) 9664102adabSPaul E. McKenney { 967f534ed1fSPranith Kumar bool ret; 9684102adabSPaul E. McKenney 9694102adabSPaul E. McKenney preempt_disable(); 9704102adabSPaul E. McKenney ret = __rcu_is_watching(); 9714102adabSPaul E. McKenney preempt_enable(); 9724102adabSPaul E. McKenney return ret; 9734102adabSPaul E. McKenney } 9744102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_is_watching); 9754102adabSPaul E. McKenney 9764102adabSPaul E. McKenney #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) 9774102adabSPaul E. McKenney 9784102adabSPaul E. McKenney /* 9794102adabSPaul E. McKenney * Is the current CPU online? Disable preemption to avoid false positives 9804102adabSPaul E. McKenney * that could otherwise happen due to the current CPU number being sampled, 9814102adabSPaul E. McKenney * this task being preempted, its old CPU being taken offline, resuming 9824102adabSPaul E. McKenney * on some other CPU, then determining that its old CPU is now offline. 9834102adabSPaul E. McKenney * It is OK to use RCU on an offline processor during initial boot, hence 9844102adabSPaul E. McKenney * the check for rcu_scheduler_fully_active. Note also that it is OK 9854102adabSPaul E. McKenney * for a CPU coming online to use RCU for one jiffy prior to marking itself 9864102adabSPaul E. McKenney * online in the cpu_online_mask. Similarly, it is OK for a CPU going 9874102adabSPaul E. McKenney * offline to continue to use RCU for one jiffy after marking itself 9884102adabSPaul E. McKenney * offline in the cpu_online_mask. This leniency is necessary given the 9894102adabSPaul E. McKenney * non-atomic nature of the online and offline processing, for example, 9904102adabSPaul E. McKenney * the fact that a CPU enters the scheduler after completing the CPU_DYING 9914102adabSPaul E. McKenney * notifiers. 9924102adabSPaul E. McKenney * 9934102adabSPaul E. McKenney * This is also why RCU internally marks CPUs online during the 9944102adabSPaul E. McKenney * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase. 9954102adabSPaul E. McKenney * 9964102adabSPaul E. McKenney * Disable checking if in an NMI handler because we cannot safely report 9974102adabSPaul E. McKenney * errors from NMI handlers anyway. 9984102adabSPaul E. McKenney */ 9994102adabSPaul E. McKenney bool rcu_lockdep_current_cpu_online(void) 10004102adabSPaul E. McKenney { 10014102adabSPaul E. McKenney struct rcu_data *rdp; 10024102adabSPaul E. McKenney struct rcu_node *rnp; 10034102adabSPaul E. McKenney bool ret; 10044102adabSPaul E. McKenney 10054102adabSPaul E. McKenney if (in_nmi()) 1006f6f7ee9aSFengguang Wu return true; 10074102adabSPaul E. McKenney preempt_disable(); 10084102adabSPaul E. McKenney rdp = this_cpu_ptr(&rcu_sched_data); 10094102adabSPaul E. McKenney rnp = rdp->mynode; 10100aa04b05SPaul E. McKenney ret = (rdp->grpmask & rcu_rnp_online_cpus(rnp)) || 10114102adabSPaul E. McKenney !rcu_scheduler_fully_active; 10124102adabSPaul E. McKenney preempt_enable(); 10134102adabSPaul E. McKenney return ret; 10144102adabSPaul E. McKenney } 10154102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); 10164102adabSPaul E. McKenney 10174102adabSPaul E. McKenney #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ 10184102adabSPaul E. McKenney 10194102adabSPaul E. McKenney /** 10204102adabSPaul E. McKenney * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle 10214102adabSPaul E. McKenney * 10224102adabSPaul E. McKenney * If the current CPU is idle or running at a first-level (not nested) 10234102adabSPaul E. McKenney * interrupt from idle, return true. The caller must have at least 10244102adabSPaul E. McKenney * disabled preemption. 10254102adabSPaul E. McKenney */ 10264102adabSPaul E. McKenney static int rcu_is_cpu_rrupt_from_idle(void) 10274102adabSPaul E. McKenney { 10284102adabSPaul E. McKenney return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1; 10294102adabSPaul E. McKenney } 10304102adabSPaul E. McKenney 10314102adabSPaul E. McKenney /* 10324102adabSPaul E. McKenney * Snapshot the specified CPU's dynticks counter so that we can later 10334102adabSPaul E. McKenney * credit them with an implicit quiescent state. Return 1 if this CPU 10344102adabSPaul E. McKenney * is in dynticks idle mode, which is an extended quiescent state. 10354102adabSPaul E. McKenney */ 10364102adabSPaul E. McKenney static int dyntick_save_progress_counter(struct rcu_data *rdp, 10374102adabSPaul E. McKenney bool *isidle, unsigned long *maxj) 10384102adabSPaul E. McKenney { 10394102adabSPaul E. McKenney rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); 10404102adabSPaul E. McKenney rcu_sysidle_check_cpu(rdp, isidle, maxj); 10417941dbdeSAndreea-Cristina Bernat if ((rdp->dynticks_snap & 0x1) == 0) { 10427941dbdeSAndreea-Cristina Bernat trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 10437941dbdeSAndreea-Cristina Bernat return 1; 10447941dbdeSAndreea-Cristina Bernat } else { 10457d0ae808SPaul E. McKenney if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, 1046e3663b10SPaul E. McKenney rdp->mynode->gpnum)) 10477d0ae808SPaul E. McKenney WRITE_ONCE(rdp->gpwrap, true); 10487941dbdeSAndreea-Cristina Bernat return 0; 10497941dbdeSAndreea-Cristina Bernat } 10504102adabSPaul E. McKenney } 10514102adabSPaul E. McKenney 10524102adabSPaul E. McKenney /* 10534102adabSPaul E. McKenney * Return true if the specified CPU has passed through a quiescent 10544102adabSPaul E. McKenney * state by virtue of being in or having passed through an dynticks 10554102adabSPaul E. McKenney * idle state since the last call to dyntick_save_progress_counter() 10564102adabSPaul E. McKenney * for this same CPU, or by virtue of having been offline. 10574102adabSPaul E. McKenney */ 10584102adabSPaul E. McKenney static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, 10594102adabSPaul E. McKenney bool *isidle, unsigned long *maxj) 10604102adabSPaul E. McKenney { 10614102adabSPaul E. McKenney unsigned int curr; 10624a81e832SPaul E. McKenney int *rcrmp; 10634102adabSPaul E. McKenney unsigned int snap; 10644102adabSPaul E. McKenney 10654102adabSPaul E. McKenney curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); 10664102adabSPaul E. McKenney snap = (unsigned int)rdp->dynticks_snap; 10674102adabSPaul E. McKenney 10684102adabSPaul E. McKenney /* 10694102adabSPaul E. McKenney * If the CPU passed through or entered a dynticks idle phase with 10704102adabSPaul E. McKenney * no active irq/NMI handlers, then we can safely pretend that the CPU 10714102adabSPaul E. McKenney * already acknowledged the request to pass through a quiescent 10724102adabSPaul E. McKenney * state. Either way, that CPU cannot possibly be in an RCU 10734102adabSPaul E. McKenney * read-side critical section that started before the beginning 10744102adabSPaul E. McKenney * of the current RCU grace period. 10754102adabSPaul E. McKenney */ 10764102adabSPaul E. McKenney if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { 10774102adabSPaul E. McKenney trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 10784102adabSPaul E. McKenney rdp->dynticks_fqs++; 10794102adabSPaul E. McKenney return 1; 10804102adabSPaul E. McKenney } 10814102adabSPaul E. McKenney 10824102adabSPaul E. McKenney /* 10834102adabSPaul E. McKenney * Check for the CPU being offline, but only if the grace period 10844102adabSPaul E. McKenney * is old enough. We don't need to worry about the CPU changing 10854102adabSPaul E. McKenney * state: If we see it offline even once, it has been through a 10864102adabSPaul E. McKenney * quiescent state. 10874102adabSPaul E. McKenney * 10884102adabSPaul E. McKenney * The reason for insisting that the grace period be at least 10894102adabSPaul E. McKenney * one jiffy old is that CPUs that are not quite online and that 10904102adabSPaul E. McKenney * have just gone offline can still execute RCU read-side critical 10914102adabSPaul E. McKenney * sections. 10924102adabSPaul E. McKenney */ 10934102adabSPaul E. McKenney if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies)) 10944102adabSPaul E. McKenney return 0; /* Grace period is not old enough. */ 10954102adabSPaul E. McKenney barrier(); 10964102adabSPaul E. McKenney if (cpu_is_offline(rdp->cpu)) { 10974102adabSPaul E. McKenney trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl")); 10984102adabSPaul E. McKenney rdp->offline_fqs++; 10994102adabSPaul E. McKenney return 1; 11004102adabSPaul E. McKenney } 11014102adabSPaul E. McKenney 11024102adabSPaul E. McKenney /* 11034a81e832SPaul E. McKenney * A CPU running for an extended time within the kernel can 11044a81e832SPaul E. McKenney * delay RCU grace periods. When the CPU is in NO_HZ_FULL mode, 11054a81e832SPaul E. McKenney * even context-switching back and forth between a pair of 11064a81e832SPaul E. McKenney * in-kernel CPU-bound tasks cannot advance grace periods. 11074a81e832SPaul E. McKenney * So if the grace period is old enough, make the CPU pay attention. 11084a81e832SPaul E. McKenney * Note that the unsynchronized assignments to the per-CPU 11094a81e832SPaul E. McKenney * rcu_sched_qs_mask variable are safe. Yes, setting of 11104a81e832SPaul E. McKenney * bits can be lost, but they will be set again on the next 11114a81e832SPaul E. McKenney * force-quiescent-state pass. So lost bit sets do not result 11124a81e832SPaul E. McKenney * in incorrect behavior, merely in a grace period lasting 11134a81e832SPaul E. McKenney * a few jiffies longer than it might otherwise. Because 11144a81e832SPaul E. McKenney * there are at most four threads involved, and because the 11154a81e832SPaul E. McKenney * updates are only once every few jiffies, the probability of 11164a81e832SPaul E. McKenney * lossage (and thus of slight grace-period extension) is 11174a81e832SPaul E. McKenney * quite low. 11184a81e832SPaul E. McKenney * 11194a81e832SPaul E. McKenney * Note that if the jiffies_till_sched_qs boot/sysfs parameter 11204a81e832SPaul E. McKenney * is set too high, we override with half of the RCU CPU stall 11214a81e832SPaul E. McKenney * warning delay. 11224102adabSPaul E. McKenney */ 11234a81e832SPaul E. McKenney rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu); 11244a81e832SPaul E. McKenney if (ULONG_CMP_GE(jiffies, 11254a81e832SPaul E. McKenney rdp->rsp->gp_start + jiffies_till_sched_qs) || 1126cb1e78cfSPaul E. McKenney ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { 11277d0ae808SPaul E. McKenney if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { 11287d0ae808SPaul E. McKenney WRITE_ONCE(rdp->cond_resched_completed, 11297d0ae808SPaul E. McKenney READ_ONCE(rdp->mynode->completed)); 11304a81e832SPaul E. McKenney smp_mb(); /* ->cond_resched_completed before *rcrmp. */ 11317d0ae808SPaul E. McKenney WRITE_ONCE(*rcrmp, 11327d0ae808SPaul E. McKenney READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask); 11334a81e832SPaul E. McKenney resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ 11344a81e832SPaul E. McKenney rdp->rsp->jiffies_resched += 5; /* Enable beating. */ 11354a81e832SPaul E. McKenney } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { 11364a81e832SPaul E. McKenney /* Time to beat on that CPU again! */ 11374a81e832SPaul E. McKenney resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ 11384a81e832SPaul E. McKenney rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ 11394a81e832SPaul E. McKenney } 11406193c76aSPaul E. McKenney } 11416193c76aSPaul E. McKenney 11424102adabSPaul E. McKenney return 0; 11434102adabSPaul E. McKenney } 11444102adabSPaul E. McKenney 11454102adabSPaul E. McKenney static void record_gp_stall_check_time(struct rcu_state *rsp) 11464102adabSPaul E. McKenney { 1147cb1e78cfSPaul E. McKenney unsigned long j = jiffies; 11486193c76aSPaul E. McKenney unsigned long j1; 11494102adabSPaul E. McKenney 11504102adabSPaul E. McKenney rsp->gp_start = j; 11514102adabSPaul E. McKenney smp_wmb(); /* Record start time before stall time. */ 11526193c76aSPaul E. McKenney j1 = rcu_jiffies_till_stall_check(); 11537d0ae808SPaul E. McKenney WRITE_ONCE(rsp->jiffies_stall, j + j1); 11546193c76aSPaul E. McKenney rsp->jiffies_resched = j + j1 / 2; 11557d0ae808SPaul E. McKenney rsp->n_force_qs_gpstart = READ_ONCE(rsp->n_force_qs); 11564102adabSPaul E. McKenney } 11574102adabSPaul E. McKenney 11584102adabSPaul E. McKenney /* 1159fb81a44bSPaul E. McKenney * Complain about starvation of grace-period kthread. 1160fb81a44bSPaul E. McKenney */ 1161fb81a44bSPaul E. McKenney static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) 1162fb81a44bSPaul E. McKenney { 1163fb81a44bSPaul E. McKenney unsigned long gpa; 1164fb81a44bSPaul E. McKenney unsigned long j; 1165fb81a44bSPaul E. McKenney 1166fb81a44bSPaul E. McKenney j = jiffies; 11677d0ae808SPaul E. McKenney gpa = READ_ONCE(rsp->gp_activity); 1168fb81a44bSPaul E. McKenney if (j - gpa > 2 * HZ) 1169fb81a44bSPaul E. McKenney pr_err("%s kthread starved for %ld jiffies!\n", 1170fb81a44bSPaul E. McKenney rsp->name, j - gpa); 11714102adabSPaul E. McKenney } 11724102adabSPaul E. McKenney 11734102adabSPaul E. McKenney /* 1174bc1dce51SPaul E. McKenney * Dump stacks of all tasks running on stalled CPUs. 11754102adabSPaul E. McKenney */ 11764102adabSPaul E. McKenney static void rcu_dump_cpu_stacks(struct rcu_state *rsp) 11774102adabSPaul E. McKenney { 11784102adabSPaul E. McKenney int cpu; 11794102adabSPaul E. McKenney unsigned long flags; 11804102adabSPaul E. McKenney struct rcu_node *rnp; 11814102adabSPaul E. McKenney 11824102adabSPaul E. McKenney rcu_for_each_leaf_node(rsp, rnp) { 11834102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 11844102adabSPaul E. McKenney if (rnp->qsmask != 0) { 11854102adabSPaul E. McKenney for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 11864102adabSPaul E. McKenney if (rnp->qsmask & (1UL << cpu)) 11874102adabSPaul E. McKenney dump_cpu_task(rnp->grplo + cpu); 11884102adabSPaul E. McKenney } 11894102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 11904102adabSPaul E. McKenney } 11914102adabSPaul E. McKenney } 11924102adabSPaul E. McKenney 11936ccd2ecdSPaul E. McKenney static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) 11944102adabSPaul E. McKenney { 11954102adabSPaul E. McKenney int cpu; 11964102adabSPaul E. McKenney long delta; 11974102adabSPaul E. McKenney unsigned long flags; 11986ccd2ecdSPaul E. McKenney unsigned long gpa; 11996ccd2ecdSPaul E. McKenney unsigned long j; 12004102adabSPaul E. McKenney int ndetected = 0; 12014102adabSPaul E. McKenney struct rcu_node *rnp = rcu_get_root(rsp); 12024102adabSPaul E. McKenney long totqlen = 0; 12034102adabSPaul E. McKenney 12044102adabSPaul E. McKenney /* Only let one CPU complain about others per time interval. */ 12054102adabSPaul E. McKenney 12064102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 12077d0ae808SPaul E. McKenney delta = jiffies - READ_ONCE(rsp->jiffies_stall); 12084102adabSPaul E. McKenney if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { 12094102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 12104102adabSPaul E. McKenney return; 12114102adabSPaul E. McKenney } 12127d0ae808SPaul E. McKenney WRITE_ONCE(rsp->jiffies_stall, 12137d0ae808SPaul E. McKenney jiffies + 3 * rcu_jiffies_till_stall_check() + 3); 12144102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 12154102adabSPaul E. McKenney 12164102adabSPaul E. McKenney /* 12174102adabSPaul E. McKenney * OK, time to rat on our buddy... 12184102adabSPaul E. McKenney * See Documentation/RCU/stallwarn.txt for info on how to debug 12194102adabSPaul E. McKenney * RCU CPU stall warnings. 12204102adabSPaul E. McKenney */ 12214102adabSPaul E. McKenney pr_err("INFO: %s detected stalls on CPUs/tasks:", 12224102adabSPaul E. McKenney rsp->name); 12234102adabSPaul E. McKenney print_cpu_stall_info_begin(); 12244102adabSPaul E. McKenney rcu_for_each_leaf_node(rsp, rnp) { 12254102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 12264102adabSPaul E. McKenney ndetected += rcu_print_task_stall(rnp); 12274102adabSPaul E. McKenney if (rnp->qsmask != 0) { 12284102adabSPaul E. McKenney for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 12294102adabSPaul E. McKenney if (rnp->qsmask & (1UL << cpu)) { 12304102adabSPaul E. McKenney print_cpu_stall_info(rsp, 12314102adabSPaul E. McKenney rnp->grplo + cpu); 12324102adabSPaul E. McKenney ndetected++; 12334102adabSPaul E. McKenney } 12344102adabSPaul E. McKenney } 12354102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 12364102adabSPaul E. McKenney } 12374102adabSPaul E. McKenney 12384102adabSPaul E. McKenney print_cpu_stall_info_end(); 12394102adabSPaul E. McKenney for_each_possible_cpu(cpu) 12404102adabSPaul E. McKenney totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 124183ebe63eSPaul E. McKenney pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", 12424102adabSPaul E. McKenney smp_processor_id(), (long)(jiffies - rsp->gp_start), 124383ebe63eSPaul E. McKenney (long)rsp->gpnum, (long)rsp->completed, totqlen); 12446ccd2ecdSPaul E. McKenney if (ndetected) { 12454102adabSPaul E. McKenney rcu_dump_cpu_stacks(rsp); 12466ccd2ecdSPaul E. McKenney } else { 12477d0ae808SPaul E. McKenney if (READ_ONCE(rsp->gpnum) != gpnum || 12487d0ae808SPaul E. McKenney READ_ONCE(rsp->completed) == gpnum) { 12496ccd2ecdSPaul E. McKenney pr_err("INFO: Stall ended before state dump start\n"); 12506ccd2ecdSPaul E. McKenney } else { 12516ccd2ecdSPaul E. McKenney j = jiffies; 12527d0ae808SPaul E. McKenney gpa = READ_ONCE(rsp->gp_activity); 1253237a0f21SPaul E. McKenney pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", 12546ccd2ecdSPaul E. McKenney rsp->name, j - gpa, j, gpa, 1255237a0f21SPaul E. McKenney jiffies_till_next_fqs, 1256237a0f21SPaul E. McKenney rcu_get_root(rsp)->qsmask); 12576ccd2ecdSPaul E. McKenney /* In this case, the current CPU might be at fault. */ 12586ccd2ecdSPaul E. McKenney sched_show_task(current); 12596ccd2ecdSPaul E. McKenney } 12606ccd2ecdSPaul E. McKenney } 12614102adabSPaul E. McKenney 12624102adabSPaul E. McKenney /* Complain about tasks blocking the grace period. */ 12634102adabSPaul E. McKenney rcu_print_detail_task_stall(rsp); 12644102adabSPaul E. McKenney 1265fb81a44bSPaul E. McKenney rcu_check_gp_kthread_starvation(rsp); 1266fb81a44bSPaul E. McKenney 12674102adabSPaul E. McKenney force_quiescent_state(rsp); /* Kick them all. */ 12684102adabSPaul E. McKenney } 12694102adabSPaul E. McKenney 12704102adabSPaul E. McKenney static void print_cpu_stall(struct rcu_state *rsp) 12714102adabSPaul E. McKenney { 12724102adabSPaul E. McKenney int cpu; 12734102adabSPaul E. McKenney unsigned long flags; 12744102adabSPaul E. McKenney struct rcu_node *rnp = rcu_get_root(rsp); 12754102adabSPaul E. McKenney long totqlen = 0; 12764102adabSPaul E. McKenney 12774102adabSPaul E. McKenney /* 12784102adabSPaul E. McKenney * OK, time to rat on ourselves... 12794102adabSPaul E. McKenney * See Documentation/RCU/stallwarn.txt for info on how to debug 12804102adabSPaul E. McKenney * RCU CPU stall warnings. 12814102adabSPaul E. McKenney */ 12824102adabSPaul E. McKenney pr_err("INFO: %s self-detected stall on CPU", rsp->name); 12834102adabSPaul E. McKenney print_cpu_stall_info_begin(); 12844102adabSPaul E. McKenney print_cpu_stall_info(rsp, smp_processor_id()); 12854102adabSPaul E. McKenney print_cpu_stall_info_end(); 12864102adabSPaul E. McKenney for_each_possible_cpu(cpu) 12874102adabSPaul E. McKenney totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 128883ebe63eSPaul E. McKenney pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", 128983ebe63eSPaul E. McKenney jiffies - rsp->gp_start, 129083ebe63eSPaul E. McKenney (long)rsp->gpnum, (long)rsp->completed, totqlen); 1291fb81a44bSPaul E. McKenney 1292fb81a44bSPaul E. McKenney rcu_check_gp_kthread_starvation(rsp); 1293fb81a44bSPaul E. McKenney 1294bc1dce51SPaul E. McKenney rcu_dump_cpu_stacks(rsp); 12954102adabSPaul E. McKenney 12964102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 12977d0ae808SPaul E. McKenney if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) 12987d0ae808SPaul E. McKenney WRITE_ONCE(rsp->jiffies_stall, 12997d0ae808SPaul E. McKenney jiffies + 3 * rcu_jiffies_till_stall_check() + 3); 13004102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 13014102adabSPaul E. McKenney 130239cf275aSLinus Torvalds /* 130339cf275aSLinus Torvalds * Attempt to revive the RCU machinery by forcing a context switch. 130439cf275aSLinus Torvalds * 130539cf275aSLinus Torvalds * A context switch would normally allow the RCU state machine to make 130639cf275aSLinus Torvalds * progress and it could be we're stuck in kernel space without context 130739cf275aSLinus Torvalds * switches for an entirely unreasonable amount of time. 130839cf275aSLinus Torvalds */ 130939cf275aSLinus Torvalds resched_cpu(smp_processor_id()); 13104102adabSPaul E. McKenney } 13114102adabSPaul E. McKenney 13124102adabSPaul E. McKenney static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) 13134102adabSPaul E. McKenney { 13144102adabSPaul E. McKenney unsigned long completed; 13154102adabSPaul E. McKenney unsigned long gpnum; 13164102adabSPaul E. McKenney unsigned long gps; 13174102adabSPaul E. McKenney unsigned long j; 13184102adabSPaul E. McKenney unsigned long js; 13194102adabSPaul E. McKenney struct rcu_node *rnp; 13204102adabSPaul E. McKenney 13214102adabSPaul E. McKenney if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp)) 13224102adabSPaul E. McKenney return; 1323cb1e78cfSPaul E. McKenney j = jiffies; 13244102adabSPaul E. McKenney 13254102adabSPaul E. McKenney /* 13264102adabSPaul E. McKenney * Lots of memory barriers to reject false positives. 13274102adabSPaul E. McKenney * 13284102adabSPaul E. McKenney * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall, 13294102adabSPaul E. McKenney * then rsp->gp_start, and finally rsp->completed. These values 13304102adabSPaul E. McKenney * are updated in the opposite order with memory barriers (or 13314102adabSPaul E. McKenney * equivalent) during grace-period initialization and cleanup. 13324102adabSPaul E. McKenney * Now, a false positive can occur if we get an new value of 13334102adabSPaul E. McKenney * rsp->gp_start and a old value of rsp->jiffies_stall. But given 13344102adabSPaul E. McKenney * the memory barriers, the only way that this can happen is if one 13354102adabSPaul E. McKenney * grace period ends and another starts between these two fetches. 13364102adabSPaul E. McKenney * Detect this by comparing rsp->completed with the previous fetch 13374102adabSPaul E. McKenney * from rsp->gpnum. 13384102adabSPaul E. McKenney * 13394102adabSPaul E. McKenney * Given this check, comparisons of jiffies, rsp->jiffies_stall, 13404102adabSPaul E. McKenney * and rsp->gp_start suffice to forestall false positives. 13414102adabSPaul E. McKenney */ 13427d0ae808SPaul E. McKenney gpnum = READ_ONCE(rsp->gpnum); 13434102adabSPaul E. McKenney smp_rmb(); /* Pick up ->gpnum first... */ 13447d0ae808SPaul E. McKenney js = READ_ONCE(rsp->jiffies_stall); 13454102adabSPaul E. McKenney smp_rmb(); /* ...then ->jiffies_stall before the rest... */ 13467d0ae808SPaul E. McKenney gps = READ_ONCE(rsp->gp_start); 13474102adabSPaul E. McKenney smp_rmb(); /* ...and finally ->gp_start before ->completed. */ 13487d0ae808SPaul E. McKenney completed = READ_ONCE(rsp->completed); 13494102adabSPaul E. McKenney if (ULONG_CMP_GE(completed, gpnum) || 13504102adabSPaul E. McKenney ULONG_CMP_LT(j, js) || 13514102adabSPaul E. McKenney ULONG_CMP_GE(gps, js)) 13524102adabSPaul E. McKenney return; /* No stall or GP completed since entering function. */ 13534102adabSPaul E. McKenney rnp = rdp->mynode; 13544102adabSPaul E. McKenney if (rcu_gp_in_progress(rsp) && 13557d0ae808SPaul E. McKenney (READ_ONCE(rnp->qsmask) & rdp->grpmask)) { 13564102adabSPaul E. McKenney 13574102adabSPaul E. McKenney /* We haven't checked in, so go dump stack. */ 13584102adabSPaul E. McKenney print_cpu_stall(rsp); 13594102adabSPaul E. McKenney 13604102adabSPaul E. McKenney } else if (rcu_gp_in_progress(rsp) && 13614102adabSPaul E. McKenney ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { 13624102adabSPaul E. McKenney 13634102adabSPaul E. McKenney /* They had a few time units to dump stack, so complain. */ 13646ccd2ecdSPaul E. McKenney print_other_cpu_stall(rsp, gpnum); 13654102adabSPaul E. McKenney } 13664102adabSPaul E. McKenney } 13674102adabSPaul E. McKenney 13684102adabSPaul E. McKenney /** 13694102adabSPaul E. McKenney * rcu_cpu_stall_reset - prevent further stall warnings in current grace period 13704102adabSPaul E. McKenney * 13714102adabSPaul E. McKenney * Set the stall-warning timeout way off into the future, thus preventing 13724102adabSPaul E. McKenney * any RCU CPU stall-warning messages from appearing in the current set of 13734102adabSPaul E. McKenney * RCU grace periods. 13744102adabSPaul E. McKenney * 13754102adabSPaul E. McKenney * The caller must disable hard irqs. 13764102adabSPaul E. McKenney */ 13774102adabSPaul E. McKenney void rcu_cpu_stall_reset(void) 13784102adabSPaul E. McKenney { 13794102adabSPaul E. McKenney struct rcu_state *rsp; 13804102adabSPaul E. McKenney 13814102adabSPaul E. McKenney for_each_rcu_flavor(rsp) 13827d0ae808SPaul E. McKenney WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2); 13834102adabSPaul E. McKenney } 13844102adabSPaul E. McKenney 13854102adabSPaul E. McKenney /* 1386d3f3f3f2SPaul E. McKenney * Initialize the specified rcu_data structure's default callback list 1387d3f3f3f2SPaul E. McKenney * to empty. The default callback list is the one that is not used by 1388d3f3f3f2SPaul E. McKenney * no-callbacks CPUs. 1389d3f3f3f2SPaul E. McKenney */ 1390d3f3f3f2SPaul E. McKenney static void init_default_callback_list(struct rcu_data *rdp) 1391d3f3f3f2SPaul E. McKenney { 1392d3f3f3f2SPaul E. McKenney int i; 1393d3f3f3f2SPaul E. McKenney 1394d3f3f3f2SPaul E. McKenney rdp->nxtlist = NULL; 1395d3f3f3f2SPaul E. McKenney for (i = 0; i < RCU_NEXT_SIZE; i++) 1396d3f3f3f2SPaul E. McKenney rdp->nxttail[i] = &rdp->nxtlist; 1397d3f3f3f2SPaul E. McKenney } 1398d3f3f3f2SPaul E. McKenney 1399d3f3f3f2SPaul E. McKenney /* 14004102adabSPaul E. McKenney * Initialize the specified rcu_data structure's callback list to empty. 14014102adabSPaul E. McKenney */ 14024102adabSPaul E. McKenney static void init_callback_list(struct rcu_data *rdp) 14034102adabSPaul E. McKenney { 14044102adabSPaul E. McKenney if (init_nocb_callback_list(rdp)) 14054102adabSPaul E. McKenney return; 1406d3f3f3f2SPaul E. McKenney init_default_callback_list(rdp); 14074102adabSPaul E. McKenney } 14084102adabSPaul E. McKenney 14094102adabSPaul E. McKenney /* 14104102adabSPaul E. McKenney * Determine the value that ->completed will have at the end of the 14114102adabSPaul E. McKenney * next subsequent grace period. This is used to tag callbacks so that 14124102adabSPaul E. McKenney * a CPU can invoke callbacks in a timely fashion even if that CPU has 14134102adabSPaul E. McKenney * been dyntick-idle for an extended period with callbacks under the 14144102adabSPaul E. McKenney * influence of RCU_FAST_NO_HZ. 14154102adabSPaul E. McKenney * 14164102adabSPaul E. McKenney * The caller must hold rnp->lock with interrupts disabled. 14174102adabSPaul E. McKenney */ 14184102adabSPaul E. McKenney static unsigned long rcu_cbs_completed(struct rcu_state *rsp, 14194102adabSPaul E. McKenney struct rcu_node *rnp) 14204102adabSPaul E. McKenney { 14214102adabSPaul E. McKenney /* 14224102adabSPaul E. McKenney * If RCU is idle, we just wait for the next grace period. 14234102adabSPaul E. McKenney * But we can only be sure that RCU is idle if we are looking 14244102adabSPaul E. McKenney * at the root rcu_node structure -- otherwise, a new grace 14254102adabSPaul E. McKenney * period might have started, but just not yet gotten around 14264102adabSPaul E. McKenney * to initializing the current non-root rcu_node structure. 14274102adabSPaul E. McKenney */ 14284102adabSPaul E. McKenney if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed) 14294102adabSPaul E. McKenney return rnp->completed + 1; 14304102adabSPaul E. McKenney 14314102adabSPaul E. McKenney /* 14324102adabSPaul E. McKenney * Otherwise, wait for a possible partial grace period and 14334102adabSPaul E. McKenney * then the subsequent full grace period. 14344102adabSPaul E. McKenney */ 14354102adabSPaul E. McKenney return rnp->completed + 2; 14364102adabSPaul E. McKenney } 14374102adabSPaul E. McKenney 14384102adabSPaul E. McKenney /* 14394102adabSPaul E. McKenney * Trace-event helper function for rcu_start_future_gp() and 14404102adabSPaul E. McKenney * rcu_nocb_wait_gp(). 14414102adabSPaul E. McKenney */ 14424102adabSPaul E. McKenney static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, 14434102adabSPaul E. McKenney unsigned long c, const char *s) 14444102adabSPaul E. McKenney { 14454102adabSPaul E. McKenney trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, 14464102adabSPaul E. McKenney rnp->completed, c, rnp->level, 14474102adabSPaul E. McKenney rnp->grplo, rnp->grphi, s); 14484102adabSPaul E. McKenney } 14494102adabSPaul E. McKenney 14504102adabSPaul E. McKenney /* 14514102adabSPaul E. McKenney * Start some future grace period, as needed to handle newly arrived 14524102adabSPaul E. McKenney * callbacks. The required future grace periods are recorded in each 145348a7639cSPaul E. McKenney * rcu_node structure's ->need_future_gp field. Returns true if there 145448a7639cSPaul E. McKenney * is reason to awaken the grace-period kthread. 14554102adabSPaul E. McKenney * 14564102adabSPaul E. McKenney * The caller must hold the specified rcu_node structure's ->lock. 14574102adabSPaul E. McKenney */ 145848a7639cSPaul E. McKenney static bool __maybe_unused 145948a7639cSPaul E. McKenney rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, 146048a7639cSPaul E. McKenney unsigned long *c_out) 14614102adabSPaul E. McKenney { 14624102adabSPaul E. McKenney unsigned long c; 14634102adabSPaul E. McKenney int i; 146448a7639cSPaul E. McKenney bool ret = false; 14654102adabSPaul E. McKenney struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); 14664102adabSPaul E. McKenney 14674102adabSPaul E. McKenney /* 14684102adabSPaul E. McKenney * Pick up grace-period number for new callbacks. If this 14694102adabSPaul E. McKenney * grace period is already marked as needed, return to the caller. 14704102adabSPaul E. McKenney */ 14714102adabSPaul E. McKenney c = rcu_cbs_completed(rdp->rsp, rnp); 14724102adabSPaul E. McKenney trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf")); 14734102adabSPaul E. McKenney if (rnp->need_future_gp[c & 0x1]) { 14744102adabSPaul E. McKenney trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf")); 147548a7639cSPaul E. McKenney goto out; 14764102adabSPaul E. McKenney } 14774102adabSPaul E. McKenney 14784102adabSPaul E. McKenney /* 14794102adabSPaul E. McKenney * If either this rcu_node structure or the root rcu_node structure 14804102adabSPaul E. McKenney * believe that a grace period is in progress, then we must wait 14814102adabSPaul E. McKenney * for the one following, which is in "c". Because our request 14824102adabSPaul E. McKenney * will be noticed at the end of the current grace period, we don't 148348bd8e9bSPranith Kumar * need to explicitly start one. We only do the lockless check 148448bd8e9bSPranith Kumar * of rnp_root's fields if the current rcu_node structure thinks 148548bd8e9bSPranith Kumar * there is no grace period in flight, and because we hold rnp->lock, 148648bd8e9bSPranith Kumar * the only possible change is when rnp_root's two fields are 148748bd8e9bSPranith Kumar * equal, in which case rnp_root->gpnum might be concurrently 148848bd8e9bSPranith Kumar * incremented. But that is OK, as it will just result in our 148948bd8e9bSPranith Kumar * doing some extra useless work. 14904102adabSPaul E. McKenney */ 14914102adabSPaul E. McKenney if (rnp->gpnum != rnp->completed || 14927d0ae808SPaul E. McKenney READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) { 14934102adabSPaul E. McKenney rnp->need_future_gp[c & 0x1]++; 14944102adabSPaul E. McKenney trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); 149548a7639cSPaul E. McKenney goto out; 14964102adabSPaul E. McKenney } 14974102adabSPaul E. McKenney 14984102adabSPaul E. McKenney /* 14994102adabSPaul E. McKenney * There might be no grace period in progress. If we don't already 15004102adabSPaul E. McKenney * hold it, acquire the root rcu_node structure's lock in order to 15014102adabSPaul E. McKenney * start one (if needed). 15024102adabSPaul E. McKenney */ 15036303b9c8SPaul E. McKenney if (rnp != rnp_root) { 15044102adabSPaul E. McKenney raw_spin_lock(&rnp_root->lock); 15056303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 15066303b9c8SPaul E. McKenney } 15074102adabSPaul E. McKenney 15084102adabSPaul E. McKenney /* 15094102adabSPaul E. McKenney * Get a new grace-period number. If there really is no grace 15104102adabSPaul E. McKenney * period in progress, it will be smaller than the one we obtained 15114102adabSPaul E. McKenney * earlier. Adjust callbacks as needed. Note that even no-CBs 15124102adabSPaul E. McKenney * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed. 15134102adabSPaul E. McKenney */ 15144102adabSPaul E. McKenney c = rcu_cbs_completed(rdp->rsp, rnp_root); 15154102adabSPaul E. McKenney for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++) 15164102adabSPaul E. McKenney if (ULONG_CMP_LT(c, rdp->nxtcompleted[i])) 15174102adabSPaul E. McKenney rdp->nxtcompleted[i] = c; 15184102adabSPaul E. McKenney 15194102adabSPaul E. McKenney /* 15204102adabSPaul E. McKenney * If the needed for the required grace period is already 15214102adabSPaul E. McKenney * recorded, trace and leave. 15224102adabSPaul E. McKenney */ 15234102adabSPaul E. McKenney if (rnp_root->need_future_gp[c & 0x1]) { 15244102adabSPaul E. McKenney trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot")); 15254102adabSPaul E. McKenney goto unlock_out; 15264102adabSPaul E. McKenney } 15274102adabSPaul E. McKenney 15284102adabSPaul E. McKenney /* Record the need for the future grace period. */ 15294102adabSPaul E. McKenney rnp_root->need_future_gp[c & 0x1]++; 15304102adabSPaul E. McKenney 15314102adabSPaul E. McKenney /* If a grace period is not already in progress, start one. */ 15324102adabSPaul E. McKenney if (rnp_root->gpnum != rnp_root->completed) { 15334102adabSPaul E. McKenney trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot")); 15344102adabSPaul E. McKenney } else { 15354102adabSPaul E. McKenney trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot")); 153648a7639cSPaul E. McKenney ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); 15374102adabSPaul E. McKenney } 15384102adabSPaul E. McKenney unlock_out: 15394102adabSPaul E. McKenney if (rnp != rnp_root) 15404102adabSPaul E. McKenney raw_spin_unlock(&rnp_root->lock); 154148a7639cSPaul E. McKenney out: 154248a7639cSPaul E. McKenney if (c_out != NULL) 154348a7639cSPaul E. McKenney *c_out = c; 154448a7639cSPaul E. McKenney return ret; 15454102adabSPaul E. McKenney } 15464102adabSPaul E. McKenney 15474102adabSPaul E. McKenney /* 15484102adabSPaul E. McKenney * Clean up any old requests for the just-ended grace period. Also return 15494102adabSPaul E. McKenney * whether any additional grace periods have been requested. Also invoke 15504102adabSPaul E. McKenney * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads 15514102adabSPaul E. McKenney * waiting for this grace period to complete. 15524102adabSPaul E. McKenney */ 15534102adabSPaul E. McKenney static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) 15544102adabSPaul E. McKenney { 15554102adabSPaul E. McKenney int c = rnp->completed; 15564102adabSPaul E. McKenney int needmore; 15574102adabSPaul E. McKenney struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 15584102adabSPaul E. McKenney 15594102adabSPaul E. McKenney rcu_nocb_gp_cleanup(rsp, rnp); 15604102adabSPaul E. McKenney rnp->need_future_gp[c & 0x1] = 0; 15614102adabSPaul E. McKenney needmore = rnp->need_future_gp[(c + 1) & 0x1]; 15624102adabSPaul E. McKenney trace_rcu_future_gp(rnp, rdp, c, 15634102adabSPaul E. McKenney needmore ? TPS("CleanupMore") : TPS("Cleanup")); 15644102adabSPaul E. McKenney return needmore; 15654102adabSPaul E. McKenney } 15664102adabSPaul E. McKenney 15674102adabSPaul E. McKenney /* 156848a7639cSPaul E. McKenney * Awaken the grace-period kthread for the specified flavor of RCU. 156948a7639cSPaul E. McKenney * Don't do a self-awaken, and don't bother awakening when there is 157048a7639cSPaul E. McKenney * nothing for the grace-period kthread to do (as in several CPUs 157148a7639cSPaul E. McKenney * raced to awaken, and we lost), and finally don't try to awaken 157248a7639cSPaul E. McKenney * a kthread that has not yet been created. 157348a7639cSPaul E. McKenney */ 157448a7639cSPaul E. McKenney static void rcu_gp_kthread_wake(struct rcu_state *rsp) 157548a7639cSPaul E. McKenney { 157648a7639cSPaul E. McKenney if (current == rsp->gp_kthread || 15777d0ae808SPaul E. McKenney !READ_ONCE(rsp->gp_flags) || 157848a7639cSPaul E. McKenney !rsp->gp_kthread) 157948a7639cSPaul E. McKenney return; 158048a7639cSPaul E. McKenney wake_up(&rsp->gp_wq); 158148a7639cSPaul E. McKenney } 158248a7639cSPaul E. McKenney 158348a7639cSPaul E. McKenney /* 15844102adabSPaul E. McKenney * If there is room, assign a ->completed number to any callbacks on 15854102adabSPaul E. McKenney * this CPU that have not already been assigned. Also accelerate any 15864102adabSPaul E. McKenney * callbacks that were previously assigned a ->completed number that has 15874102adabSPaul E. McKenney * since proven to be too conservative, which can happen if callbacks get 15884102adabSPaul E. McKenney * assigned a ->completed number while RCU is idle, but with reference to 15894102adabSPaul E. McKenney * a non-root rcu_node structure. This function is idempotent, so it does 159048a7639cSPaul E. McKenney * not hurt to call it repeatedly. Returns an flag saying that we should 159148a7639cSPaul E. McKenney * awaken the RCU grace-period kthread. 15924102adabSPaul E. McKenney * 15934102adabSPaul E. McKenney * The caller must hold rnp->lock with interrupts disabled. 15944102adabSPaul E. McKenney */ 159548a7639cSPaul E. McKenney static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 15964102adabSPaul E. McKenney struct rcu_data *rdp) 15974102adabSPaul E. McKenney { 15984102adabSPaul E. McKenney unsigned long c; 15994102adabSPaul E. McKenney int i; 160048a7639cSPaul E. McKenney bool ret; 16014102adabSPaul E. McKenney 16024102adabSPaul E. McKenney /* If the CPU has no callbacks, nothing to do. */ 16034102adabSPaul E. McKenney if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) 160448a7639cSPaul E. McKenney return false; 16054102adabSPaul E. McKenney 16064102adabSPaul E. McKenney /* 16074102adabSPaul E. McKenney * Starting from the sublist containing the callbacks most 16084102adabSPaul E. McKenney * recently assigned a ->completed number and working down, find the 16094102adabSPaul E. McKenney * first sublist that is not assignable to an upcoming grace period. 16104102adabSPaul E. McKenney * Such a sublist has something in it (first two tests) and has 16114102adabSPaul E. McKenney * a ->completed number assigned that will complete sooner than 16124102adabSPaul E. McKenney * the ->completed number for newly arrived callbacks (last test). 16134102adabSPaul E. McKenney * 16144102adabSPaul E. McKenney * The key point is that any later sublist can be assigned the 16154102adabSPaul E. McKenney * same ->completed number as the newly arrived callbacks, which 16164102adabSPaul E. McKenney * means that the callbacks in any of these later sublist can be 16174102adabSPaul E. McKenney * grouped into a single sublist, whether or not they have already 16184102adabSPaul E. McKenney * been assigned a ->completed number. 16194102adabSPaul E. McKenney */ 16204102adabSPaul E. McKenney c = rcu_cbs_completed(rsp, rnp); 16214102adabSPaul E. McKenney for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--) 16224102adabSPaul E. McKenney if (rdp->nxttail[i] != rdp->nxttail[i - 1] && 16234102adabSPaul E. McKenney !ULONG_CMP_GE(rdp->nxtcompleted[i], c)) 16244102adabSPaul E. McKenney break; 16254102adabSPaul E. McKenney 16264102adabSPaul E. McKenney /* 16274102adabSPaul E. McKenney * If there are no sublist for unassigned callbacks, leave. 16284102adabSPaul E. McKenney * At the same time, advance "i" one sublist, so that "i" will 16294102adabSPaul E. McKenney * index into the sublist where all the remaining callbacks should 16304102adabSPaul E. McKenney * be grouped into. 16314102adabSPaul E. McKenney */ 16324102adabSPaul E. McKenney if (++i >= RCU_NEXT_TAIL) 163348a7639cSPaul E. McKenney return false; 16344102adabSPaul E. McKenney 16354102adabSPaul E. McKenney /* 16364102adabSPaul E. McKenney * Assign all subsequent callbacks' ->completed number to the next 16374102adabSPaul E. McKenney * full grace period and group them all in the sublist initially 16384102adabSPaul E. McKenney * indexed by "i". 16394102adabSPaul E. McKenney */ 16404102adabSPaul E. McKenney for (; i <= RCU_NEXT_TAIL; i++) { 16414102adabSPaul E. McKenney rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; 16424102adabSPaul E. McKenney rdp->nxtcompleted[i] = c; 16434102adabSPaul E. McKenney } 16444102adabSPaul E. McKenney /* Record any needed additional grace periods. */ 164548a7639cSPaul E. McKenney ret = rcu_start_future_gp(rnp, rdp, NULL); 16464102adabSPaul E. McKenney 16474102adabSPaul E. McKenney /* Trace depending on how much we were able to accelerate. */ 16484102adabSPaul E. McKenney if (!*rdp->nxttail[RCU_WAIT_TAIL]) 16494102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); 16504102adabSPaul E. McKenney else 16514102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); 165248a7639cSPaul E. McKenney return ret; 16534102adabSPaul E. McKenney } 16544102adabSPaul E. McKenney 16554102adabSPaul E. McKenney /* 16564102adabSPaul E. McKenney * Move any callbacks whose grace period has completed to the 16574102adabSPaul E. McKenney * RCU_DONE_TAIL sublist, then compact the remaining sublists and 16584102adabSPaul E. McKenney * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL 16594102adabSPaul E. McKenney * sublist. This function is idempotent, so it does not hurt to 16604102adabSPaul E. McKenney * invoke it repeatedly. As long as it is not invoked -too- often... 166148a7639cSPaul E. McKenney * Returns true if the RCU grace-period kthread needs to be awakened. 16624102adabSPaul E. McKenney * 16634102adabSPaul E. McKenney * The caller must hold rnp->lock with interrupts disabled. 16644102adabSPaul E. McKenney */ 166548a7639cSPaul E. McKenney static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 16664102adabSPaul E. McKenney struct rcu_data *rdp) 16674102adabSPaul E. McKenney { 16684102adabSPaul E. McKenney int i, j; 16694102adabSPaul E. McKenney 16704102adabSPaul E. McKenney /* If the CPU has no callbacks, nothing to do. */ 16714102adabSPaul E. McKenney if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) 167248a7639cSPaul E. McKenney return false; 16734102adabSPaul E. McKenney 16744102adabSPaul E. McKenney /* 16754102adabSPaul E. McKenney * Find all callbacks whose ->completed numbers indicate that they 16764102adabSPaul E. McKenney * are ready to invoke, and put them into the RCU_DONE_TAIL sublist. 16774102adabSPaul E. McKenney */ 16784102adabSPaul E. McKenney for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { 16794102adabSPaul E. McKenney if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i])) 16804102adabSPaul E. McKenney break; 16814102adabSPaul E. McKenney rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i]; 16824102adabSPaul E. McKenney } 16834102adabSPaul E. McKenney /* Clean up any sublist tail pointers that were misordered above. */ 16844102adabSPaul E. McKenney for (j = RCU_WAIT_TAIL; j < i; j++) 16854102adabSPaul E. McKenney rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL]; 16864102adabSPaul E. McKenney 16874102adabSPaul E. McKenney /* Copy down callbacks to fill in empty sublists. */ 16884102adabSPaul E. McKenney for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { 16894102adabSPaul E. McKenney if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL]) 16904102adabSPaul E. McKenney break; 16914102adabSPaul E. McKenney rdp->nxttail[j] = rdp->nxttail[i]; 16924102adabSPaul E. McKenney rdp->nxtcompleted[j] = rdp->nxtcompleted[i]; 16934102adabSPaul E. McKenney } 16944102adabSPaul E. McKenney 16954102adabSPaul E. McKenney /* Classify any remaining callbacks. */ 169648a7639cSPaul E. McKenney return rcu_accelerate_cbs(rsp, rnp, rdp); 16974102adabSPaul E. McKenney } 16984102adabSPaul E. McKenney 16994102adabSPaul E. McKenney /* 17004102adabSPaul E. McKenney * Update CPU-local rcu_data state to record the beginnings and ends of 17014102adabSPaul E. McKenney * grace periods. The caller must hold the ->lock of the leaf rcu_node 17024102adabSPaul E. McKenney * structure corresponding to the current CPU, and must have irqs disabled. 170348a7639cSPaul E. McKenney * Returns true if the grace-period kthread needs to be awakened. 17044102adabSPaul E. McKenney */ 170548a7639cSPaul E. McKenney static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, 170648a7639cSPaul E. McKenney struct rcu_data *rdp) 17074102adabSPaul E. McKenney { 170848a7639cSPaul E. McKenney bool ret; 170948a7639cSPaul E. McKenney 17104102adabSPaul E. McKenney /* Handle the ends of any preceding grace periods first. */ 1711e3663b10SPaul E. McKenney if (rdp->completed == rnp->completed && 17127d0ae808SPaul E. McKenney !unlikely(READ_ONCE(rdp->gpwrap))) { 17134102adabSPaul E. McKenney 17144102adabSPaul E. McKenney /* No grace period end, so just accelerate recent callbacks. */ 171548a7639cSPaul E. McKenney ret = rcu_accelerate_cbs(rsp, rnp, rdp); 17164102adabSPaul E. McKenney 17174102adabSPaul E. McKenney } else { 17184102adabSPaul E. McKenney 17194102adabSPaul E. McKenney /* Advance callbacks. */ 172048a7639cSPaul E. McKenney ret = rcu_advance_cbs(rsp, rnp, rdp); 17214102adabSPaul E. McKenney 17224102adabSPaul E. McKenney /* Remember that we saw this grace-period completion. */ 17234102adabSPaul E. McKenney rdp->completed = rnp->completed; 17244102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); 17254102adabSPaul E. McKenney } 17264102adabSPaul E. McKenney 17277d0ae808SPaul E. McKenney if (rdp->gpnum != rnp->gpnum || unlikely(READ_ONCE(rdp->gpwrap))) { 17284102adabSPaul E. McKenney /* 17294102adabSPaul E. McKenney * If the current grace period is waiting for this CPU, 17304102adabSPaul E. McKenney * set up to detect a quiescent state, otherwise don't 17314102adabSPaul E. McKenney * go looking for one. 17324102adabSPaul E. McKenney */ 17334102adabSPaul E. McKenney rdp->gpnum = rnp->gpnum; 17344102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); 17354102adabSPaul E. McKenney rdp->passed_quiesce = 0; 17365cd37193SPaul E. McKenney rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); 17374102adabSPaul E. McKenney rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); 17384102adabSPaul E. McKenney zero_cpu_stall_ticks(rdp); 17397d0ae808SPaul E. McKenney WRITE_ONCE(rdp->gpwrap, false); 17404102adabSPaul E. McKenney } 174148a7639cSPaul E. McKenney return ret; 17424102adabSPaul E. McKenney } 17434102adabSPaul E. McKenney 17444102adabSPaul E. McKenney static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) 17454102adabSPaul E. McKenney { 17464102adabSPaul E. McKenney unsigned long flags; 174748a7639cSPaul E. McKenney bool needwake; 17484102adabSPaul E. McKenney struct rcu_node *rnp; 17494102adabSPaul E. McKenney 17504102adabSPaul E. McKenney local_irq_save(flags); 17514102adabSPaul E. McKenney rnp = rdp->mynode; 17527d0ae808SPaul E. McKenney if ((rdp->gpnum == READ_ONCE(rnp->gpnum) && 17537d0ae808SPaul E. McKenney rdp->completed == READ_ONCE(rnp->completed) && 17547d0ae808SPaul E. McKenney !unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */ 17554102adabSPaul E. McKenney !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 17564102adabSPaul E. McKenney local_irq_restore(flags); 17574102adabSPaul E. McKenney return; 17584102adabSPaul E. McKenney } 17596303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 176048a7639cSPaul E. McKenney needwake = __note_gp_changes(rsp, rnp, rdp); 17614102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 176248a7639cSPaul E. McKenney if (needwake) 176348a7639cSPaul E. McKenney rcu_gp_kthread_wake(rsp); 17644102adabSPaul E. McKenney } 17654102adabSPaul E. McKenney 17660f41c0ddSPaul E. McKenney static void rcu_gp_slow(struct rcu_state *rsp, int delay) 17670f41c0ddSPaul E. McKenney { 17680f41c0ddSPaul E. McKenney if (delay > 0 && 17690f41c0ddSPaul E. McKenney !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) 17700f41c0ddSPaul E. McKenney schedule_timeout_uninterruptible(delay); 17710f41c0ddSPaul E. McKenney } 17720f41c0ddSPaul E. McKenney 17734102adabSPaul E. McKenney /* 17744102adabSPaul E. McKenney * Initialize a new grace period. Return 0 if no grace period required. 17754102adabSPaul E. McKenney */ 17764102adabSPaul E. McKenney static int rcu_gp_init(struct rcu_state *rsp) 17774102adabSPaul E. McKenney { 17780aa04b05SPaul E. McKenney unsigned long oldmask; 17794102adabSPaul E. McKenney struct rcu_data *rdp; 17804102adabSPaul E. McKenney struct rcu_node *rnp = rcu_get_root(rsp); 17814102adabSPaul E. McKenney 17827d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_activity, jiffies); 17834102adabSPaul E. McKenney raw_spin_lock_irq(&rnp->lock); 17846303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 17857d0ae808SPaul E. McKenney if (!READ_ONCE(rsp->gp_flags)) { 17864102adabSPaul E. McKenney /* Spurious wakeup, tell caller to go back to sleep. */ 17874102adabSPaul E. McKenney raw_spin_unlock_irq(&rnp->lock); 17884102adabSPaul E. McKenney return 0; 17894102adabSPaul E. McKenney } 17907d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */ 17914102adabSPaul E. McKenney 17924102adabSPaul E. McKenney if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { 17934102adabSPaul E. McKenney /* 17944102adabSPaul E. McKenney * Grace period already in progress, don't start another. 17954102adabSPaul E. McKenney * Not supposed to be able to happen. 17964102adabSPaul E. McKenney */ 17974102adabSPaul E. McKenney raw_spin_unlock_irq(&rnp->lock); 17984102adabSPaul E. McKenney return 0; 17994102adabSPaul E. McKenney } 18004102adabSPaul E. McKenney 18014102adabSPaul E. McKenney /* Advance to a new grace period and initialize state. */ 18024102adabSPaul E. McKenney record_gp_stall_check_time(rsp); 1803765a3f4fSPaul E. McKenney /* Record GP times before starting GP, hence smp_store_release(). */ 1804765a3f4fSPaul E. McKenney smp_store_release(&rsp->gpnum, rsp->gpnum + 1); 18054102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); 18064102adabSPaul E. McKenney raw_spin_unlock_irq(&rnp->lock); 18074102adabSPaul E. McKenney 18084102adabSPaul E. McKenney /* 18090aa04b05SPaul E. McKenney * Apply per-leaf buffered online and offline operations to the 18100aa04b05SPaul E. McKenney * rcu_node tree. Note that this new grace period need not wait 18110aa04b05SPaul E. McKenney * for subsequent online CPUs, and that quiescent-state forcing 18120aa04b05SPaul E. McKenney * will handle subsequent offline CPUs. 18130aa04b05SPaul E. McKenney */ 18140aa04b05SPaul E. McKenney rcu_for_each_leaf_node(rsp, rnp) { 18150f41c0ddSPaul E. McKenney rcu_gp_slow(rsp, gp_preinit_delay); 18160aa04b05SPaul E. McKenney raw_spin_lock_irq(&rnp->lock); 18170aa04b05SPaul E. McKenney smp_mb__after_unlock_lock(); 18180aa04b05SPaul E. McKenney if (rnp->qsmaskinit == rnp->qsmaskinitnext && 18190aa04b05SPaul E. McKenney !rnp->wait_blkd_tasks) { 18200aa04b05SPaul E. McKenney /* Nothing to do on this leaf rcu_node structure. */ 18210aa04b05SPaul E. McKenney raw_spin_unlock_irq(&rnp->lock); 18220aa04b05SPaul E. McKenney continue; 18230aa04b05SPaul E. McKenney } 18240aa04b05SPaul E. McKenney 18250aa04b05SPaul E. McKenney /* Record old state, apply changes to ->qsmaskinit field. */ 18260aa04b05SPaul E. McKenney oldmask = rnp->qsmaskinit; 18270aa04b05SPaul E. McKenney rnp->qsmaskinit = rnp->qsmaskinitnext; 18280aa04b05SPaul E. McKenney 18290aa04b05SPaul E. McKenney /* If zero-ness of ->qsmaskinit changed, propagate up tree. */ 18300aa04b05SPaul E. McKenney if (!oldmask != !rnp->qsmaskinit) { 18310aa04b05SPaul E. McKenney if (!oldmask) /* First online CPU for this rcu_node. */ 18320aa04b05SPaul E. McKenney rcu_init_new_rnp(rnp); 18330aa04b05SPaul E. McKenney else if (rcu_preempt_has_tasks(rnp)) /* blocked tasks */ 18340aa04b05SPaul E. McKenney rnp->wait_blkd_tasks = true; 18350aa04b05SPaul E. McKenney else /* Last offline CPU and can propagate. */ 18360aa04b05SPaul E. McKenney rcu_cleanup_dead_rnp(rnp); 18370aa04b05SPaul E. McKenney } 18380aa04b05SPaul E. McKenney 18390aa04b05SPaul E. McKenney /* 18400aa04b05SPaul E. McKenney * If all waited-on tasks from prior grace period are 18410aa04b05SPaul E. McKenney * done, and if all this rcu_node structure's CPUs are 18420aa04b05SPaul E. McKenney * still offline, propagate up the rcu_node tree and 18430aa04b05SPaul E. McKenney * clear ->wait_blkd_tasks. Otherwise, if one of this 18440aa04b05SPaul E. McKenney * rcu_node structure's CPUs has since come back online, 18450aa04b05SPaul E. McKenney * simply clear ->wait_blkd_tasks (but rcu_cleanup_dead_rnp() 18460aa04b05SPaul E. McKenney * checks for this, so just call it unconditionally). 18470aa04b05SPaul E. McKenney */ 18480aa04b05SPaul E. McKenney if (rnp->wait_blkd_tasks && 18490aa04b05SPaul E. McKenney (!rcu_preempt_has_tasks(rnp) || 18500aa04b05SPaul E. McKenney rnp->qsmaskinit)) { 18510aa04b05SPaul E. McKenney rnp->wait_blkd_tasks = false; 18520aa04b05SPaul E. McKenney rcu_cleanup_dead_rnp(rnp); 18530aa04b05SPaul E. McKenney } 18540aa04b05SPaul E. McKenney 18550aa04b05SPaul E. McKenney raw_spin_unlock_irq(&rnp->lock); 18560aa04b05SPaul E. McKenney } 18574102adabSPaul E. McKenney 18584102adabSPaul E. McKenney /* 18594102adabSPaul E. McKenney * Set the quiescent-state-needed bits in all the rcu_node 18604102adabSPaul E. McKenney * structures for all currently online CPUs in breadth-first order, 18614102adabSPaul E. McKenney * starting from the root rcu_node structure, relying on the layout 18624102adabSPaul E. McKenney * of the tree within the rsp->node[] array. Note that other CPUs 18634102adabSPaul E. McKenney * will access only the leaves of the hierarchy, thus seeing that no 18644102adabSPaul E. McKenney * grace period is in progress, at least until the corresponding 18654102adabSPaul E. McKenney * leaf node has been initialized. In addition, we have excluded 18664102adabSPaul E. McKenney * CPU-hotplug operations. 18674102adabSPaul E. McKenney * 18684102adabSPaul E. McKenney * The grace period cannot complete until the initialization 18694102adabSPaul E. McKenney * process finishes, because this kthread handles both. 18704102adabSPaul E. McKenney */ 18714102adabSPaul E. McKenney rcu_for_each_node_breadth_first(rsp, rnp) { 18720f41c0ddSPaul E. McKenney rcu_gp_slow(rsp, gp_init_delay); 18734102adabSPaul E. McKenney raw_spin_lock_irq(&rnp->lock); 18746303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 18754102adabSPaul E. McKenney rdp = this_cpu_ptr(rsp->rda); 18764102adabSPaul E. McKenney rcu_preempt_check_blocked_tasks(rnp); 18774102adabSPaul E. McKenney rnp->qsmask = rnp->qsmaskinit; 18787d0ae808SPaul E. McKenney WRITE_ONCE(rnp->gpnum, rsp->gpnum); 18793f47da0fSLai Jiangshan if (WARN_ON_ONCE(rnp->completed != rsp->completed)) 18807d0ae808SPaul E. McKenney WRITE_ONCE(rnp->completed, rsp->completed); 18814102adabSPaul E. McKenney if (rnp == rdp->mynode) 188248a7639cSPaul E. McKenney (void)__note_gp_changes(rsp, rnp, rdp); 18834102adabSPaul E. McKenney rcu_preempt_boost_start_gp(rnp); 18844102adabSPaul E. McKenney trace_rcu_grace_period_init(rsp->name, rnp->gpnum, 18854102adabSPaul E. McKenney rnp->level, rnp->grplo, 18864102adabSPaul E. McKenney rnp->grphi, rnp->qsmask); 18874102adabSPaul E. McKenney raw_spin_unlock_irq(&rnp->lock); 1888bde6c3aaSPaul E. McKenney cond_resched_rcu_qs(); 18897d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_activity, jiffies); 18904102adabSPaul E. McKenney } 18914102adabSPaul E. McKenney 18924102adabSPaul E. McKenney return 1; 18934102adabSPaul E. McKenney } 18944102adabSPaul E. McKenney 18954102adabSPaul E. McKenney /* 18964102adabSPaul E. McKenney * Do one round of quiescent-state forcing. 18974102adabSPaul E. McKenney */ 18984102adabSPaul E. McKenney static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) 18994102adabSPaul E. McKenney { 19004102adabSPaul E. McKenney int fqs_state = fqs_state_in; 19014102adabSPaul E. McKenney bool isidle = false; 19024102adabSPaul E. McKenney unsigned long maxj; 19034102adabSPaul E. McKenney struct rcu_node *rnp = rcu_get_root(rsp); 19044102adabSPaul E. McKenney 19057d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_activity, jiffies); 19064102adabSPaul E. McKenney rsp->n_force_qs++; 19074102adabSPaul E. McKenney if (fqs_state == RCU_SAVE_DYNTICK) { 19084102adabSPaul E. McKenney /* Collect dyntick-idle snapshots. */ 19094102adabSPaul E. McKenney if (is_sysidle_rcu_state(rsp)) { 1910e02b2edfSPranith Kumar isidle = true; 19114102adabSPaul E. McKenney maxj = jiffies - ULONG_MAX / 4; 19124102adabSPaul E. McKenney } 19134102adabSPaul E. McKenney force_qs_rnp(rsp, dyntick_save_progress_counter, 19144102adabSPaul E. McKenney &isidle, &maxj); 19154102adabSPaul E. McKenney rcu_sysidle_report_gp(rsp, isidle, maxj); 19164102adabSPaul E. McKenney fqs_state = RCU_FORCE_QS; 19174102adabSPaul E. McKenney } else { 19184102adabSPaul E. McKenney /* Handle dyntick-idle and offline CPUs. */ 1919675da67fSPaul E. McKenney isidle = true; 19204102adabSPaul E. McKenney force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); 19214102adabSPaul E. McKenney } 19224102adabSPaul E. McKenney /* Clear flag to prevent immediate re-entry. */ 19237d0ae808SPaul E. McKenney if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 19244102adabSPaul E. McKenney raw_spin_lock_irq(&rnp->lock); 19256303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 19267d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_flags, 19277d0ae808SPaul E. McKenney READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS); 19284102adabSPaul E. McKenney raw_spin_unlock_irq(&rnp->lock); 19294102adabSPaul E. McKenney } 19304102adabSPaul E. McKenney return fqs_state; 19314102adabSPaul E. McKenney } 19324102adabSPaul E. McKenney 19334102adabSPaul E. McKenney /* 19344102adabSPaul E. McKenney * Clean up after the old grace period. 19354102adabSPaul E. McKenney */ 19364102adabSPaul E. McKenney static void rcu_gp_cleanup(struct rcu_state *rsp) 19374102adabSPaul E. McKenney { 19384102adabSPaul E. McKenney unsigned long gp_duration; 193948a7639cSPaul E. McKenney bool needgp = false; 19404102adabSPaul E. McKenney int nocb = 0; 19414102adabSPaul E. McKenney struct rcu_data *rdp; 19424102adabSPaul E. McKenney struct rcu_node *rnp = rcu_get_root(rsp); 19434102adabSPaul E. McKenney 19447d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_activity, jiffies); 19454102adabSPaul E. McKenney raw_spin_lock_irq(&rnp->lock); 19466303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 19474102adabSPaul E. McKenney gp_duration = jiffies - rsp->gp_start; 19484102adabSPaul E. McKenney if (gp_duration > rsp->gp_max) 19494102adabSPaul E. McKenney rsp->gp_max = gp_duration; 19504102adabSPaul E. McKenney 19514102adabSPaul E. McKenney /* 19524102adabSPaul E. McKenney * We know the grace period is complete, but to everyone else 19534102adabSPaul E. McKenney * it appears to still be ongoing. But it is also the case 19544102adabSPaul E. McKenney * that to everyone else it looks like there is nothing that 19554102adabSPaul E. McKenney * they can do to advance the grace period. It is therefore 19564102adabSPaul E. McKenney * safe for us to drop the lock in order to mark the grace 19574102adabSPaul E. McKenney * period as completed in all of the rcu_node structures. 19584102adabSPaul E. McKenney */ 19594102adabSPaul E. McKenney raw_spin_unlock_irq(&rnp->lock); 19604102adabSPaul E. McKenney 19614102adabSPaul E. McKenney /* 19624102adabSPaul E. McKenney * Propagate new ->completed value to rcu_node structures so 19634102adabSPaul E. McKenney * that other CPUs don't have to wait until the start of the next 19644102adabSPaul E. McKenney * grace period to process their callbacks. This also avoids 19654102adabSPaul E. McKenney * some nasty RCU grace-period initialization races by forcing 19664102adabSPaul E. McKenney * the end of the current grace period to be completely recorded in 19674102adabSPaul E. McKenney * all of the rcu_node structures before the beginning of the next 19684102adabSPaul E. McKenney * grace period is recorded in any of the rcu_node structures. 19694102adabSPaul E. McKenney */ 19704102adabSPaul E. McKenney rcu_for_each_node_breadth_first(rsp, rnp) { 19714102adabSPaul E. McKenney raw_spin_lock_irq(&rnp->lock); 19726303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 19735c60d25fSPaul E. McKenney WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 19745c60d25fSPaul E. McKenney WARN_ON_ONCE(rnp->qsmask); 19757d0ae808SPaul E. McKenney WRITE_ONCE(rnp->completed, rsp->gpnum); 19764102adabSPaul E. McKenney rdp = this_cpu_ptr(rsp->rda); 19774102adabSPaul E. McKenney if (rnp == rdp->mynode) 197848a7639cSPaul E. McKenney needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; 197978e4bc34SPaul E. McKenney /* smp_mb() provided by prior unlock-lock pair. */ 19804102adabSPaul E. McKenney nocb += rcu_future_gp_cleanup(rsp, rnp); 19814102adabSPaul E. McKenney raw_spin_unlock_irq(&rnp->lock); 1982bde6c3aaSPaul E. McKenney cond_resched_rcu_qs(); 19837d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_activity, jiffies); 19840f41c0ddSPaul E. McKenney rcu_gp_slow(rsp, gp_cleanup_delay); 19854102adabSPaul E. McKenney } 19864102adabSPaul E. McKenney rnp = rcu_get_root(rsp); 19874102adabSPaul E. McKenney raw_spin_lock_irq(&rnp->lock); 1988765a3f4fSPaul E. McKenney smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */ 19894102adabSPaul E. McKenney rcu_nocb_gp_set(rnp, nocb); 19904102adabSPaul E. McKenney 1991765a3f4fSPaul E. McKenney /* Declare grace period done. */ 19927d0ae808SPaul E. McKenney WRITE_ONCE(rsp->completed, rsp->gpnum); 19934102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); 19944102adabSPaul E. McKenney rsp->fqs_state = RCU_GP_IDLE; 19954102adabSPaul E. McKenney rdp = this_cpu_ptr(rsp->rda); 199648a7639cSPaul E. McKenney /* Advance CBs to reduce false positives below. */ 199748a7639cSPaul E. McKenney needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp; 199848a7639cSPaul E. McKenney if (needgp || cpu_needs_another_gp(rsp, rdp)) { 19997d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); 20004102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, 20017d0ae808SPaul E. McKenney READ_ONCE(rsp->gpnum), 20024102adabSPaul E. McKenney TPS("newreq")); 20034102adabSPaul E. McKenney } 20044102adabSPaul E. McKenney raw_spin_unlock_irq(&rnp->lock); 20054102adabSPaul E. McKenney } 20064102adabSPaul E. McKenney 20074102adabSPaul E. McKenney /* 20084102adabSPaul E. McKenney * Body of kthread that handles grace periods. 20094102adabSPaul E. McKenney */ 20104102adabSPaul E. McKenney static int __noreturn rcu_gp_kthread(void *arg) 20114102adabSPaul E. McKenney { 20124102adabSPaul E. McKenney int fqs_state; 20134102adabSPaul E. McKenney int gf; 20144102adabSPaul E. McKenney unsigned long j; 20154102adabSPaul E. McKenney int ret; 20164102adabSPaul E. McKenney struct rcu_state *rsp = arg; 20174102adabSPaul E. McKenney struct rcu_node *rnp = rcu_get_root(rsp); 20184102adabSPaul E. McKenney 20195871968dSPaul E. McKenney rcu_bind_gp_kthread(); 20204102adabSPaul E. McKenney for (;;) { 20214102adabSPaul E. McKenney 20224102adabSPaul E. McKenney /* Handle grace-period start. */ 20234102adabSPaul E. McKenney for (;;) { 20244102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, 20257d0ae808SPaul E. McKenney READ_ONCE(rsp->gpnum), 20264102adabSPaul E. McKenney TPS("reqwait")); 2027afea227fSPaul E. McKenney rsp->gp_state = RCU_GP_WAIT_GPS; 20284102adabSPaul E. McKenney wait_event_interruptible(rsp->gp_wq, 20297d0ae808SPaul E. McKenney READ_ONCE(rsp->gp_flags) & 20304102adabSPaul E. McKenney RCU_GP_FLAG_INIT); 203178e4bc34SPaul E. McKenney /* Locking provides needed memory barrier. */ 20324102adabSPaul E. McKenney if (rcu_gp_init(rsp)) 20334102adabSPaul E. McKenney break; 2034bde6c3aaSPaul E. McKenney cond_resched_rcu_qs(); 20357d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_activity, jiffies); 203673a860cdSPaul E. McKenney WARN_ON(signal_pending(current)); 20374102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, 20387d0ae808SPaul E. McKenney READ_ONCE(rsp->gpnum), 20394102adabSPaul E. McKenney TPS("reqwaitsig")); 20404102adabSPaul E. McKenney } 20414102adabSPaul E. McKenney 20424102adabSPaul E. McKenney /* Handle quiescent-state forcing. */ 20434102adabSPaul E. McKenney fqs_state = RCU_SAVE_DYNTICK; 20444102adabSPaul E. McKenney j = jiffies_till_first_fqs; 20454102adabSPaul E. McKenney if (j > HZ) { 20464102adabSPaul E. McKenney j = HZ; 20474102adabSPaul E. McKenney jiffies_till_first_fqs = HZ; 20484102adabSPaul E. McKenney } 20494102adabSPaul E. McKenney ret = 0; 20504102adabSPaul E. McKenney for (;;) { 20514102adabSPaul E. McKenney if (!ret) 20524102adabSPaul E. McKenney rsp->jiffies_force_qs = jiffies + j; 20534102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, 20547d0ae808SPaul E. McKenney READ_ONCE(rsp->gpnum), 20554102adabSPaul E. McKenney TPS("fqswait")); 2056afea227fSPaul E. McKenney rsp->gp_state = RCU_GP_WAIT_FQS; 20574102adabSPaul E. McKenney ret = wait_event_interruptible_timeout(rsp->gp_wq, 20587d0ae808SPaul E. McKenney ((gf = READ_ONCE(rsp->gp_flags)) & 20594102adabSPaul E. McKenney RCU_GP_FLAG_FQS) || 20607d0ae808SPaul E. McKenney (!READ_ONCE(rnp->qsmask) && 20614102adabSPaul E. McKenney !rcu_preempt_blocked_readers_cgp(rnp)), 20624102adabSPaul E. McKenney j); 206378e4bc34SPaul E. McKenney /* Locking provides needed memory barriers. */ 20644102adabSPaul E. McKenney /* If grace period done, leave loop. */ 20657d0ae808SPaul E. McKenney if (!READ_ONCE(rnp->qsmask) && 20664102adabSPaul E. McKenney !rcu_preempt_blocked_readers_cgp(rnp)) 20674102adabSPaul E. McKenney break; 20684102adabSPaul E. McKenney /* If time for quiescent-state forcing, do it. */ 20694102adabSPaul E. McKenney if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) || 20704102adabSPaul E. McKenney (gf & RCU_GP_FLAG_FQS)) { 20714102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, 20727d0ae808SPaul E. McKenney READ_ONCE(rsp->gpnum), 20734102adabSPaul E. McKenney TPS("fqsstart")); 20744102adabSPaul E. McKenney fqs_state = rcu_gp_fqs(rsp, fqs_state); 20754102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, 20767d0ae808SPaul E. McKenney READ_ONCE(rsp->gpnum), 20774102adabSPaul E. McKenney TPS("fqsend")); 2078bde6c3aaSPaul E. McKenney cond_resched_rcu_qs(); 20797d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_activity, jiffies); 20804102adabSPaul E. McKenney } else { 20814102adabSPaul E. McKenney /* Deal with stray signal. */ 2082bde6c3aaSPaul E. McKenney cond_resched_rcu_qs(); 20837d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_activity, jiffies); 208473a860cdSPaul E. McKenney WARN_ON(signal_pending(current)); 20854102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, 20867d0ae808SPaul E. McKenney READ_ONCE(rsp->gpnum), 20874102adabSPaul E. McKenney TPS("fqswaitsig")); 20884102adabSPaul E. McKenney } 20894102adabSPaul E. McKenney j = jiffies_till_next_fqs; 20904102adabSPaul E. McKenney if (j > HZ) { 20914102adabSPaul E. McKenney j = HZ; 20924102adabSPaul E. McKenney jiffies_till_next_fqs = HZ; 20934102adabSPaul E. McKenney } else if (j < 1) { 20944102adabSPaul E. McKenney j = 1; 20954102adabSPaul E. McKenney jiffies_till_next_fqs = 1; 20964102adabSPaul E. McKenney } 20974102adabSPaul E. McKenney } 20984102adabSPaul E. McKenney 20994102adabSPaul E. McKenney /* Handle grace-period end. */ 21004102adabSPaul E. McKenney rcu_gp_cleanup(rsp); 21014102adabSPaul E. McKenney } 21024102adabSPaul E. McKenney } 21034102adabSPaul E. McKenney 21044102adabSPaul E. McKenney /* 21054102adabSPaul E. McKenney * Start a new RCU grace period if warranted, re-initializing the hierarchy 21064102adabSPaul E. McKenney * in preparation for detecting the next grace period. The caller must hold 21074102adabSPaul E. McKenney * the root node's ->lock and hard irqs must be disabled. 21084102adabSPaul E. McKenney * 21094102adabSPaul E. McKenney * Note that it is legal for a dying CPU (which is marked as offline) to 21104102adabSPaul E. McKenney * invoke this function. This can happen when the dying CPU reports its 21114102adabSPaul E. McKenney * quiescent state. 211248a7639cSPaul E. McKenney * 211348a7639cSPaul E. McKenney * Returns true if the grace-period kthread must be awakened. 21144102adabSPaul E. McKenney */ 211548a7639cSPaul E. McKenney static bool 21164102adabSPaul E. McKenney rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 21174102adabSPaul E. McKenney struct rcu_data *rdp) 21184102adabSPaul E. McKenney { 21194102adabSPaul E. McKenney if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { 21204102adabSPaul E. McKenney /* 21214102adabSPaul E. McKenney * Either we have not yet spawned the grace-period 21224102adabSPaul E. McKenney * task, this CPU does not need another grace period, 21234102adabSPaul E. McKenney * or a grace period is already in progress. 21244102adabSPaul E. McKenney * Either way, don't start a new grace period. 21254102adabSPaul E. McKenney */ 212648a7639cSPaul E. McKenney return false; 21274102adabSPaul E. McKenney } 21287d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); 21297d0ae808SPaul E. McKenney trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), 21304102adabSPaul E. McKenney TPS("newreq")); 21314102adabSPaul E. McKenney 21324102adabSPaul E. McKenney /* 21334102adabSPaul E. McKenney * We can't do wakeups while holding the rnp->lock, as that 21344102adabSPaul E. McKenney * could cause possible deadlocks with the rq->lock. Defer 213548a7639cSPaul E. McKenney * the wakeup to our caller. 21364102adabSPaul E. McKenney */ 213748a7639cSPaul E. McKenney return true; 21384102adabSPaul E. McKenney } 21394102adabSPaul E. McKenney 21404102adabSPaul E. McKenney /* 21414102adabSPaul E. McKenney * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's 21424102adabSPaul E. McKenney * callbacks. Note that rcu_start_gp_advanced() cannot do this because it 21434102adabSPaul E. McKenney * is invoked indirectly from rcu_advance_cbs(), which would result in 21444102adabSPaul E. McKenney * endless recursion -- or would do so if it wasn't for the self-deadlock 21454102adabSPaul E. McKenney * that is encountered beforehand. 214648a7639cSPaul E. McKenney * 214748a7639cSPaul E. McKenney * Returns true if the grace-period kthread needs to be awakened. 21484102adabSPaul E. McKenney */ 214948a7639cSPaul E. McKenney static bool rcu_start_gp(struct rcu_state *rsp) 21504102adabSPaul E. McKenney { 21514102adabSPaul E. McKenney struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 21524102adabSPaul E. McKenney struct rcu_node *rnp = rcu_get_root(rsp); 215348a7639cSPaul E. McKenney bool ret = false; 21544102adabSPaul E. McKenney 21554102adabSPaul E. McKenney /* 21564102adabSPaul E. McKenney * If there is no grace period in progress right now, any 21574102adabSPaul E. McKenney * callbacks we have up to this point will be satisfied by the 21584102adabSPaul E. McKenney * next grace period. Also, advancing the callbacks reduces the 21594102adabSPaul E. McKenney * probability of false positives from cpu_needs_another_gp() 21604102adabSPaul E. McKenney * resulting in pointless grace periods. So, advance callbacks 21614102adabSPaul E. McKenney * then start the grace period! 21624102adabSPaul E. McKenney */ 216348a7639cSPaul E. McKenney ret = rcu_advance_cbs(rsp, rnp, rdp) || ret; 216448a7639cSPaul E. McKenney ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret; 216548a7639cSPaul E. McKenney return ret; 21664102adabSPaul E. McKenney } 21674102adabSPaul E. McKenney 21684102adabSPaul E. McKenney /* 21694102adabSPaul E. McKenney * Report a full set of quiescent states to the specified rcu_state 21704102adabSPaul E. McKenney * data structure. This involves cleaning up after the prior grace 21714102adabSPaul E. McKenney * period and letting rcu_start_gp() start up the next grace period 21724102adabSPaul E. McKenney * if one is needed. Note that the caller must hold rnp->lock, which 21734102adabSPaul E. McKenney * is released before return. 21744102adabSPaul E. McKenney */ 21754102adabSPaul E. McKenney static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 21764102adabSPaul E. McKenney __releases(rcu_get_root(rsp)->lock) 21774102adabSPaul E. McKenney { 21784102adabSPaul E. McKenney WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 21794102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); 21802aa792e6SPranith Kumar rcu_gp_kthread_wake(rsp); 21814102adabSPaul E. McKenney } 21824102adabSPaul E. McKenney 21834102adabSPaul E. McKenney /* 21844102adabSPaul E. McKenney * Similar to rcu_report_qs_rdp(), for which it is a helper function. 21854102adabSPaul E. McKenney * Allows quiescent states for a group of CPUs to be reported at one go 21864102adabSPaul E. McKenney * to the specified rcu_node structure, though all the CPUs in the group 2187654e9533SPaul E. McKenney * must be represented by the same rcu_node structure (which need not be a 2188654e9533SPaul E. McKenney * leaf rcu_node structure, though it often will be). The gps parameter 2189654e9533SPaul E. McKenney * is the grace-period snapshot, which means that the quiescent states 2190654e9533SPaul E. McKenney * are valid only if rnp->gpnum is equal to gps. That structure's lock 2191654e9533SPaul E. McKenney * must be held upon entry, and it is released before return. 21924102adabSPaul E. McKenney */ 21934102adabSPaul E. McKenney static void 21944102adabSPaul E. McKenney rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, 2195654e9533SPaul E. McKenney struct rcu_node *rnp, unsigned long gps, unsigned long flags) 21964102adabSPaul E. McKenney __releases(rnp->lock) 21974102adabSPaul E. McKenney { 2198654e9533SPaul E. McKenney unsigned long oldmask = 0; 21994102adabSPaul E. McKenney struct rcu_node *rnp_c; 22004102adabSPaul E. McKenney 22014102adabSPaul E. McKenney /* Walk up the rcu_node hierarchy. */ 22024102adabSPaul E. McKenney for (;;) { 2203654e9533SPaul E. McKenney if (!(rnp->qsmask & mask) || rnp->gpnum != gps) { 22044102adabSPaul E. McKenney 2205654e9533SPaul E. McKenney /* 2206654e9533SPaul E. McKenney * Our bit has already been cleared, or the 2207654e9533SPaul E. McKenney * relevant grace period is already over, so done. 2208654e9533SPaul E. McKenney */ 22094102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 22104102adabSPaul E. McKenney return; 22114102adabSPaul E. McKenney } 2212654e9533SPaul E. McKenney WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ 22134102adabSPaul E. McKenney rnp->qsmask &= ~mask; 22144102adabSPaul E. McKenney trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, 22154102adabSPaul E. McKenney mask, rnp->qsmask, rnp->level, 22164102adabSPaul E. McKenney rnp->grplo, rnp->grphi, 22174102adabSPaul E. McKenney !!rnp->gp_tasks); 22184102adabSPaul E. McKenney if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 22194102adabSPaul E. McKenney 22204102adabSPaul E. McKenney /* Other bits still set at this level, so done. */ 22214102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 22224102adabSPaul E. McKenney return; 22234102adabSPaul E. McKenney } 22244102adabSPaul E. McKenney mask = rnp->grpmask; 22254102adabSPaul E. McKenney if (rnp->parent == NULL) { 22264102adabSPaul E. McKenney 22274102adabSPaul E. McKenney /* No more levels. Exit loop holding root lock. */ 22284102adabSPaul E. McKenney 22294102adabSPaul E. McKenney break; 22304102adabSPaul E. McKenney } 22314102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 22324102adabSPaul E. McKenney rnp_c = rnp; 22334102adabSPaul E. McKenney rnp = rnp->parent; 22344102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 22356303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 2236654e9533SPaul E. McKenney oldmask = rnp_c->qsmask; 22374102adabSPaul E. McKenney } 22384102adabSPaul E. McKenney 22394102adabSPaul E. McKenney /* 22404102adabSPaul E. McKenney * Get here if we are the last CPU to pass through a quiescent 22414102adabSPaul E. McKenney * state for this grace period. Invoke rcu_report_qs_rsp() 22424102adabSPaul E. McKenney * to clean up and start the next grace period if one is needed. 22434102adabSPaul E. McKenney */ 22444102adabSPaul E. McKenney rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ 22454102adabSPaul E. McKenney } 22464102adabSPaul E. McKenney 22474102adabSPaul E. McKenney /* 2248cc99a310SPaul E. McKenney * Record a quiescent state for all tasks that were previously queued 2249cc99a310SPaul E. McKenney * on the specified rcu_node structure and that were blocking the current 2250cc99a310SPaul E. McKenney * RCU grace period. The caller must hold the specified rnp->lock with 2251cc99a310SPaul E. McKenney * irqs disabled, and this lock is released upon return, but irqs remain 2252cc99a310SPaul E. McKenney * disabled. 2253cc99a310SPaul E. McKenney */ 22540aa04b05SPaul E. McKenney static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp, 2255cc99a310SPaul E. McKenney struct rcu_node *rnp, unsigned long flags) 2256cc99a310SPaul E. McKenney __releases(rnp->lock) 2257cc99a310SPaul E. McKenney { 2258654e9533SPaul E. McKenney unsigned long gps; 2259cc99a310SPaul E. McKenney unsigned long mask; 2260cc99a310SPaul E. McKenney struct rcu_node *rnp_p; 2261cc99a310SPaul E. McKenney 2262a77da14cSPaul E. McKenney if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || 2263a77da14cSPaul E. McKenney rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 2264cc99a310SPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 2265cc99a310SPaul E. McKenney return; /* Still need more quiescent states! */ 2266cc99a310SPaul E. McKenney } 2267cc99a310SPaul E. McKenney 2268cc99a310SPaul E. McKenney rnp_p = rnp->parent; 2269cc99a310SPaul E. McKenney if (rnp_p == NULL) { 2270cc99a310SPaul E. McKenney /* 2271a77da14cSPaul E. McKenney * Only one rcu_node structure in the tree, so don't 2272a77da14cSPaul E. McKenney * try to report up to its nonexistent parent! 2273cc99a310SPaul E. McKenney */ 2274cc99a310SPaul E. McKenney rcu_report_qs_rsp(rsp, flags); 2275cc99a310SPaul E. McKenney return; 2276cc99a310SPaul E. McKenney } 2277cc99a310SPaul E. McKenney 2278654e9533SPaul E. McKenney /* Report up the rest of the hierarchy, tracking current ->gpnum. */ 2279654e9533SPaul E. McKenney gps = rnp->gpnum; 2280cc99a310SPaul E. McKenney mask = rnp->grpmask; 2281cc99a310SPaul E. McKenney raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2282cc99a310SPaul E. McKenney raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ 2283cc99a310SPaul E. McKenney smp_mb__after_unlock_lock(); 2284654e9533SPaul E. McKenney rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags); 2285cc99a310SPaul E. McKenney } 2286cc99a310SPaul E. McKenney 2287cc99a310SPaul E. McKenney /* 22884102adabSPaul E. McKenney * Record a quiescent state for the specified CPU to that CPU's rcu_data 22894102adabSPaul E. McKenney * structure. This must be either called from the specified CPU, or 22904102adabSPaul E. McKenney * called when the specified CPU is known to be offline (and when it is 22914102adabSPaul E. McKenney * also known that no other CPU is concurrently trying to help the offline 22924102adabSPaul E. McKenney * CPU). The lastcomp argument is used to make sure we are still in the 22934102adabSPaul E. McKenney * grace period of interest. We don't want to end the current grace period 22944102adabSPaul E. McKenney * based on quiescent states detected in an earlier grace period! 22954102adabSPaul E. McKenney */ 22964102adabSPaul E. McKenney static void 22974102adabSPaul E. McKenney rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) 22984102adabSPaul E. McKenney { 22994102adabSPaul E. McKenney unsigned long flags; 23004102adabSPaul E. McKenney unsigned long mask; 230148a7639cSPaul E. McKenney bool needwake; 23024102adabSPaul E. McKenney struct rcu_node *rnp; 23034102adabSPaul E. McKenney 23044102adabSPaul E. McKenney rnp = rdp->mynode; 23054102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 23066303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 23075cd37193SPaul E. McKenney if ((rdp->passed_quiesce == 0 && 23085cd37193SPaul E. McKenney rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) || 23095cd37193SPaul E. McKenney rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum || 23105cd37193SPaul E. McKenney rdp->gpwrap) { 23114102adabSPaul E. McKenney 23124102adabSPaul E. McKenney /* 23134102adabSPaul E. McKenney * The grace period in which this quiescent state was 23144102adabSPaul E. McKenney * recorded has ended, so don't report it upwards. 23154102adabSPaul E. McKenney * We will instead need a new quiescent state that lies 23164102adabSPaul E. McKenney * within the current grace period. 23174102adabSPaul E. McKenney */ 23184102adabSPaul E. McKenney rdp->passed_quiesce = 0; /* need qs for new gp. */ 23195cd37193SPaul E. McKenney rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); 23204102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 23214102adabSPaul E. McKenney return; 23224102adabSPaul E. McKenney } 23234102adabSPaul E. McKenney mask = rdp->grpmask; 23244102adabSPaul E. McKenney if ((rnp->qsmask & mask) == 0) { 23254102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 23264102adabSPaul E. McKenney } else { 23274102adabSPaul E. McKenney rdp->qs_pending = 0; 23284102adabSPaul E. McKenney 23294102adabSPaul E. McKenney /* 23304102adabSPaul E. McKenney * This GP can't end until cpu checks in, so all of our 23314102adabSPaul E. McKenney * callbacks can be processed during the next GP. 23324102adabSPaul E. McKenney */ 233348a7639cSPaul E. McKenney needwake = rcu_accelerate_cbs(rsp, rnp, rdp); 23344102adabSPaul E. McKenney 2335654e9533SPaul E. McKenney rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags); 2336654e9533SPaul E. McKenney /* ^^^ Released rnp->lock */ 233748a7639cSPaul E. McKenney if (needwake) 233848a7639cSPaul E. McKenney rcu_gp_kthread_wake(rsp); 23394102adabSPaul E. McKenney } 23404102adabSPaul E. McKenney } 23414102adabSPaul E. McKenney 23424102adabSPaul E. McKenney /* 23434102adabSPaul E. McKenney * Check to see if there is a new grace period of which this CPU 23444102adabSPaul E. McKenney * is not yet aware, and if so, set up local rcu_data state for it. 23454102adabSPaul E. McKenney * Otherwise, see if this CPU has just passed through its first 23464102adabSPaul E. McKenney * quiescent state for this grace period, and record that fact if so. 23474102adabSPaul E. McKenney */ 23484102adabSPaul E. McKenney static void 23494102adabSPaul E. McKenney rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) 23504102adabSPaul E. McKenney { 23514102adabSPaul E. McKenney /* Check for grace-period ends and beginnings. */ 23524102adabSPaul E. McKenney note_gp_changes(rsp, rdp); 23534102adabSPaul E. McKenney 23544102adabSPaul E. McKenney /* 23554102adabSPaul E. McKenney * Does this CPU still need to do its part for current grace period? 23564102adabSPaul E. McKenney * If no, return and let the other CPUs do their part as well. 23574102adabSPaul E. McKenney */ 23584102adabSPaul E. McKenney if (!rdp->qs_pending) 23594102adabSPaul E. McKenney return; 23604102adabSPaul E. McKenney 23614102adabSPaul E. McKenney /* 23624102adabSPaul E. McKenney * Was there a quiescent state since the beginning of the grace 23634102adabSPaul E. McKenney * period? If no, then exit and wait for the next call. 23644102adabSPaul E. McKenney */ 23655cd37193SPaul E. McKenney if (!rdp->passed_quiesce && 23665cd37193SPaul E. McKenney rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) 23674102adabSPaul E. McKenney return; 23684102adabSPaul E. McKenney 23694102adabSPaul E. McKenney /* 23704102adabSPaul E. McKenney * Tell RCU we are done (but rcu_report_qs_rdp() will be the 23714102adabSPaul E. McKenney * judge of that). 23724102adabSPaul E. McKenney */ 23734102adabSPaul E. McKenney rcu_report_qs_rdp(rdp->cpu, rsp, rdp); 23744102adabSPaul E. McKenney } 23754102adabSPaul E. McKenney 23764102adabSPaul E. McKenney #ifdef CONFIG_HOTPLUG_CPU 23774102adabSPaul E. McKenney 23784102adabSPaul E. McKenney /* 23794102adabSPaul E. McKenney * Send the specified CPU's RCU callbacks to the orphanage. The 23804102adabSPaul E. McKenney * specified CPU must be offline, and the caller must hold the 23814102adabSPaul E. McKenney * ->orphan_lock. 23824102adabSPaul E. McKenney */ 23834102adabSPaul E. McKenney static void 23844102adabSPaul E. McKenney rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, 23854102adabSPaul E. McKenney struct rcu_node *rnp, struct rcu_data *rdp) 23864102adabSPaul E. McKenney { 23874102adabSPaul E. McKenney /* No-CBs CPUs do not have orphanable callbacks. */ 23884102adabSPaul E. McKenney if (rcu_is_nocb_cpu(rdp->cpu)) 23894102adabSPaul E. McKenney return; 23904102adabSPaul E. McKenney 23914102adabSPaul E. McKenney /* 23924102adabSPaul E. McKenney * Orphan the callbacks. First adjust the counts. This is safe 23934102adabSPaul E. McKenney * because _rcu_barrier() excludes CPU-hotplug operations, so it 23944102adabSPaul E. McKenney * cannot be running now. Thus no memory barrier is required. 23954102adabSPaul E. McKenney */ 23964102adabSPaul E. McKenney if (rdp->nxtlist != NULL) { 23974102adabSPaul E. McKenney rsp->qlen_lazy += rdp->qlen_lazy; 23984102adabSPaul E. McKenney rsp->qlen += rdp->qlen; 23994102adabSPaul E. McKenney rdp->n_cbs_orphaned += rdp->qlen; 24004102adabSPaul E. McKenney rdp->qlen_lazy = 0; 24017d0ae808SPaul E. McKenney WRITE_ONCE(rdp->qlen, 0); 24024102adabSPaul E. McKenney } 24034102adabSPaul E. McKenney 24044102adabSPaul E. McKenney /* 24054102adabSPaul E. McKenney * Next, move those callbacks still needing a grace period to 24064102adabSPaul E. McKenney * the orphanage, where some other CPU will pick them up. 24074102adabSPaul E. McKenney * Some of the callbacks might have gone partway through a grace 24084102adabSPaul E. McKenney * period, but that is too bad. They get to start over because we 24094102adabSPaul E. McKenney * cannot assume that grace periods are synchronized across CPUs. 24104102adabSPaul E. McKenney * We don't bother updating the ->nxttail[] array yet, instead 24114102adabSPaul E. McKenney * we just reset the whole thing later on. 24124102adabSPaul E. McKenney */ 24134102adabSPaul E. McKenney if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) { 24144102adabSPaul E. McKenney *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL]; 24154102adabSPaul E. McKenney rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL]; 24164102adabSPaul E. McKenney *rdp->nxttail[RCU_DONE_TAIL] = NULL; 24174102adabSPaul E. McKenney } 24184102adabSPaul E. McKenney 24194102adabSPaul E. McKenney /* 24204102adabSPaul E. McKenney * Then move the ready-to-invoke callbacks to the orphanage, 24214102adabSPaul E. McKenney * where some other CPU will pick them up. These will not be 24224102adabSPaul E. McKenney * required to pass though another grace period: They are done. 24234102adabSPaul E. McKenney */ 24244102adabSPaul E. McKenney if (rdp->nxtlist != NULL) { 24254102adabSPaul E. McKenney *rsp->orphan_donetail = rdp->nxtlist; 24264102adabSPaul E. McKenney rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL]; 24274102adabSPaul E. McKenney } 24284102adabSPaul E. McKenney 2429b33078b6SPaul E. McKenney /* 2430b33078b6SPaul E. McKenney * Finally, initialize the rcu_data structure's list to empty and 2431b33078b6SPaul E. McKenney * disallow further callbacks on this CPU. 2432b33078b6SPaul E. McKenney */ 24334102adabSPaul E. McKenney init_callback_list(rdp); 2434b33078b6SPaul E. McKenney rdp->nxttail[RCU_NEXT_TAIL] = NULL; 24354102adabSPaul E. McKenney } 24364102adabSPaul E. McKenney 24374102adabSPaul E. McKenney /* 24384102adabSPaul E. McKenney * Adopt the RCU callbacks from the specified rcu_state structure's 24394102adabSPaul E. McKenney * orphanage. The caller must hold the ->orphan_lock. 24404102adabSPaul E. McKenney */ 244196d3fd0dSPaul E. McKenney static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) 24424102adabSPaul E. McKenney { 24434102adabSPaul E. McKenney int i; 2444fa07a58fSChristoph Lameter struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 24454102adabSPaul E. McKenney 24464102adabSPaul E. McKenney /* No-CBs CPUs are handled specially. */ 244796d3fd0dSPaul E. McKenney if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) 24484102adabSPaul E. McKenney return; 24494102adabSPaul E. McKenney 24504102adabSPaul E. McKenney /* Do the accounting first. */ 24514102adabSPaul E. McKenney rdp->qlen_lazy += rsp->qlen_lazy; 24524102adabSPaul E. McKenney rdp->qlen += rsp->qlen; 24534102adabSPaul E. McKenney rdp->n_cbs_adopted += rsp->qlen; 24544102adabSPaul E. McKenney if (rsp->qlen_lazy != rsp->qlen) 24554102adabSPaul E. McKenney rcu_idle_count_callbacks_posted(); 24564102adabSPaul E. McKenney rsp->qlen_lazy = 0; 24574102adabSPaul E. McKenney rsp->qlen = 0; 24584102adabSPaul E. McKenney 24594102adabSPaul E. McKenney /* 24604102adabSPaul E. McKenney * We do not need a memory barrier here because the only way we 24614102adabSPaul E. McKenney * can get here if there is an rcu_barrier() in flight is if 24624102adabSPaul E. McKenney * we are the task doing the rcu_barrier(). 24634102adabSPaul E. McKenney */ 24644102adabSPaul E. McKenney 24654102adabSPaul E. McKenney /* First adopt the ready-to-invoke callbacks. */ 24664102adabSPaul E. McKenney if (rsp->orphan_donelist != NULL) { 24674102adabSPaul E. McKenney *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL]; 24684102adabSPaul E. McKenney *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist; 24694102adabSPaul E. McKenney for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--) 24704102adabSPaul E. McKenney if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) 24714102adabSPaul E. McKenney rdp->nxttail[i] = rsp->orphan_donetail; 24724102adabSPaul E. McKenney rsp->orphan_donelist = NULL; 24734102adabSPaul E. McKenney rsp->orphan_donetail = &rsp->orphan_donelist; 24744102adabSPaul E. McKenney } 24754102adabSPaul E. McKenney 24764102adabSPaul E. McKenney /* And then adopt the callbacks that still need a grace period. */ 24774102adabSPaul E. McKenney if (rsp->orphan_nxtlist != NULL) { 24784102adabSPaul E. McKenney *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist; 24794102adabSPaul E. McKenney rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail; 24804102adabSPaul E. McKenney rsp->orphan_nxtlist = NULL; 24814102adabSPaul E. McKenney rsp->orphan_nxttail = &rsp->orphan_nxtlist; 24824102adabSPaul E. McKenney } 24834102adabSPaul E. McKenney } 24844102adabSPaul E. McKenney 24854102adabSPaul E. McKenney /* 24864102adabSPaul E. McKenney * Trace the fact that this CPU is going offline. 24874102adabSPaul E. McKenney */ 24884102adabSPaul E. McKenney static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) 24894102adabSPaul E. McKenney { 24904102adabSPaul E. McKenney RCU_TRACE(unsigned long mask); 24914102adabSPaul E. McKenney RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); 24924102adabSPaul E. McKenney RCU_TRACE(struct rcu_node *rnp = rdp->mynode); 24934102adabSPaul E. McKenney 24944102adabSPaul E. McKenney RCU_TRACE(mask = rdp->grpmask); 24954102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, 24964102adabSPaul E. McKenney rnp->gpnum + 1 - !!(rnp->qsmask & mask), 24974102adabSPaul E. McKenney TPS("cpuofl")); 24984102adabSPaul E. McKenney } 24994102adabSPaul E. McKenney 25004102adabSPaul E. McKenney /* 25018af3a5e7SPaul E. McKenney * All CPUs for the specified rcu_node structure have gone offline, 25028af3a5e7SPaul E. McKenney * and all tasks that were preempted within an RCU read-side critical 25038af3a5e7SPaul E. McKenney * section while running on one of those CPUs have since exited their RCU 25048af3a5e7SPaul E. McKenney * read-side critical section. Some other CPU is reporting this fact with 25058af3a5e7SPaul E. McKenney * the specified rcu_node structure's ->lock held and interrupts disabled. 25068af3a5e7SPaul E. McKenney * This function therefore goes up the tree of rcu_node structures, 25078af3a5e7SPaul E. McKenney * clearing the corresponding bits in the ->qsmaskinit fields. Note that 25088af3a5e7SPaul E. McKenney * the leaf rcu_node structure's ->qsmaskinit field has already been 25098af3a5e7SPaul E. McKenney * updated 25108af3a5e7SPaul E. McKenney * 25118af3a5e7SPaul E. McKenney * This function does check that the specified rcu_node structure has 25128af3a5e7SPaul E. McKenney * all CPUs offline and no blocked tasks, so it is OK to invoke it 25138af3a5e7SPaul E. McKenney * prematurely. That said, invoking it after the fact will cost you 25148af3a5e7SPaul E. McKenney * a needless lock acquisition. So once it has done its work, don't 25158af3a5e7SPaul E. McKenney * invoke it again. 25168af3a5e7SPaul E. McKenney */ 25178af3a5e7SPaul E. McKenney static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) 25188af3a5e7SPaul E. McKenney { 25198af3a5e7SPaul E. McKenney long mask; 25208af3a5e7SPaul E. McKenney struct rcu_node *rnp = rnp_leaf; 25218af3a5e7SPaul E. McKenney 25228af3a5e7SPaul E. McKenney if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) 25238af3a5e7SPaul E. McKenney return; 25248af3a5e7SPaul E. McKenney for (;;) { 25258af3a5e7SPaul E. McKenney mask = rnp->grpmask; 25268af3a5e7SPaul E. McKenney rnp = rnp->parent; 25278af3a5e7SPaul E. McKenney if (!rnp) 25288af3a5e7SPaul E. McKenney break; 25298af3a5e7SPaul E. McKenney raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 25308af3a5e7SPaul E. McKenney smp_mb__after_unlock_lock(); /* GP memory ordering. */ 25318af3a5e7SPaul E. McKenney rnp->qsmaskinit &= ~mask; 25320aa04b05SPaul E. McKenney rnp->qsmask &= ~mask; 25338af3a5e7SPaul E. McKenney if (rnp->qsmaskinit) { 25348af3a5e7SPaul E. McKenney raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 25358af3a5e7SPaul E. McKenney return; 25368af3a5e7SPaul E. McKenney } 25378af3a5e7SPaul E. McKenney raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 25388af3a5e7SPaul E. McKenney } 25398af3a5e7SPaul E. McKenney } 25408af3a5e7SPaul E. McKenney 25418af3a5e7SPaul E. McKenney /* 254288428cc5SPaul E. McKenney * The CPU is exiting the idle loop into the arch_cpu_idle_dead() 254388428cc5SPaul E. McKenney * function. We now remove it from the rcu_node tree's ->qsmaskinit 254488428cc5SPaul E. McKenney * bit masks. 254588428cc5SPaul E. McKenney */ 254688428cc5SPaul E. McKenney static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) 254788428cc5SPaul E. McKenney { 254888428cc5SPaul E. McKenney unsigned long flags; 254988428cc5SPaul E. McKenney unsigned long mask; 255088428cc5SPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 255188428cc5SPaul E. McKenney struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 255288428cc5SPaul E. McKenney 255388428cc5SPaul E. McKenney /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ 255488428cc5SPaul E. McKenney mask = rdp->grpmask; 255588428cc5SPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 255688428cc5SPaul E. McKenney smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */ 255788428cc5SPaul E. McKenney rnp->qsmaskinitnext &= ~mask; 255888428cc5SPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 255988428cc5SPaul E. McKenney } 256088428cc5SPaul E. McKenney 256188428cc5SPaul E. McKenney /* 25624102adabSPaul E. McKenney * The CPU has been completely removed, and some other CPU is reporting 25634102adabSPaul E. McKenney * this fact from process context. Do the remainder of the cleanup, 25644102adabSPaul E. McKenney * including orphaning the outgoing CPU's RCU callbacks, and also 25654102adabSPaul E. McKenney * adopting them. There can only be one CPU hotplug operation at a time, 25664102adabSPaul E. McKenney * so no other CPU can be attempting to update rcu_cpu_kthread_task. 25674102adabSPaul E. McKenney */ 25684102adabSPaul E. McKenney static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 25694102adabSPaul E. McKenney { 25704102adabSPaul E. McKenney unsigned long flags; 25714102adabSPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 25724102adabSPaul E. McKenney struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 25734102adabSPaul E. McKenney 25744102adabSPaul E. McKenney /* Adjust any no-longer-needed kthreads. */ 25754102adabSPaul E. McKenney rcu_boost_kthread_setaffinity(rnp, -1); 25764102adabSPaul E. McKenney 25774102adabSPaul E. McKenney /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ 257878043c46SPaul E. McKenney raw_spin_lock_irqsave(&rsp->orphan_lock, flags); 25794102adabSPaul E. McKenney rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); 258096d3fd0dSPaul E. McKenney rcu_adopt_orphan_cbs(rsp, flags); 2581a8f4cbadSPaul E. McKenney raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); 25824102adabSPaul E. McKenney 25834102adabSPaul E. McKenney WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, 25844102adabSPaul E. McKenney "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", 25854102adabSPaul E. McKenney cpu, rdp->qlen, rdp->nxtlist); 25864102adabSPaul E. McKenney } 25874102adabSPaul E. McKenney 25884102adabSPaul E. McKenney #else /* #ifdef CONFIG_HOTPLUG_CPU */ 25894102adabSPaul E. McKenney 25904102adabSPaul E. McKenney static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) 25914102adabSPaul E. McKenney { 25924102adabSPaul E. McKenney } 25934102adabSPaul E. McKenney 2594b6a932d1SPaul E. McKenney static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) 2595b6a932d1SPaul E. McKenney { 2596b6a932d1SPaul E. McKenney } 2597b6a932d1SPaul E. McKenney 259888428cc5SPaul E. McKenney static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) 259988428cc5SPaul E. McKenney { 260088428cc5SPaul E. McKenney } 260188428cc5SPaul E. McKenney 26024102adabSPaul E. McKenney static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 26034102adabSPaul E. McKenney { 26044102adabSPaul E. McKenney } 26054102adabSPaul E. McKenney 26064102adabSPaul E. McKenney #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ 26074102adabSPaul E. McKenney 26084102adabSPaul E. McKenney /* 26094102adabSPaul E. McKenney * Invoke any RCU callbacks that have made it to the end of their grace 26104102adabSPaul E. McKenney * period. Thottle as specified by rdp->blimit. 26114102adabSPaul E. McKenney */ 26124102adabSPaul E. McKenney static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) 26134102adabSPaul E. McKenney { 26144102adabSPaul E. McKenney unsigned long flags; 26154102adabSPaul E. McKenney struct rcu_head *next, *list, **tail; 26164102adabSPaul E. McKenney long bl, count, count_lazy; 26174102adabSPaul E. McKenney int i; 26184102adabSPaul E. McKenney 26194102adabSPaul E. McKenney /* If no callbacks are ready, just return. */ 26204102adabSPaul E. McKenney if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 26214102adabSPaul E. McKenney trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); 26227d0ae808SPaul E. McKenney trace_rcu_batch_end(rsp->name, 0, !!READ_ONCE(rdp->nxtlist), 26234102adabSPaul E. McKenney need_resched(), is_idle_task(current), 26244102adabSPaul E. McKenney rcu_is_callbacks_kthread()); 26254102adabSPaul E. McKenney return; 26264102adabSPaul E. McKenney } 26274102adabSPaul E. McKenney 26284102adabSPaul E. McKenney /* 26294102adabSPaul E. McKenney * Extract the list of ready callbacks, disabling to prevent 26304102adabSPaul E. McKenney * races with call_rcu() from interrupt handlers. 26314102adabSPaul E. McKenney */ 26324102adabSPaul E. McKenney local_irq_save(flags); 26334102adabSPaul E. McKenney WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); 26344102adabSPaul E. McKenney bl = rdp->blimit; 26354102adabSPaul E. McKenney trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl); 26364102adabSPaul E. McKenney list = rdp->nxtlist; 26374102adabSPaul E. McKenney rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 26384102adabSPaul E. McKenney *rdp->nxttail[RCU_DONE_TAIL] = NULL; 26394102adabSPaul E. McKenney tail = rdp->nxttail[RCU_DONE_TAIL]; 26404102adabSPaul E. McKenney for (i = RCU_NEXT_SIZE - 1; i >= 0; i--) 26414102adabSPaul E. McKenney if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) 26424102adabSPaul E. McKenney rdp->nxttail[i] = &rdp->nxtlist; 26434102adabSPaul E. McKenney local_irq_restore(flags); 26444102adabSPaul E. McKenney 26454102adabSPaul E. McKenney /* Invoke callbacks. */ 26464102adabSPaul E. McKenney count = count_lazy = 0; 26474102adabSPaul E. McKenney while (list) { 26484102adabSPaul E. McKenney next = list->next; 26494102adabSPaul E. McKenney prefetch(next); 26504102adabSPaul E. McKenney debug_rcu_head_unqueue(list); 26514102adabSPaul E. McKenney if (__rcu_reclaim(rsp->name, list)) 26524102adabSPaul E. McKenney count_lazy++; 26534102adabSPaul E. McKenney list = next; 26544102adabSPaul E. McKenney /* Stop only if limit reached and CPU has something to do. */ 26554102adabSPaul E. McKenney if (++count >= bl && 26564102adabSPaul E. McKenney (need_resched() || 26574102adabSPaul E. McKenney (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) 26584102adabSPaul E. McKenney break; 26594102adabSPaul E. McKenney } 26604102adabSPaul E. McKenney 26614102adabSPaul E. McKenney local_irq_save(flags); 26624102adabSPaul E. McKenney trace_rcu_batch_end(rsp->name, count, !!list, need_resched(), 26634102adabSPaul E. McKenney is_idle_task(current), 26644102adabSPaul E. McKenney rcu_is_callbacks_kthread()); 26654102adabSPaul E. McKenney 26664102adabSPaul E. McKenney /* Update count, and requeue any remaining callbacks. */ 26674102adabSPaul E. McKenney if (list != NULL) { 26684102adabSPaul E. McKenney *tail = rdp->nxtlist; 26694102adabSPaul E. McKenney rdp->nxtlist = list; 26704102adabSPaul E. McKenney for (i = 0; i < RCU_NEXT_SIZE; i++) 26714102adabSPaul E. McKenney if (&rdp->nxtlist == rdp->nxttail[i]) 26724102adabSPaul E. McKenney rdp->nxttail[i] = tail; 26734102adabSPaul E. McKenney else 26744102adabSPaul E. McKenney break; 26754102adabSPaul E. McKenney } 26764102adabSPaul E. McKenney smp_mb(); /* List handling before counting for rcu_barrier(). */ 26774102adabSPaul E. McKenney rdp->qlen_lazy -= count_lazy; 26787d0ae808SPaul E. McKenney WRITE_ONCE(rdp->qlen, rdp->qlen - count); 26794102adabSPaul E. McKenney rdp->n_cbs_invoked += count; 26804102adabSPaul E. McKenney 26814102adabSPaul E. McKenney /* Reinstate batch limit if we have worked down the excess. */ 26824102adabSPaul E. McKenney if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) 26834102adabSPaul E. McKenney rdp->blimit = blimit; 26844102adabSPaul E. McKenney 26854102adabSPaul E. McKenney /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ 26864102adabSPaul E. McKenney if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { 26874102adabSPaul E. McKenney rdp->qlen_last_fqs_check = 0; 26884102adabSPaul E. McKenney rdp->n_force_qs_snap = rsp->n_force_qs; 26894102adabSPaul E. McKenney } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 26904102adabSPaul E. McKenney rdp->qlen_last_fqs_check = rdp->qlen; 26914102adabSPaul E. McKenney WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); 26924102adabSPaul E. McKenney 26934102adabSPaul E. McKenney local_irq_restore(flags); 26944102adabSPaul E. McKenney 26954102adabSPaul E. McKenney /* Re-invoke RCU core processing if there are callbacks remaining. */ 26964102adabSPaul E. McKenney if (cpu_has_callbacks_ready_to_invoke(rdp)) 26974102adabSPaul E. McKenney invoke_rcu_core(); 26984102adabSPaul E. McKenney } 26994102adabSPaul E. McKenney 27004102adabSPaul E. McKenney /* 27014102adabSPaul E. McKenney * Check to see if this CPU is in a non-context-switch quiescent state 27024102adabSPaul E. McKenney * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 27034102adabSPaul E. McKenney * Also schedule RCU core processing. 27044102adabSPaul E. McKenney * 27054102adabSPaul E. McKenney * This function must be called from hardirq context. It is normally 27064102adabSPaul E. McKenney * invoked from the scheduling-clock interrupt. If rcu_pending returns 27074102adabSPaul E. McKenney * false, there is no point in invoking rcu_check_callbacks(). 27084102adabSPaul E. McKenney */ 2709c3377c2dSPaul E. McKenney void rcu_check_callbacks(int user) 27104102adabSPaul E. McKenney { 27114102adabSPaul E. McKenney trace_rcu_utilization(TPS("Start scheduler-tick")); 27124102adabSPaul E. McKenney increment_cpu_stall_ticks(); 27134102adabSPaul E. McKenney if (user || rcu_is_cpu_rrupt_from_idle()) { 27144102adabSPaul E. McKenney 27154102adabSPaul E. McKenney /* 27164102adabSPaul E. McKenney * Get here if this CPU took its interrupt from user 27174102adabSPaul E. McKenney * mode or from the idle loop, and if this is not a 27184102adabSPaul E. McKenney * nested interrupt. In this case, the CPU is in 27194102adabSPaul E. McKenney * a quiescent state, so note it. 27204102adabSPaul E. McKenney * 27214102adabSPaul E. McKenney * No memory barrier is required here because both 27224102adabSPaul E. McKenney * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local 27234102adabSPaul E. McKenney * variables that other CPUs neither access nor modify, 27244102adabSPaul E. McKenney * at least not while the corresponding CPU is online. 27254102adabSPaul E. McKenney */ 27264102adabSPaul E. McKenney 2727284a8c93SPaul E. McKenney rcu_sched_qs(); 2728284a8c93SPaul E. McKenney rcu_bh_qs(); 27294102adabSPaul E. McKenney 27304102adabSPaul E. McKenney } else if (!in_softirq()) { 27314102adabSPaul E. McKenney 27324102adabSPaul E. McKenney /* 27334102adabSPaul E. McKenney * Get here if this CPU did not take its interrupt from 27344102adabSPaul E. McKenney * softirq, in other words, if it is not interrupting 27354102adabSPaul E. McKenney * a rcu_bh read-side critical section. This is an _bh 27364102adabSPaul E. McKenney * critical section, so note it. 27374102adabSPaul E. McKenney */ 27384102adabSPaul E. McKenney 2739284a8c93SPaul E. McKenney rcu_bh_qs(); 27404102adabSPaul E. McKenney } 274186aea0e6SPaul E. McKenney rcu_preempt_check_callbacks(); 2742e3950ecdSPaul E. McKenney if (rcu_pending()) 27434102adabSPaul E. McKenney invoke_rcu_core(); 27448315f422SPaul E. McKenney if (user) 27458315f422SPaul E. McKenney rcu_note_voluntary_context_switch(current); 27464102adabSPaul E. McKenney trace_rcu_utilization(TPS("End scheduler-tick")); 27474102adabSPaul E. McKenney } 27484102adabSPaul E. McKenney 27494102adabSPaul E. McKenney /* 27504102adabSPaul E. McKenney * Scan the leaf rcu_node structures, processing dyntick state for any that 27514102adabSPaul E. McKenney * have not yet encountered a quiescent state, using the function specified. 27524102adabSPaul E. McKenney * Also initiate boosting for any threads blocked on the root rcu_node. 27534102adabSPaul E. McKenney * 27544102adabSPaul E. McKenney * The caller must have suppressed start of new grace periods. 27554102adabSPaul E. McKenney */ 27564102adabSPaul E. McKenney static void force_qs_rnp(struct rcu_state *rsp, 27574102adabSPaul E. McKenney int (*f)(struct rcu_data *rsp, bool *isidle, 27584102adabSPaul E. McKenney unsigned long *maxj), 27594102adabSPaul E. McKenney bool *isidle, unsigned long *maxj) 27604102adabSPaul E. McKenney { 27614102adabSPaul E. McKenney unsigned long bit; 27624102adabSPaul E. McKenney int cpu; 27634102adabSPaul E. McKenney unsigned long flags; 27644102adabSPaul E. McKenney unsigned long mask; 27654102adabSPaul E. McKenney struct rcu_node *rnp; 27664102adabSPaul E. McKenney 27674102adabSPaul E. McKenney rcu_for_each_leaf_node(rsp, rnp) { 2768bde6c3aaSPaul E. McKenney cond_resched_rcu_qs(); 27694102adabSPaul E. McKenney mask = 0; 27704102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 27716303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 27724102adabSPaul E. McKenney if (!rcu_gp_in_progress(rsp)) { 27734102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 27744102adabSPaul E. McKenney return; 27754102adabSPaul E. McKenney } 27764102adabSPaul E. McKenney if (rnp->qsmask == 0) { 2777a77da14cSPaul E. McKenney if (rcu_state_p == &rcu_sched_state || 2778a77da14cSPaul E. McKenney rsp != rcu_state_p || 2779a77da14cSPaul E. McKenney rcu_preempt_blocked_readers_cgp(rnp)) { 2780a77da14cSPaul E. McKenney /* 2781a77da14cSPaul E. McKenney * No point in scanning bits because they 2782a77da14cSPaul E. McKenney * are all zero. But we might need to 2783a77da14cSPaul E. McKenney * priority-boost blocked readers. 2784a77da14cSPaul E. McKenney */ 2785a77da14cSPaul E. McKenney rcu_initiate_boost(rnp, flags); 2786a77da14cSPaul E. McKenney /* rcu_initiate_boost() releases rnp->lock */ 27874102adabSPaul E. McKenney continue; 27884102adabSPaul E. McKenney } 2789a77da14cSPaul E. McKenney if (rnp->parent && 2790a77da14cSPaul E. McKenney (rnp->parent->qsmask & rnp->grpmask)) { 2791a77da14cSPaul E. McKenney /* 2792a77da14cSPaul E. McKenney * Race between grace-period 2793a77da14cSPaul E. McKenney * initialization and task exiting RCU 2794a77da14cSPaul E. McKenney * read-side critical section: Report. 2795a77da14cSPaul E. McKenney */ 2796a77da14cSPaul E. McKenney rcu_report_unblock_qs_rnp(rsp, rnp, flags); 2797a77da14cSPaul E. McKenney /* rcu_report_unblock_qs_rnp() rlses ->lock */ 2798a77da14cSPaul E. McKenney continue; 2799a77da14cSPaul E. McKenney } 2800a77da14cSPaul E. McKenney } 28014102adabSPaul E. McKenney cpu = rnp->grplo; 28024102adabSPaul E. McKenney bit = 1; 28034102adabSPaul E. McKenney for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 28044102adabSPaul E. McKenney if ((rnp->qsmask & bit) != 0) { 2805675da67fSPaul E. McKenney if ((rnp->qsmaskinit & bit) == 0) 2806675da67fSPaul E. McKenney *isidle = false; /* Pending hotplug. */ 28074102adabSPaul E. McKenney if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) 28084102adabSPaul E. McKenney mask |= bit; 28094102adabSPaul E. McKenney } 28104102adabSPaul E. McKenney } 28114102adabSPaul E. McKenney if (mask != 0) { 2812654e9533SPaul E. McKenney /* Idle/offline CPUs, report (releases rnp->lock. */ 2813654e9533SPaul E. McKenney rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags); 28140aa04b05SPaul E. McKenney } else { 28150aa04b05SPaul E. McKenney /* Nothing to do here, so just drop the lock. */ 28164102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 28174102adabSPaul E. McKenney } 28184102adabSPaul E. McKenney } 28190aa04b05SPaul E. McKenney } 28204102adabSPaul E. McKenney 28214102adabSPaul E. McKenney /* 28224102adabSPaul E. McKenney * Force quiescent states on reluctant CPUs, and also detect which 28234102adabSPaul E. McKenney * CPUs are in dyntick-idle mode. 28244102adabSPaul E. McKenney */ 28254102adabSPaul E. McKenney static void force_quiescent_state(struct rcu_state *rsp) 28264102adabSPaul E. McKenney { 28274102adabSPaul E. McKenney unsigned long flags; 28284102adabSPaul E. McKenney bool ret; 28294102adabSPaul E. McKenney struct rcu_node *rnp; 28304102adabSPaul E. McKenney struct rcu_node *rnp_old = NULL; 28314102adabSPaul E. McKenney 28324102adabSPaul E. McKenney /* Funnel through hierarchy to reduce memory contention. */ 2833d860d403SShan Wei rnp = __this_cpu_read(rsp->rda->mynode); 28344102adabSPaul E. McKenney for (; rnp != NULL; rnp = rnp->parent) { 28357d0ae808SPaul E. McKenney ret = (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) || 28364102adabSPaul E. McKenney !raw_spin_trylock(&rnp->fqslock); 28374102adabSPaul E. McKenney if (rnp_old != NULL) 28384102adabSPaul E. McKenney raw_spin_unlock(&rnp_old->fqslock); 28394102adabSPaul E. McKenney if (ret) { 2840a792563bSPaul E. McKenney rsp->n_force_qs_lh++; 28414102adabSPaul E. McKenney return; 28424102adabSPaul E. McKenney } 28434102adabSPaul E. McKenney rnp_old = rnp; 28444102adabSPaul E. McKenney } 28454102adabSPaul E. McKenney /* rnp_old == rcu_get_root(rsp), rnp == NULL. */ 28464102adabSPaul E. McKenney 28474102adabSPaul E. McKenney /* Reached the root of the rcu_node tree, acquire lock. */ 28484102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp_old->lock, flags); 28496303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 28504102adabSPaul E. McKenney raw_spin_unlock(&rnp_old->fqslock); 28517d0ae808SPaul E. McKenney if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 2852a792563bSPaul E. McKenney rsp->n_force_qs_lh++; 28534102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 28544102adabSPaul E. McKenney return; /* Someone beat us to it. */ 28554102adabSPaul E. McKenney } 28567d0ae808SPaul E. McKenney WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); 28574102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 28582aa792e6SPranith Kumar rcu_gp_kthread_wake(rsp); 28594102adabSPaul E. McKenney } 28604102adabSPaul E. McKenney 28614102adabSPaul E. McKenney /* 28624102adabSPaul E. McKenney * This does the RCU core processing work for the specified rcu_state 28634102adabSPaul E. McKenney * and rcu_data structures. This may be called only from the CPU to 28644102adabSPaul E. McKenney * whom the rdp belongs. 28654102adabSPaul E. McKenney */ 28664102adabSPaul E. McKenney static void 28674102adabSPaul E. McKenney __rcu_process_callbacks(struct rcu_state *rsp) 28684102adabSPaul E. McKenney { 28694102adabSPaul E. McKenney unsigned long flags; 287048a7639cSPaul E. McKenney bool needwake; 2871fa07a58fSChristoph Lameter struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 28724102adabSPaul E. McKenney 28734102adabSPaul E. McKenney WARN_ON_ONCE(rdp->beenonline == 0); 28744102adabSPaul E. McKenney 28754102adabSPaul E. McKenney /* Update RCU state based on any recent quiescent states. */ 28764102adabSPaul E. McKenney rcu_check_quiescent_state(rsp, rdp); 28774102adabSPaul E. McKenney 28784102adabSPaul E. McKenney /* Does this CPU require a not-yet-started grace period? */ 28794102adabSPaul E. McKenney local_irq_save(flags); 28804102adabSPaul E. McKenney if (cpu_needs_another_gp(rsp, rdp)) { 28814102adabSPaul E. McKenney raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ 288248a7639cSPaul E. McKenney needwake = rcu_start_gp(rsp); 28834102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); 288448a7639cSPaul E. McKenney if (needwake) 288548a7639cSPaul E. McKenney rcu_gp_kthread_wake(rsp); 28864102adabSPaul E. McKenney } else { 28874102adabSPaul E. McKenney local_irq_restore(flags); 28884102adabSPaul E. McKenney } 28894102adabSPaul E. McKenney 28904102adabSPaul E. McKenney /* If there are callbacks ready, invoke them. */ 28914102adabSPaul E. McKenney if (cpu_has_callbacks_ready_to_invoke(rdp)) 28924102adabSPaul E. McKenney invoke_rcu_callbacks(rsp, rdp); 289396d3fd0dSPaul E. McKenney 289496d3fd0dSPaul E. McKenney /* Do any needed deferred wakeups of rcuo kthreads. */ 289596d3fd0dSPaul E. McKenney do_nocb_deferred_wakeup(rdp); 28964102adabSPaul E. McKenney } 28974102adabSPaul E. McKenney 28984102adabSPaul E. McKenney /* 28994102adabSPaul E. McKenney * Do RCU core processing for the current CPU. 29004102adabSPaul E. McKenney */ 29014102adabSPaul E. McKenney static void rcu_process_callbacks(struct softirq_action *unused) 29024102adabSPaul E. McKenney { 29034102adabSPaul E. McKenney struct rcu_state *rsp; 29044102adabSPaul E. McKenney 29054102adabSPaul E. McKenney if (cpu_is_offline(smp_processor_id())) 29064102adabSPaul E. McKenney return; 29074102adabSPaul E. McKenney trace_rcu_utilization(TPS("Start RCU core")); 29084102adabSPaul E. McKenney for_each_rcu_flavor(rsp) 29094102adabSPaul E. McKenney __rcu_process_callbacks(rsp); 29104102adabSPaul E. McKenney trace_rcu_utilization(TPS("End RCU core")); 29114102adabSPaul E. McKenney } 29124102adabSPaul E. McKenney 29134102adabSPaul E. McKenney /* 29144102adabSPaul E. McKenney * Schedule RCU callback invocation. If the specified type of RCU 29154102adabSPaul E. McKenney * does not support RCU priority boosting, just do a direct call, 29164102adabSPaul E. McKenney * otherwise wake up the per-CPU kernel kthread. Note that because we 2917924df8a0SPaul E. McKenney * are running on the current CPU with softirqs disabled, the 29184102adabSPaul E. McKenney * rcu_cpu_kthread_task cannot disappear out from under us. 29194102adabSPaul E. McKenney */ 29204102adabSPaul E. McKenney static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 29214102adabSPaul E. McKenney { 29227d0ae808SPaul E. McKenney if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) 29234102adabSPaul E. McKenney return; 29244102adabSPaul E. McKenney if (likely(!rsp->boost)) { 29254102adabSPaul E. McKenney rcu_do_batch(rsp, rdp); 29264102adabSPaul E. McKenney return; 29274102adabSPaul E. McKenney } 29284102adabSPaul E. McKenney invoke_rcu_callbacks_kthread(); 29294102adabSPaul E. McKenney } 29304102adabSPaul E. McKenney 29314102adabSPaul E. McKenney static void invoke_rcu_core(void) 29324102adabSPaul E. McKenney { 29334102adabSPaul E. McKenney if (cpu_online(smp_processor_id())) 29344102adabSPaul E. McKenney raise_softirq(RCU_SOFTIRQ); 29354102adabSPaul E. McKenney } 29364102adabSPaul E. McKenney 29374102adabSPaul E. McKenney /* 29384102adabSPaul E. McKenney * Handle any core-RCU processing required by a call_rcu() invocation. 29394102adabSPaul E. McKenney */ 29404102adabSPaul E. McKenney static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, 29414102adabSPaul E. McKenney struct rcu_head *head, unsigned long flags) 29424102adabSPaul E. McKenney { 294348a7639cSPaul E. McKenney bool needwake; 294448a7639cSPaul E. McKenney 29454102adabSPaul E. McKenney /* 29464102adabSPaul E. McKenney * If called from an extended quiescent state, invoke the RCU 29474102adabSPaul E. McKenney * core in order to force a re-evaluation of RCU's idleness. 29484102adabSPaul E. McKenney */ 29499910affaSYao Dongdong if (!rcu_is_watching()) 29504102adabSPaul E. McKenney invoke_rcu_core(); 29514102adabSPaul E. McKenney 29524102adabSPaul E. McKenney /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ 29534102adabSPaul E. McKenney if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) 29544102adabSPaul E. McKenney return; 29554102adabSPaul E. McKenney 29564102adabSPaul E. McKenney /* 29574102adabSPaul E. McKenney * Force the grace period if too many callbacks or too long waiting. 29584102adabSPaul E. McKenney * Enforce hysteresis, and don't invoke force_quiescent_state() 29594102adabSPaul E. McKenney * if some other CPU has recently done so. Also, don't bother 29604102adabSPaul E. McKenney * invoking force_quiescent_state() if the newly enqueued callback 29614102adabSPaul E. McKenney * is the only one waiting for a grace period to complete. 29624102adabSPaul E. McKenney */ 29634102adabSPaul E. McKenney if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 29644102adabSPaul E. McKenney 29654102adabSPaul E. McKenney /* Are we ignoring a completed grace period? */ 29664102adabSPaul E. McKenney note_gp_changes(rsp, rdp); 29674102adabSPaul E. McKenney 29684102adabSPaul E. McKenney /* Start a new grace period if one not already started. */ 29694102adabSPaul E. McKenney if (!rcu_gp_in_progress(rsp)) { 29704102adabSPaul E. McKenney struct rcu_node *rnp_root = rcu_get_root(rsp); 29714102adabSPaul E. McKenney 29724102adabSPaul E. McKenney raw_spin_lock(&rnp_root->lock); 29736303b9c8SPaul E. McKenney smp_mb__after_unlock_lock(); 297448a7639cSPaul E. McKenney needwake = rcu_start_gp(rsp); 29754102adabSPaul E. McKenney raw_spin_unlock(&rnp_root->lock); 297648a7639cSPaul E. McKenney if (needwake) 297748a7639cSPaul E. McKenney rcu_gp_kthread_wake(rsp); 29784102adabSPaul E. McKenney } else { 29794102adabSPaul E. McKenney /* Give the grace period a kick. */ 29804102adabSPaul E. McKenney rdp->blimit = LONG_MAX; 29814102adabSPaul E. McKenney if (rsp->n_force_qs == rdp->n_force_qs_snap && 29824102adabSPaul E. McKenney *rdp->nxttail[RCU_DONE_TAIL] != head) 29834102adabSPaul E. McKenney force_quiescent_state(rsp); 29844102adabSPaul E. McKenney rdp->n_force_qs_snap = rsp->n_force_qs; 29854102adabSPaul E. McKenney rdp->qlen_last_fqs_check = rdp->qlen; 29864102adabSPaul E. McKenney } 29874102adabSPaul E. McKenney } 29884102adabSPaul E. McKenney } 29894102adabSPaul E. McKenney 29904102adabSPaul E. McKenney /* 29914102adabSPaul E. McKenney * RCU callback function to leak a callback. 29924102adabSPaul E. McKenney */ 29934102adabSPaul E. McKenney static void rcu_leak_callback(struct rcu_head *rhp) 29944102adabSPaul E. McKenney { 29954102adabSPaul E. McKenney } 29964102adabSPaul E. McKenney 29974102adabSPaul E. McKenney /* 29984102adabSPaul E. McKenney * Helper function for call_rcu() and friends. The cpu argument will 29994102adabSPaul E. McKenney * normally be -1, indicating "currently running CPU". It may specify 30004102adabSPaul E. McKenney * a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier() 30014102adabSPaul E. McKenney * is expected to specify a CPU. 30024102adabSPaul E. McKenney */ 30034102adabSPaul E. McKenney static void 30044102adabSPaul E. McKenney __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 30054102adabSPaul E. McKenney struct rcu_state *rsp, int cpu, bool lazy) 30064102adabSPaul E. McKenney { 30074102adabSPaul E. McKenney unsigned long flags; 30084102adabSPaul E. McKenney struct rcu_data *rdp; 30094102adabSPaul E. McKenney 30101146edcbSPaul E. McKenney WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */ 30114102adabSPaul E. McKenney if (debug_rcu_head_queue(head)) { 30124102adabSPaul E. McKenney /* Probable double call_rcu(), so leak the callback. */ 30137d0ae808SPaul E. McKenney WRITE_ONCE(head->func, rcu_leak_callback); 30144102adabSPaul E. McKenney WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n"); 30154102adabSPaul E. McKenney return; 30164102adabSPaul E. McKenney } 30174102adabSPaul E. McKenney head->func = func; 30184102adabSPaul E. McKenney head->next = NULL; 30194102adabSPaul E. McKenney 30204102adabSPaul E. McKenney /* 30214102adabSPaul E. McKenney * Opportunistically note grace-period endings and beginnings. 30224102adabSPaul E. McKenney * Note that we might see a beginning right after we see an 30234102adabSPaul E. McKenney * end, but never vice versa, since this CPU has to pass through 30244102adabSPaul E. McKenney * a quiescent state betweentimes. 30254102adabSPaul E. McKenney */ 30264102adabSPaul E. McKenney local_irq_save(flags); 30274102adabSPaul E. McKenney rdp = this_cpu_ptr(rsp->rda); 30284102adabSPaul E. McKenney 30294102adabSPaul E. McKenney /* Add the callback to our list. */ 30304102adabSPaul E. McKenney if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) { 30314102adabSPaul E. McKenney int offline; 30324102adabSPaul E. McKenney 30334102adabSPaul E. McKenney if (cpu != -1) 30344102adabSPaul E. McKenney rdp = per_cpu_ptr(rsp->rda, cpu); 3035143da9c2SPaul E. McKenney if (likely(rdp->mynode)) { 3036143da9c2SPaul E. McKenney /* Post-boot, so this should be for a no-CBs CPU. */ 303796d3fd0dSPaul E. McKenney offline = !__call_rcu_nocb(rdp, head, lazy, flags); 30384102adabSPaul E. McKenney WARN_ON_ONCE(offline); 3039143da9c2SPaul E. McKenney /* Offline CPU, _call_rcu() illegal, leak callback. */ 30404102adabSPaul E. McKenney local_irq_restore(flags); 30414102adabSPaul E. McKenney return; 30424102adabSPaul E. McKenney } 3043143da9c2SPaul E. McKenney /* 3044143da9c2SPaul E. McKenney * Very early boot, before rcu_init(). Initialize if needed 3045143da9c2SPaul E. McKenney * and then drop through to queue the callback. 3046143da9c2SPaul E. McKenney */ 3047143da9c2SPaul E. McKenney BUG_ON(cpu != -1); 304834404ca8SPaul E. McKenney WARN_ON_ONCE(!rcu_is_watching()); 3049143da9c2SPaul E. McKenney if (!likely(rdp->nxtlist)) 3050143da9c2SPaul E. McKenney init_default_callback_list(rdp); 3051143da9c2SPaul E. McKenney } 30527d0ae808SPaul E. McKenney WRITE_ONCE(rdp->qlen, rdp->qlen + 1); 30534102adabSPaul E. McKenney if (lazy) 30544102adabSPaul E. McKenney rdp->qlen_lazy++; 30554102adabSPaul E. McKenney else 30564102adabSPaul E. McKenney rcu_idle_count_callbacks_posted(); 30574102adabSPaul E. McKenney smp_mb(); /* Count before adding callback for rcu_barrier(). */ 30584102adabSPaul E. McKenney *rdp->nxttail[RCU_NEXT_TAIL] = head; 30594102adabSPaul E. McKenney rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 30604102adabSPaul E. McKenney 30614102adabSPaul E. McKenney if (__is_kfree_rcu_offset((unsigned long)func)) 30624102adabSPaul E. McKenney trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, 30634102adabSPaul E. McKenney rdp->qlen_lazy, rdp->qlen); 30644102adabSPaul E. McKenney else 30654102adabSPaul E. McKenney trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); 30664102adabSPaul E. McKenney 30674102adabSPaul E. McKenney /* Go handle any RCU core processing required. */ 30684102adabSPaul E. McKenney __call_rcu_core(rsp, rdp, head, flags); 30694102adabSPaul E. McKenney local_irq_restore(flags); 30704102adabSPaul E. McKenney } 30714102adabSPaul E. McKenney 30724102adabSPaul E. McKenney /* 30734102adabSPaul E. McKenney * Queue an RCU-sched callback for invocation after a grace period. 30744102adabSPaul E. McKenney */ 30754102adabSPaul E. McKenney void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 30764102adabSPaul E. McKenney { 30774102adabSPaul E. McKenney __call_rcu(head, func, &rcu_sched_state, -1, 0); 30784102adabSPaul E. McKenney } 30794102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(call_rcu_sched); 30804102adabSPaul E. McKenney 30814102adabSPaul E. McKenney /* 30824102adabSPaul E. McKenney * Queue an RCU callback for invocation after a quicker grace period. 30834102adabSPaul E. McKenney */ 30844102adabSPaul E. McKenney void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 30854102adabSPaul E. McKenney { 30864102adabSPaul E. McKenney __call_rcu(head, func, &rcu_bh_state, -1, 0); 30874102adabSPaul E. McKenney } 30884102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(call_rcu_bh); 30894102adabSPaul E. McKenney 30904102adabSPaul E. McKenney /* 3091495aa969SAndreea-Cristina Bernat * Queue an RCU callback for lazy invocation after a grace period. 3092495aa969SAndreea-Cristina Bernat * This will likely be later named something like "call_rcu_lazy()", 3093495aa969SAndreea-Cristina Bernat * but this change will require some way of tagging the lazy RCU 3094495aa969SAndreea-Cristina Bernat * callbacks in the list of pending callbacks. Until then, this 3095495aa969SAndreea-Cristina Bernat * function may only be called from __kfree_rcu(). 3096495aa969SAndreea-Cristina Bernat */ 3097495aa969SAndreea-Cristina Bernat void kfree_call_rcu(struct rcu_head *head, 3098495aa969SAndreea-Cristina Bernat void (*func)(struct rcu_head *rcu)) 3099495aa969SAndreea-Cristina Bernat { 3100e534165bSUma Sharma __call_rcu(head, func, rcu_state_p, -1, 1); 3101495aa969SAndreea-Cristina Bernat } 3102495aa969SAndreea-Cristina Bernat EXPORT_SYMBOL_GPL(kfree_call_rcu); 3103495aa969SAndreea-Cristina Bernat 3104495aa969SAndreea-Cristina Bernat /* 31054102adabSPaul E. McKenney * Because a context switch is a grace period for RCU-sched and RCU-bh, 31064102adabSPaul E. McKenney * any blocking grace-period wait automatically implies a grace period 31074102adabSPaul E. McKenney * if there is only one CPU online at any point time during execution 31084102adabSPaul E. McKenney * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to 31094102adabSPaul E. McKenney * occasionally incorrectly indicate that there are multiple CPUs online 31104102adabSPaul E. McKenney * when there was in fact only one the whole time, as this just adds 31114102adabSPaul E. McKenney * some overhead: RCU still operates correctly. 31124102adabSPaul E. McKenney */ 31134102adabSPaul E. McKenney static inline int rcu_blocking_is_gp(void) 31144102adabSPaul E. McKenney { 31154102adabSPaul E. McKenney int ret; 31164102adabSPaul E. McKenney 31174102adabSPaul E. McKenney might_sleep(); /* Check for RCU read-side critical section. */ 31184102adabSPaul E. McKenney preempt_disable(); 31194102adabSPaul E. McKenney ret = num_online_cpus() <= 1; 31204102adabSPaul E. McKenney preempt_enable(); 31214102adabSPaul E. McKenney return ret; 31224102adabSPaul E. McKenney } 31234102adabSPaul E. McKenney 31244102adabSPaul E. McKenney /** 31254102adabSPaul E. McKenney * synchronize_sched - wait until an rcu-sched grace period has elapsed. 31264102adabSPaul E. McKenney * 31274102adabSPaul E. McKenney * Control will return to the caller some time after a full rcu-sched 31284102adabSPaul E. McKenney * grace period has elapsed, in other words after all currently executing 31294102adabSPaul E. McKenney * rcu-sched read-side critical sections have completed. These read-side 31304102adabSPaul E. McKenney * critical sections are delimited by rcu_read_lock_sched() and 31314102adabSPaul E. McKenney * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), 31324102adabSPaul E. McKenney * local_irq_disable(), and so on may be used in place of 31334102adabSPaul E. McKenney * rcu_read_lock_sched(). 31344102adabSPaul E. McKenney * 31354102adabSPaul E. McKenney * This means that all preempt_disable code sequences, including NMI and 31364102adabSPaul E. McKenney * non-threaded hardware-interrupt handlers, in progress on entry will 31374102adabSPaul E. McKenney * have completed before this primitive returns. However, this does not 31384102adabSPaul E. McKenney * guarantee that softirq handlers will have completed, since in some 31394102adabSPaul E. McKenney * kernels, these handlers can run in process context, and can block. 31404102adabSPaul E. McKenney * 31414102adabSPaul E. McKenney * Note that this guarantee implies further memory-ordering guarantees. 31424102adabSPaul E. McKenney * On systems with more than one CPU, when synchronize_sched() returns, 31434102adabSPaul E. McKenney * each CPU is guaranteed to have executed a full memory barrier since the 31444102adabSPaul E. McKenney * end of its last RCU-sched read-side critical section whose beginning 31454102adabSPaul E. McKenney * preceded the call to synchronize_sched(). In addition, each CPU having 31464102adabSPaul E. McKenney * an RCU read-side critical section that extends beyond the return from 31474102adabSPaul E. McKenney * synchronize_sched() is guaranteed to have executed a full memory barrier 31484102adabSPaul E. McKenney * after the beginning of synchronize_sched() and before the beginning of 31494102adabSPaul E. McKenney * that RCU read-side critical section. Note that these guarantees include 31504102adabSPaul E. McKenney * CPUs that are offline, idle, or executing in user mode, as well as CPUs 31514102adabSPaul E. McKenney * that are executing in the kernel. 31524102adabSPaul E. McKenney * 31534102adabSPaul E. McKenney * Furthermore, if CPU A invoked synchronize_sched(), which returned 31544102adabSPaul E. McKenney * to its caller on CPU B, then both CPU A and CPU B are guaranteed 31554102adabSPaul E. McKenney * to have executed a full memory barrier during the execution of 31564102adabSPaul E. McKenney * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but 31574102adabSPaul E. McKenney * again only if the system has more than one CPU). 31584102adabSPaul E. McKenney * 31594102adabSPaul E. McKenney * This primitive provides the guarantees made by the (now removed) 31604102adabSPaul E. McKenney * synchronize_kernel() API. In contrast, synchronize_rcu() only 31614102adabSPaul E. McKenney * guarantees that rcu_read_lock() sections will have completed. 31624102adabSPaul E. McKenney * In "classic RCU", these two guarantees happen to be one and 31634102adabSPaul E. McKenney * the same, but can differ in realtime RCU implementations. 31644102adabSPaul E. McKenney */ 31654102adabSPaul E. McKenney void synchronize_sched(void) 31664102adabSPaul E. McKenney { 31674102adabSPaul E. McKenney rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && 31684102adabSPaul E. McKenney !lock_is_held(&rcu_lock_map) && 31694102adabSPaul E. McKenney !lock_is_held(&rcu_sched_lock_map), 31704102adabSPaul E. McKenney "Illegal synchronize_sched() in RCU-sched read-side critical section"); 31714102adabSPaul E. McKenney if (rcu_blocking_is_gp()) 31724102adabSPaul E. McKenney return; 31735afff48bSPaul E. McKenney if (rcu_gp_is_expedited()) 31744102adabSPaul E. McKenney synchronize_sched_expedited(); 31754102adabSPaul E. McKenney else 31764102adabSPaul E. McKenney wait_rcu_gp(call_rcu_sched); 31774102adabSPaul E. McKenney } 31784102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(synchronize_sched); 31794102adabSPaul E. McKenney 31804102adabSPaul E. McKenney /** 31814102adabSPaul E. McKenney * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. 31824102adabSPaul E. McKenney * 31834102adabSPaul E. McKenney * Control will return to the caller some time after a full rcu_bh grace 31844102adabSPaul E. McKenney * period has elapsed, in other words after all currently executing rcu_bh 31854102adabSPaul E. McKenney * read-side critical sections have completed. RCU read-side critical 31864102adabSPaul E. McKenney * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), 31874102adabSPaul E. McKenney * and may be nested. 31884102adabSPaul E. McKenney * 31894102adabSPaul E. McKenney * See the description of synchronize_sched() for more detailed information 31904102adabSPaul E. McKenney * on memory ordering guarantees. 31914102adabSPaul E. McKenney */ 31924102adabSPaul E. McKenney void synchronize_rcu_bh(void) 31934102adabSPaul E. McKenney { 31944102adabSPaul E. McKenney rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && 31954102adabSPaul E. McKenney !lock_is_held(&rcu_lock_map) && 31964102adabSPaul E. McKenney !lock_is_held(&rcu_sched_lock_map), 31974102adabSPaul E. McKenney "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); 31984102adabSPaul E. McKenney if (rcu_blocking_is_gp()) 31994102adabSPaul E. McKenney return; 32005afff48bSPaul E. McKenney if (rcu_gp_is_expedited()) 32014102adabSPaul E. McKenney synchronize_rcu_bh_expedited(); 32024102adabSPaul E. McKenney else 32034102adabSPaul E. McKenney wait_rcu_gp(call_rcu_bh); 32044102adabSPaul E. McKenney } 32054102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 32064102adabSPaul E. McKenney 3207765a3f4fSPaul E. McKenney /** 3208765a3f4fSPaul E. McKenney * get_state_synchronize_rcu - Snapshot current RCU state 3209765a3f4fSPaul E. McKenney * 3210765a3f4fSPaul E. McKenney * Returns a cookie that is used by a later call to cond_synchronize_rcu() 3211765a3f4fSPaul E. McKenney * to determine whether or not a full grace period has elapsed in the 3212765a3f4fSPaul E. McKenney * meantime. 3213765a3f4fSPaul E. McKenney */ 3214765a3f4fSPaul E. McKenney unsigned long get_state_synchronize_rcu(void) 3215765a3f4fSPaul E. McKenney { 3216765a3f4fSPaul E. McKenney /* 3217765a3f4fSPaul E. McKenney * Any prior manipulation of RCU-protected data must happen 3218765a3f4fSPaul E. McKenney * before the load from ->gpnum. 3219765a3f4fSPaul E. McKenney */ 3220765a3f4fSPaul E. McKenney smp_mb(); /* ^^^ */ 3221765a3f4fSPaul E. McKenney 3222765a3f4fSPaul E. McKenney /* 3223765a3f4fSPaul E. McKenney * Make sure this load happens before the purportedly 3224765a3f4fSPaul E. McKenney * time-consuming work between get_state_synchronize_rcu() 3225765a3f4fSPaul E. McKenney * and cond_synchronize_rcu(). 3226765a3f4fSPaul E. McKenney */ 3227e534165bSUma Sharma return smp_load_acquire(&rcu_state_p->gpnum); 3228765a3f4fSPaul E. McKenney } 3229765a3f4fSPaul E. McKenney EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); 3230765a3f4fSPaul E. McKenney 3231765a3f4fSPaul E. McKenney /** 3232765a3f4fSPaul E. McKenney * cond_synchronize_rcu - Conditionally wait for an RCU grace period 3233765a3f4fSPaul E. McKenney * 3234765a3f4fSPaul E. McKenney * @oldstate: return value from earlier call to get_state_synchronize_rcu() 3235765a3f4fSPaul E. McKenney * 3236765a3f4fSPaul E. McKenney * If a full RCU grace period has elapsed since the earlier call to 3237765a3f4fSPaul E. McKenney * get_state_synchronize_rcu(), just return. Otherwise, invoke 3238765a3f4fSPaul E. McKenney * synchronize_rcu() to wait for a full grace period. 3239765a3f4fSPaul E. McKenney * 3240765a3f4fSPaul E. McKenney * Yes, this function does not take counter wrap into account. But 3241765a3f4fSPaul E. McKenney * counter wrap is harmless. If the counter wraps, we have waited for 3242765a3f4fSPaul E. McKenney * more than 2 billion grace periods (and way more on a 64-bit system!), 3243765a3f4fSPaul E. McKenney * so waiting for one additional grace period should be just fine. 3244765a3f4fSPaul E. McKenney */ 3245765a3f4fSPaul E. McKenney void cond_synchronize_rcu(unsigned long oldstate) 3246765a3f4fSPaul E. McKenney { 3247765a3f4fSPaul E. McKenney unsigned long newstate; 3248765a3f4fSPaul E. McKenney 3249765a3f4fSPaul E. McKenney /* 3250765a3f4fSPaul E. McKenney * Ensure that this load happens before any RCU-destructive 3251765a3f4fSPaul E. McKenney * actions the caller might carry out after we return. 3252765a3f4fSPaul E. McKenney */ 3253e534165bSUma Sharma newstate = smp_load_acquire(&rcu_state_p->completed); 3254765a3f4fSPaul E. McKenney if (ULONG_CMP_GE(oldstate, newstate)) 3255765a3f4fSPaul E. McKenney synchronize_rcu(); 3256765a3f4fSPaul E. McKenney } 3257765a3f4fSPaul E. McKenney EXPORT_SYMBOL_GPL(cond_synchronize_rcu); 3258765a3f4fSPaul E. McKenney 32594102adabSPaul E. McKenney static int synchronize_sched_expedited_cpu_stop(void *data) 32604102adabSPaul E. McKenney { 32614102adabSPaul E. McKenney /* 32624102adabSPaul E. McKenney * There must be a full memory barrier on each affected CPU 32634102adabSPaul E. McKenney * between the time that try_stop_cpus() is called and the 32644102adabSPaul E. McKenney * time that it returns. 32654102adabSPaul E. McKenney * 32664102adabSPaul E. McKenney * In the current initial implementation of cpu_stop, the 32674102adabSPaul E. McKenney * above condition is already met when the control reaches 32684102adabSPaul E. McKenney * this point and the following smp_mb() is not strictly 32694102adabSPaul E. McKenney * necessary. Do smp_mb() anyway for documentation and 32704102adabSPaul E. McKenney * robustness against future implementation changes. 32714102adabSPaul E. McKenney */ 32724102adabSPaul E. McKenney smp_mb(); /* See above comment block. */ 32734102adabSPaul E. McKenney return 0; 32744102adabSPaul E. McKenney } 32754102adabSPaul E. McKenney 32764102adabSPaul E. McKenney /** 32774102adabSPaul E. McKenney * synchronize_sched_expedited - Brute-force RCU-sched grace period 32784102adabSPaul E. McKenney * 32794102adabSPaul E. McKenney * Wait for an RCU-sched grace period to elapse, but use a "big hammer" 32804102adabSPaul E. McKenney * approach to force the grace period to end quickly. This consumes 32814102adabSPaul E. McKenney * significant time on all CPUs and is unfriendly to real-time workloads, 32824102adabSPaul E. McKenney * so is thus not recommended for any sort of common-case code. In fact, 32834102adabSPaul E. McKenney * if you are using synchronize_sched_expedited() in a loop, please 32844102adabSPaul E. McKenney * restructure your code to batch your updates, and then use a single 32854102adabSPaul E. McKenney * synchronize_sched() instead. 32864102adabSPaul E. McKenney * 32874102adabSPaul E. McKenney * This implementation can be thought of as an application of ticket 32884102adabSPaul E. McKenney * locking to RCU, with sync_sched_expedited_started and 32894102adabSPaul E. McKenney * sync_sched_expedited_done taking on the roles of the halves 32904102adabSPaul E. McKenney * of the ticket-lock word. Each task atomically increments 32914102adabSPaul E. McKenney * sync_sched_expedited_started upon entry, snapshotting the old value, 32924102adabSPaul E. McKenney * then attempts to stop all the CPUs. If this succeeds, then each 32934102adabSPaul E. McKenney * CPU will have executed a context switch, resulting in an RCU-sched 32944102adabSPaul E. McKenney * grace period. We are then done, so we use atomic_cmpxchg() to 32954102adabSPaul E. McKenney * update sync_sched_expedited_done to match our snapshot -- but 32964102adabSPaul E. McKenney * only if someone else has not already advanced past our snapshot. 32974102adabSPaul E. McKenney * 32984102adabSPaul E. McKenney * On the other hand, if try_stop_cpus() fails, we check the value 32994102adabSPaul E. McKenney * of sync_sched_expedited_done. If it has advanced past our 33004102adabSPaul E. McKenney * initial snapshot, then someone else must have forced a grace period 33014102adabSPaul E. McKenney * some time after we took our snapshot. In this case, our work is 33024102adabSPaul E. McKenney * done for us, and we can simply return. Otherwise, we try again, 33034102adabSPaul E. McKenney * but keep our initial snapshot for purposes of checking for someone 33044102adabSPaul E. McKenney * doing our work for us. 33054102adabSPaul E. McKenney * 33064102adabSPaul E. McKenney * If we fail too many times in a row, we fall back to synchronize_sched(). 33074102adabSPaul E. McKenney */ 33084102adabSPaul E. McKenney void synchronize_sched_expedited(void) 33094102adabSPaul E. McKenney { 3310e0775cefSPaul E. McKenney cpumask_var_t cm; 3311e0775cefSPaul E. McKenney bool cma = false; 3312e0775cefSPaul E. McKenney int cpu; 33134102adabSPaul E. McKenney long firstsnap, s, snap; 33144102adabSPaul E. McKenney int trycount = 0; 33154102adabSPaul E. McKenney struct rcu_state *rsp = &rcu_sched_state; 33164102adabSPaul E. McKenney 33174102adabSPaul E. McKenney /* 33184102adabSPaul E. McKenney * If we are in danger of counter wrap, just do synchronize_sched(). 33194102adabSPaul E. McKenney * By allowing sync_sched_expedited_started to advance no more than 33204102adabSPaul E. McKenney * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring 33214102adabSPaul E. McKenney * that more than 3.5 billion CPUs would be required to force a 33224102adabSPaul E. McKenney * counter wrap on a 32-bit system. Quite a few more CPUs would of 33234102adabSPaul E. McKenney * course be required on a 64-bit system. 33244102adabSPaul E. McKenney */ 33254102adabSPaul E. McKenney if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start), 33264102adabSPaul E. McKenney (ulong)atomic_long_read(&rsp->expedited_done) + 33274102adabSPaul E. McKenney ULONG_MAX / 8)) { 33284102adabSPaul E. McKenney synchronize_sched(); 33294102adabSPaul E. McKenney atomic_long_inc(&rsp->expedited_wrap); 33304102adabSPaul E. McKenney return; 33314102adabSPaul E. McKenney } 33324102adabSPaul E. McKenney 33334102adabSPaul E. McKenney /* 33344102adabSPaul E. McKenney * Take a ticket. Note that atomic_inc_return() implies a 33354102adabSPaul E. McKenney * full memory barrier. 33364102adabSPaul E. McKenney */ 33374102adabSPaul E. McKenney snap = atomic_long_inc_return(&rsp->expedited_start); 33384102adabSPaul E. McKenney firstsnap = snap; 3339dd56af42SPaul E. McKenney if (!try_get_online_cpus()) { 3340dd56af42SPaul E. McKenney /* CPU hotplug operation in flight, fall back to normal GP. */ 3341dd56af42SPaul E. McKenney wait_rcu_gp(call_rcu_sched); 3342dd56af42SPaul E. McKenney atomic_long_inc(&rsp->expedited_normal); 3343dd56af42SPaul E. McKenney return; 3344dd56af42SPaul E. McKenney } 33454102adabSPaul E. McKenney WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); 33464102adabSPaul E. McKenney 3347e0775cefSPaul E. McKenney /* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */ 3348e0775cefSPaul E. McKenney cma = zalloc_cpumask_var(&cm, GFP_KERNEL); 3349e0775cefSPaul E. McKenney if (cma) { 3350e0775cefSPaul E. McKenney cpumask_copy(cm, cpu_online_mask); 3351e0775cefSPaul E. McKenney cpumask_clear_cpu(raw_smp_processor_id(), cm); 3352e0775cefSPaul E. McKenney for_each_cpu(cpu, cm) { 3353e0775cefSPaul E. McKenney struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 3354e0775cefSPaul E. McKenney 3355e0775cefSPaul E. McKenney if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1)) 3356e0775cefSPaul E. McKenney cpumask_clear_cpu(cpu, cm); 3357e0775cefSPaul E. McKenney } 3358e0775cefSPaul E. McKenney if (cpumask_weight(cm) == 0) 3359e0775cefSPaul E. McKenney goto all_cpus_idle; 3360e0775cefSPaul E. McKenney } 3361e0775cefSPaul E. McKenney 33624102adabSPaul E. McKenney /* 33634102adabSPaul E. McKenney * Each pass through the following loop attempts to force a 33644102adabSPaul E. McKenney * context switch on each CPU. 33654102adabSPaul E. McKenney */ 3366e0775cefSPaul E. McKenney while (try_stop_cpus(cma ? cm : cpu_online_mask, 33674102adabSPaul E. McKenney synchronize_sched_expedited_cpu_stop, 33684102adabSPaul E. McKenney NULL) == -EAGAIN) { 33694102adabSPaul E. McKenney put_online_cpus(); 33704102adabSPaul E. McKenney atomic_long_inc(&rsp->expedited_tryfail); 33714102adabSPaul E. McKenney 33724102adabSPaul E. McKenney /* Check to see if someone else did our work for us. */ 33734102adabSPaul E. McKenney s = atomic_long_read(&rsp->expedited_done); 33744102adabSPaul E. McKenney if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) { 33754102adabSPaul E. McKenney /* ensure test happens before caller kfree */ 33764e857c58SPeter Zijlstra smp_mb__before_atomic(); /* ^^^ */ 33774102adabSPaul E. McKenney atomic_long_inc(&rsp->expedited_workdone1); 3378e0775cefSPaul E. McKenney free_cpumask_var(cm); 33794102adabSPaul E. McKenney return; 33804102adabSPaul E. McKenney } 33814102adabSPaul E. McKenney 33824102adabSPaul E. McKenney /* No joy, try again later. Or just synchronize_sched(). */ 33834102adabSPaul E. McKenney if (trycount++ < 10) { 33844102adabSPaul E. McKenney udelay(trycount * num_online_cpus()); 33854102adabSPaul E. McKenney } else { 33864102adabSPaul E. McKenney wait_rcu_gp(call_rcu_sched); 33874102adabSPaul E. McKenney atomic_long_inc(&rsp->expedited_normal); 3388e0775cefSPaul E. McKenney free_cpumask_var(cm); 33894102adabSPaul E. McKenney return; 33904102adabSPaul E. McKenney } 33914102adabSPaul E. McKenney 33924102adabSPaul E. McKenney /* Recheck to see if someone else did our work for us. */ 33934102adabSPaul E. McKenney s = atomic_long_read(&rsp->expedited_done); 33944102adabSPaul E. McKenney if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) { 33954102adabSPaul E. McKenney /* ensure test happens before caller kfree */ 33964e857c58SPeter Zijlstra smp_mb__before_atomic(); /* ^^^ */ 33974102adabSPaul E. McKenney atomic_long_inc(&rsp->expedited_workdone2); 3398e0775cefSPaul E. McKenney free_cpumask_var(cm); 33994102adabSPaul E. McKenney return; 34004102adabSPaul E. McKenney } 34014102adabSPaul E. McKenney 34024102adabSPaul E. McKenney /* 34034102adabSPaul E. McKenney * Refetching sync_sched_expedited_started allows later 34044102adabSPaul E. McKenney * callers to piggyback on our grace period. We retry 34054102adabSPaul E. McKenney * after they started, so our grace period works for them, 34064102adabSPaul E. McKenney * and they started after our first try, so their grace 34074102adabSPaul E. McKenney * period works for us. 34084102adabSPaul E. McKenney */ 3409dd56af42SPaul E. McKenney if (!try_get_online_cpus()) { 3410dd56af42SPaul E. McKenney /* CPU hotplug operation in flight, use normal GP. */ 3411dd56af42SPaul E. McKenney wait_rcu_gp(call_rcu_sched); 3412dd56af42SPaul E. McKenney atomic_long_inc(&rsp->expedited_normal); 3413e0775cefSPaul E. McKenney free_cpumask_var(cm); 3414dd56af42SPaul E. McKenney return; 3415dd56af42SPaul E. McKenney } 34164102adabSPaul E. McKenney snap = atomic_long_read(&rsp->expedited_start); 34174102adabSPaul E. McKenney smp_mb(); /* ensure read is before try_stop_cpus(). */ 34184102adabSPaul E. McKenney } 34194102adabSPaul E. McKenney atomic_long_inc(&rsp->expedited_stoppedcpus); 34204102adabSPaul E. McKenney 3421e0775cefSPaul E. McKenney all_cpus_idle: 3422e0775cefSPaul E. McKenney free_cpumask_var(cm); 3423e0775cefSPaul E. McKenney 34244102adabSPaul E. McKenney /* 34254102adabSPaul E. McKenney * Everyone up to our most recent fetch is covered by our grace 34264102adabSPaul E. McKenney * period. Update the counter, but only if our work is still 34274102adabSPaul E. McKenney * relevant -- which it won't be if someone who started later 34284102adabSPaul E. McKenney * than we did already did their update. 34294102adabSPaul E. McKenney */ 34304102adabSPaul E. McKenney do { 34314102adabSPaul E. McKenney atomic_long_inc(&rsp->expedited_done_tries); 34324102adabSPaul E. McKenney s = atomic_long_read(&rsp->expedited_done); 34334102adabSPaul E. McKenney if (ULONG_CMP_GE((ulong)s, (ulong)snap)) { 34344102adabSPaul E. McKenney /* ensure test happens before caller kfree */ 34354e857c58SPeter Zijlstra smp_mb__before_atomic(); /* ^^^ */ 34364102adabSPaul E. McKenney atomic_long_inc(&rsp->expedited_done_lost); 34374102adabSPaul E. McKenney break; 34384102adabSPaul E. McKenney } 34394102adabSPaul E. McKenney } while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s); 34404102adabSPaul E. McKenney atomic_long_inc(&rsp->expedited_done_exit); 34414102adabSPaul E. McKenney 34424102adabSPaul E. McKenney put_online_cpus(); 34434102adabSPaul E. McKenney } 34444102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(synchronize_sched_expedited); 34454102adabSPaul E. McKenney 34464102adabSPaul E. McKenney /* 34474102adabSPaul E. McKenney * Check to see if there is any immediate RCU-related work to be done 34484102adabSPaul E. McKenney * by the current CPU, for the specified type of RCU, returning 1 if so. 34494102adabSPaul E. McKenney * The checks are in order of increasing expense: checks that can be 34504102adabSPaul E. McKenney * carried out against CPU-local state are performed first. However, 34514102adabSPaul E. McKenney * we must check for CPU stalls first, else we might not get a chance. 34524102adabSPaul E. McKenney */ 34534102adabSPaul E. McKenney static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) 34544102adabSPaul E. McKenney { 34554102adabSPaul E. McKenney struct rcu_node *rnp = rdp->mynode; 34564102adabSPaul E. McKenney 34574102adabSPaul E. McKenney rdp->n_rcu_pending++; 34584102adabSPaul E. McKenney 34594102adabSPaul E. McKenney /* Check for CPU stalls, if enabled. */ 34604102adabSPaul E. McKenney check_cpu_stall(rsp, rdp); 34614102adabSPaul E. McKenney 3462a096932fSPaul E. McKenney /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */ 3463a096932fSPaul E. McKenney if (rcu_nohz_full_cpu(rsp)) 3464a096932fSPaul E. McKenney return 0; 3465a096932fSPaul E. McKenney 34664102adabSPaul E. McKenney /* Is the RCU core waiting for a quiescent state from this CPU? */ 34674102adabSPaul E. McKenney if (rcu_scheduler_fully_active && 34685cd37193SPaul E. McKenney rdp->qs_pending && !rdp->passed_quiesce && 34695cd37193SPaul E. McKenney rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { 34704102adabSPaul E. McKenney rdp->n_rp_qs_pending++; 34715cd37193SPaul E. McKenney } else if (rdp->qs_pending && 34725cd37193SPaul E. McKenney (rdp->passed_quiesce || 34735cd37193SPaul E. McKenney rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) { 34744102adabSPaul E. McKenney rdp->n_rp_report_qs++; 34754102adabSPaul E. McKenney return 1; 34764102adabSPaul E. McKenney } 34774102adabSPaul E. McKenney 34784102adabSPaul E. McKenney /* Does this CPU have callbacks ready to invoke? */ 34794102adabSPaul E. McKenney if (cpu_has_callbacks_ready_to_invoke(rdp)) { 34804102adabSPaul E. McKenney rdp->n_rp_cb_ready++; 34814102adabSPaul E. McKenney return 1; 34824102adabSPaul E. McKenney } 34834102adabSPaul E. McKenney 34844102adabSPaul E. McKenney /* Has RCU gone idle with this CPU needing another grace period? */ 34854102adabSPaul E. McKenney if (cpu_needs_another_gp(rsp, rdp)) { 34864102adabSPaul E. McKenney rdp->n_rp_cpu_needs_gp++; 34874102adabSPaul E. McKenney return 1; 34884102adabSPaul E. McKenney } 34894102adabSPaul E. McKenney 34904102adabSPaul E. McKenney /* Has another RCU grace period completed? */ 34917d0ae808SPaul E. McKenney if (READ_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ 34924102adabSPaul E. McKenney rdp->n_rp_gp_completed++; 34934102adabSPaul E. McKenney return 1; 34944102adabSPaul E. McKenney } 34954102adabSPaul E. McKenney 34964102adabSPaul E. McKenney /* Has a new RCU grace period started? */ 34977d0ae808SPaul E. McKenney if (READ_ONCE(rnp->gpnum) != rdp->gpnum || 34987d0ae808SPaul E. McKenney unlikely(READ_ONCE(rdp->gpwrap))) { /* outside lock */ 34994102adabSPaul E. McKenney rdp->n_rp_gp_started++; 35004102adabSPaul E. McKenney return 1; 35014102adabSPaul E. McKenney } 35024102adabSPaul E. McKenney 350396d3fd0dSPaul E. McKenney /* Does this CPU need a deferred NOCB wakeup? */ 350496d3fd0dSPaul E. McKenney if (rcu_nocb_need_deferred_wakeup(rdp)) { 350596d3fd0dSPaul E. McKenney rdp->n_rp_nocb_defer_wakeup++; 350696d3fd0dSPaul E. McKenney return 1; 350796d3fd0dSPaul E. McKenney } 350896d3fd0dSPaul E. McKenney 35094102adabSPaul E. McKenney /* nothing to do */ 35104102adabSPaul E. McKenney rdp->n_rp_need_nothing++; 35114102adabSPaul E. McKenney return 0; 35124102adabSPaul E. McKenney } 35134102adabSPaul E. McKenney 35144102adabSPaul E. McKenney /* 35154102adabSPaul E. McKenney * Check to see if there is any immediate RCU-related work to be done 35164102adabSPaul E. McKenney * by the current CPU, returning 1 if so. This function is part of the 35174102adabSPaul E. McKenney * RCU implementation; it is -not- an exported member of the RCU API. 35184102adabSPaul E. McKenney */ 3519e3950ecdSPaul E. McKenney static int rcu_pending(void) 35204102adabSPaul E. McKenney { 35214102adabSPaul E. McKenney struct rcu_state *rsp; 35224102adabSPaul E. McKenney 35234102adabSPaul E. McKenney for_each_rcu_flavor(rsp) 3524e3950ecdSPaul E. McKenney if (__rcu_pending(rsp, this_cpu_ptr(rsp->rda))) 35254102adabSPaul E. McKenney return 1; 35264102adabSPaul E. McKenney return 0; 35274102adabSPaul E. McKenney } 35284102adabSPaul E. McKenney 35294102adabSPaul E. McKenney /* 35304102adabSPaul E. McKenney * Return true if the specified CPU has any callback. If all_lazy is 35314102adabSPaul E. McKenney * non-NULL, store an indication of whether all callbacks are lazy. 35324102adabSPaul E. McKenney * (If there are no callbacks, all of them are deemed to be lazy.) 35334102adabSPaul E. McKenney */ 3534aa6da514SPaul E. McKenney static int __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) 35354102adabSPaul E. McKenney { 35364102adabSPaul E. McKenney bool al = true; 35374102adabSPaul E. McKenney bool hc = false; 35384102adabSPaul E. McKenney struct rcu_data *rdp; 35394102adabSPaul E. McKenney struct rcu_state *rsp; 35404102adabSPaul E. McKenney 35414102adabSPaul E. McKenney for_each_rcu_flavor(rsp) { 3542aa6da514SPaul E. McKenney rdp = this_cpu_ptr(rsp->rda); 35434102adabSPaul E. McKenney if (!rdp->nxtlist) 35444102adabSPaul E. McKenney continue; 35454102adabSPaul E. McKenney hc = true; 35464102adabSPaul E. McKenney if (rdp->qlen != rdp->qlen_lazy || !all_lazy) { 35474102adabSPaul E. McKenney al = false; 35484102adabSPaul E. McKenney break; 35494102adabSPaul E. McKenney } 35504102adabSPaul E. McKenney } 35514102adabSPaul E. McKenney if (all_lazy) 35524102adabSPaul E. McKenney *all_lazy = al; 35534102adabSPaul E. McKenney return hc; 35544102adabSPaul E. McKenney } 35554102adabSPaul E. McKenney 35564102adabSPaul E. McKenney /* 35574102adabSPaul E. McKenney * Helper function for _rcu_barrier() tracing. If tracing is disabled, 35584102adabSPaul E. McKenney * the compiler is expected to optimize this away. 35594102adabSPaul E. McKenney */ 35604102adabSPaul E. McKenney static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s, 35614102adabSPaul E. McKenney int cpu, unsigned long done) 35624102adabSPaul E. McKenney { 35634102adabSPaul E. McKenney trace_rcu_barrier(rsp->name, s, cpu, 35644102adabSPaul E. McKenney atomic_read(&rsp->barrier_cpu_count), done); 35654102adabSPaul E. McKenney } 35664102adabSPaul E. McKenney 35674102adabSPaul E. McKenney /* 35684102adabSPaul E. McKenney * RCU callback function for _rcu_barrier(). If we are last, wake 35694102adabSPaul E. McKenney * up the task executing _rcu_barrier(). 35704102adabSPaul E. McKenney */ 35714102adabSPaul E. McKenney static void rcu_barrier_callback(struct rcu_head *rhp) 35724102adabSPaul E. McKenney { 35734102adabSPaul E. McKenney struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); 35744102adabSPaul E. McKenney struct rcu_state *rsp = rdp->rsp; 35754102adabSPaul E. McKenney 35764102adabSPaul E. McKenney if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { 35774102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done); 35784102adabSPaul E. McKenney complete(&rsp->barrier_completion); 35794102adabSPaul E. McKenney } else { 35804102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done); 35814102adabSPaul E. McKenney } 35824102adabSPaul E. McKenney } 35834102adabSPaul E. McKenney 35844102adabSPaul E. McKenney /* 35854102adabSPaul E. McKenney * Called with preemption disabled, and from cross-cpu IRQ context. 35864102adabSPaul E. McKenney */ 35874102adabSPaul E. McKenney static void rcu_barrier_func(void *type) 35884102adabSPaul E. McKenney { 35894102adabSPaul E. McKenney struct rcu_state *rsp = type; 3590fa07a58fSChristoph Lameter struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 35914102adabSPaul E. McKenney 35924102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); 35934102adabSPaul E. McKenney atomic_inc(&rsp->barrier_cpu_count); 35944102adabSPaul E. McKenney rsp->call(&rdp->barrier_head, rcu_barrier_callback); 35954102adabSPaul E. McKenney } 35964102adabSPaul E. McKenney 35974102adabSPaul E. McKenney /* 35984102adabSPaul E. McKenney * Orchestrate the specified type of RCU barrier, waiting for all 35994102adabSPaul E. McKenney * RCU callbacks of the specified type to complete. 36004102adabSPaul E. McKenney */ 36014102adabSPaul E. McKenney static void _rcu_barrier(struct rcu_state *rsp) 36024102adabSPaul E. McKenney { 36034102adabSPaul E. McKenney int cpu; 36044102adabSPaul E. McKenney struct rcu_data *rdp; 36057d0ae808SPaul E. McKenney unsigned long snap = READ_ONCE(rsp->n_barrier_done); 36064102adabSPaul E. McKenney unsigned long snap_done; 36074102adabSPaul E. McKenney 36084102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "Begin", -1, snap); 36094102adabSPaul E. McKenney 36104102adabSPaul E. McKenney /* Take mutex to serialize concurrent rcu_barrier() requests. */ 36114102adabSPaul E. McKenney mutex_lock(&rsp->barrier_mutex); 36124102adabSPaul E. McKenney 36134102adabSPaul E. McKenney /* 36144102adabSPaul E. McKenney * Ensure that all prior references, including to ->n_barrier_done, 36154102adabSPaul E. McKenney * are ordered before the _rcu_barrier() machinery. 36164102adabSPaul E. McKenney */ 36174102adabSPaul E. McKenney smp_mb(); /* See above block comment. */ 36184102adabSPaul E. McKenney 36194102adabSPaul E. McKenney /* 36204102adabSPaul E. McKenney * Recheck ->n_barrier_done to see if others did our work for us. 36214102adabSPaul E. McKenney * This means checking ->n_barrier_done for an even-to-odd-to-even 36224102adabSPaul E. McKenney * transition. The "if" expression below therefore rounds the old 36234102adabSPaul E. McKenney * value up to the next even number and adds two before comparing. 36244102adabSPaul E. McKenney */ 36254102adabSPaul E. McKenney snap_done = rsp->n_barrier_done; 36264102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "Check", -1, snap_done); 36274102adabSPaul E. McKenney 36284102adabSPaul E. McKenney /* 36294102adabSPaul E. McKenney * If the value in snap is odd, we needed to wait for the current 36304102adabSPaul E. McKenney * rcu_barrier() to complete, then wait for the next one, in other 36314102adabSPaul E. McKenney * words, we need the value of snap_done to be three larger than 36324102adabSPaul E. McKenney * the value of snap. On the other hand, if the value in snap is 36334102adabSPaul E. McKenney * even, we only had to wait for the next rcu_barrier() to complete, 36344102adabSPaul E. McKenney * in other words, we need the value of snap_done to be only two 36354102adabSPaul E. McKenney * greater than the value of snap. The "(snap + 3) & ~0x1" computes 36364102adabSPaul E. McKenney * this for us (thank you, Linus!). 36374102adabSPaul E. McKenney */ 36384102adabSPaul E. McKenney if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) { 36394102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); 36404102adabSPaul E. McKenney smp_mb(); /* caller's subsequent code after above check. */ 36414102adabSPaul E. McKenney mutex_unlock(&rsp->barrier_mutex); 36424102adabSPaul E. McKenney return; 36434102adabSPaul E. McKenney } 36444102adabSPaul E. McKenney 36454102adabSPaul E. McKenney /* 36464102adabSPaul E. McKenney * Increment ->n_barrier_done to avoid duplicate work. Use 36477d0ae808SPaul E. McKenney * WRITE_ONCE() to prevent the compiler from speculating 36484102adabSPaul E. McKenney * the increment to precede the early-exit check. 36494102adabSPaul E. McKenney */ 36507d0ae808SPaul E. McKenney WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1); 36514102adabSPaul E. McKenney WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); 36524102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); 36534102adabSPaul E. McKenney smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ 36544102adabSPaul E. McKenney 36554102adabSPaul E. McKenney /* 36564102adabSPaul E. McKenney * Initialize the count to one rather than to zero in order to 36574102adabSPaul E. McKenney * avoid a too-soon return to zero in case of a short grace period 36584102adabSPaul E. McKenney * (or preemption of this task). Exclude CPU-hotplug operations 36594102adabSPaul E. McKenney * to ensure that no offline CPU has callbacks queued. 36604102adabSPaul E. McKenney */ 36614102adabSPaul E. McKenney init_completion(&rsp->barrier_completion); 36624102adabSPaul E. McKenney atomic_set(&rsp->barrier_cpu_count, 1); 36634102adabSPaul E. McKenney get_online_cpus(); 36644102adabSPaul E. McKenney 36654102adabSPaul E. McKenney /* 36664102adabSPaul E. McKenney * Force each CPU with callbacks to register a new callback. 36674102adabSPaul E. McKenney * When that callback is invoked, we will know that all of the 36684102adabSPaul E. McKenney * corresponding CPU's preceding callbacks have been invoked. 36694102adabSPaul E. McKenney */ 36704102adabSPaul E. McKenney for_each_possible_cpu(cpu) { 36714102adabSPaul E. McKenney if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu)) 36724102adabSPaul E. McKenney continue; 36734102adabSPaul E. McKenney rdp = per_cpu_ptr(rsp->rda, cpu); 36744102adabSPaul E. McKenney if (rcu_is_nocb_cpu(cpu)) { 3675d7e29933SPaul E. McKenney if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { 3676d7e29933SPaul E. McKenney _rcu_barrier_trace(rsp, "OfflineNoCB", cpu, 3677d7e29933SPaul E. McKenney rsp->n_barrier_done); 3678d7e29933SPaul E. McKenney } else { 36794102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, 36804102adabSPaul E. McKenney rsp->n_barrier_done); 368141050a00SPaul E. McKenney smp_mb__before_atomic(); 36824102adabSPaul E. McKenney atomic_inc(&rsp->barrier_cpu_count); 3683d7e29933SPaul E. McKenney __call_rcu(&rdp->barrier_head, 3684d7e29933SPaul E. McKenney rcu_barrier_callback, rsp, cpu, 0); 3685d7e29933SPaul E. McKenney } 36867d0ae808SPaul E. McKenney } else if (READ_ONCE(rdp->qlen)) { 36874102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "OnlineQ", cpu, 36884102adabSPaul E. McKenney rsp->n_barrier_done); 36894102adabSPaul E. McKenney smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); 36904102adabSPaul E. McKenney } else { 36914102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "OnlineNQ", cpu, 36924102adabSPaul E. McKenney rsp->n_barrier_done); 36934102adabSPaul E. McKenney } 36944102adabSPaul E. McKenney } 36954102adabSPaul E. McKenney put_online_cpus(); 36964102adabSPaul E. McKenney 36974102adabSPaul E. McKenney /* 36984102adabSPaul E. McKenney * Now that we have an rcu_barrier_callback() callback on each 36994102adabSPaul E. McKenney * CPU, and thus each counted, remove the initial count. 37004102adabSPaul E. McKenney */ 37014102adabSPaul E. McKenney if (atomic_dec_and_test(&rsp->barrier_cpu_count)) 37024102adabSPaul E. McKenney complete(&rsp->barrier_completion); 37034102adabSPaul E. McKenney 37044102adabSPaul E. McKenney /* Increment ->n_barrier_done to prevent duplicate work. */ 37054102adabSPaul E. McKenney smp_mb(); /* Keep increment after above mechanism. */ 37067d0ae808SPaul E. McKenney WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1); 37074102adabSPaul E. McKenney WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); 37084102adabSPaul E. McKenney _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); 37094102adabSPaul E. McKenney smp_mb(); /* Keep increment before caller's subsequent code. */ 37104102adabSPaul E. McKenney 37114102adabSPaul E. McKenney /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ 37124102adabSPaul E. McKenney wait_for_completion(&rsp->barrier_completion); 37134102adabSPaul E. McKenney 37144102adabSPaul E. McKenney /* Other rcu_barrier() invocations can now safely proceed. */ 37154102adabSPaul E. McKenney mutex_unlock(&rsp->barrier_mutex); 37164102adabSPaul E. McKenney } 37174102adabSPaul E. McKenney 37184102adabSPaul E. McKenney /** 37194102adabSPaul E. McKenney * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. 37204102adabSPaul E. McKenney */ 37214102adabSPaul E. McKenney void rcu_barrier_bh(void) 37224102adabSPaul E. McKenney { 37234102adabSPaul E. McKenney _rcu_barrier(&rcu_bh_state); 37244102adabSPaul E. McKenney } 37254102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_barrier_bh); 37264102adabSPaul E. McKenney 37274102adabSPaul E. McKenney /** 37284102adabSPaul E. McKenney * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. 37294102adabSPaul E. McKenney */ 37304102adabSPaul E. McKenney void rcu_barrier_sched(void) 37314102adabSPaul E. McKenney { 37324102adabSPaul E. McKenney _rcu_barrier(&rcu_sched_state); 37334102adabSPaul E. McKenney } 37344102adabSPaul E. McKenney EXPORT_SYMBOL_GPL(rcu_barrier_sched); 37354102adabSPaul E. McKenney 37364102adabSPaul E. McKenney /* 37370aa04b05SPaul E. McKenney * Propagate ->qsinitmask bits up the rcu_node tree to account for the 37380aa04b05SPaul E. McKenney * first CPU in a given leaf rcu_node structure coming online. The caller 37390aa04b05SPaul E. McKenney * must hold the corresponding leaf rcu_node ->lock with interrrupts 37400aa04b05SPaul E. McKenney * disabled. 37410aa04b05SPaul E. McKenney */ 37420aa04b05SPaul E. McKenney static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) 37430aa04b05SPaul E. McKenney { 37440aa04b05SPaul E. McKenney long mask; 37450aa04b05SPaul E. McKenney struct rcu_node *rnp = rnp_leaf; 37460aa04b05SPaul E. McKenney 37470aa04b05SPaul E. McKenney for (;;) { 37480aa04b05SPaul E. McKenney mask = rnp->grpmask; 37490aa04b05SPaul E. McKenney rnp = rnp->parent; 37500aa04b05SPaul E. McKenney if (rnp == NULL) 37510aa04b05SPaul E. McKenney return; 37520aa04b05SPaul E. McKenney raw_spin_lock(&rnp->lock); /* Interrupts already disabled. */ 37530aa04b05SPaul E. McKenney rnp->qsmaskinit |= mask; 37540aa04b05SPaul E. McKenney raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ 37550aa04b05SPaul E. McKenney } 37560aa04b05SPaul E. McKenney } 37570aa04b05SPaul E. McKenney 37580aa04b05SPaul E. McKenney /* 37594102adabSPaul E. McKenney * Do boot-time initialization of a CPU's per-CPU RCU data. 37604102adabSPaul E. McKenney */ 37614102adabSPaul E. McKenney static void __init 37624102adabSPaul E. McKenney rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 37634102adabSPaul E. McKenney { 37644102adabSPaul E. McKenney unsigned long flags; 37654102adabSPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 37664102adabSPaul E. McKenney struct rcu_node *rnp = rcu_get_root(rsp); 37674102adabSPaul E. McKenney 37684102adabSPaul E. McKenney /* Set up local state, ensuring consistent view of global state. */ 37694102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 37704102adabSPaul E. McKenney rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 37714102adabSPaul E. McKenney rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 37724102adabSPaul E. McKenney WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 37734102adabSPaul E. McKenney WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 37744102adabSPaul E. McKenney rdp->cpu = cpu; 37754102adabSPaul E. McKenney rdp->rsp = rsp; 37764102adabSPaul E. McKenney rcu_boot_init_nocb_percpu_data(rdp); 37774102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 37784102adabSPaul E. McKenney } 37794102adabSPaul E. McKenney 37804102adabSPaul E. McKenney /* 37814102adabSPaul E. McKenney * Initialize a CPU's per-CPU RCU data. Note that only one online or 37824102adabSPaul E. McKenney * offline event can be happening at a given time. Note also that we 37834102adabSPaul E. McKenney * can accept some slop in the rsp->completed access due to the fact 37844102adabSPaul E. McKenney * that this CPU cannot possibly have any RCU callbacks in flight yet. 37854102adabSPaul E. McKenney */ 37864102adabSPaul E. McKenney static void 37879b67122aSIulia Manda rcu_init_percpu_data(int cpu, struct rcu_state *rsp) 37884102adabSPaul E. McKenney { 37894102adabSPaul E. McKenney unsigned long flags; 37904102adabSPaul E. McKenney unsigned long mask; 37914102adabSPaul E. McKenney struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 37924102adabSPaul E. McKenney struct rcu_node *rnp = rcu_get_root(rsp); 37934102adabSPaul E. McKenney 37944102adabSPaul E. McKenney /* Set up local state, ensuring consistent view of global state. */ 37954102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 37964102adabSPaul E. McKenney rdp->beenonline = 1; /* We have now been online. */ 37974102adabSPaul E. McKenney rdp->qlen_last_fqs_check = 0; 37984102adabSPaul E. McKenney rdp->n_force_qs_snap = rsp->n_force_qs; 37994102adabSPaul E. McKenney rdp->blimit = blimit; 380039c8d313SPaul E. McKenney if (!rdp->nxtlist) 38014102adabSPaul E. McKenney init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ 38024102adabSPaul E. McKenney rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 38034102adabSPaul E. McKenney rcu_sysidle_init_percpu_data(rdp->dynticks); 38044102adabSPaul E. McKenney atomic_set(&rdp->dynticks->dynticks, 38054102adabSPaul E. McKenney (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); 38064102adabSPaul E. McKenney raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 38074102adabSPaul E. McKenney 38080aa04b05SPaul E. McKenney /* 38090aa04b05SPaul E. McKenney * Add CPU to leaf rcu_node pending-online bitmask. Any needed 38100aa04b05SPaul E. McKenney * propagation up the rcu_node tree will happen at the beginning 38110aa04b05SPaul E. McKenney * of the next grace period. 38120aa04b05SPaul E. McKenney */ 38134102adabSPaul E. McKenney rnp = rdp->mynode; 38144102adabSPaul E. McKenney mask = rdp->grpmask; 38154102adabSPaul E. McKenney raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 38160aa04b05SPaul E. McKenney smp_mb__after_unlock_lock(); 38170aa04b05SPaul E. McKenney rnp->qsmaskinitnext |= mask; 38180aa04b05SPaul E. McKenney rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ 38194102adabSPaul E. McKenney rdp->completed = rnp->completed; 38200aa04b05SPaul E. McKenney rdp->passed_quiesce = false; 38215cd37193SPaul E. McKenney rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); 38220aa04b05SPaul E. McKenney rdp->qs_pending = false; 38234102adabSPaul E. McKenney trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); 38240aa04b05SPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 38254102adabSPaul E. McKenney } 38264102adabSPaul E. McKenney 38274102adabSPaul E. McKenney static void rcu_prepare_cpu(int cpu) 38284102adabSPaul E. McKenney { 38294102adabSPaul E. McKenney struct rcu_state *rsp; 38304102adabSPaul E. McKenney 38314102adabSPaul E. McKenney for_each_rcu_flavor(rsp) 38329b67122aSIulia Manda rcu_init_percpu_data(cpu, rsp); 38334102adabSPaul E. McKenney } 38344102adabSPaul E. McKenney 38354102adabSPaul E. McKenney /* 38364102adabSPaul E. McKenney * Handle CPU online/offline notification events. 38374102adabSPaul E. McKenney */ 383888428cc5SPaul E. McKenney int rcu_cpu_notify(struct notifier_block *self, 38394102adabSPaul E. McKenney unsigned long action, void *hcpu) 38404102adabSPaul E. McKenney { 38414102adabSPaul E. McKenney long cpu = (long)hcpu; 3842e534165bSUma Sharma struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); 38434102adabSPaul E. McKenney struct rcu_node *rnp = rdp->mynode; 38444102adabSPaul E. McKenney struct rcu_state *rsp; 38454102adabSPaul E. McKenney 38464102adabSPaul E. McKenney switch (action) { 38474102adabSPaul E. McKenney case CPU_UP_PREPARE: 38484102adabSPaul E. McKenney case CPU_UP_PREPARE_FROZEN: 38494102adabSPaul E. McKenney rcu_prepare_cpu(cpu); 38504102adabSPaul E. McKenney rcu_prepare_kthreads(cpu); 385135ce7f29SPaul E. McKenney rcu_spawn_all_nocb_kthreads(cpu); 38524102adabSPaul E. McKenney break; 38534102adabSPaul E. McKenney case CPU_ONLINE: 38544102adabSPaul E. McKenney case CPU_DOWN_FAILED: 38554102adabSPaul E. McKenney rcu_boost_kthread_setaffinity(rnp, -1); 38564102adabSPaul E. McKenney break; 38574102adabSPaul E. McKenney case CPU_DOWN_PREPARE: 38584102adabSPaul E. McKenney rcu_boost_kthread_setaffinity(rnp, cpu); 38594102adabSPaul E. McKenney break; 38604102adabSPaul E. McKenney case CPU_DYING: 38614102adabSPaul E. McKenney case CPU_DYING_FROZEN: 38624102adabSPaul E. McKenney for_each_rcu_flavor(rsp) 38634102adabSPaul E. McKenney rcu_cleanup_dying_cpu(rsp); 38644102adabSPaul E. McKenney break; 386588428cc5SPaul E. McKenney case CPU_DYING_IDLE: 386688428cc5SPaul E. McKenney for_each_rcu_flavor(rsp) { 386788428cc5SPaul E. McKenney rcu_cleanup_dying_idle_cpu(cpu, rsp); 386888428cc5SPaul E. McKenney } 386988428cc5SPaul E. McKenney break; 38704102adabSPaul E. McKenney case CPU_DEAD: 38714102adabSPaul E. McKenney case CPU_DEAD_FROZEN: 38724102adabSPaul E. McKenney case CPU_UP_CANCELED: 38734102adabSPaul E. McKenney case CPU_UP_CANCELED_FROZEN: 3874776d6807SPaul E. McKenney for_each_rcu_flavor(rsp) { 38754102adabSPaul E. McKenney rcu_cleanup_dead_cpu(cpu, rsp); 3876776d6807SPaul E. McKenney do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu)); 3877776d6807SPaul E. McKenney } 38784102adabSPaul E. McKenney break; 38794102adabSPaul E. McKenney default: 38804102adabSPaul E. McKenney break; 38814102adabSPaul E. McKenney } 38824102adabSPaul E. McKenney return NOTIFY_OK; 38834102adabSPaul E. McKenney } 38844102adabSPaul E. McKenney 38854102adabSPaul E. McKenney static int rcu_pm_notify(struct notifier_block *self, 38864102adabSPaul E. McKenney unsigned long action, void *hcpu) 38874102adabSPaul E. McKenney { 38884102adabSPaul E. McKenney switch (action) { 38894102adabSPaul E. McKenney case PM_HIBERNATION_PREPARE: 38904102adabSPaul E. McKenney case PM_SUSPEND_PREPARE: 38914102adabSPaul E. McKenney if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ 38925afff48bSPaul E. McKenney rcu_expedite_gp(); 38934102adabSPaul E. McKenney break; 38944102adabSPaul E. McKenney case PM_POST_HIBERNATION: 38954102adabSPaul E. McKenney case PM_POST_SUSPEND: 38965afff48bSPaul E. McKenney if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ 38975afff48bSPaul E. McKenney rcu_unexpedite_gp(); 38984102adabSPaul E. McKenney break; 38994102adabSPaul E. McKenney default: 39004102adabSPaul E. McKenney break; 39014102adabSPaul E. McKenney } 39024102adabSPaul E. McKenney return NOTIFY_OK; 39034102adabSPaul E. McKenney } 39044102adabSPaul E. McKenney 39054102adabSPaul E. McKenney /* 39069386c0b7SPaul E. McKenney * Spawn the kthreads that handle each RCU flavor's grace periods. 39074102adabSPaul E. McKenney */ 39084102adabSPaul E. McKenney static int __init rcu_spawn_gp_kthread(void) 39094102adabSPaul E. McKenney { 39104102adabSPaul E. McKenney unsigned long flags; 3911a94844b2SPaul E. McKenney int kthread_prio_in = kthread_prio; 39124102adabSPaul E. McKenney struct rcu_node *rnp; 39134102adabSPaul E. McKenney struct rcu_state *rsp; 3914a94844b2SPaul E. McKenney struct sched_param sp; 39154102adabSPaul E. McKenney struct task_struct *t; 39164102adabSPaul E. McKenney 3917a94844b2SPaul E. McKenney /* Force priority into range. */ 3918a94844b2SPaul E. McKenney if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) 3919a94844b2SPaul E. McKenney kthread_prio = 1; 3920a94844b2SPaul E. McKenney else if (kthread_prio < 0) 3921a94844b2SPaul E. McKenney kthread_prio = 0; 3922a94844b2SPaul E. McKenney else if (kthread_prio > 99) 3923a94844b2SPaul E. McKenney kthread_prio = 99; 3924a94844b2SPaul E. McKenney if (kthread_prio != kthread_prio_in) 3925a94844b2SPaul E. McKenney pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n", 3926a94844b2SPaul E. McKenney kthread_prio, kthread_prio_in); 3927a94844b2SPaul E. McKenney 39289386c0b7SPaul E. McKenney rcu_scheduler_fully_active = 1; 39294102adabSPaul E. McKenney for_each_rcu_flavor(rsp) { 3930a94844b2SPaul E. McKenney t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name); 39314102adabSPaul E. McKenney BUG_ON(IS_ERR(t)); 39324102adabSPaul E. McKenney rnp = rcu_get_root(rsp); 39334102adabSPaul E. McKenney raw_spin_lock_irqsave(&rnp->lock, flags); 39344102adabSPaul E. McKenney rsp->gp_kthread = t; 3935a94844b2SPaul E. McKenney if (kthread_prio) { 3936a94844b2SPaul E. McKenney sp.sched_priority = kthread_prio; 3937a94844b2SPaul E. McKenney sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 3938a94844b2SPaul E. McKenney } 3939a94844b2SPaul E. McKenney wake_up_process(t); 39404102adabSPaul E. McKenney raw_spin_unlock_irqrestore(&rnp->lock, flags); 39414102adabSPaul E. McKenney } 394235ce7f29SPaul E. McKenney rcu_spawn_nocb_kthreads(); 39439386c0b7SPaul E. McKenney rcu_spawn_boost_kthreads(); 39444102adabSPaul E. McKenney return 0; 39454102adabSPaul E. McKenney } 39464102adabSPaul E. McKenney early_initcall(rcu_spawn_gp_kthread); 39474102adabSPaul E. McKenney 39484102adabSPaul E. McKenney /* 39494102adabSPaul E. McKenney * This function is invoked towards the end of the scheduler's initialization 39504102adabSPaul E. McKenney * process. Before this is called, the idle task might contain 39514102adabSPaul E. McKenney * RCU read-side critical sections (during which time, this idle 39524102adabSPaul E. McKenney * task is booting the system). After this function is called, the 39534102adabSPaul E. McKenney * idle tasks are prohibited from containing RCU read-side critical 39544102adabSPaul E. McKenney * sections. This function also enables RCU lockdep checking. 39554102adabSPaul E. McKenney */ 39564102adabSPaul E. McKenney void rcu_scheduler_starting(void) 39574102adabSPaul E. McKenney { 39584102adabSPaul E. McKenney WARN_ON(num_online_cpus() != 1); 39594102adabSPaul E. McKenney WARN_ON(nr_context_switches() > 0); 39604102adabSPaul E. McKenney rcu_scheduler_active = 1; 39614102adabSPaul E. McKenney } 39624102adabSPaul E. McKenney 39634102adabSPaul E. McKenney /* 39644102adabSPaul E. McKenney * Compute the per-level fanout, either using the exact fanout specified 39657fa27001SPaul E. McKenney * or balancing the tree, depending on the rcu_fanout_exact boot parameter. 39664102adabSPaul E. McKenney */ 39674102adabSPaul E. McKenney static void __init rcu_init_levelspread(struct rcu_state *rsp) 39684102adabSPaul E. McKenney { 39694102adabSPaul E. McKenney int i; 39704102adabSPaul E. McKenney 39717fa27001SPaul E. McKenney if (rcu_fanout_exact) { 397204f34650SPaul E. McKenney rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; 397304f34650SPaul E. McKenney for (i = rcu_num_lvls - 2; i >= 0; i--) 397405c5df31SPaul E. McKenney rsp->levelspread[i] = RCU_FANOUT; 397566292405SPaul E. McKenney } else { 39764102adabSPaul E. McKenney int ccur; 39774102adabSPaul E. McKenney int cprv; 39784102adabSPaul E. McKenney 39794102adabSPaul E. McKenney cprv = nr_cpu_ids; 39804102adabSPaul E. McKenney for (i = rcu_num_lvls - 1; i >= 0; i--) { 39814102adabSPaul E. McKenney ccur = rsp->levelcnt[i]; 39824102adabSPaul E. McKenney rsp->levelspread[i] = (cprv + ccur - 1) / ccur; 39834102adabSPaul E. McKenney cprv = ccur; 39844102adabSPaul E. McKenney } 39854102adabSPaul E. McKenney } 398666292405SPaul E. McKenney } 39874102adabSPaul E. McKenney 39884102adabSPaul E. McKenney /* 39894102adabSPaul E. McKenney * Helper function for rcu_init() that initializes one rcu_state structure. 39904102adabSPaul E. McKenney */ 39914102adabSPaul E. McKenney static void __init rcu_init_one(struct rcu_state *rsp, 39924102adabSPaul E. McKenney struct rcu_data __percpu *rda) 39934102adabSPaul E. McKenney { 3994b4426b49SFabian Frederick static const char * const buf[] = { 3995b4426b49SFabian Frederick "rcu_node_0", 39964102adabSPaul E. McKenney "rcu_node_1", 39974102adabSPaul E. McKenney "rcu_node_2", 39984102adabSPaul E. McKenney "rcu_node_3" }; /* Match MAX_RCU_LVLS */ 3999b4426b49SFabian Frederick static const char * const fqs[] = { 4000b4426b49SFabian Frederick "rcu_node_fqs_0", 40014102adabSPaul E. McKenney "rcu_node_fqs_1", 40024102adabSPaul E. McKenney "rcu_node_fqs_2", 40034102adabSPaul E. McKenney "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ 40044a81e832SPaul E. McKenney static u8 fl_mask = 0x1; 40054102adabSPaul E. McKenney int cpustride = 1; 40064102adabSPaul E. McKenney int i; 40074102adabSPaul E. McKenney int j; 40084102adabSPaul E. McKenney struct rcu_node *rnp; 40094102adabSPaul E. McKenney 40104102adabSPaul E. McKenney BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ 40114102adabSPaul E. McKenney 40123eaaaf6cSPaul E. McKenney /* Silence gcc 4.8 false positive about array index out of range. */ 40133eaaaf6cSPaul E. McKenney if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS) 40143eaaaf6cSPaul E. McKenney panic("rcu_init_one: rcu_num_lvls out of range"); 40154102adabSPaul E. McKenney 40164102adabSPaul E. McKenney /* Initialize the level-tracking arrays. */ 40174102adabSPaul E. McKenney 40184102adabSPaul E. McKenney for (i = 0; i < rcu_num_lvls; i++) 40194102adabSPaul E. McKenney rsp->levelcnt[i] = num_rcu_lvl[i]; 40204102adabSPaul E. McKenney for (i = 1; i < rcu_num_lvls; i++) 40214102adabSPaul E. McKenney rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; 40224102adabSPaul E. McKenney rcu_init_levelspread(rsp); 40234a81e832SPaul E. McKenney rsp->flavor_mask = fl_mask; 40244a81e832SPaul E. McKenney fl_mask <<= 1; 40254102adabSPaul E. McKenney 40264102adabSPaul E. McKenney /* Initialize the elements themselves, starting from the leaves. */ 40274102adabSPaul E. McKenney 40284102adabSPaul E. McKenney for (i = rcu_num_lvls - 1; i >= 0; i--) { 40294102adabSPaul E. McKenney cpustride *= rsp->levelspread[i]; 40304102adabSPaul E. McKenney rnp = rsp->level[i]; 40314102adabSPaul E. McKenney for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 40324102adabSPaul E. McKenney raw_spin_lock_init(&rnp->lock); 40334102adabSPaul E. McKenney lockdep_set_class_and_name(&rnp->lock, 40344102adabSPaul E. McKenney &rcu_node_class[i], buf[i]); 40354102adabSPaul E. McKenney raw_spin_lock_init(&rnp->fqslock); 40364102adabSPaul E. McKenney lockdep_set_class_and_name(&rnp->fqslock, 40374102adabSPaul E. McKenney &rcu_fqs_class[i], fqs[i]); 40384102adabSPaul E. McKenney rnp->gpnum = rsp->gpnum; 40394102adabSPaul E. McKenney rnp->completed = rsp->completed; 40404102adabSPaul E. McKenney rnp->qsmask = 0; 40414102adabSPaul E. McKenney rnp->qsmaskinit = 0; 40424102adabSPaul E. McKenney rnp->grplo = j * cpustride; 40434102adabSPaul E. McKenney rnp->grphi = (j + 1) * cpustride - 1; 4044595f3900SHimangi Saraogi if (rnp->grphi >= nr_cpu_ids) 4045595f3900SHimangi Saraogi rnp->grphi = nr_cpu_ids - 1; 40464102adabSPaul E. McKenney if (i == 0) { 40474102adabSPaul E. McKenney rnp->grpnum = 0; 40484102adabSPaul E. McKenney rnp->grpmask = 0; 40494102adabSPaul E. McKenney rnp->parent = NULL; 40504102adabSPaul E. McKenney } else { 40514102adabSPaul E. McKenney rnp->grpnum = j % rsp->levelspread[i - 1]; 40524102adabSPaul E. McKenney rnp->grpmask = 1UL << rnp->grpnum; 40534102adabSPaul E. McKenney rnp->parent = rsp->level[i - 1] + 40544102adabSPaul E. McKenney j / rsp->levelspread[i - 1]; 40554102adabSPaul E. McKenney } 40564102adabSPaul E. McKenney rnp->level = i; 40574102adabSPaul E. McKenney INIT_LIST_HEAD(&rnp->blkd_tasks); 40584102adabSPaul E. McKenney rcu_init_one_nocb(rnp); 40594102adabSPaul E. McKenney } 40604102adabSPaul E. McKenney } 40614102adabSPaul E. McKenney 40624102adabSPaul E. McKenney init_waitqueue_head(&rsp->gp_wq); 40634102adabSPaul E. McKenney rnp = rsp->level[rcu_num_lvls - 1]; 40644102adabSPaul E. McKenney for_each_possible_cpu(i) { 40654102adabSPaul E. McKenney while (i > rnp->grphi) 40664102adabSPaul E. McKenney rnp++; 40674102adabSPaul E. McKenney per_cpu_ptr(rsp->rda, i)->mynode = rnp; 40684102adabSPaul E. McKenney rcu_boot_init_percpu_data(i, rsp); 40694102adabSPaul E. McKenney } 40704102adabSPaul E. McKenney list_add(&rsp->flavors, &rcu_struct_flavors); 40714102adabSPaul E. McKenney } 40724102adabSPaul E. McKenney 40734102adabSPaul E. McKenney /* 40744102adabSPaul E. McKenney * Compute the rcu_node tree geometry from kernel parameters. This cannot 40754102adabSPaul E. McKenney * replace the definitions in tree.h because those are needed to size 40764102adabSPaul E. McKenney * the ->node array in the rcu_state structure. 40774102adabSPaul E. McKenney */ 40784102adabSPaul E. McKenney static void __init rcu_init_geometry(void) 40794102adabSPaul E. McKenney { 40804102adabSPaul E. McKenney ulong d; 40814102adabSPaul E. McKenney int i; 40824102adabSPaul E. McKenney int j; 40834102adabSPaul E. McKenney int n = nr_cpu_ids; 40844102adabSPaul E. McKenney int rcu_capacity[MAX_RCU_LVLS + 1]; 40854102adabSPaul E. McKenney 40864102adabSPaul E. McKenney /* 40874102adabSPaul E. McKenney * Initialize any unspecified boot parameters. 40884102adabSPaul E. McKenney * The default values of jiffies_till_first_fqs and 40894102adabSPaul E. McKenney * jiffies_till_next_fqs are set to the RCU_JIFFIES_TILL_FORCE_QS 40904102adabSPaul E. McKenney * value, which is a function of HZ, then adding one for each 40914102adabSPaul E. McKenney * RCU_JIFFIES_FQS_DIV CPUs that might be on the system. 40924102adabSPaul E. McKenney */ 40934102adabSPaul E. McKenney d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV; 40944102adabSPaul E. McKenney if (jiffies_till_first_fqs == ULONG_MAX) 40954102adabSPaul E. McKenney jiffies_till_first_fqs = d; 40964102adabSPaul E. McKenney if (jiffies_till_next_fqs == ULONG_MAX) 40974102adabSPaul E. McKenney jiffies_till_next_fqs = d; 40984102adabSPaul E. McKenney 40994102adabSPaul E. McKenney /* If the compile-time values are accurate, just leave. */ 4100*47d631afSPaul E. McKenney if (rcu_fanout_leaf == RCU_FANOUT_LEAF && 41014102adabSPaul E. McKenney nr_cpu_ids == NR_CPUS) 41024102adabSPaul E. McKenney return; 410339479098SPaul E. McKenney pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n", 410439479098SPaul E. McKenney rcu_fanout_leaf, nr_cpu_ids); 41054102adabSPaul E. McKenney 41064102adabSPaul E. McKenney /* 41074102adabSPaul E. McKenney * Compute number of nodes that can be handled an rcu_node tree 41084102adabSPaul E. McKenney * with the given number of levels. Setting rcu_capacity[0] makes 41094102adabSPaul E. McKenney * some of the arithmetic easier. 41104102adabSPaul E. McKenney */ 41114102adabSPaul E. McKenney rcu_capacity[0] = 1; 41124102adabSPaul E. McKenney rcu_capacity[1] = rcu_fanout_leaf; 41134102adabSPaul E. McKenney for (i = 2; i <= MAX_RCU_LVLS; i++) 411405c5df31SPaul E. McKenney rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT; 41154102adabSPaul E. McKenney 41164102adabSPaul E. McKenney /* 41174102adabSPaul E. McKenney * The boot-time rcu_fanout_leaf parameter is only permitted 41184102adabSPaul E. McKenney * to increase the leaf-level fanout, not decrease it. Of course, 41194102adabSPaul E. McKenney * the leaf-level fanout cannot exceed the number of bits in 41204102adabSPaul E. McKenney * the rcu_node masks. Finally, the tree must be able to accommodate 41214102adabSPaul E. McKenney * the configured number of CPUs. Complain and fall back to the 41224102adabSPaul E. McKenney * compile-time values if these limits are exceeded. 41234102adabSPaul E. McKenney */ 4124*47d631afSPaul E. McKenney if (rcu_fanout_leaf < RCU_FANOUT_LEAF || 41254102adabSPaul E. McKenney rcu_fanout_leaf > sizeof(unsigned long) * 8 || 41264102adabSPaul E. McKenney n > rcu_capacity[MAX_RCU_LVLS]) { 41274102adabSPaul E. McKenney WARN_ON(1); 41284102adabSPaul E. McKenney return; 41294102adabSPaul E. McKenney } 41304102adabSPaul E. McKenney 41314102adabSPaul E. McKenney /* Calculate the number of rcu_nodes at each level of the tree. */ 41324102adabSPaul E. McKenney for (i = 1; i <= MAX_RCU_LVLS; i++) 41334102adabSPaul E. McKenney if (n <= rcu_capacity[i]) { 41344102adabSPaul E. McKenney for (j = 0; j <= i; j++) 41354102adabSPaul E. McKenney num_rcu_lvl[j] = 41364102adabSPaul E. McKenney DIV_ROUND_UP(n, rcu_capacity[i - j]); 41374102adabSPaul E. McKenney rcu_num_lvls = i; 41384102adabSPaul E. McKenney for (j = i + 1; j <= MAX_RCU_LVLS; j++) 41394102adabSPaul E. McKenney num_rcu_lvl[j] = 0; 41404102adabSPaul E. McKenney break; 41414102adabSPaul E. McKenney } 41424102adabSPaul E. McKenney 41434102adabSPaul E. McKenney /* Calculate the total number of rcu_node structures. */ 41444102adabSPaul E. McKenney rcu_num_nodes = 0; 41454102adabSPaul E. McKenney for (i = 0; i <= MAX_RCU_LVLS; i++) 41464102adabSPaul E. McKenney rcu_num_nodes += num_rcu_lvl[i]; 41474102adabSPaul E. McKenney rcu_num_nodes -= n; 41484102adabSPaul E. McKenney } 41494102adabSPaul E. McKenney 4150a3dc2948SPaul E. McKenney /* 4151a3dc2948SPaul E. McKenney * Dump out the structure of the rcu_node combining tree associated 4152a3dc2948SPaul E. McKenney * with the rcu_state structure referenced by rsp. 4153a3dc2948SPaul E. McKenney */ 4154a3dc2948SPaul E. McKenney static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp) 4155a3dc2948SPaul E. McKenney { 4156a3dc2948SPaul E. McKenney int level = 0; 4157a3dc2948SPaul E. McKenney struct rcu_node *rnp; 4158a3dc2948SPaul E. McKenney 4159a3dc2948SPaul E. McKenney pr_info("rcu_node tree layout dump\n"); 4160a3dc2948SPaul E. McKenney pr_info(" "); 4161a3dc2948SPaul E. McKenney rcu_for_each_node_breadth_first(rsp, rnp) { 4162a3dc2948SPaul E. McKenney if (rnp->level != level) { 4163a3dc2948SPaul E. McKenney pr_cont("\n"); 4164a3dc2948SPaul E. McKenney pr_info(" "); 4165a3dc2948SPaul E. McKenney level = rnp->level; 4166a3dc2948SPaul E. McKenney } 4167a3dc2948SPaul E. McKenney pr_cont("%d:%d ^%d ", rnp->grplo, rnp->grphi, rnp->grpnum); 4168a3dc2948SPaul E. McKenney } 4169a3dc2948SPaul E. McKenney pr_cont("\n"); 4170a3dc2948SPaul E. McKenney } 4171a3dc2948SPaul E. McKenney 41724102adabSPaul E. McKenney void __init rcu_init(void) 41734102adabSPaul E. McKenney { 41744102adabSPaul E. McKenney int cpu; 41754102adabSPaul E. McKenney 417647627678SPaul E. McKenney rcu_early_boot_tests(); 417747627678SPaul E. McKenney 41784102adabSPaul E. McKenney rcu_bootup_announce(); 41794102adabSPaul E. McKenney rcu_init_geometry(); 41804102adabSPaul E. McKenney rcu_init_one(&rcu_bh_state, &rcu_bh_data); 41814102adabSPaul E. McKenney rcu_init_one(&rcu_sched_state, &rcu_sched_data); 4182a3dc2948SPaul E. McKenney if (dump_tree) 4183a3dc2948SPaul E. McKenney rcu_dump_rcu_node_tree(&rcu_sched_state); 41844102adabSPaul E. McKenney __rcu_init_preempt(); 41854102adabSPaul E. McKenney open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 41864102adabSPaul E. McKenney 41874102adabSPaul E. McKenney /* 41884102adabSPaul E. McKenney * We don't need protection against CPU-hotplug here because 41894102adabSPaul E. McKenney * this is called early in boot, before either interrupts 41904102adabSPaul E. McKenney * or the scheduler are operational. 41914102adabSPaul E. McKenney */ 41924102adabSPaul E. McKenney cpu_notifier(rcu_cpu_notify, 0); 41934102adabSPaul E. McKenney pm_notifier(rcu_pm_notify, 0); 41944102adabSPaul E. McKenney for_each_online_cpu(cpu) 41954102adabSPaul E. McKenney rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); 41964102adabSPaul E. McKenney } 41974102adabSPaul E. McKenney 41984102adabSPaul E. McKenney #include "tree_plugin.h" 4199