1 /* 2 * Sleepable Read-Copy Update mechanism for mutual exclusion. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, you can access it online at 16 * http://www.gnu.org/licenses/gpl-2.0.html. 17 * 18 * Copyright (C) IBM Corporation, 2006 19 * Copyright (C) Fujitsu, 2012 20 * 21 * Author: Paul McKenney <paulmck@us.ibm.com> 22 * Lai Jiangshan <laijs@cn.fujitsu.com> 23 * 24 * For detailed explanation of Read-Copy Update mechanism see - 25 * Documentation/RCU/ *.txt 26 * 27 */ 28 29 #include <linux/export.h> 30 #include <linux/mutex.h> 31 #include <linux/percpu.h> 32 #include <linux/preempt.h> 33 #include <linux/rcupdate_wait.h> 34 #include <linux/sched.h> 35 #include <linux/smp.h> 36 #include <linux/delay.h> 37 #include <linux/module.h> 38 #include <linux/srcu.h> 39 40 #include "rcu.h" 41 #include "rcu_segcblist.h" 42 43 /* Holdoff in nanoseconds for auto-expediting. */ 44 #define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000) 45 static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF; 46 module_param(exp_holdoff, ulong, 0444); 47 48 /* Overflow-check frequency. N bits roughly says every 2**N grace periods. */ 49 static ulong counter_wrap_check = (ULONG_MAX >> 2); 50 module_param(counter_wrap_check, ulong, 0444); 51 52 static void srcu_invoke_callbacks(struct work_struct *work); 53 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); 54 55 /* 56 * Initialize SRCU combining tree. Note that statically allocated 57 * srcu_struct structures might already have srcu_read_lock() and 58 * srcu_read_unlock() running against them. So if the is_static parameter 59 * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. 60 */ 61 static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) 62 { 63 int cpu; 64 int i; 65 int level = 0; 66 int levelspread[RCU_NUM_LVLS]; 67 struct srcu_data *sdp; 68 struct srcu_node *snp; 69 struct srcu_node *snp_first; 70 71 /* Work out the overall tree geometry. */ 72 sp->level[0] = &sp->node[0]; 73 for (i = 1; i < rcu_num_lvls; i++) 74 sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1]; 75 rcu_init_levelspread(levelspread, num_rcu_lvl); 76 77 /* Each pass through this loop initializes one srcu_node structure. */ 78 rcu_for_each_node_breadth_first(sp, snp) { 79 raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock)); 80 WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != 81 ARRAY_SIZE(snp->srcu_data_have_cbs)); 82 for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { 83 snp->srcu_have_cbs[i] = 0; 84 snp->srcu_data_have_cbs[i] = 0; 85 } 86 snp->srcu_gp_seq_needed_exp = 0; 87 snp->grplo = -1; 88 snp->grphi = -1; 89 if (snp == &sp->node[0]) { 90 /* Root node, special case. */ 91 snp->srcu_parent = NULL; 92 continue; 93 } 94 95 /* Non-root node. */ 96 if (snp == sp->level[level + 1]) 97 level++; 98 snp->srcu_parent = sp->level[level - 1] + 99 (snp - sp->level[level]) / 100 levelspread[level - 1]; 101 } 102 103 /* 104 * Initialize the per-CPU srcu_data array, which feeds into the 105 * leaves of the srcu_node tree. 106 */ 107 WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != 108 ARRAY_SIZE(sdp->srcu_unlock_count)); 109 level = rcu_num_lvls - 1; 110 snp_first = sp->level[level]; 111 for_each_possible_cpu(cpu) { 112 sdp = per_cpu_ptr(sp->sda, cpu); 113 raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); 114 rcu_segcblist_init(&sdp->srcu_cblist); 115 sdp->srcu_cblist_invoking = false; 116 sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; 117 sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq; 118 sdp->mynode = &snp_first[cpu / levelspread[level]]; 119 for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { 120 if (snp->grplo < 0) 121 snp->grplo = cpu; 122 snp->grphi = cpu; 123 } 124 sdp->cpu = cpu; 125 INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); 126 sdp->sp = sp; 127 sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); 128 if (is_static) 129 continue; 130 131 /* Dynamically allocated, better be no srcu_read_locks()! */ 132 for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) { 133 sdp->srcu_lock_count[i] = 0; 134 sdp->srcu_unlock_count[i] = 0; 135 } 136 } 137 } 138 139 /* 140 * Initialize non-compile-time initialized fields, including the 141 * associated srcu_node and srcu_data structures. The is_static 142 * parameter is passed through to init_srcu_struct_nodes(), and 143 * also tells us that ->sda has already been wired up to srcu_data. 144 */ 145 static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) 146 { 147 mutex_init(&sp->srcu_cb_mutex); 148 mutex_init(&sp->srcu_gp_mutex); 149 sp->srcu_idx = 0; 150 sp->srcu_gp_seq = 0; 151 sp->srcu_barrier_seq = 0; 152 mutex_init(&sp->srcu_barrier_mutex); 153 atomic_set(&sp->srcu_barrier_cpu_cnt, 0); 154 INIT_DELAYED_WORK(&sp->work, process_srcu); 155 if (!is_static) 156 sp->sda = alloc_percpu(struct srcu_data); 157 init_srcu_struct_nodes(sp, is_static); 158 sp->srcu_gp_seq_needed_exp = 0; 159 sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); 160 smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */ 161 return sp->sda ? 0 : -ENOMEM; 162 } 163 164 #ifdef CONFIG_DEBUG_LOCK_ALLOC 165 166 int __init_srcu_struct(struct srcu_struct *sp, const char *name, 167 struct lock_class_key *key) 168 { 169 /* Don't re-initialize a lock while it is held. */ 170 debug_check_no_locks_freed((void *)sp, sizeof(*sp)); 171 lockdep_init_map(&sp->dep_map, name, key, 0); 172 raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); 173 return init_srcu_struct_fields(sp, false); 174 } 175 EXPORT_SYMBOL_GPL(__init_srcu_struct); 176 177 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 178 179 /** 180 * init_srcu_struct - initialize a sleep-RCU structure 181 * @sp: structure to initialize. 182 * 183 * Must invoke this on a given srcu_struct before passing that srcu_struct 184 * to any other function. Each srcu_struct represents a separate domain 185 * of SRCU protection. 186 */ 187 int init_srcu_struct(struct srcu_struct *sp) 188 { 189 raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); 190 return init_srcu_struct_fields(sp, false); 191 } 192 EXPORT_SYMBOL_GPL(init_srcu_struct); 193 194 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 195 196 /* 197 * First-use initialization of statically allocated srcu_struct 198 * structure. Wiring up the combining tree is more than can be 199 * done with compile-time initialization, so this check is added 200 * to each update-side SRCU primitive. Use sp->lock, which -is- 201 * compile-time initialized, to resolve races involving multiple 202 * CPUs trying to garner first-use privileges. 203 */ 204 static void check_init_srcu_struct(struct srcu_struct *sp) 205 { 206 unsigned long flags; 207 208 WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT); 209 /* The smp_load_acquire() pairs with the smp_store_release(). */ 210 if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ 211 return; /* Already initialized. */ 212 raw_spin_lock_irqsave_rcu_node(sp, flags); 213 if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { 214 raw_spin_unlock_irqrestore_rcu_node(sp, flags); 215 return; 216 } 217 init_srcu_struct_fields(sp, true); 218 raw_spin_unlock_irqrestore_rcu_node(sp, flags); 219 } 220 221 /* 222 * Returns approximate total of the readers' ->srcu_lock_count[] values 223 * for the rank of per-CPU counters specified by idx. 224 */ 225 static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) 226 { 227 int cpu; 228 unsigned long sum = 0; 229 230 for_each_possible_cpu(cpu) { 231 struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); 232 233 sum += READ_ONCE(cpuc->srcu_lock_count[idx]); 234 } 235 return sum; 236 } 237 238 /* 239 * Returns approximate total of the readers' ->srcu_unlock_count[] values 240 * for the rank of per-CPU counters specified by idx. 241 */ 242 static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) 243 { 244 int cpu; 245 unsigned long sum = 0; 246 247 for_each_possible_cpu(cpu) { 248 struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); 249 250 sum += READ_ONCE(cpuc->srcu_unlock_count[idx]); 251 } 252 return sum; 253 } 254 255 /* 256 * Return true if the number of pre-existing readers is determined to 257 * be zero. 258 */ 259 static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) 260 { 261 unsigned long unlocks; 262 263 unlocks = srcu_readers_unlock_idx(sp, idx); 264 265 /* 266 * Make sure that a lock is always counted if the corresponding 267 * unlock is counted. Needs to be a smp_mb() as the read side may 268 * contain a read from a variable that is written to before the 269 * synchronize_srcu() in the write side. In this case smp_mb()s 270 * A and B act like the store buffering pattern. 271 * 272 * This smp_mb() also pairs with smp_mb() C to prevent accesses 273 * after the synchronize_srcu() from being executed before the 274 * grace period ends. 275 */ 276 smp_mb(); /* A */ 277 278 /* 279 * If the locks are the same as the unlocks, then there must have 280 * been no readers on this index at some time in between. This does 281 * not mean that there are no more readers, as one could have read 282 * the current index but not have incremented the lock counter yet. 283 * 284 * So suppose that the updater is preempted here for so long 285 * that more than ULONG_MAX non-nested readers come and go in 286 * the meantime. It turns out that this cannot result in overflow 287 * because if a reader modifies its unlock count after we read it 288 * above, then that reader's next load of ->srcu_idx is guaranteed 289 * to get the new value, which will cause it to operate on the 290 * other bank of counters, where it cannot contribute to the 291 * overflow of these counters. This means that there is a maximum 292 * of 2*NR_CPUS increments, which cannot overflow given current 293 * systems, especially not on 64-bit systems. 294 * 295 * OK, how about nesting? This does impose a limit on nesting 296 * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient, 297 * especially on 64-bit systems. 298 */ 299 return srcu_readers_lock_idx(sp, idx) == unlocks; 300 } 301 302 /** 303 * srcu_readers_active - returns true if there are readers. and false 304 * otherwise 305 * @sp: which srcu_struct to count active readers (holding srcu_read_lock). 306 * 307 * Note that this is not an atomic primitive, and can therefore suffer 308 * severe errors when invoked on an active srcu_struct. That said, it 309 * can be useful as an error check at cleanup time. 310 */ 311 static bool srcu_readers_active(struct srcu_struct *sp) 312 { 313 int cpu; 314 unsigned long sum = 0; 315 316 for_each_possible_cpu(cpu) { 317 struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); 318 319 sum += READ_ONCE(cpuc->srcu_lock_count[0]); 320 sum += READ_ONCE(cpuc->srcu_lock_count[1]); 321 sum -= READ_ONCE(cpuc->srcu_unlock_count[0]); 322 sum -= READ_ONCE(cpuc->srcu_unlock_count[1]); 323 } 324 return sum; 325 } 326 327 #define SRCU_INTERVAL 1 328 329 /* 330 * Return grace-period delay, zero if there are expedited grace 331 * periods pending, SRCU_INTERVAL otherwise. 332 */ 333 static unsigned long srcu_get_delay(struct srcu_struct *sp) 334 { 335 if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq), 336 READ_ONCE(sp->srcu_gp_seq_needed_exp))) 337 return 0; 338 return SRCU_INTERVAL; 339 } 340 341 /** 342 * cleanup_srcu_struct - deconstruct a sleep-RCU structure 343 * @sp: structure to clean up. 344 * 345 * Must invoke this after you are finished using a given srcu_struct that 346 * was initialized via init_srcu_struct(), else you leak memory. 347 */ 348 void cleanup_srcu_struct(struct srcu_struct *sp) 349 { 350 int cpu; 351 352 if (WARN_ON(!srcu_get_delay(sp))) 353 return; /* Leakage unless caller handles error. */ 354 if (WARN_ON(srcu_readers_active(sp))) 355 return; /* Leakage unless caller handles error. */ 356 flush_delayed_work(&sp->work); 357 for_each_possible_cpu(cpu) 358 flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); 359 if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || 360 WARN_ON(srcu_readers_active(sp))) { 361 pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); 362 return; /* Caller forgot to stop doing call_srcu()? */ 363 } 364 free_percpu(sp->sda); 365 sp->sda = NULL; 366 } 367 EXPORT_SYMBOL_GPL(cleanup_srcu_struct); 368 369 /* 370 * Counts the new reader in the appropriate per-CPU element of the 371 * srcu_struct. 372 * Returns an index that must be passed to the matching srcu_read_unlock(). 373 */ 374 int __srcu_read_lock(struct srcu_struct *sp) 375 { 376 int idx; 377 378 idx = READ_ONCE(sp->srcu_idx) & 0x1; 379 this_cpu_inc(sp->sda->srcu_lock_count[idx]); 380 smp_mb(); /* B */ /* Avoid leaking the critical section. */ 381 return idx; 382 } 383 EXPORT_SYMBOL_GPL(__srcu_read_lock); 384 385 /* 386 * Removes the count for the old reader from the appropriate per-CPU 387 * element of the srcu_struct. Note that this may well be a different 388 * CPU than that which was incremented by the corresponding srcu_read_lock(). 389 */ 390 void __srcu_read_unlock(struct srcu_struct *sp, int idx) 391 { 392 smp_mb(); /* C */ /* Avoid leaking the critical section. */ 393 this_cpu_inc(sp->sda->srcu_unlock_count[idx]); 394 } 395 EXPORT_SYMBOL_GPL(__srcu_read_unlock); 396 397 /* 398 * We use an adaptive strategy for synchronize_srcu() and especially for 399 * synchronize_srcu_expedited(). We spin for a fixed time period 400 * (defined below) to allow SRCU readers to exit their read-side critical 401 * sections. If there are still some readers after a few microseconds, 402 * we repeatedly block for 1-millisecond time periods. 403 */ 404 #define SRCU_RETRY_CHECK_DELAY 5 405 406 /* 407 * Start an SRCU grace period. 408 */ 409 static void srcu_gp_start(struct srcu_struct *sp) 410 { 411 struct srcu_data *sdp = this_cpu_ptr(sp->sda); 412 int state; 413 414 lockdep_assert_held(&sp->lock); 415 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); 416 rcu_segcblist_advance(&sdp->srcu_cblist, 417 rcu_seq_current(&sp->srcu_gp_seq)); 418 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, 419 rcu_seq_snap(&sp->srcu_gp_seq)); 420 smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ 421 rcu_seq_start(&sp->srcu_gp_seq); 422 state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); 423 WARN_ON_ONCE(state != SRCU_STATE_SCAN1); 424 } 425 426 /* 427 * Track online CPUs to guide callback workqueue placement. 428 */ 429 DEFINE_PER_CPU(bool, srcu_online); 430 431 void srcu_online_cpu(unsigned int cpu) 432 { 433 WRITE_ONCE(per_cpu(srcu_online, cpu), true); 434 } 435 436 void srcu_offline_cpu(unsigned int cpu) 437 { 438 WRITE_ONCE(per_cpu(srcu_online, cpu), false); 439 } 440 441 /* 442 * Place the workqueue handler on the specified CPU if online, otherwise 443 * just run it whereever. This is useful for placing workqueue handlers 444 * that are to invoke the specified CPU's callbacks. 445 */ 446 static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, 447 struct delayed_work *dwork, 448 unsigned long delay) 449 { 450 bool ret; 451 452 preempt_disable(); 453 if (READ_ONCE(per_cpu(srcu_online, cpu))) 454 ret = queue_delayed_work_on(cpu, wq, dwork, delay); 455 else 456 ret = queue_delayed_work(wq, dwork, delay); 457 preempt_enable(); 458 return ret; 459 } 460 461 /* 462 * Schedule callback invocation for the specified srcu_data structure, 463 * if possible, on the corresponding CPU. 464 */ 465 static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) 466 { 467 srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq, 468 &sdp->work, delay); 469 } 470 471 /* 472 * Schedule callback invocation for all srcu_data structures associated 473 * with the specified srcu_node structure that have callbacks for the 474 * just-completed grace period, the one corresponding to idx. If possible, 475 * schedule this invocation on the corresponding CPUs. 476 */ 477 static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, 478 unsigned long mask, unsigned long delay) 479 { 480 int cpu; 481 482 for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { 483 if (!(mask & (1 << (cpu - snp->grplo)))) 484 continue; 485 srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay); 486 } 487 } 488 489 /* 490 * Note the end of an SRCU grace period. Initiates callback invocation 491 * and starts a new grace period if needed. 492 * 493 * The ->srcu_cb_mutex acquisition does not protect any data, but 494 * instead prevents more than one grace period from starting while we 495 * are initiating callback invocation. This allows the ->srcu_have_cbs[] 496 * array to have a finite number of elements. 497 */ 498 static void srcu_gp_end(struct srcu_struct *sp) 499 { 500 unsigned long cbdelay; 501 bool cbs; 502 int cpu; 503 unsigned long flags; 504 unsigned long gpseq; 505 int idx; 506 int idxnext; 507 unsigned long mask; 508 struct srcu_data *sdp; 509 struct srcu_node *snp; 510 511 /* Prevent more than one additional grace period. */ 512 mutex_lock(&sp->srcu_cb_mutex); 513 514 /* End the current grace period. */ 515 raw_spin_lock_irq_rcu_node(sp); 516 idx = rcu_seq_state(sp->srcu_gp_seq); 517 WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); 518 cbdelay = srcu_get_delay(sp); 519 sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); 520 rcu_seq_end(&sp->srcu_gp_seq); 521 gpseq = rcu_seq_current(&sp->srcu_gp_seq); 522 if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) 523 sp->srcu_gp_seq_needed_exp = gpseq; 524 raw_spin_unlock_irq_rcu_node(sp); 525 mutex_unlock(&sp->srcu_gp_mutex); 526 /* A new grace period can start at this point. But only one. */ 527 528 /* Initiate callback invocation as needed. */ 529 idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); 530 idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); 531 rcu_for_each_node_breadth_first(sp, snp) { 532 raw_spin_lock_irq_rcu_node(snp); 533 cbs = false; 534 if (snp >= sp->level[rcu_num_lvls - 1]) 535 cbs = snp->srcu_have_cbs[idx] == gpseq; 536 snp->srcu_have_cbs[idx] = gpseq; 537 rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1); 538 if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq)) 539 snp->srcu_gp_seq_needed_exp = gpseq; 540 mask = snp->srcu_data_have_cbs[idx]; 541 snp->srcu_data_have_cbs[idx] = 0; 542 raw_spin_unlock_irq_rcu_node(snp); 543 if (cbs) 544 srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); 545 546 /* Occasionally prevent srcu_data counter wrap. */ 547 if (!(gpseq & counter_wrap_check)) 548 for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { 549 sdp = per_cpu_ptr(sp->sda, cpu); 550 raw_spin_lock_irqsave_rcu_node(sdp, flags); 551 if (ULONG_CMP_GE(gpseq, 552 sdp->srcu_gp_seq_needed + 100)) 553 sdp->srcu_gp_seq_needed = gpseq; 554 raw_spin_unlock_irqrestore_rcu_node(sdp, flags); 555 } 556 } 557 558 /* Callback initiation done, allow grace periods after next. */ 559 mutex_unlock(&sp->srcu_cb_mutex); 560 561 /* Start a new grace period if needed. */ 562 raw_spin_lock_irq_rcu_node(sp); 563 gpseq = rcu_seq_current(&sp->srcu_gp_seq); 564 if (!rcu_seq_state(gpseq) && 565 ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { 566 srcu_gp_start(sp); 567 raw_spin_unlock_irq_rcu_node(sp); 568 /* Throttle expedited grace periods: Should be rare! */ 569 srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff 570 ? 0 : SRCU_INTERVAL); 571 } else { 572 raw_spin_unlock_irq_rcu_node(sp); 573 } 574 } 575 576 /* 577 * Funnel-locking scheme to scalably mediate many concurrent expedited 578 * grace-period requests. This function is invoked for the first known 579 * expedited request for a grace period that has already been requested, 580 * but without expediting. To start a completely new grace period, 581 * whether expedited or not, use srcu_funnel_gp_start() instead. 582 */ 583 static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, 584 unsigned long s) 585 { 586 unsigned long flags; 587 588 for (; snp != NULL; snp = snp->srcu_parent) { 589 if (rcu_seq_done(&sp->srcu_gp_seq, s) || 590 ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) 591 return; 592 raw_spin_lock_irqsave_rcu_node(snp, flags); 593 if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { 594 raw_spin_unlock_irqrestore_rcu_node(snp, flags); 595 return; 596 } 597 WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 598 raw_spin_unlock_irqrestore_rcu_node(snp, flags); 599 } 600 raw_spin_lock_irqsave_rcu_node(sp, flags); 601 if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) 602 sp->srcu_gp_seq_needed_exp = s; 603 raw_spin_unlock_irqrestore_rcu_node(sp, flags); 604 } 605 606 /* 607 * Funnel-locking scheme to scalably mediate many concurrent grace-period 608 * requests. The winner has to do the work of actually starting grace 609 * period s. Losers must either ensure that their desired grace-period 610 * number is recorded on at least their leaf srcu_node structure, or they 611 * must take steps to invoke their own callbacks. 612 */ 613 static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, 614 unsigned long s, bool do_norm) 615 { 616 unsigned long flags; 617 int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs); 618 struct srcu_node *snp = sdp->mynode; 619 unsigned long snp_seq; 620 621 /* Each pass through the loop does one level of the srcu_node tree. */ 622 for (; snp != NULL; snp = snp->srcu_parent) { 623 if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) 624 return; /* GP already done and CBs recorded. */ 625 raw_spin_lock_irqsave_rcu_node(snp, flags); 626 if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { 627 snp_seq = snp->srcu_have_cbs[idx]; 628 if (snp == sdp->mynode && snp_seq == s) 629 snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 630 raw_spin_unlock_irqrestore_rcu_node(snp, flags); 631 if (snp == sdp->mynode && snp_seq != s) { 632 srcu_schedule_cbs_sdp(sdp, do_norm 633 ? SRCU_INTERVAL 634 : 0); 635 return; 636 } 637 if (!do_norm) 638 srcu_funnel_exp_start(sp, snp, s); 639 return; 640 } 641 snp->srcu_have_cbs[idx] = s; 642 if (snp == sdp->mynode) 643 snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 644 if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) 645 snp->srcu_gp_seq_needed_exp = s; 646 raw_spin_unlock_irqrestore_rcu_node(snp, flags); 647 } 648 649 /* Top of tree, must ensure the grace period will be started. */ 650 raw_spin_lock_irqsave_rcu_node(sp, flags); 651 if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { 652 /* 653 * Record need for grace period s. Pair with load 654 * acquire setting up for initialization. 655 */ 656 smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/ 657 } 658 if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) 659 sp->srcu_gp_seq_needed_exp = s; 660 661 /* If grace period not already done and none in progress, start it. */ 662 if (!rcu_seq_done(&sp->srcu_gp_seq, s) && 663 rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { 664 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); 665 srcu_gp_start(sp); 666 queue_delayed_work(system_power_efficient_wq, &sp->work, 667 srcu_get_delay(sp)); 668 } 669 raw_spin_unlock_irqrestore_rcu_node(sp, flags); 670 } 671 672 /* 673 * Wait until all readers counted by array index idx complete, but 674 * loop an additional time if there is an expedited grace period pending. 675 * The caller must ensure that ->srcu_idx is not changed while checking. 676 */ 677 static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) 678 { 679 for (;;) { 680 if (srcu_readers_active_idx_check(sp, idx)) 681 return true; 682 if (--trycount + !srcu_get_delay(sp) <= 0) 683 return false; 684 udelay(SRCU_RETRY_CHECK_DELAY); 685 } 686 } 687 688 /* 689 * Increment the ->srcu_idx counter so that future SRCU readers will 690 * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows 691 * us to wait for pre-existing readers in a starvation-free manner. 692 */ 693 static void srcu_flip(struct srcu_struct *sp) 694 { 695 /* 696 * Ensure that if this updater saw a given reader's increment 697 * from __srcu_read_lock(), that reader was using an old value 698 * of ->srcu_idx. Also ensure that if a given reader sees the 699 * new value of ->srcu_idx, this updater's earlier scans cannot 700 * have seen that reader's increments (which is OK, because this 701 * grace period need not wait on that reader). 702 */ 703 smp_mb(); /* E */ /* Pairs with B and C. */ 704 705 WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); 706 707 /* 708 * Ensure that if the updater misses an __srcu_read_unlock() 709 * increment, that task's next __srcu_read_lock() will see the 710 * above counter update. Note that both this memory barrier 711 * and the one in srcu_readers_active_idx_check() provide the 712 * guarantee for __srcu_read_lock(). 713 */ 714 smp_mb(); /* D */ /* Pairs with C. */ 715 } 716 717 /* 718 * If SRCU is likely idle, return true, otherwise return false. 719 * 720 * Note that it is OK for several current from-idle requests for a new 721 * grace period from idle to specify expediting because they will all end 722 * up requesting the same grace period anyhow. So no loss. 723 * 724 * Note also that if any CPU (including the current one) is still invoking 725 * callbacks, this function will nevertheless say "idle". This is not 726 * ideal, but the overhead of checking all CPUs' callback lists is even 727 * less ideal, especially on large systems. Furthermore, the wakeup 728 * can happen before the callback is fully removed, so we have no choice 729 * but to accept this type of error. 730 * 731 * This function is also subject to counter-wrap errors, but let's face 732 * it, if this function was preempted for enough time for the counters 733 * to wrap, it really doesn't matter whether or not we expedite the grace 734 * period. The extra overhead of a needlessly expedited grace period is 735 * negligible when amoritized over that time period, and the extra latency 736 * of a needlessly non-expedited grace period is similarly negligible. 737 */ 738 static bool srcu_might_be_idle(struct srcu_struct *sp) 739 { 740 unsigned long curseq; 741 unsigned long flags; 742 struct srcu_data *sdp; 743 unsigned long t; 744 745 /* If the local srcu_data structure has callbacks, not idle. */ 746 local_irq_save(flags); 747 sdp = this_cpu_ptr(sp->sda); 748 if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { 749 local_irq_restore(flags); 750 return false; /* Callbacks already present, so not idle. */ 751 } 752 local_irq_restore(flags); 753 754 /* 755 * No local callbacks, so probabalistically probe global state. 756 * Exact information would require acquiring locks, which would 757 * kill scalability, hence the probabalistic nature of the probe. 758 */ 759 760 /* First, see if enough time has passed since the last GP. */ 761 t = ktime_get_mono_fast_ns(); 762 if (exp_holdoff == 0 || 763 time_in_range_open(t, sp->srcu_last_gp_end, 764 sp->srcu_last_gp_end + exp_holdoff)) 765 return false; /* Too soon after last GP. */ 766 767 /* Next, check for probable idleness. */ 768 curseq = rcu_seq_current(&sp->srcu_gp_seq); 769 smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ 770 if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed))) 771 return false; /* Grace period in progress, so not idle. */ 772 smp_mb(); /* Order ->srcu_gp_seq with prior access. */ 773 if (curseq != rcu_seq_current(&sp->srcu_gp_seq)) 774 return false; /* GP # changed, so not idle. */ 775 return true; /* With reasonable probability, idle! */ 776 } 777 778 /* 779 * SRCU callback function to leak a callback. 780 */ 781 static void srcu_leak_callback(struct rcu_head *rhp) 782 { 783 } 784 785 /* 786 * Enqueue an SRCU callback on the srcu_data structure associated with 787 * the current CPU and the specified srcu_struct structure, initiating 788 * grace-period processing if it is not already running. 789 * 790 * Note that all CPUs must agree that the grace period extended beyond 791 * all pre-existing SRCU read-side critical section. On systems with 792 * more than one CPU, this means that when "func()" is invoked, each CPU 793 * is guaranteed to have executed a full memory barrier since the end of 794 * its last corresponding SRCU read-side critical section whose beginning 795 * preceded the call to call_rcu(). It also means that each CPU executing 796 * an SRCU read-side critical section that continues beyond the start of 797 * "func()" must have executed a memory barrier after the call_rcu() 798 * but before the beginning of that SRCU read-side critical section. 799 * Note that these guarantees include CPUs that are offline, idle, or 800 * executing in user mode, as well as CPUs that are executing in the kernel. 801 * 802 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the 803 * resulting SRCU callback function "func()", then both CPU A and CPU 804 * B are guaranteed to execute a full memory barrier during the time 805 * interval between the call to call_rcu() and the invocation of "func()". 806 * This guarantee applies even if CPU A and CPU B are the same CPU (but 807 * again only if the system has more than one CPU). 808 * 809 * Of course, these guarantees apply only for invocations of call_srcu(), 810 * srcu_read_lock(), and srcu_read_unlock() that are all passed the same 811 * srcu_struct structure. 812 */ 813 void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, 814 rcu_callback_t func, bool do_norm) 815 { 816 unsigned long flags; 817 bool needexp = false; 818 bool needgp = false; 819 unsigned long s; 820 struct srcu_data *sdp; 821 822 check_init_srcu_struct(sp); 823 if (debug_rcu_head_queue(rhp)) { 824 /* Probable double call_srcu(), so leak the callback. */ 825 WRITE_ONCE(rhp->func, srcu_leak_callback); 826 WARN_ONCE(1, "call_srcu(): Leaked duplicate callback\n"); 827 return; 828 } 829 rhp->func = func; 830 local_irq_save(flags); 831 sdp = this_cpu_ptr(sp->sda); 832 raw_spin_lock_rcu_node(sdp); 833 rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); 834 rcu_segcblist_advance(&sdp->srcu_cblist, 835 rcu_seq_current(&sp->srcu_gp_seq)); 836 s = rcu_seq_snap(&sp->srcu_gp_seq); 837 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); 838 if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { 839 sdp->srcu_gp_seq_needed = s; 840 needgp = true; 841 } 842 if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) { 843 sdp->srcu_gp_seq_needed_exp = s; 844 needexp = true; 845 } 846 raw_spin_unlock_irqrestore_rcu_node(sdp, flags); 847 if (needgp) 848 srcu_funnel_gp_start(sp, sdp, s, do_norm); 849 else if (needexp) 850 srcu_funnel_exp_start(sp, sdp->mynode, s); 851 } 852 853 /** 854 * call_srcu() - Queue a callback for invocation after an SRCU grace period 855 * @sp: srcu_struct in queue the callback 856 * @head: structure to be used for queueing the SRCU callback. 857 * @func: function to be invoked after the SRCU grace period 858 * 859 * The callback function will be invoked some time after a full SRCU 860 * grace period elapses, in other words after all pre-existing SRCU 861 * read-side critical sections have completed. However, the callback 862 * function might well execute concurrently with other SRCU read-side 863 * critical sections that started after call_srcu() was invoked. SRCU 864 * read-side critical sections are delimited by srcu_read_lock() and 865 * srcu_read_unlock(), and may be nested. 866 * 867 * The callback will be invoked from process context, but must nevertheless 868 * be fast and must not block. 869 */ 870 void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, 871 rcu_callback_t func) 872 { 873 __call_srcu(sp, rhp, func, true); 874 } 875 EXPORT_SYMBOL_GPL(call_srcu); 876 877 /* 878 * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). 879 */ 880 static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) 881 { 882 struct rcu_synchronize rcu; 883 884 RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || 885 lock_is_held(&rcu_bh_lock_map) || 886 lock_is_held(&rcu_lock_map) || 887 lock_is_held(&rcu_sched_lock_map), 888 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section"); 889 890 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) 891 return; 892 might_sleep(); 893 check_init_srcu_struct(sp); 894 init_completion(&rcu.completion); 895 init_rcu_head_on_stack(&rcu.head); 896 __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); 897 wait_for_completion(&rcu.completion); 898 destroy_rcu_head_on_stack(&rcu.head); 899 } 900 901 /** 902 * synchronize_srcu_expedited - Brute-force SRCU grace period 903 * @sp: srcu_struct with which to synchronize. 904 * 905 * Wait for an SRCU grace period to elapse, but be more aggressive about 906 * spinning rather than blocking when waiting. 907 * 908 * Note that synchronize_srcu_expedited() has the same deadlock and 909 * memory-ordering properties as does synchronize_srcu(). 910 */ 911 void synchronize_srcu_expedited(struct srcu_struct *sp) 912 { 913 __synchronize_srcu(sp, rcu_gp_is_normal()); 914 } 915 EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); 916 917 /** 918 * synchronize_srcu - wait for prior SRCU read-side critical-section completion 919 * @sp: srcu_struct with which to synchronize. 920 * 921 * Wait for the count to drain to zero of both indexes. To avoid the 922 * possible starvation of synchronize_srcu(), it waits for the count of 923 * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first, 924 * and then flip the srcu_idx and wait for the count of the other index. 925 * 926 * Can block; must be called from process context. 927 * 928 * Note that it is illegal to call synchronize_srcu() from the corresponding 929 * SRCU read-side critical section; doing so will result in deadlock. 930 * However, it is perfectly legal to call synchronize_srcu() on one 931 * srcu_struct from some other srcu_struct's read-side critical section, 932 * as long as the resulting graph of srcu_structs is acyclic. 933 * 934 * There are memory-ordering constraints implied by synchronize_srcu(). 935 * On systems with more than one CPU, when synchronize_srcu() returns, 936 * each CPU is guaranteed to have executed a full memory barrier since 937 * the end of its last corresponding SRCU-sched read-side critical section 938 * whose beginning preceded the call to synchronize_srcu(). In addition, 939 * each CPU having an SRCU read-side critical section that extends beyond 940 * the return from synchronize_srcu() is guaranteed to have executed a 941 * full memory barrier after the beginning of synchronize_srcu() and before 942 * the beginning of that SRCU read-side critical section. Note that these 943 * guarantees include CPUs that are offline, idle, or executing in user mode, 944 * as well as CPUs that are executing in the kernel. 945 * 946 * Furthermore, if CPU A invoked synchronize_srcu(), which returned 947 * to its caller on CPU B, then both CPU A and CPU B are guaranteed 948 * to have executed a full memory barrier during the execution of 949 * synchronize_srcu(). This guarantee applies even if CPU A and CPU B 950 * are the same CPU, but again only if the system has more than one CPU. 951 * 952 * Of course, these memory-ordering guarantees apply only when 953 * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are 954 * passed the same srcu_struct structure. 955 * 956 * If SRCU is likely idle, expedite the first request. This semantic 957 * was provided by Classic SRCU, and is relied upon by its users, so TREE 958 * SRCU must also provide it. Note that detecting idleness is heuristic 959 * and subject to both false positives and negatives. 960 */ 961 void synchronize_srcu(struct srcu_struct *sp) 962 { 963 if (srcu_might_be_idle(sp) || rcu_gp_is_expedited()) 964 synchronize_srcu_expedited(sp); 965 else 966 __synchronize_srcu(sp, true); 967 } 968 EXPORT_SYMBOL_GPL(synchronize_srcu); 969 970 /* 971 * Callback function for srcu_barrier() use. 972 */ 973 static void srcu_barrier_cb(struct rcu_head *rhp) 974 { 975 struct srcu_data *sdp; 976 struct srcu_struct *sp; 977 978 sdp = container_of(rhp, struct srcu_data, srcu_barrier_head); 979 sp = sdp->sp; 980 if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) 981 complete(&sp->srcu_barrier_completion); 982 } 983 984 /** 985 * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. 986 * @sp: srcu_struct on which to wait for in-flight callbacks. 987 */ 988 void srcu_barrier(struct srcu_struct *sp) 989 { 990 int cpu; 991 struct srcu_data *sdp; 992 unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq); 993 994 check_init_srcu_struct(sp); 995 mutex_lock(&sp->srcu_barrier_mutex); 996 if (rcu_seq_done(&sp->srcu_barrier_seq, s)) { 997 smp_mb(); /* Force ordering following return. */ 998 mutex_unlock(&sp->srcu_barrier_mutex); 999 return; /* Someone else did our work for us. */ 1000 } 1001 rcu_seq_start(&sp->srcu_barrier_seq); 1002 init_completion(&sp->srcu_barrier_completion); 1003 1004 /* Initial count prevents reaching zero until all CBs are posted. */ 1005 atomic_set(&sp->srcu_barrier_cpu_cnt, 1); 1006 1007 /* 1008 * Each pass through this loop enqueues a callback, but only 1009 * on CPUs already having callbacks enqueued. Note that if 1010 * a CPU already has callbacks enqueue, it must have already 1011 * registered the need for a future grace period, so all we 1012 * need do is enqueue a callback that will use the same 1013 * grace period as the last callback already in the queue. 1014 */ 1015 for_each_possible_cpu(cpu) { 1016 sdp = per_cpu_ptr(sp->sda, cpu); 1017 raw_spin_lock_irq_rcu_node(sdp); 1018 atomic_inc(&sp->srcu_barrier_cpu_cnt); 1019 sdp->srcu_barrier_head.func = srcu_barrier_cb; 1020 debug_rcu_head_queue(&sdp->srcu_barrier_head); 1021 if (!rcu_segcblist_entrain(&sdp->srcu_cblist, 1022 &sdp->srcu_barrier_head, 0)) { 1023 debug_rcu_head_unqueue(&sdp->srcu_barrier_head); 1024 atomic_dec(&sp->srcu_barrier_cpu_cnt); 1025 } 1026 raw_spin_unlock_irq_rcu_node(sdp); 1027 } 1028 1029 /* Remove the initial count, at which point reaching zero can happen. */ 1030 if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) 1031 complete(&sp->srcu_barrier_completion); 1032 wait_for_completion(&sp->srcu_barrier_completion); 1033 1034 rcu_seq_end(&sp->srcu_barrier_seq); 1035 mutex_unlock(&sp->srcu_barrier_mutex); 1036 } 1037 EXPORT_SYMBOL_GPL(srcu_barrier); 1038 1039 /** 1040 * srcu_batches_completed - return batches completed. 1041 * @sp: srcu_struct on which to report batch completion. 1042 * 1043 * Report the number of batches, correlated with, but not necessarily 1044 * precisely the same as, the number of grace periods that have elapsed. 1045 */ 1046 unsigned long srcu_batches_completed(struct srcu_struct *sp) 1047 { 1048 return sp->srcu_idx; 1049 } 1050 EXPORT_SYMBOL_GPL(srcu_batches_completed); 1051 1052 /* 1053 * Core SRCU state machine. Push state bits of ->srcu_gp_seq 1054 * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has 1055 * completed in that state. 1056 */ 1057 static void srcu_advance_state(struct srcu_struct *sp) 1058 { 1059 int idx; 1060 1061 mutex_lock(&sp->srcu_gp_mutex); 1062 1063 /* 1064 * Because readers might be delayed for an extended period after 1065 * fetching ->srcu_idx for their index, at any point in time there 1066 * might well be readers using both idx=0 and idx=1. We therefore 1067 * need to wait for readers to clear from both index values before 1068 * invoking a callback. 1069 * 1070 * The load-acquire ensures that we see the accesses performed 1071 * by the prior grace period. 1072 */ 1073 idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ 1074 if (idx == SRCU_STATE_IDLE) { 1075 raw_spin_lock_irq_rcu_node(sp); 1076 if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { 1077 WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); 1078 raw_spin_unlock_irq_rcu_node(sp); 1079 mutex_unlock(&sp->srcu_gp_mutex); 1080 return; 1081 } 1082 idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); 1083 if (idx == SRCU_STATE_IDLE) 1084 srcu_gp_start(sp); 1085 raw_spin_unlock_irq_rcu_node(sp); 1086 if (idx != SRCU_STATE_IDLE) { 1087 mutex_unlock(&sp->srcu_gp_mutex); 1088 return; /* Someone else started the grace period. */ 1089 } 1090 } 1091 1092 if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { 1093 idx = 1 ^ (sp->srcu_idx & 1); 1094 if (!try_check_zero(sp, idx, 1)) { 1095 mutex_unlock(&sp->srcu_gp_mutex); 1096 return; /* readers present, retry later. */ 1097 } 1098 srcu_flip(sp); 1099 rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2); 1100 } 1101 1102 if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { 1103 1104 /* 1105 * SRCU read-side critical sections are normally short, 1106 * so check at least twice in quick succession after a flip. 1107 */ 1108 idx = 1 ^ (sp->srcu_idx & 1); 1109 if (!try_check_zero(sp, idx, 2)) { 1110 mutex_unlock(&sp->srcu_gp_mutex); 1111 return; /* readers present, retry later. */ 1112 } 1113 srcu_gp_end(sp); /* Releases ->srcu_gp_mutex. */ 1114 } 1115 } 1116 1117 /* 1118 * Invoke a limited number of SRCU callbacks that have passed through 1119 * their grace period. If there are more to do, SRCU will reschedule 1120 * the workqueue. Note that needed memory barriers have been executed 1121 * in this task's context by srcu_readers_active_idx_check(). 1122 */ 1123 static void srcu_invoke_callbacks(struct work_struct *work) 1124 { 1125 bool more; 1126 struct rcu_cblist ready_cbs; 1127 struct rcu_head *rhp; 1128 struct srcu_data *sdp; 1129 struct srcu_struct *sp; 1130 1131 sdp = container_of(work, struct srcu_data, work.work); 1132 sp = sdp->sp; 1133 rcu_cblist_init(&ready_cbs); 1134 raw_spin_lock_irq_rcu_node(sdp); 1135 rcu_segcblist_advance(&sdp->srcu_cblist, 1136 rcu_seq_current(&sp->srcu_gp_seq)); 1137 if (sdp->srcu_cblist_invoking || 1138 !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { 1139 raw_spin_unlock_irq_rcu_node(sdp); 1140 return; /* Someone else on the job or nothing to do. */ 1141 } 1142 1143 /* We are on the job! Extract and invoke ready callbacks. */ 1144 sdp->srcu_cblist_invoking = true; 1145 rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); 1146 raw_spin_unlock_irq_rcu_node(sdp); 1147 rhp = rcu_cblist_dequeue(&ready_cbs); 1148 for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { 1149 debug_rcu_head_unqueue(rhp); 1150 local_bh_disable(); 1151 rhp->func(rhp); 1152 local_bh_enable(); 1153 } 1154 1155 /* 1156 * Update counts, accelerate new callbacks, and if needed, 1157 * schedule another round of callback invocation. 1158 */ 1159 raw_spin_lock_irq_rcu_node(sdp); 1160 rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); 1161 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, 1162 rcu_seq_snap(&sp->srcu_gp_seq)); 1163 sdp->srcu_cblist_invoking = false; 1164 more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); 1165 raw_spin_unlock_irq_rcu_node(sdp); 1166 if (more) 1167 srcu_schedule_cbs_sdp(sdp, 0); 1168 } 1169 1170 /* 1171 * Finished one round of SRCU grace period. Start another if there are 1172 * more SRCU callbacks queued, otherwise put SRCU into not-running state. 1173 */ 1174 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) 1175 { 1176 bool pushgp = true; 1177 1178 raw_spin_lock_irq_rcu_node(sp); 1179 if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { 1180 if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { 1181 /* All requests fulfilled, time to go idle. */ 1182 pushgp = false; 1183 } 1184 } else if (!rcu_seq_state(sp->srcu_gp_seq)) { 1185 /* Outstanding request and no GP. Start one. */ 1186 srcu_gp_start(sp); 1187 } 1188 raw_spin_unlock_irq_rcu_node(sp); 1189 1190 if (pushgp) 1191 queue_delayed_work(system_power_efficient_wq, &sp->work, delay); 1192 } 1193 1194 /* 1195 * This is the work-queue function that handles SRCU grace periods. 1196 */ 1197 void process_srcu(struct work_struct *work) 1198 { 1199 struct srcu_struct *sp; 1200 1201 sp = container_of(work, struct srcu_struct, work.work); 1202 1203 srcu_advance_state(sp); 1204 srcu_reschedule(sp, srcu_get_delay(sp)); 1205 } 1206 EXPORT_SYMBOL_GPL(process_srcu); 1207 1208 void srcutorture_get_gp_data(enum rcutorture_type test_type, 1209 struct srcu_struct *sp, int *flags, 1210 unsigned long *gpnum, unsigned long *completed) 1211 { 1212 if (test_type != SRCU_FLAVOR) 1213 return; 1214 *flags = 0; 1215 *completed = rcu_seq_ctr(sp->srcu_gp_seq); 1216 *gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed); 1217 } 1218 EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); 1219 1220 static int __init srcu_bootup_announce(void) 1221 { 1222 pr_info("Hierarchical SRCU implementation.\n"); 1223 if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF) 1224 pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff); 1225 return 0; 1226 } 1227 early_initcall(srcu_bootup_announce); 1228