1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * RCU segmented callback lists, function definitions 4 * 5 * Copyright IBM Corporation, 2017 6 * 7 * Authors: Paul E. McKenney <paulmck@linux.ibm.com> 8 */ 9 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/interrupt.h> 13 #include <linux/rcupdate.h> 14 15 #include "rcu_segcblist.h" 16 17 /* Initialize simple callback list. */ 18 void rcu_cblist_init(struct rcu_cblist *rclp) 19 { 20 rclp->head = NULL; 21 rclp->tail = &rclp->head; 22 rclp->len = 0; 23 rclp->len_lazy = 0; 24 } 25 26 /* 27 * Enqueue an rcu_head structure onto the specified callback list. 28 * This function assumes that the callback is non-lazy because it 29 * is intended for use by no-CBs CPUs, which do not distinguish 30 * between lazy and non-lazy RCU callbacks. 31 */ 32 void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp) 33 { 34 *rclp->tail = rhp; 35 rclp->tail = &rhp->next; 36 WRITE_ONCE(rclp->len, rclp->len + 1); 37 } 38 39 /* 40 * Flush the second rcu_cblist structure onto the first one, obliterating 41 * any contents of the first. If rhp is non-NULL, enqueue it as the sole 42 * element of the second rcu_cblist structure, but ensuring that the second 43 * rcu_cblist structure, if initially non-empty, always appears non-empty 44 * throughout the process. If rdp is NULL, the second rcu_cblist structure 45 * is instead initialized to empty. 46 */ 47 void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp, 48 struct rcu_cblist *srclp, 49 struct rcu_head *rhp) 50 { 51 drclp->head = srclp->head; 52 if (drclp->head) 53 drclp->tail = srclp->tail; 54 else 55 drclp->tail = &drclp->head; 56 drclp->len = srclp->len; 57 drclp->len_lazy = srclp->len_lazy; 58 if (!rhp) { 59 rcu_cblist_init(srclp); 60 } else { 61 rhp->next = NULL; 62 srclp->head = rhp; 63 srclp->tail = &rhp->next; 64 WRITE_ONCE(srclp->len, 1); 65 srclp->len_lazy = 0; 66 } 67 } 68 69 /* 70 * Dequeue the oldest rcu_head structure from the specified callback 71 * list. This function assumes that the callback is non-lazy, but 72 * the caller can later invoke rcu_cblist_dequeued_lazy() if it 73 * finds otherwise (and if it cares about laziness). This allows 74 * different users to have different ways of determining laziness. 75 */ 76 struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) 77 { 78 struct rcu_head *rhp; 79 80 rhp = rclp->head; 81 if (!rhp) 82 return NULL; 83 rclp->len--; 84 rclp->head = rhp->next; 85 if (!rclp->head) 86 rclp->tail = &rclp->head; 87 return rhp; 88 } 89 90 /* Set the length of an rcu_segcblist structure. */ 91 void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v) 92 { 93 #ifdef CONFIG_RCU_NOCB_CPU 94 atomic_long_set(&rsclp->len, v); 95 #else 96 WRITE_ONCE(rsclp->len, v); 97 #endif 98 } 99 100 /* 101 * Increase the numeric length of an rcu_segcblist structure by the 102 * specified amount, which can be negative. This can cause the ->len 103 * field to disagree with the actual number of callbacks on the structure. 104 * This increase is fully ordered with respect to the callers accesses 105 * both before and after. 106 */ 107 void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v) 108 { 109 #ifdef CONFIG_RCU_NOCB_CPU 110 smp_mb__before_atomic(); /* Up to the caller! */ 111 atomic_long_add(v, &rsclp->len); 112 smp_mb__after_atomic(); /* Up to the caller! */ 113 #else 114 smp_mb(); /* Up to the caller! */ 115 WRITE_ONCE(rsclp->len, rsclp->len + v); 116 smp_mb(); /* Up to the caller! */ 117 #endif 118 } 119 120 /* 121 * Increase the numeric length of an rcu_segcblist structure by one. 122 * This can cause the ->len field to disagree with the actual number of 123 * callbacks on the structure. This increase is fully ordered with respect 124 * to the callers accesses both before and after. 125 */ 126 void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp) 127 { 128 rcu_segcblist_add_len(rsclp, 1); 129 } 130 131 /* 132 * Exchange the numeric length of the specified rcu_segcblist structure 133 * with the specified value. This can cause the ->len field to disagree 134 * with the actual number of callbacks on the structure. This exchange is 135 * fully ordered with respect to the callers accesses both before and after. 136 */ 137 long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v) 138 { 139 #ifdef CONFIG_RCU_NOCB_CPU 140 return atomic_long_xchg(&rsclp->len, v); 141 #else 142 long ret = rsclp->len; 143 144 smp_mb(); /* Up to the caller! */ 145 WRITE_ONCE(rsclp->len, v); 146 smp_mb(); /* Up to the caller! */ 147 return ret; 148 #endif 149 } 150 151 /* 152 * Initialize an rcu_segcblist structure. 153 */ 154 void rcu_segcblist_init(struct rcu_segcblist *rsclp) 155 { 156 int i; 157 158 BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); 159 BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); 160 rsclp->head = NULL; 161 for (i = 0; i < RCU_CBLIST_NSEGS; i++) 162 rsclp->tails[i] = &rsclp->head; 163 rcu_segcblist_set_len(rsclp, 0); 164 rsclp->len_lazy = 0; 165 rsclp->enabled = 1; 166 } 167 168 /* 169 * Disable the specified rcu_segcblist structure, so that callbacks can 170 * no longer be posted to it. This structure must be empty. 171 */ 172 void rcu_segcblist_disable(struct rcu_segcblist *rsclp) 173 { 174 WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); 175 WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); 176 WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); 177 rsclp->enabled = 0; 178 } 179 180 /* 181 * Mark the specified rcu_segcblist structure as offloaded. This 182 * structure must be empty. 183 */ 184 void rcu_segcblist_offload(struct rcu_segcblist *rsclp) 185 { 186 rsclp->offloaded = 1; 187 } 188 189 /* 190 * Does the specified rcu_segcblist structure contain callbacks that 191 * are ready to be invoked? 192 */ 193 bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) 194 { 195 return rcu_segcblist_is_enabled(rsclp) && 196 &rsclp->head != rsclp->tails[RCU_DONE_TAIL]; 197 } 198 199 /* 200 * Does the specified rcu_segcblist structure contain callbacks that 201 * are still pending, that is, not yet ready to be invoked? 202 */ 203 bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) 204 { 205 return rcu_segcblist_is_enabled(rsclp) && 206 !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL); 207 } 208 209 /* 210 * Return a pointer to the first callback in the specified rcu_segcblist 211 * structure. This is useful for diagnostics. 212 */ 213 struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp) 214 { 215 if (rcu_segcblist_is_enabled(rsclp)) 216 return rsclp->head; 217 return NULL; 218 } 219 220 /* 221 * Return a pointer to the first pending callback in the specified 222 * rcu_segcblist structure. This is useful just after posting a given 223 * callback -- if that callback is the first pending callback, then 224 * you cannot rely on someone else having already started up the required 225 * grace period. 226 */ 227 struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) 228 { 229 if (rcu_segcblist_is_enabled(rsclp)) 230 return *rsclp->tails[RCU_DONE_TAIL]; 231 return NULL; 232 } 233 234 /* 235 * Return false if there are no CBs awaiting grace periods, otherwise, 236 * return true and store the nearest waited-upon grace period into *lp. 237 */ 238 bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp) 239 { 240 if (!rcu_segcblist_pend_cbs(rsclp)) 241 return false; 242 *lp = rsclp->gp_seq[RCU_WAIT_TAIL]; 243 return true; 244 } 245 246 /* 247 * Enqueue the specified callback onto the specified rcu_segcblist 248 * structure, updating accounting as needed. Note that the ->len 249 * field may be accessed locklessly, hence the WRITE_ONCE(). 250 * The ->len field is used by rcu_barrier() and friends to determine 251 * if it must post a callback on this structure, and it is OK 252 * for rcu_barrier() to sometimes post callbacks needlessly, but 253 * absolutely not OK for it to ever miss posting a callback. 254 */ 255 void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, 256 struct rcu_head *rhp, bool lazy) 257 { 258 rcu_segcblist_inc_len(rsclp); 259 if (lazy) 260 rsclp->len_lazy++; 261 smp_mb(); /* Ensure counts are updated before callback is enqueued. */ 262 rhp->next = NULL; 263 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp); 264 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next); 265 } 266 267 /* 268 * Entrain the specified callback onto the specified rcu_segcblist at 269 * the end of the last non-empty segment. If the entire rcu_segcblist 270 * is empty, make no change, but return false. 271 * 272 * This is intended for use by rcu_barrier()-like primitives, -not- 273 * for normal grace-period use. IMPORTANT: The callback you enqueue 274 * will wait for all prior callbacks, NOT necessarily for a grace 275 * period. You have been warned. 276 */ 277 bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, 278 struct rcu_head *rhp, bool lazy) 279 { 280 int i; 281 282 if (rcu_segcblist_n_cbs(rsclp) == 0) 283 return false; 284 rcu_segcblist_inc_len(rsclp); 285 if (lazy) 286 rsclp->len_lazy++; 287 smp_mb(); /* Ensure counts are updated before callback is entrained. */ 288 rhp->next = NULL; 289 for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) 290 if (rsclp->tails[i] != rsclp->tails[i - 1]) 291 break; 292 WRITE_ONCE(*rsclp->tails[i], rhp); 293 for (; i <= RCU_NEXT_TAIL; i++) 294 WRITE_ONCE(rsclp->tails[i], &rhp->next); 295 return true; 296 } 297 298 /* 299 * Extract only the counts from the specified rcu_segcblist structure, 300 * and place them in the specified rcu_cblist structure. This function 301 * supports both callback orphaning and invocation, hence the separation 302 * of counts and callbacks. (Callbacks ready for invocation must be 303 * orphaned and adopted separately from pending callbacks, but counts 304 * apply to all callbacks. Locking must be used to make sure that 305 * both orphaned-callbacks lists are consistent.) 306 */ 307 void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, 308 struct rcu_cblist *rclp) 309 { 310 rclp->len_lazy += rsclp->len_lazy; 311 rsclp->len_lazy = 0; 312 rclp->len = rcu_segcblist_xchg_len(rsclp, 0); 313 } 314 315 /* 316 * Extract only those callbacks ready to be invoked from the specified 317 * rcu_segcblist structure and place them in the specified rcu_cblist 318 * structure. 319 */ 320 void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, 321 struct rcu_cblist *rclp) 322 { 323 int i; 324 325 if (!rcu_segcblist_ready_cbs(rsclp)) 326 return; /* Nothing to do. */ 327 *rclp->tail = rsclp->head; 328 WRITE_ONCE(rsclp->head, *rsclp->tails[RCU_DONE_TAIL]); 329 WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); 330 rclp->tail = rsclp->tails[RCU_DONE_TAIL]; 331 for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) 332 if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) 333 WRITE_ONCE(rsclp->tails[i], &rsclp->head); 334 } 335 336 /* 337 * Extract only those callbacks still pending (not yet ready to be 338 * invoked) from the specified rcu_segcblist structure and place them in 339 * the specified rcu_cblist structure. Note that this loses information 340 * about any callbacks that might have been partway done waiting for 341 * their grace period. Too bad! They will have to start over. 342 */ 343 void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, 344 struct rcu_cblist *rclp) 345 { 346 int i; 347 348 if (!rcu_segcblist_pend_cbs(rsclp)) 349 return; /* Nothing to do. */ 350 *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; 351 rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; 352 WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); 353 for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) 354 WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_DONE_TAIL]); 355 } 356 357 /* 358 * Insert counts from the specified rcu_cblist structure in the 359 * specified rcu_segcblist structure. 360 */ 361 void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, 362 struct rcu_cblist *rclp) 363 { 364 rsclp->len_lazy += rclp->len_lazy; 365 rcu_segcblist_add_len(rsclp, rclp->len); 366 rclp->len_lazy = 0; 367 rclp->len = 0; 368 } 369 370 /* 371 * Move callbacks from the specified rcu_cblist to the beginning of the 372 * done-callbacks segment of the specified rcu_segcblist. 373 */ 374 void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, 375 struct rcu_cblist *rclp) 376 { 377 int i; 378 379 if (!rclp->head) 380 return; /* No callbacks to move. */ 381 *rclp->tail = rsclp->head; 382 WRITE_ONCE(rsclp->head, rclp->head); 383 for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) 384 if (&rsclp->head == rsclp->tails[i]) 385 WRITE_ONCE(rsclp->tails[i], rclp->tail); 386 else 387 break; 388 rclp->head = NULL; 389 rclp->tail = &rclp->head; 390 } 391 392 /* 393 * Move callbacks from the specified rcu_cblist to the end of the 394 * new-callbacks segment of the specified rcu_segcblist. 395 */ 396 void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, 397 struct rcu_cblist *rclp) 398 { 399 if (!rclp->head) 400 return; /* Nothing to do. */ 401 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head); 402 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail); 403 rclp->head = NULL; 404 rclp->tail = &rclp->head; 405 } 406 407 /* 408 * Advance the callbacks in the specified rcu_segcblist structure based 409 * on the current value passed in for the grace-period counter. 410 */ 411 void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) 412 { 413 int i, j; 414 415 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); 416 if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) 417 return; 418 419 /* 420 * Find all callbacks whose ->gp_seq numbers indicate that they 421 * are ready to invoke, and put them into the RCU_DONE_TAIL segment. 422 */ 423 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { 424 if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) 425 break; 426 WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]); 427 } 428 429 /* If no callbacks moved, nothing more need be done. */ 430 if (i == RCU_WAIT_TAIL) 431 return; 432 433 /* Clean up tail pointers that might have been misordered above. */ 434 for (j = RCU_WAIT_TAIL; j < i; j++) 435 WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]); 436 437 /* 438 * Callbacks moved, so clean up the misordered ->tails[] pointers 439 * that now point into the middle of the list of ready-to-invoke 440 * callbacks. The overall effect is to copy down the later pointers 441 * into the gap that was created by the now-ready segments. 442 */ 443 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { 444 if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) 445 break; /* No more callbacks. */ 446 WRITE_ONCE(rsclp->tails[j], rsclp->tails[i]); 447 rsclp->gp_seq[j] = rsclp->gp_seq[i]; 448 } 449 } 450 451 /* 452 * "Accelerate" callbacks based on more-accurate grace-period information. 453 * The reason for this is that RCU does not synchronize the beginnings and 454 * ends of grace periods, and that callbacks are posted locally. This in 455 * turn means that the callbacks must be labelled conservatively early 456 * on, as getting exact information would degrade both performance and 457 * scalability. When more accurate grace-period information becomes 458 * available, previously posted callbacks can be "accelerated", marking 459 * them to complete at the end of the earlier grace period. 460 * 461 * This function operates on an rcu_segcblist structure, and also the 462 * grace-period sequence number seq at which new callbacks would become 463 * ready to invoke. Returns true if there are callbacks that won't be 464 * ready to invoke until seq, false otherwise. 465 */ 466 bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) 467 { 468 int i; 469 470 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); 471 if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) 472 return false; 473 474 /* 475 * Find the segment preceding the oldest segment of callbacks 476 * whose ->gp_seq[] completion is at or after that passed in via 477 * "seq", skipping any empty segments. This oldest segment, along 478 * with any later segments, can be merged in with any newly arrived 479 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq" 480 * as their ->gp_seq[] grace-period completion sequence number. 481 */ 482 for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--) 483 if (rsclp->tails[i] != rsclp->tails[i - 1] && 484 ULONG_CMP_LT(rsclp->gp_seq[i], seq)) 485 break; 486 487 /* 488 * If all the segments contain callbacks that correspond to 489 * earlier grace-period sequence numbers than "seq", leave. 490 * Assuming that the rcu_segcblist structure has enough 491 * segments in its arrays, this can only happen if some of 492 * the non-done segments contain callbacks that really are 493 * ready to invoke. This situation will get straightened 494 * out by the next call to rcu_segcblist_advance(). 495 * 496 * Also advance to the oldest segment of callbacks whose 497 * ->gp_seq[] completion is at or after that passed in via "seq", 498 * skipping any empty segments. 499 */ 500 if (++i >= RCU_NEXT_TAIL) 501 return false; 502 503 /* 504 * Merge all later callbacks, including newly arrived callbacks, 505 * into the segment located by the for-loop above. Assign "seq" 506 * as the ->gp_seq[] value in order to correctly handle the case 507 * where there were no pending callbacks in the rcu_segcblist 508 * structure other than in the RCU_NEXT_TAIL segment. 509 */ 510 for (; i < RCU_NEXT_TAIL; i++) { 511 WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_NEXT_TAIL]); 512 rsclp->gp_seq[i] = seq; 513 } 514 return true; 515 } 516 517 /* 518 * Merge the source rcu_segcblist structure into the destination 519 * rcu_segcblist structure, then initialize the source. Any pending 520 * callbacks from the source get to start over. It is best to 521 * advance and accelerate both the destination and the source 522 * before merging. 523 */ 524 void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp, 525 struct rcu_segcblist *src_rsclp) 526 { 527 struct rcu_cblist donecbs; 528 struct rcu_cblist pendcbs; 529 530 rcu_cblist_init(&donecbs); 531 rcu_cblist_init(&pendcbs); 532 rcu_segcblist_extract_count(src_rsclp, &donecbs); 533 rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs); 534 rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs); 535 rcu_segcblist_insert_count(dst_rsclp, &donecbs); 536 rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs); 537 rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs); 538 rcu_segcblist_init(src_rsclp); 539 } 540