1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * RCU segmented callback lists, function definitions 4 * 5 * Copyright IBM Corporation, 2017 6 * 7 * Authors: Paul E. McKenney <paulmck@linux.ibm.com> 8 */ 9 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/interrupt.h> 13 #include <linux/rcupdate.h> 14 15 #include "rcu_segcblist.h" 16 17 /* Initialize simple callback list. */ 18 void rcu_cblist_init(struct rcu_cblist *rclp) 19 { 20 rclp->head = NULL; 21 rclp->tail = &rclp->head; 22 rclp->len = 0; 23 } 24 25 /* 26 * Enqueue an rcu_head structure onto the specified callback list. 27 */ 28 void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp) 29 { 30 *rclp->tail = rhp; 31 rclp->tail = &rhp->next; 32 WRITE_ONCE(rclp->len, rclp->len + 1); 33 } 34 35 /* 36 * Flush the second rcu_cblist structure onto the first one, obliterating 37 * any contents of the first. If rhp is non-NULL, enqueue it as the sole 38 * element of the second rcu_cblist structure, but ensuring that the second 39 * rcu_cblist structure, if initially non-empty, always appears non-empty 40 * throughout the process. If rdp is NULL, the second rcu_cblist structure 41 * is instead initialized to empty. 42 */ 43 void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp, 44 struct rcu_cblist *srclp, 45 struct rcu_head *rhp) 46 { 47 drclp->head = srclp->head; 48 if (drclp->head) 49 drclp->tail = srclp->tail; 50 else 51 drclp->tail = &drclp->head; 52 drclp->len = srclp->len; 53 if (!rhp) { 54 rcu_cblist_init(srclp); 55 } else { 56 rhp->next = NULL; 57 srclp->head = rhp; 58 srclp->tail = &rhp->next; 59 WRITE_ONCE(srclp->len, 1); 60 } 61 } 62 63 /* 64 * Dequeue the oldest rcu_head structure from the specified callback 65 * list. 66 */ 67 struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) 68 { 69 struct rcu_head *rhp; 70 71 rhp = rclp->head; 72 if (!rhp) 73 return NULL; 74 rclp->len--; 75 rclp->head = rhp->next; 76 if (!rclp->head) 77 rclp->tail = &rclp->head; 78 return rhp; 79 } 80 81 /* Set the length of an rcu_segcblist structure. */ 82 static void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v) 83 { 84 #ifdef CONFIG_RCU_NOCB_CPU 85 atomic_long_set(&rsclp->len, v); 86 #else 87 WRITE_ONCE(rsclp->len, v); 88 #endif 89 } 90 91 /* 92 * Increase the numeric length of an rcu_segcblist structure by the 93 * specified amount, which can be negative. This can cause the ->len 94 * field to disagree with the actual number of callbacks on the structure. 95 * This increase is fully ordered with respect to the callers accesses 96 * both before and after. 97 */ 98 static void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v) 99 { 100 #ifdef CONFIG_RCU_NOCB_CPU 101 smp_mb__before_atomic(); /* Up to the caller! */ 102 atomic_long_add(v, &rsclp->len); 103 smp_mb__after_atomic(); /* Up to the caller! */ 104 #else 105 smp_mb(); /* Up to the caller! */ 106 WRITE_ONCE(rsclp->len, rsclp->len + v); 107 smp_mb(); /* Up to the caller! */ 108 #endif 109 } 110 111 /* 112 * Increase the numeric length of an rcu_segcblist structure by one. 113 * This can cause the ->len field to disagree with the actual number of 114 * callbacks on the structure. This increase is fully ordered with respect 115 * to the callers accesses both before and after. 116 */ 117 void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp) 118 { 119 rcu_segcblist_add_len(rsclp, 1); 120 } 121 122 /* 123 * Exchange the numeric length of the specified rcu_segcblist structure 124 * with the specified value. This can cause the ->len field to disagree 125 * with the actual number of callbacks on the structure. This exchange is 126 * fully ordered with respect to the callers accesses both before and after. 127 */ 128 static long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v) 129 { 130 #ifdef CONFIG_RCU_NOCB_CPU 131 return atomic_long_xchg(&rsclp->len, v); 132 #else 133 long ret = rsclp->len; 134 135 smp_mb(); /* Up to the caller! */ 136 WRITE_ONCE(rsclp->len, v); 137 smp_mb(); /* Up to the caller! */ 138 return ret; 139 #endif 140 } 141 142 /* 143 * Initialize an rcu_segcblist structure. 144 */ 145 void rcu_segcblist_init(struct rcu_segcblist *rsclp) 146 { 147 int i; 148 149 BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); 150 BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); 151 rsclp->head = NULL; 152 for (i = 0; i < RCU_CBLIST_NSEGS; i++) 153 rsclp->tails[i] = &rsclp->head; 154 rcu_segcblist_set_len(rsclp, 0); 155 rsclp->enabled = 1; 156 } 157 158 /* 159 * Disable the specified rcu_segcblist structure, so that callbacks can 160 * no longer be posted to it. This structure must be empty. 161 */ 162 void rcu_segcblist_disable(struct rcu_segcblist *rsclp) 163 { 164 WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); 165 WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); 166 rsclp->enabled = 0; 167 } 168 169 /* 170 * Mark the specified rcu_segcblist structure as offloaded. This 171 * structure must be empty. 172 */ 173 void rcu_segcblist_offload(struct rcu_segcblist *rsclp) 174 { 175 rsclp->offloaded = 1; 176 } 177 178 /* 179 * Does the specified rcu_segcblist structure contain callbacks that 180 * are ready to be invoked? 181 */ 182 bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) 183 { 184 return rcu_segcblist_is_enabled(rsclp) && 185 &rsclp->head != READ_ONCE(rsclp->tails[RCU_DONE_TAIL]); 186 } 187 188 /* 189 * Does the specified rcu_segcblist structure contain callbacks that 190 * are still pending, that is, not yet ready to be invoked? 191 */ 192 bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) 193 { 194 return rcu_segcblist_is_enabled(rsclp) && 195 !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL); 196 } 197 198 /* 199 * Return a pointer to the first callback in the specified rcu_segcblist 200 * structure. This is useful for diagnostics. 201 */ 202 struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp) 203 { 204 if (rcu_segcblist_is_enabled(rsclp)) 205 return rsclp->head; 206 return NULL; 207 } 208 209 /* 210 * Return a pointer to the first pending callback in the specified 211 * rcu_segcblist structure. This is useful just after posting a given 212 * callback -- if that callback is the first pending callback, then 213 * you cannot rely on someone else having already started up the required 214 * grace period. 215 */ 216 struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) 217 { 218 if (rcu_segcblist_is_enabled(rsclp)) 219 return *rsclp->tails[RCU_DONE_TAIL]; 220 return NULL; 221 } 222 223 /* 224 * Return false if there are no CBs awaiting grace periods, otherwise, 225 * return true and store the nearest waited-upon grace period into *lp. 226 */ 227 bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp) 228 { 229 if (!rcu_segcblist_pend_cbs(rsclp)) 230 return false; 231 *lp = rsclp->gp_seq[RCU_WAIT_TAIL]; 232 return true; 233 } 234 235 /* 236 * Enqueue the specified callback onto the specified rcu_segcblist 237 * structure, updating accounting as needed. Note that the ->len 238 * field may be accessed locklessly, hence the WRITE_ONCE(). 239 * The ->len field is used by rcu_barrier() and friends to determine 240 * if it must post a callback on this structure, and it is OK 241 * for rcu_barrier() to sometimes post callbacks needlessly, but 242 * absolutely not OK for it to ever miss posting a callback. 243 */ 244 void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, 245 struct rcu_head *rhp) 246 { 247 rcu_segcblist_inc_len(rsclp); 248 smp_mb(); /* Ensure counts are updated before callback is enqueued. */ 249 rhp->next = NULL; 250 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp); 251 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next); 252 } 253 254 /* 255 * Entrain the specified callback onto the specified rcu_segcblist at 256 * the end of the last non-empty segment. If the entire rcu_segcblist 257 * is empty, make no change, but return false. 258 * 259 * This is intended for use by rcu_barrier()-like primitives, -not- 260 * for normal grace-period use. IMPORTANT: The callback you enqueue 261 * will wait for all prior callbacks, NOT necessarily for a grace 262 * period. You have been warned. 263 */ 264 bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, 265 struct rcu_head *rhp) 266 { 267 int i; 268 269 if (rcu_segcblist_n_cbs(rsclp) == 0) 270 return false; 271 rcu_segcblist_inc_len(rsclp); 272 smp_mb(); /* Ensure counts are updated before callback is entrained. */ 273 rhp->next = NULL; 274 for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) 275 if (rsclp->tails[i] != rsclp->tails[i - 1]) 276 break; 277 WRITE_ONCE(*rsclp->tails[i], rhp); 278 for (; i <= RCU_NEXT_TAIL; i++) 279 WRITE_ONCE(rsclp->tails[i], &rhp->next); 280 return true; 281 } 282 283 /* 284 * Extract only the counts from the specified rcu_segcblist structure, 285 * and place them in the specified rcu_cblist structure. This function 286 * supports both callback orphaning and invocation, hence the separation 287 * of counts and callbacks. (Callbacks ready for invocation must be 288 * orphaned and adopted separately from pending callbacks, but counts 289 * apply to all callbacks. Locking must be used to make sure that 290 * both orphaned-callbacks lists are consistent.) 291 */ 292 void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, 293 struct rcu_cblist *rclp) 294 { 295 rclp->len = rcu_segcblist_xchg_len(rsclp, 0); 296 } 297 298 /* 299 * Extract only those callbacks ready to be invoked from the specified 300 * rcu_segcblist structure and place them in the specified rcu_cblist 301 * structure. 302 */ 303 void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, 304 struct rcu_cblist *rclp) 305 { 306 int i; 307 308 if (!rcu_segcblist_ready_cbs(rsclp)) 309 return; /* Nothing to do. */ 310 *rclp->tail = rsclp->head; 311 WRITE_ONCE(rsclp->head, *rsclp->tails[RCU_DONE_TAIL]); 312 WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); 313 rclp->tail = rsclp->tails[RCU_DONE_TAIL]; 314 for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) 315 if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) 316 WRITE_ONCE(rsclp->tails[i], &rsclp->head); 317 } 318 319 /* 320 * Extract only those callbacks still pending (not yet ready to be 321 * invoked) from the specified rcu_segcblist structure and place them in 322 * the specified rcu_cblist structure. Note that this loses information 323 * about any callbacks that might have been partway done waiting for 324 * their grace period. Too bad! They will have to start over. 325 */ 326 void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, 327 struct rcu_cblist *rclp) 328 { 329 int i; 330 331 if (!rcu_segcblist_pend_cbs(rsclp)) 332 return; /* Nothing to do. */ 333 *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; 334 rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; 335 WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); 336 for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) 337 WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_DONE_TAIL]); 338 } 339 340 /* 341 * Insert counts from the specified rcu_cblist structure in the 342 * specified rcu_segcblist structure. 343 */ 344 void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, 345 struct rcu_cblist *rclp) 346 { 347 rcu_segcblist_add_len(rsclp, rclp->len); 348 rclp->len = 0; 349 } 350 351 /* 352 * Move callbacks from the specified rcu_cblist to the beginning of the 353 * done-callbacks segment of the specified rcu_segcblist. 354 */ 355 void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, 356 struct rcu_cblist *rclp) 357 { 358 int i; 359 360 if (!rclp->head) 361 return; /* No callbacks to move. */ 362 *rclp->tail = rsclp->head; 363 WRITE_ONCE(rsclp->head, rclp->head); 364 for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) 365 if (&rsclp->head == rsclp->tails[i]) 366 WRITE_ONCE(rsclp->tails[i], rclp->tail); 367 else 368 break; 369 rclp->head = NULL; 370 rclp->tail = &rclp->head; 371 } 372 373 /* 374 * Move callbacks from the specified rcu_cblist to the end of the 375 * new-callbacks segment of the specified rcu_segcblist. 376 */ 377 void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, 378 struct rcu_cblist *rclp) 379 { 380 if (!rclp->head) 381 return; /* Nothing to do. */ 382 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head); 383 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail); 384 } 385 386 /* 387 * Advance the callbacks in the specified rcu_segcblist structure based 388 * on the current value passed in for the grace-period counter. 389 */ 390 void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) 391 { 392 int i, j; 393 394 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); 395 if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) 396 return; 397 398 /* 399 * Find all callbacks whose ->gp_seq numbers indicate that they 400 * are ready to invoke, and put them into the RCU_DONE_TAIL segment. 401 */ 402 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { 403 if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) 404 break; 405 WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]); 406 } 407 408 /* If no callbacks moved, nothing more need be done. */ 409 if (i == RCU_WAIT_TAIL) 410 return; 411 412 /* Clean up tail pointers that might have been misordered above. */ 413 for (j = RCU_WAIT_TAIL; j < i; j++) 414 WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]); 415 416 /* 417 * Callbacks moved, so clean up the misordered ->tails[] pointers 418 * that now point into the middle of the list of ready-to-invoke 419 * callbacks. The overall effect is to copy down the later pointers 420 * into the gap that was created by the now-ready segments. 421 */ 422 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { 423 if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) 424 break; /* No more callbacks. */ 425 WRITE_ONCE(rsclp->tails[j], rsclp->tails[i]); 426 rsclp->gp_seq[j] = rsclp->gp_seq[i]; 427 } 428 } 429 430 /* 431 * "Accelerate" callbacks based on more-accurate grace-period information. 432 * The reason for this is that RCU does not synchronize the beginnings and 433 * ends of grace periods, and that callbacks are posted locally. This in 434 * turn means that the callbacks must be labelled conservatively early 435 * on, as getting exact information would degrade both performance and 436 * scalability. When more accurate grace-period information becomes 437 * available, previously posted callbacks can be "accelerated", marking 438 * them to complete at the end of the earlier grace period. 439 * 440 * This function operates on an rcu_segcblist structure, and also the 441 * grace-period sequence number seq at which new callbacks would become 442 * ready to invoke. Returns true if there are callbacks that won't be 443 * ready to invoke until seq, false otherwise. 444 */ 445 bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) 446 { 447 int i; 448 449 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); 450 if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) 451 return false; 452 453 /* 454 * Find the segment preceding the oldest segment of callbacks 455 * whose ->gp_seq[] completion is at or after that passed in via 456 * "seq", skipping any empty segments. This oldest segment, along 457 * with any later segments, can be merged in with any newly arrived 458 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq" 459 * as their ->gp_seq[] grace-period completion sequence number. 460 */ 461 for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--) 462 if (rsclp->tails[i] != rsclp->tails[i - 1] && 463 ULONG_CMP_LT(rsclp->gp_seq[i], seq)) 464 break; 465 466 /* 467 * If all the segments contain callbacks that correspond to 468 * earlier grace-period sequence numbers than "seq", leave. 469 * Assuming that the rcu_segcblist structure has enough 470 * segments in its arrays, this can only happen if some of 471 * the non-done segments contain callbacks that really are 472 * ready to invoke. This situation will get straightened 473 * out by the next call to rcu_segcblist_advance(). 474 * 475 * Also advance to the oldest segment of callbacks whose 476 * ->gp_seq[] completion is at or after that passed in via "seq", 477 * skipping any empty segments. 478 * 479 * Note that segment "i" (and any lower-numbered segments 480 * containing older callbacks) will be unaffected, and their 481 * grace-period numbers remain unchanged. For example, if i == 482 * WAIT_TAIL, then neither WAIT_TAIL nor DONE_TAIL will be touched. 483 * Instead, the CBs in NEXT_TAIL will be merged with those in 484 * NEXT_READY_TAIL and the grace-period number of NEXT_READY_TAIL 485 * would be updated. NEXT_TAIL would then be empty. 486 */ 487 if (rcu_segcblist_restempty(rsclp, i) || ++i >= RCU_NEXT_TAIL) 488 return false; 489 490 /* 491 * Merge all later callbacks, including newly arrived callbacks, 492 * into the segment located by the for-loop above. Assign "seq" 493 * as the ->gp_seq[] value in order to correctly handle the case 494 * where there were no pending callbacks in the rcu_segcblist 495 * structure other than in the RCU_NEXT_TAIL segment. 496 */ 497 for (; i < RCU_NEXT_TAIL; i++) { 498 WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_NEXT_TAIL]); 499 rsclp->gp_seq[i] = seq; 500 } 501 return true; 502 } 503 504 /* 505 * Merge the source rcu_segcblist structure into the destination 506 * rcu_segcblist structure, then initialize the source. Any pending 507 * callbacks from the source get to start over. It is best to 508 * advance and accelerate both the destination and the source 509 * before merging. 510 */ 511 void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp, 512 struct rcu_segcblist *src_rsclp) 513 { 514 struct rcu_cblist donecbs; 515 struct rcu_cblist pendcbs; 516 517 rcu_cblist_init(&donecbs); 518 rcu_cblist_init(&pendcbs); 519 rcu_segcblist_extract_count(src_rsclp, &donecbs); 520 rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs); 521 rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs); 522 rcu_segcblist_insert_count(dst_rsclp, &donecbs); 523 rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs); 524 rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs); 525 rcu_segcblist_init(src_rsclp); 526 } 527