1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * RCU segmented callback lists, function definitions 4 * 5 * Copyright IBM Corporation, 2017 6 * 7 * Authors: Paul E. McKenney <paulmck@linux.ibm.com> 8 */ 9 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/interrupt.h> 13 #include <linux/rcupdate.h> 14 15 #include "rcu_segcblist.h" 16 17 /* Initialize simple callback list. */ 18 void rcu_cblist_init(struct rcu_cblist *rclp) 19 { 20 rclp->head = NULL; 21 rclp->tail = &rclp->head; 22 rclp->len = 0; 23 rclp->len_lazy = 0; 24 } 25 26 /* 27 * Dequeue the oldest rcu_head structure from the specified callback 28 * list. This function assumes that the callback is non-lazy, but 29 * the caller can later invoke rcu_cblist_dequeued_lazy() if it 30 * finds otherwise (and if it cares about laziness). This allows 31 * different users to have different ways of determining laziness. 32 */ 33 struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) 34 { 35 struct rcu_head *rhp; 36 37 rhp = rclp->head; 38 if (!rhp) 39 return NULL; 40 rclp->len--; 41 rclp->head = rhp->next; 42 if (!rclp->head) 43 rclp->tail = &rclp->head; 44 return rhp; 45 } 46 47 /* 48 * Initialize an rcu_segcblist structure. 49 */ 50 void rcu_segcblist_init(struct rcu_segcblist *rsclp) 51 { 52 int i; 53 54 BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); 55 BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); 56 rsclp->head = NULL; 57 for (i = 0; i < RCU_CBLIST_NSEGS; i++) 58 rsclp->tails[i] = &rsclp->head; 59 rsclp->len = 0; 60 rsclp->len_lazy = 0; 61 rsclp->enabled = 1; 62 } 63 64 /* 65 * Disable the specified rcu_segcblist structure, so that callbacks can 66 * no longer be posted to it. This structure must be empty. 67 */ 68 void rcu_segcblist_disable(struct rcu_segcblist *rsclp) 69 { 70 WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); 71 WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); 72 WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); 73 rsclp->enabled = 0; 74 } 75 76 /* 77 * Mark the specified rcu_segcblist structure as offloaded. This 78 * structure must be empty. 79 */ 80 void rcu_segcblist_offload(struct rcu_segcblist *rsclp) 81 { 82 WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); 83 WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); 84 WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); 85 rsclp->offloaded = 1; 86 } 87 88 /* 89 * Does the specified rcu_segcblist structure contain callbacks that 90 * are ready to be invoked? 91 */ 92 bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) 93 { 94 return rcu_segcblist_is_enabled(rsclp) && 95 &rsclp->head != rsclp->tails[RCU_DONE_TAIL]; 96 } 97 98 /* 99 * Does the specified rcu_segcblist structure contain callbacks that 100 * are still pending, that is, not yet ready to be invoked? 101 */ 102 bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) 103 { 104 return rcu_segcblist_is_enabled(rsclp) && 105 !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL); 106 } 107 108 /* 109 * Return a pointer to the first callback in the specified rcu_segcblist 110 * structure. This is useful for diagnostics. 111 */ 112 struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp) 113 { 114 if (rcu_segcblist_is_enabled(rsclp)) 115 return rsclp->head; 116 return NULL; 117 } 118 119 /* 120 * Return a pointer to the first pending callback in the specified 121 * rcu_segcblist structure. This is useful just after posting a given 122 * callback -- if that callback is the first pending callback, then 123 * you cannot rely on someone else having already started up the required 124 * grace period. 125 */ 126 struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) 127 { 128 if (rcu_segcblist_is_enabled(rsclp)) 129 return *rsclp->tails[RCU_DONE_TAIL]; 130 return NULL; 131 } 132 133 /* 134 * Enqueue the specified callback onto the specified rcu_segcblist 135 * structure, updating accounting as needed. Note that the ->len 136 * field may be accessed locklessly, hence the WRITE_ONCE(). 137 * The ->len field is used by rcu_barrier() and friends to determine 138 * if it must post a callback on this structure, and it is OK 139 * for rcu_barrier() to sometimes post callbacks needlessly, but 140 * absolutely not OK for it to ever miss posting a callback. 141 */ 142 void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, 143 struct rcu_head *rhp, bool lazy) 144 { 145 WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */ 146 if (lazy) 147 rsclp->len_lazy++; 148 smp_mb(); /* Ensure counts are updated before callback is enqueued. */ 149 rhp->next = NULL; 150 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp); 151 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next); 152 } 153 154 /* 155 * Entrain the specified callback onto the specified rcu_segcblist at 156 * the end of the last non-empty segment. If the entire rcu_segcblist 157 * is empty, make no change, but return false. 158 * 159 * This is intended for use by rcu_barrier()-like primitives, -not- 160 * for normal grace-period use. IMPORTANT: The callback you enqueue 161 * will wait for all prior callbacks, NOT necessarily for a grace 162 * period. You have been warned. 163 */ 164 bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, 165 struct rcu_head *rhp, bool lazy) 166 { 167 int i; 168 169 if (rcu_segcblist_n_cbs(rsclp) == 0) 170 return false; 171 WRITE_ONCE(rsclp->len, rsclp->len + 1); 172 if (lazy) 173 rsclp->len_lazy++; 174 smp_mb(); /* Ensure counts are updated before callback is entrained. */ 175 rhp->next = NULL; 176 for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) 177 if (rsclp->tails[i] != rsclp->tails[i - 1]) 178 break; 179 WRITE_ONCE(*rsclp->tails[i], rhp); 180 for (; i <= RCU_NEXT_TAIL; i++) 181 WRITE_ONCE(rsclp->tails[i], &rhp->next); 182 return true; 183 } 184 185 /* 186 * Extract only the counts from the specified rcu_segcblist structure, 187 * and place them in the specified rcu_cblist structure. This function 188 * supports both callback orphaning and invocation, hence the separation 189 * of counts and callbacks. (Callbacks ready for invocation must be 190 * orphaned and adopted separately from pending callbacks, but counts 191 * apply to all callbacks. Locking must be used to make sure that 192 * both orphaned-callbacks lists are consistent.) 193 */ 194 void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, 195 struct rcu_cblist *rclp) 196 { 197 rclp->len_lazy += rsclp->len_lazy; 198 rclp->len += rsclp->len; 199 rsclp->len_lazy = 0; 200 WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */ 201 } 202 203 /* 204 * Extract only those callbacks ready to be invoked from the specified 205 * rcu_segcblist structure and place them in the specified rcu_cblist 206 * structure. 207 */ 208 void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, 209 struct rcu_cblist *rclp) 210 { 211 int i; 212 213 if (!rcu_segcblist_ready_cbs(rsclp)) 214 return; /* Nothing to do. */ 215 *rclp->tail = rsclp->head; 216 rsclp->head = *rsclp->tails[RCU_DONE_TAIL]; 217 WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); 218 rclp->tail = rsclp->tails[RCU_DONE_TAIL]; 219 for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) 220 if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) 221 WRITE_ONCE(rsclp->tails[i], &rsclp->head); 222 } 223 224 /* 225 * Extract only those callbacks still pending (not yet ready to be 226 * invoked) from the specified rcu_segcblist structure and place them in 227 * the specified rcu_cblist structure. Note that this loses information 228 * about any callbacks that might have been partway done waiting for 229 * their grace period. Too bad! They will have to start over. 230 */ 231 void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, 232 struct rcu_cblist *rclp) 233 { 234 int i; 235 236 if (!rcu_segcblist_pend_cbs(rsclp)) 237 return; /* Nothing to do. */ 238 *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; 239 rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; 240 WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); 241 for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) 242 WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_DONE_TAIL]); 243 } 244 245 /* 246 * Insert counts from the specified rcu_cblist structure in the 247 * specified rcu_segcblist structure. 248 */ 249 void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, 250 struct rcu_cblist *rclp) 251 { 252 rsclp->len_lazy += rclp->len_lazy; 253 /* ->len sampled locklessly. */ 254 WRITE_ONCE(rsclp->len, rsclp->len + rclp->len); 255 rclp->len_lazy = 0; 256 rclp->len = 0; 257 } 258 259 /* 260 * Move callbacks from the specified rcu_cblist to the beginning of the 261 * done-callbacks segment of the specified rcu_segcblist. 262 */ 263 void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, 264 struct rcu_cblist *rclp) 265 { 266 int i; 267 268 if (!rclp->head) 269 return; /* No callbacks to move. */ 270 *rclp->tail = rsclp->head; 271 rsclp->head = rclp->head; 272 for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) 273 if (&rsclp->head == rsclp->tails[i]) 274 WRITE_ONCE(rsclp->tails[i], rclp->tail); 275 else 276 break; 277 rclp->head = NULL; 278 rclp->tail = &rclp->head; 279 } 280 281 /* 282 * Move callbacks from the specified rcu_cblist to the end of the 283 * new-callbacks segment of the specified rcu_segcblist. 284 */ 285 void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, 286 struct rcu_cblist *rclp) 287 { 288 if (!rclp->head) 289 return; /* Nothing to do. */ 290 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head); 291 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail); 292 rclp->head = NULL; 293 rclp->tail = &rclp->head; 294 } 295 296 /* 297 * Advance the callbacks in the specified rcu_segcblist structure based 298 * on the current value passed in for the grace-period counter. 299 */ 300 void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) 301 { 302 int i, j; 303 304 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); 305 if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) 306 return; 307 308 /* 309 * Find all callbacks whose ->gp_seq numbers indicate that they 310 * are ready to invoke, and put them into the RCU_DONE_TAIL segment. 311 */ 312 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { 313 if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) 314 break; 315 WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]); 316 } 317 318 /* If no callbacks moved, nothing more need be done. */ 319 if (i == RCU_WAIT_TAIL) 320 return; 321 322 /* Clean up tail pointers that might have been misordered above. */ 323 for (j = RCU_WAIT_TAIL; j < i; j++) 324 WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]); 325 326 /* 327 * Callbacks moved, so clean up the misordered ->tails[] pointers 328 * that now point into the middle of the list of ready-to-invoke 329 * callbacks. The overall effect is to copy down the later pointers 330 * into the gap that was created by the now-ready segments. 331 */ 332 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { 333 if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) 334 break; /* No more callbacks. */ 335 WRITE_ONCE(rsclp->tails[j], rsclp->tails[i]); 336 rsclp->gp_seq[j] = rsclp->gp_seq[i]; 337 } 338 } 339 340 /* 341 * "Accelerate" callbacks based on more-accurate grace-period information. 342 * The reason for this is that RCU does not synchronize the beginnings and 343 * ends of grace periods, and that callbacks are posted locally. This in 344 * turn means that the callbacks must be labelled conservatively early 345 * on, as getting exact information would degrade both performance and 346 * scalability. When more accurate grace-period information becomes 347 * available, previously posted callbacks can be "accelerated", marking 348 * them to complete at the end of the earlier grace period. 349 * 350 * This function operates on an rcu_segcblist structure, and also the 351 * grace-period sequence number seq at which new callbacks would become 352 * ready to invoke. Returns true if there are callbacks that won't be 353 * ready to invoke until seq, false otherwise. 354 */ 355 bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) 356 { 357 int i; 358 359 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); 360 if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) 361 return false; 362 363 /* 364 * Find the segment preceding the oldest segment of callbacks 365 * whose ->gp_seq[] completion is at or after that passed in via 366 * "seq", skipping any empty segments. This oldest segment, along 367 * with any later segments, can be merged in with any newly arrived 368 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq" 369 * as their ->gp_seq[] grace-period completion sequence number. 370 */ 371 for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--) 372 if (rsclp->tails[i] != rsclp->tails[i - 1] && 373 ULONG_CMP_LT(rsclp->gp_seq[i], seq)) 374 break; 375 376 /* 377 * If all the segments contain callbacks that correspond to 378 * earlier grace-period sequence numbers than "seq", leave. 379 * Assuming that the rcu_segcblist structure has enough 380 * segments in its arrays, this can only happen if some of 381 * the non-done segments contain callbacks that really are 382 * ready to invoke. This situation will get straightened 383 * out by the next call to rcu_segcblist_advance(). 384 * 385 * Also advance to the oldest segment of callbacks whose 386 * ->gp_seq[] completion is at or after that passed in via "seq", 387 * skipping any empty segments. 388 */ 389 if (++i >= RCU_NEXT_TAIL) 390 return false; 391 392 /* 393 * Merge all later callbacks, including newly arrived callbacks, 394 * into the segment located by the for-loop above. Assign "seq" 395 * as the ->gp_seq[] value in order to correctly handle the case 396 * where there were no pending callbacks in the rcu_segcblist 397 * structure other than in the RCU_NEXT_TAIL segment. 398 */ 399 for (; i < RCU_NEXT_TAIL; i++) { 400 WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_NEXT_TAIL]); 401 rsclp->gp_seq[i] = seq; 402 } 403 return true; 404 } 405 406 /* 407 * Merge the source rcu_segcblist structure into the destination 408 * rcu_segcblist structure, then initialize the source. Any pending 409 * callbacks from the source get to start over. It is best to 410 * advance and accelerate both the destination and the source 411 * before merging. 412 */ 413 void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp, 414 struct rcu_segcblist *src_rsclp) 415 { 416 struct rcu_cblist donecbs; 417 struct rcu_cblist pendcbs; 418 419 rcu_cblist_init(&donecbs); 420 rcu_cblist_init(&pendcbs); 421 rcu_segcblist_extract_count(src_rsclp, &donecbs); 422 rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs); 423 rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs); 424 rcu_segcblist_insert_count(dst_rsclp, &donecbs); 425 rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs); 426 rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs); 427 rcu_segcblist_init(src_rsclp); 428 } 429