1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * RCU segmented callback lists, function definitions 4 * 5 * Copyright IBM Corporation, 2017 6 * 7 * Authors: Paul E. McKenney <paulmck@linux.ibm.com> 8 */ 9 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/interrupt.h> 13 #include <linux/rcupdate.h> 14 15 #include "rcu_segcblist.h" 16 17 /* Initialize simple callback list. */ 18 void rcu_cblist_init(struct rcu_cblist *rclp) 19 { 20 rclp->head = NULL; 21 rclp->tail = &rclp->head; 22 rclp->len = 0; 23 rclp->len_lazy = 0; 24 } 25 26 /* 27 * Dequeue the oldest rcu_head structure from the specified callback 28 * list. This function assumes that the callback is non-lazy, but 29 * the caller can later invoke rcu_cblist_dequeued_lazy() if it 30 * finds otherwise (and if it cares about laziness). This allows 31 * different users to have different ways of determining laziness. 32 */ 33 struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) 34 { 35 struct rcu_head *rhp; 36 37 rhp = rclp->head; 38 if (!rhp) 39 return NULL; 40 rclp->len--; 41 rclp->head = rhp->next; 42 if (!rclp->head) 43 rclp->tail = &rclp->head; 44 return rhp; 45 } 46 47 /* 48 * Initialize an rcu_segcblist structure. 49 */ 50 void rcu_segcblist_init(struct rcu_segcblist *rsclp) 51 { 52 int i; 53 54 BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); 55 BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); 56 rsclp->head = NULL; 57 for (i = 0; i < RCU_CBLIST_NSEGS; i++) 58 rsclp->tails[i] = &rsclp->head; 59 rsclp->len = 0; 60 rsclp->len_lazy = 0; 61 rsclp->enabled = 1; 62 } 63 64 /* 65 * Disable the specified rcu_segcblist structure, so that callbacks can 66 * no longer be posted to it. This structure must be empty. 67 */ 68 void rcu_segcblist_disable(struct rcu_segcblist *rsclp) 69 { 70 WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); 71 WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); 72 WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); 73 rsclp->enabled = 0; 74 } 75 76 /* 77 * Mark the specified rcu_segcblist structure as offloaded. This 78 * structure must be empty. 79 */ 80 void rcu_segcblist_offload(struct rcu_segcblist *rsclp) 81 { 82 rsclp->offloaded = 1; 83 } 84 85 /* 86 * Does the specified rcu_segcblist structure contain callbacks that 87 * are ready to be invoked? 88 */ 89 bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) 90 { 91 return rcu_segcblist_is_enabled(rsclp) && 92 &rsclp->head != rsclp->tails[RCU_DONE_TAIL]; 93 } 94 95 /* 96 * Does the specified rcu_segcblist structure contain callbacks that 97 * are still pending, that is, not yet ready to be invoked? 98 */ 99 bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) 100 { 101 return rcu_segcblist_is_enabled(rsclp) && 102 !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL); 103 } 104 105 /* 106 * Return a pointer to the first callback in the specified rcu_segcblist 107 * structure. This is useful for diagnostics. 108 */ 109 struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp) 110 { 111 if (rcu_segcblist_is_enabled(rsclp)) 112 return rsclp->head; 113 return NULL; 114 } 115 116 /* 117 * Return a pointer to the first pending callback in the specified 118 * rcu_segcblist structure. This is useful just after posting a given 119 * callback -- if that callback is the first pending callback, then 120 * you cannot rely on someone else having already started up the required 121 * grace period. 122 */ 123 struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) 124 { 125 if (rcu_segcblist_is_enabled(rsclp)) 126 return *rsclp->tails[RCU_DONE_TAIL]; 127 return NULL; 128 } 129 130 /* 131 * Enqueue the specified callback onto the specified rcu_segcblist 132 * structure, updating accounting as needed. Note that the ->len 133 * field may be accessed locklessly, hence the WRITE_ONCE(). 134 * The ->len field is used by rcu_barrier() and friends to determine 135 * if it must post a callback on this structure, and it is OK 136 * for rcu_barrier() to sometimes post callbacks needlessly, but 137 * absolutely not OK for it to ever miss posting a callback. 138 */ 139 void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, 140 struct rcu_head *rhp, bool lazy) 141 { 142 WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */ 143 if (lazy) 144 rsclp->len_lazy++; 145 smp_mb(); /* Ensure counts are updated before callback is enqueued. */ 146 rhp->next = NULL; 147 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp); 148 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next); 149 } 150 151 /* 152 * Entrain the specified callback onto the specified rcu_segcblist at 153 * the end of the last non-empty segment. If the entire rcu_segcblist 154 * is empty, make no change, but return false. 155 * 156 * This is intended for use by rcu_barrier()-like primitives, -not- 157 * for normal grace-period use. IMPORTANT: The callback you enqueue 158 * will wait for all prior callbacks, NOT necessarily for a grace 159 * period. You have been warned. 160 */ 161 bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, 162 struct rcu_head *rhp, bool lazy) 163 { 164 int i; 165 166 if (rcu_segcblist_n_cbs(rsclp) == 0) 167 return false; 168 WRITE_ONCE(rsclp->len, rsclp->len + 1); 169 if (lazy) 170 rsclp->len_lazy++; 171 smp_mb(); /* Ensure counts are updated before callback is entrained. */ 172 rhp->next = NULL; 173 for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) 174 if (rsclp->tails[i] != rsclp->tails[i - 1]) 175 break; 176 WRITE_ONCE(*rsclp->tails[i], rhp); 177 for (; i <= RCU_NEXT_TAIL; i++) 178 WRITE_ONCE(rsclp->tails[i], &rhp->next); 179 return true; 180 } 181 182 /* 183 * Extract only the counts from the specified rcu_segcblist structure, 184 * and place them in the specified rcu_cblist structure. This function 185 * supports both callback orphaning and invocation, hence the separation 186 * of counts and callbacks. (Callbacks ready for invocation must be 187 * orphaned and adopted separately from pending callbacks, but counts 188 * apply to all callbacks. Locking must be used to make sure that 189 * both orphaned-callbacks lists are consistent.) 190 */ 191 void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, 192 struct rcu_cblist *rclp) 193 { 194 rclp->len_lazy += rsclp->len_lazy; 195 rclp->len += rsclp->len; 196 rsclp->len_lazy = 0; 197 WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */ 198 } 199 200 /* 201 * Extract only those callbacks ready to be invoked from the specified 202 * rcu_segcblist structure and place them in the specified rcu_cblist 203 * structure. 204 */ 205 void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, 206 struct rcu_cblist *rclp) 207 { 208 int i; 209 210 if (!rcu_segcblist_ready_cbs(rsclp)) 211 return; /* Nothing to do. */ 212 *rclp->tail = rsclp->head; 213 WRITE_ONCE(rsclp->head, *rsclp->tails[RCU_DONE_TAIL]); 214 WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); 215 rclp->tail = rsclp->tails[RCU_DONE_TAIL]; 216 for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) 217 if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) 218 WRITE_ONCE(rsclp->tails[i], &rsclp->head); 219 } 220 221 /* 222 * Extract only those callbacks still pending (not yet ready to be 223 * invoked) from the specified rcu_segcblist structure and place them in 224 * the specified rcu_cblist structure. Note that this loses information 225 * about any callbacks that might have been partway done waiting for 226 * their grace period. Too bad! They will have to start over. 227 */ 228 void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, 229 struct rcu_cblist *rclp) 230 { 231 int i; 232 233 if (!rcu_segcblist_pend_cbs(rsclp)) 234 return; /* Nothing to do. */ 235 *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; 236 rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; 237 WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); 238 for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) 239 WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_DONE_TAIL]); 240 } 241 242 /* 243 * Insert counts from the specified rcu_cblist structure in the 244 * specified rcu_segcblist structure. 245 */ 246 void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, 247 struct rcu_cblist *rclp) 248 { 249 rsclp->len_lazy += rclp->len_lazy; 250 /* ->len sampled locklessly. */ 251 WRITE_ONCE(rsclp->len, rsclp->len + rclp->len); 252 rclp->len_lazy = 0; 253 rclp->len = 0; 254 } 255 256 /* 257 * Move callbacks from the specified rcu_cblist to the beginning of the 258 * done-callbacks segment of the specified rcu_segcblist. 259 */ 260 void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, 261 struct rcu_cblist *rclp) 262 { 263 int i; 264 265 if (!rclp->head) 266 return; /* No callbacks to move. */ 267 *rclp->tail = rsclp->head; 268 WRITE_ONCE(rsclp->head, rclp->head); 269 for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) 270 if (&rsclp->head == rsclp->tails[i]) 271 WRITE_ONCE(rsclp->tails[i], rclp->tail); 272 else 273 break; 274 rclp->head = NULL; 275 rclp->tail = &rclp->head; 276 } 277 278 /* 279 * Move callbacks from the specified rcu_cblist to the end of the 280 * new-callbacks segment of the specified rcu_segcblist. 281 */ 282 void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, 283 struct rcu_cblist *rclp) 284 { 285 if (!rclp->head) 286 return; /* Nothing to do. */ 287 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head); 288 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail); 289 rclp->head = NULL; 290 rclp->tail = &rclp->head; 291 } 292 293 /* 294 * Advance the callbacks in the specified rcu_segcblist structure based 295 * on the current value passed in for the grace-period counter. 296 */ 297 void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) 298 { 299 int i, j; 300 301 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); 302 if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) 303 return; 304 305 /* 306 * Find all callbacks whose ->gp_seq numbers indicate that they 307 * are ready to invoke, and put them into the RCU_DONE_TAIL segment. 308 */ 309 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { 310 if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) 311 break; 312 WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]); 313 } 314 315 /* If no callbacks moved, nothing more need be done. */ 316 if (i == RCU_WAIT_TAIL) 317 return; 318 319 /* Clean up tail pointers that might have been misordered above. */ 320 for (j = RCU_WAIT_TAIL; j < i; j++) 321 WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]); 322 323 /* 324 * Callbacks moved, so clean up the misordered ->tails[] pointers 325 * that now point into the middle of the list of ready-to-invoke 326 * callbacks. The overall effect is to copy down the later pointers 327 * into the gap that was created by the now-ready segments. 328 */ 329 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { 330 if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) 331 break; /* No more callbacks. */ 332 WRITE_ONCE(rsclp->tails[j], rsclp->tails[i]); 333 rsclp->gp_seq[j] = rsclp->gp_seq[i]; 334 } 335 } 336 337 /* 338 * "Accelerate" callbacks based on more-accurate grace-period information. 339 * The reason for this is that RCU does not synchronize the beginnings and 340 * ends of grace periods, and that callbacks are posted locally. This in 341 * turn means that the callbacks must be labelled conservatively early 342 * on, as getting exact information would degrade both performance and 343 * scalability. When more accurate grace-period information becomes 344 * available, previously posted callbacks can be "accelerated", marking 345 * them to complete at the end of the earlier grace period. 346 * 347 * This function operates on an rcu_segcblist structure, and also the 348 * grace-period sequence number seq at which new callbacks would become 349 * ready to invoke. Returns true if there are callbacks that won't be 350 * ready to invoke until seq, false otherwise. 351 */ 352 bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) 353 { 354 int i; 355 356 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); 357 if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) 358 return false; 359 360 /* 361 * Find the segment preceding the oldest segment of callbacks 362 * whose ->gp_seq[] completion is at or after that passed in via 363 * "seq", skipping any empty segments. This oldest segment, along 364 * with any later segments, can be merged in with any newly arrived 365 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq" 366 * as their ->gp_seq[] grace-period completion sequence number. 367 */ 368 for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--) 369 if (rsclp->tails[i] != rsclp->tails[i - 1] && 370 ULONG_CMP_LT(rsclp->gp_seq[i], seq)) 371 break; 372 373 /* 374 * If all the segments contain callbacks that correspond to 375 * earlier grace-period sequence numbers than "seq", leave. 376 * Assuming that the rcu_segcblist structure has enough 377 * segments in its arrays, this can only happen if some of 378 * the non-done segments contain callbacks that really are 379 * ready to invoke. This situation will get straightened 380 * out by the next call to rcu_segcblist_advance(). 381 * 382 * Also advance to the oldest segment of callbacks whose 383 * ->gp_seq[] completion is at or after that passed in via "seq", 384 * skipping any empty segments. 385 */ 386 if (++i >= RCU_NEXT_TAIL) 387 return false; 388 389 /* 390 * Merge all later callbacks, including newly arrived callbacks, 391 * into the segment located by the for-loop above. Assign "seq" 392 * as the ->gp_seq[] value in order to correctly handle the case 393 * where there were no pending callbacks in the rcu_segcblist 394 * structure other than in the RCU_NEXT_TAIL segment. 395 */ 396 for (; i < RCU_NEXT_TAIL; i++) { 397 WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_NEXT_TAIL]); 398 rsclp->gp_seq[i] = seq; 399 } 400 return true; 401 } 402 403 /* 404 * Merge the source rcu_segcblist structure into the destination 405 * rcu_segcblist structure, then initialize the source. Any pending 406 * callbacks from the source get to start over. It is best to 407 * advance and accelerate both the destination and the source 408 * before merging. 409 */ 410 void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp, 411 struct rcu_segcblist *src_rsclp) 412 { 413 struct rcu_cblist donecbs; 414 struct rcu_cblist pendcbs; 415 416 rcu_cblist_init(&donecbs); 417 rcu_cblist_init(&pendcbs); 418 rcu_segcblist_extract_count(src_rsclp, &donecbs); 419 rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs); 420 rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs); 421 rcu_segcblist_insert_count(dst_rsclp, &donecbs); 422 rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs); 423 rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs); 424 rcu_segcblist_init(src_rsclp); 425 } 426