1 // SPDX-License-Identifier: GPL-2.0-only 2 #define pr_fmt(fmt) "%s: " fmt "\n", __func__ 3 4 #include <linux/kernel.h> 5 #include <linux/sched.h> 6 #include <linux/wait.h> 7 #include <linux/percpu-refcount.h> 8 9 /* 10 * Initially, a percpu refcount is just a set of percpu counters. Initially, we 11 * don't try to detect the ref hitting 0 - which means that get/put can just 12 * increment or decrement the local counter. Note that the counter on a 13 * particular cpu can (and will) wrap - this is fine, when we go to shutdown the 14 * percpu counters will all sum to the correct value 15 * 16 * (More precisely: because modular arithmetic is commutative the sum of all the 17 * percpu_count vars will be equal to what it would have been if all the gets 18 * and puts were done to a single integer, even if some of the percpu integers 19 * overflow or underflow). 20 * 21 * The real trick to implementing percpu refcounts is shutdown. We can't detect 22 * the ref hitting 0 on every put - this would require global synchronization 23 * and defeat the whole purpose of using percpu refs. 24 * 25 * What we do is require the user to keep track of the initial refcount; we know 26 * the ref can't hit 0 before the user drops the initial ref, so as long as we 27 * convert to non percpu mode before the initial ref is dropped everything 28 * works. 29 * 30 * Converting to non percpu mode is done with some RCUish stuff in 31 * percpu_ref_kill. Additionally, we need a bias value so that the 32 * atomic_long_t can't hit 0 before we've added up all the percpu refs. 33 */ 34 35 #define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) 36 37 static DEFINE_SPINLOCK(percpu_ref_switch_lock); 38 static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); 39 40 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) 41 { 42 return (unsigned long __percpu *) 43 (ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD); 44 } 45 46 /** 47 * percpu_ref_init - initialize a percpu refcount 48 * @ref: percpu_ref to initialize 49 * @release: function which will be called when refcount hits 0 50 * @flags: PERCPU_REF_INIT_* flags 51 * @gfp: allocation mask to use 52 * 53 * Initializes @ref. If @flags is zero, @ref starts in percpu mode with a 54 * refcount of 1; analagous to atomic_long_set(ref, 1). See the 55 * definitions of PERCPU_REF_INIT_* flags for flag behaviors. 56 * 57 * Note that @release must not sleep - it may potentially be called from RCU 58 * callback context by percpu_ref_kill(). 59 */ 60 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, 61 unsigned int flags, gfp_t gfp) 62 { 63 size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS, 64 __alignof__(unsigned long)); 65 unsigned long start_count = 0; 66 67 ref->percpu_count_ptr = (unsigned long) 68 __alloc_percpu_gfp(sizeof(unsigned long), align, gfp); 69 if (!ref->percpu_count_ptr) 70 return -ENOMEM; 71 72 ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC; 73 ref->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT; 74 75 if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) { 76 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; 77 ref->allow_reinit = true; 78 } else { 79 start_count += PERCPU_COUNT_BIAS; 80 } 81 82 if (flags & PERCPU_REF_INIT_DEAD) 83 ref->percpu_count_ptr |= __PERCPU_REF_DEAD; 84 else 85 start_count++; 86 87 atomic_long_set(&ref->count, start_count); 88 89 ref->release = release; 90 ref->confirm_switch = NULL; 91 return 0; 92 } 93 EXPORT_SYMBOL_GPL(percpu_ref_init); 94 95 /** 96 * percpu_ref_exit - undo percpu_ref_init() 97 * @ref: percpu_ref to exit 98 * 99 * This function exits @ref. The caller is responsible for ensuring that 100 * @ref is no longer in active use. The usual places to invoke this 101 * function from are the @ref->release() callback or in init failure path 102 * where percpu_ref_init() succeeded but other parts of the initialization 103 * of the embedding object failed. 104 */ 105 void percpu_ref_exit(struct percpu_ref *ref) 106 { 107 unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 108 109 if (percpu_count) { 110 /* non-NULL confirm_switch indicates switching in progress */ 111 WARN_ON_ONCE(ref->confirm_switch); 112 free_percpu(percpu_count); 113 ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; 114 } 115 } 116 EXPORT_SYMBOL_GPL(percpu_ref_exit); 117 118 static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu) 119 { 120 struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); 121 122 ref->confirm_switch(ref); 123 ref->confirm_switch = NULL; 124 wake_up_all(&percpu_ref_switch_waitq); 125 126 if (!ref->allow_reinit) 127 percpu_ref_exit(ref); 128 129 /* drop ref from percpu_ref_switch_to_atomic() */ 130 percpu_ref_put(ref); 131 } 132 133 static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu) 134 { 135 struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); 136 unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 137 unsigned long count = 0; 138 int cpu; 139 140 for_each_possible_cpu(cpu) 141 count += *per_cpu_ptr(percpu_count, cpu); 142 143 pr_debug("global %ld percpu %ld", 144 atomic_long_read(&ref->count), (long)count); 145 146 /* 147 * It's crucial that we sum the percpu counters _before_ adding the sum 148 * to &ref->count; since gets could be happening on one cpu while puts 149 * happen on another, adding a single cpu's count could cause 150 * @ref->count to hit 0 before we've got a consistent value - but the 151 * sum of all the counts will be consistent and correct. 152 * 153 * Subtracting the bias value then has to happen _after_ adding count to 154 * &ref->count; we need the bias value to prevent &ref->count from 155 * reaching 0 before we add the percpu counts. But doing it at the same 156 * time is equivalent and saves us atomic operations: 157 */ 158 atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count); 159 160 WARN_ONCE(atomic_long_read(&ref->count) <= 0, 161 "percpu ref (%ps) <= 0 (%ld) after switching to atomic", 162 ref->release, atomic_long_read(&ref->count)); 163 164 /* @ref is viewed as dead on all CPUs, send out switch confirmation */ 165 percpu_ref_call_confirm_rcu(rcu); 166 } 167 168 static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref) 169 { 170 } 171 172 static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, 173 percpu_ref_func_t *confirm_switch) 174 { 175 if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) { 176 if (confirm_switch) 177 confirm_switch(ref); 178 return; 179 } 180 181 /* switching from percpu to atomic */ 182 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; 183 184 /* 185 * Non-NULL ->confirm_switch is used to indicate that switching is 186 * in progress. Use noop one if unspecified. 187 */ 188 ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch; 189 190 percpu_ref_get(ref); /* put after confirmation */ 191 call_rcu(&ref->rcu, percpu_ref_switch_to_atomic_rcu); 192 } 193 194 static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) 195 { 196 unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 197 int cpu; 198 199 BUG_ON(!percpu_count); 200 201 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) 202 return; 203 204 if (WARN_ON_ONCE(!ref->allow_reinit)) 205 return; 206 207 atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); 208 209 /* 210 * Restore per-cpu operation. smp_store_release() is paired 211 * with READ_ONCE() in __ref_is_percpu() and guarantees that the 212 * zeroing is visible to all percpu accesses which can see the 213 * following __PERCPU_REF_ATOMIC clearing. 214 */ 215 for_each_possible_cpu(cpu) 216 *per_cpu_ptr(percpu_count, cpu) = 0; 217 218 smp_store_release(&ref->percpu_count_ptr, 219 ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); 220 } 221 222 static void __percpu_ref_switch_mode(struct percpu_ref *ref, 223 percpu_ref_func_t *confirm_switch) 224 { 225 lockdep_assert_held(&percpu_ref_switch_lock); 226 227 /* 228 * If the previous ATOMIC switching hasn't finished yet, wait for 229 * its completion. If the caller ensures that ATOMIC switching 230 * isn't in progress, this function can be called from any context. 231 */ 232 wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch, 233 percpu_ref_switch_lock); 234 235 if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD)) 236 __percpu_ref_switch_to_atomic(ref, confirm_switch); 237 else 238 __percpu_ref_switch_to_percpu(ref); 239 } 240 241 /** 242 * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode 243 * @ref: percpu_ref to switch to atomic mode 244 * @confirm_switch: optional confirmation callback 245 * 246 * There's no reason to use this function for the usual reference counting. 247 * Use percpu_ref_kill[_and_confirm](). 248 * 249 * Schedule switching of @ref to atomic mode. All its percpu counts will 250 * be collected to the main atomic counter. On completion, when all CPUs 251 * are guaraneed to be in atomic mode, @confirm_switch, which may not 252 * block, is invoked. This function may be invoked concurrently with all 253 * the get/put operations and can safely be mixed with kill and reinit 254 * operations. Note that @ref will stay in atomic mode across kill/reinit 255 * cycles until percpu_ref_switch_to_percpu() is called. 256 * 257 * This function may block if @ref is in the process of switching to atomic 258 * mode. If the caller ensures that @ref is not in the process of 259 * switching to atomic mode, this function can be called from any context. 260 */ 261 void percpu_ref_switch_to_atomic(struct percpu_ref *ref, 262 percpu_ref_func_t *confirm_switch) 263 { 264 unsigned long flags; 265 266 spin_lock_irqsave(&percpu_ref_switch_lock, flags); 267 268 ref->force_atomic = true; 269 __percpu_ref_switch_mode(ref, confirm_switch); 270 271 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 272 } 273 EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic); 274 275 /** 276 * percpu_ref_switch_to_atomic_sync - switch a percpu_ref to atomic mode 277 * @ref: percpu_ref to switch to atomic mode 278 * 279 * Schedule switching the ref to atomic mode, and wait for the 280 * switch to complete. Caller must ensure that no other thread 281 * will switch back to percpu mode. 282 */ 283 void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref) 284 { 285 percpu_ref_switch_to_atomic(ref, NULL); 286 wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); 287 } 288 EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync); 289 290 /** 291 * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode 292 * @ref: percpu_ref to switch to percpu mode 293 * 294 * There's no reason to use this function for the usual reference counting. 295 * To re-use an expired ref, use percpu_ref_reinit(). 296 * 297 * Switch @ref to percpu mode. This function may be invoked concurrently 298 * with all the get/put operations and can safely be mixed with kill and 299 * reinit operations. This function reverses the sticky atomic state set 300 * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic(). If @ref is 301 * dying or dead, the actual switching takes place on the following 302 * percpu_ref_reinit(). 303 * 304 * This function may block if @ref is in the process of switching to atomic 305 * mode. If the caller ensures that @ref is not in the process of 306 * switching to atomic mode, this function can be called from any context. 307 */ 308 void percpu_ref_switch_to_percpu(struct percpu_ref *ref) 309 { 310 unsigned long flags; 311 312 spin_lock_irqsave(&percpu_ref_switch_lock, flags); 313 314 ref->force_atomic = false; 315 __percpu_ref_switch_mode(ref, NULL); 316 317 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 318 } 319 EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu); 320 321 /** 322 * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation 323 * @ref: percpu_ref to kill 324 * @confirm_kill: optional confirmation callback 325 * 326 * Equivalent to percpu_ref_kill() but also schedules kill confirmation if 327 * @confirm_kill is not NULL. @confirm_kill, which may not block, will be 328 * called after @ref is seen as dead from all CPUs at which point all 329 * further invocations of percpu_ref_tryget_live() will fail. See 330 * percpu_ref_tryget_live() for details. 331 * 332 * This function normally doesn't block and can be called from any context 333 * but it may block if @confirm_kill is specified and @ref is in the 334 * process of switching to atomic mode by percpu_ref_switch_to_atomic(). 335 * 336 * There are no implied RCU grace periods between kill and release. 337 */ 338 void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 339 percpu_ref_func_t *confirm_kill) 340 { 341 unsigned long flags; 342 343 spin_lock_irqsave(&percpu_ref_switch_lock, flags); 344 345 WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, 346 "%s called more than once on %ps!", __func__, ref->release); 347 348 ref->percpu_count_ptr |= __PERCPU_REF_DEAD; 349 __percpu_ref_switch_mode(ref, confirm_kill); 350 percpu_ref_put(ref); 351 352 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 353 } 354 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); 355 356 /** 357 * percpu_ref_reinit - re-initialize a percpu refcount 358 * @ref: perpcu_ref to re-initialize 359 * 360 * Re-initialize @ref so that it's in the same state as when it finished 361 * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD. @ref must have been 362 * initialized successfully and reached 0 but not exited. 363 * 364 * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while 365 * this function is in progress. 366 */ 367 void percpu_ref_reinit(struct percpu_ref *ref) 368 { 369 WARN_ON_ONCE(!percpu_ref_is_zero(ref)); 370 371 percpu_ref_resurrect(ref); 372 } 373 EXPORT_SYMBOL_GPL(percpu_ref_reinit); 374 375 /** 376 * percpu_ref_resurrect - modify a percpu refcount from dead to live 377 * @ref: perpcu_ref to resurrect 378 * 379 * Modify @ref so that it's in the same state as before percpu_ref_kill() was 380 * called. @ref must be dead but must not yet have exited. 381 * 382 * If @ref->release() frees @ref then the caller is responsible for 383 * guaranteeing that @ref->release() does not get called while this 384 * function is in progress. 385 * 386 * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while 387 * this function is in progress. 388 */ 389 void percpu_ref_resurrect(struct percpu_ref *ref) 390 { 391 unsigned long __percpu *percpu_count; 392 unsigned long flags; 393 394 spin_lock_irqsave(&percpu_ref_switch_lock, flags); 395 396 WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)); 397 WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count)); 398 399 ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; 400 percpu_ref_get(ref); 401 __percpu_ref_switch_mode(ref, NULL); 402 403 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 404 } 405 EXPORT_SYMBOL_GPL(percpu_ref_resurrect); 406