xref: /openbmc/linux/lib/rcuref.c (revision 0f9b4c3ca5fdf3e177266ef994071b1a03f07318)
1ee1ee6dbSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2ee1ee6dbSThomas Gleixner 
3ee1ee6dbSThomas Gleixner /*
4ee1ee6dbSThomas Gleixner  * rcuref - A scalable reference count implementation for RCU managed objects
5ee1ee6dbSThomas Gleixner  *
6ee1ee6dbSThomas Gleixner  * rcuref is provided to replace open coded reference count implementations
7ee1ee6dbSThomas Gleixner  * based on atomic_t. It protects explicitely RCU managed objects which can
8ee1ee6dbSThomas Gleixner  * be visible even after the last reference has been dropped and the object
9ee1ee6dbSThomas Gleixner  * is heading towards destruction.
10ee1ee6dbSThomas Gleixner  *
11ee1ee6dbSThomas Gleixner  * A common usage pattern is:
12ee1ee6dbSThomas Gleixner  *
13ee1ee6dbSThomas Gleixner  * get()
14ee1ee6dbSThomas Gleixner  *	rcu_read_lock();
15ee1ee6dbSThomas Gleixner  *	p = get_ptr();
16ee1ee6dbSThomas Gleixner  *	if (p && !atomic_inc_not_zero(&p->refcnt))
17ee1ee6dbSThomas Gleixner  *		p = NULL;
18ee1ee6dbSThomas Gleixner  *	rcu_read_unlock();
19ee1ee6dbSThomas Gleixner  *	return p;
20ee1ee6dbSThomas Gleixner  *
21ee1ee6dbSThomas Gleixner  * put()
22ee1ee6dbSThomas Gleixner  *	if (!atomic_dec_return(&->refcnt)) {
23ee1ee6dbSThomas Gleixner  *		remove_ptr(p);
24ee1ee6dbSThomas Gleixner  *		kfree_rcu((p, rcu);
25ee1ee6dbSThomas Gleixner  *	}
26ee1ee6dbSThomas Gleixner  *
27ee1ee6dbSThomas Gleixner  * atomic_inc_not_zero() is implemented with a try_cmpxchg() loop which has
28ee1ee6dbSThomas Gleixner  * O(N^2) behaviour under contention with N concurrent operations.
29ee1ee6dbSThomas Gleixner  *
30ee1ee6dbSThomas Gleixner  * rcuref uses atomic_add_negative_relaxed() for the fast path, which scales
31ee1ee6dbSThomas Gleixner  * better under contention.
32ee1ee6dbSThomas Gleixner  *
33ee1ee6dbSThomas Gleixner  * Why not refcount?
34ee1ee6dbSThomas Gleixner  * =================
35ee1ee6dbSThomas Gleixner  *
36ee1ee6dbSThomas Gleixner  * In principle it should be possible to make refcount use the rcuref
37ee1ee6dbSThomas Gleixner  * scheme, but the destruction race described below cannot be prevented
38ee1ee6dbSThomas Gleixner  * unless the protected object is RCU managed.
39ee1ee6dbSThomas Gleixner  *
40ee1ee6dbSThomas Gleixner  * Theory of operation
41ee1ee6dbSThomas Gleixner  * ===================
42ee1ee6dbSThomas Gleixner  *
43ee1ee6dbSThomas Gleixner  * rcuref uses an unsigned integer reference counter. As long as the
44ee1ee6dbSThomas Gleixner  * counter value is greater than or equal to RCUREF_ONEREF and not larger
45ee1ee6dbSThomas Gleixner  * than RCUREF_MAXREF the reference is alive:
46ee1ee6dbSThomas Gleixner  *
47ee1ee6dbSThomas Gleixner  * ONEREF   MAXREF               SATURATED             RELEASED      DEAD    NOREF
48ee1ee6dbSThomas Gleixner  * 0        0x7FFFFFFF 0x8000000 0xA0000000 0xBFFFFFFF 0xC0000000 0xE0000000 0xFFFFFFFF
49ee1ee6dbSThomas Gleixner  * <---valid --------> <-------saturation zone-------> <-----dead zone----->
50ee1ee6dbSThomas Gleixner  *
51ee1ee6dbSThomas Gleixner  * The get() and put() operations do unconditional increments and
52ee1ee6dbSThomas Gleixner  * decrements. The result is checked after the operation. This optimizes
53ee1ee6dbSThomas Gleixner  * for the fast path.
54ee1ee6dbSThomas Gleixner  *
55ee1ee6dbSThomas Gleixner  * If the reference count is saturated or dead, then the increments and
56ee1ee6dbSThomas Gleixner  * decrements are not harmful as the reference count still stays in the
57ee1ee6dbSThomas Gleixner  * respective zones and is always set back to STATURATED resp. DEAD. The
58ee1ee6dbSThomas Gleixner  * zones have room for 2^28 racing operations in each direction, which
59ee1ee6dbSThomas Gleixner  * makes it practically impossible to escape the zones.
60ee1ee6dbSThomas Gleixner  *
61ee1ee6dbSThomas Gleixner  * Once the last reference is dropped the reference count becomes
62ee1ee6dbSThomas Gleixner  * RCUREF_NOREF which forces rcuref_put() into the slowpath operation. The
63ee1ee6dbSThomas Gleixner  * slowpath then tries to set the reference count from RCUREF_NOREF to
64ee1ee6dbSThomas Gleixner  * RCUREF_DEAD via a cmpxchg(). This opens a small window where a
65ee1ee6dbSThomas Gleixner  * concurrent rcuref_get() can acquire the reference count and bring it
66ee1ee6dbSThomas Gleixner  * back to RCUREF_ONEREF or even drop the reference again and mark it DEAD.
67ee1ee6dbSThomas Gleixner  *
68ee1ee6dbSThomas Gleixner  * If the cmpxchg() succeeds then a concurrent rcuref_get() will result in
69ee1ee6dbSThomas Gleixner  * DEAD + 1, which is inside the dead zone. If that happens the reference
70ee1ee6dbSThomas Gleixner  * count is put back to DEAD.
71ee1ee6dbSThomas Gleixner  *
72ee1ee6dbSThomas Gleixner  * The actual race is possible due to the unconditional increment and
73ee1ee6dbSThomas Gleixner  * decrements in rcuref_get() and rcuref_put():
74ee1ee6dbSThomas Gleixner  *
75ee1ee6dbSThomas Gleixner  *	T1				T2
76ee1ee6dbSThomas Gleixner  *	get()				put()
77ee1ee6dbSThomas Gleixner  *					if (atomic_add_negative(-1, &ref->refcnt))
78ee1ee6dbSThomas Gleixner  *		succeeds->			atomic_cmpxchg(&ref->refcnt, NOREF, DEAD);
79ee1ee6dbSThomas Gleixner  *
80ee1ee6dbSThomas Gleixner  *	atomic_add_negative(1, &ref->refcnt);	<- Elevates refcount to DEAD + 1
81ee1ee6dbSThomas Gleixner  *
82ee1ee6dbSThomas Gleixner  * As the result of T1's add is negative, the get() goes into the slow path
83ee1ee6dbSThomas Gleixner  * and observes refcnt being in the dead zone which makes the operation fail.
84ee1ee6dbSThomas Gleixner  *
85ee1ee6dbSThomas Gleixner  * Possible critical states:
86ee1ee6dbSThomas Gleixner  *
87ee1ee6dbSThomas Gleixner  *	Context Counter	References	Operation
88ee1ee6dbSThomas Gleixner  *	T1	0	1		init()
89ee1ee6dbSThomas Gleixner  *	T2	1	2		get()
90ee1ee6dbSThomas Gleixner  *	T1	0	1		put()
91ee1ee6dbSThomas Gleixner  *	T2     -1	0		put() tries to mark dead
92ee1ee6dbSThomas Gleixner  *	T1	0	1		get()
93ee1ee6dbSThomas Gleixner  *	T2	0	1		put() mark dead fails
94ee1ee6dbSThomas Gleixner  *	T1     -1	0		put() tries to mark dead
95ee1ee6dbSThomas Gleixner  *	T1    DEAD	0		put() mark dead succeeds
96ee1ee6dbSThomas Gleixner  *	T2    DEAD+1	0		get() fails and puts it back to DEAD
97ee1ee6dbSThomas Gleixner  *
98ee1ee6dbSThomas Gleixner  * Of course there are more complex scenarios, but the above illustrates
99ee1ee6dbSThomas Gleixner  * the working principle. The rest is left to the imagination of the
100ee1ee6dbSThomas Gleixner  * reader.
101ee1ee6dbSThomas Gleixner  *
102ee1ee6dbSThomas Gleixner  * Deconstruction race
103ee1ee6dbSThomas Gleixner  * ===================
104ee1ee6dbSThomas Gleixner  *
105ee1ee6dbSThomas Gleixner  * The release operation must be protected by prohibiting a grace period in
106ee1ee6dbSThomas Gleixner  * order to prevent a possible use after free:
107ee1ee6dbSThomas Gleixner  *
108ee1ee6dbSThomas Gleixner  *	T1				T2
109ee1ee6dbSThomas Gleixner  *	put()				get()
110ee1ee6dbSThomas Gleixner  *	// ref->refcnt = ONEREF
111ee1ee6dbSThomas Gleixner  *	if (!atomic_add_negative(-1, &ref->refcnt))
112ee1ee6dbSThomas Gleixner  *		return false;				<- Not taken
113ee1ee6dbSThomas Gleixner  *
114ee1ee6dbSThomas Gleixner  *	// ref->refcnt == NOREF
115ee1ee6dbSThomas Gleixner  *	--> preemption
116ee1ee6dbSThomas Gleixner  *					// Elevates ref->refcnt to ONEREF
117ee1ee6dbSThomas Gleixner  *					if (!atomic_add_negative(1, &ref->refcnt))
118ee1ee6dbSThomas Gleixner  *						return true;			<- taken
119ee1ee6dbSThomas Gleixner  *
120ee1ee6dbSThomas Gleixner  *					if (put(&p->ref)) { <-- Succeeds
121ee1ee6dbSThomas Gleixner  *						remove_pointer(p);
122ee1ee6dbSThomas Gleixner  *						kfree_rcu(p, rcu);
123ee1ee6dbSThomas Gleixner  *					}
124ee1ee6dbSThomas Gleixner  *
125ee1ee6dbSThomas Gleixner  *		RCU grace period ends, object is freed
126ee1ee6dbSThomas Gleixner  *
127ee1ee6dbSThomas Gleixner  *	atomic_cmpxchg(&ref->refcnt, NOREF, DEAD);	<- UAF
128ee1ee6dbSThomas Gleixner  *
129ee1ee6dbSThomas Gleixner  * This is prevented by disabling preemption around the put() operation as
130ee1ee6dbSThomas Gleixner  * that's in most kernel configurations cheaper than a rcu_read_lock() /
131ee1ee6dbSThomas Gleixner  * rcu_read_unlock() pair and in many cases even a NOOP. In any case it
132ee1ee6dbSThomas Gleixner  * prevents the grace period which keeps the object alive until all put()
133ee1ee6dbSThomas Gleixner  * operations complete.
134ee1ee6dbSThomas Gleixner  *
135ee1ee6dbSThomas Gleixner  * Saturation protection
136ee1ee6dbSThomas Gleixner  * =====================
137ee1ee6dbSThomas Gleixner  *
138ee1ee6dbSThomas Gleixner  * The reference count has a saturation limit RCUREF_MAXREF (INT_MAX).
139ee1ee6dbSThomas Gleixner  * Once this is exceedded the reference count becomes stale by setting it
140ee1ee6dbSThomas Gleixner  * to RCUREF_SATURATED, which will cause a memory leak, but it prevents
141ee1ee6dbSThomas Gleixner  * wrap arounds which obviously cause worse problems than a memory
142ee1ee6dbSThomas Gleixner  * leak. When saturation is reached a warning is emitted.
143ee1ee6dbSThomas Gleixner  *
144ee1ee6dbSThomas Gleixner  * Race conditions
145ee1ee6dbSThomas Gleixner  * ===============
146ee1ee6dbSThomas Gleixner  *
147ee1ee6dbSThomas Gleixner  * All reference count increment/decrement operations are unconditional and
148ee1ee6dbSThomas Gleixner  * only verified after the fact. This optimizes for the good case and takes
149ee1ee6dbSThomas Gleixner  * the occasional race vs. a dead or already saturated refcount into
150ee1ee6dbSThomas Gleixner  * account. The saturation and dead zones are large enough to accomodate
151ee1ee6dbSThomas Gleixner  * for that.
152ee1ee6dbSThomas Gleixner  *
153ee1ee6dbSThomas Gleixner  * Memory ordering
154ee1ee6dbSThomas Gleixner  * ===============
155ee1ee6dbSThomas Gleixner  *
156ee1ee6dbSThomas Gleixner  * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
157ee1ee6dbSThomas Gleixner  * and provide only what is strictly required for refcounts.
158ee1ee6dbSThomas Gleixner  *
159ee1ee6dbSThomas Gleixner  * The increments are fully relaxed; these will not provide ordering. The
160ee1ee6dbSThomas Gleixner  * rationale is that whatever is used to obtain the object to increase the
161ee1ee6dbSThomas Gleixner  * reference count on will provide the ordering. For locked data
162ee1ee6dbSThomas Gleixner  * structures, its the lock acquire, for RCU/lockless data structures its
163ee1ee6dbSThomas Gleixner  * the dependent load.
164ee1ee6dbSThomas Gleixner  *
165ee1ee6dbSThomas Gleixner  * rcuref_get() provides a control dependency ordering future stores which
166ee1ee6dbSThomas Gleixner  * ensures that the object is not modified when acquiring a reference
167ee1ee6dbSThomas Gleixner  * fails.
168ee1ee6dbSThomas Gleixner  *
169ee1ee6dbSThomas Gleixner  * rcuref_put() provides release order, i.e. all prior loads and stores
170ee1ee6dbSThomas Gleixner  * will be issued before. It also provides a control dependency ordering
171ee1ee6dbSThomas Gleixner  * against the subsequent destruction of the object.
172ee1ee6dbSThomas Gleixner  *
173ee1ee6dbSThomas Gleixner  * If rcuref_put() successfully dropped the last reference and marked the
174ee1ee6dbSThomas Gleixner  * object DEAD it also provides acquire ordering.
175ee1ee6dbSThomas Gleixner  */
176ee1ee6dbSThomas Gleixner 
177ee1ee6dbSThomas Gleixner #include <linux/export.h>
178ee1ee6dbSThomas Gleixner #include <linux/rcuref.h>
179ee1ee6dbSThomas Gleixner 
180ee1ee6dbSThomas Gleixner /**
181ee1ee6dbSThomas Gleixner  * rcuref_get_slowpath - Slowpath of rcuref_get()
182ee1ee6dbSThomas Gleixner  * @ref:	Pointer to the reference count
183ee1ee6dbSThomas Gleixner  *
184ee1ee6dbSThomas Gleixner  * Invoked when the reference count is outside of the valid zone.
185ee1ee6dbSThomas Gleixner  *
186ee1ee6dbSThomas Gleixner  * Return:
187ee1ee6dbSThomas Gleixner  *	False if the reference count was already marked dead
188ee1ee6dbSThomas Gleixner  *
189ee1ee6dbSThomas Gleixner  *	True if the reference count is saturated, which prevents the
190ee1ee6dbSThomas Gleixner  *	object from being deconstructed ever.
191ee1ee6dbSThomas Gleixner  */
rcuref_get_slowpath(rcuref_t * ref)192ee1ee6dbSThomas Gleixner bool rcuref_get_slowpath(rcuref_t *ref)
193ee1ee6dbSThomas Gleixner {
194ee1ee6dbSThomas Gleixner 	unsigned int cnt = atomic_read(&ref->refcnt);
195ee1ee6dbSThomas Gleixner 
196ee1ee6dbSThomas Gleixner 	/*
197ee1ee6dbSThomas Gleixner 	 * If the reference count was already marked dead, undo the
198ee1ee6dbSThomas Gleixner 	 * increment so it stays in the middle of the dead zone and return
199ee1ee6dbSThomas Gleixner 	 * fail.
200ee1ee6dbSThomas Gleixner 	 */
201ee1ee6dbSThomas Gleixner 	if (cnt >= RCUREF_RELEASED) {
202ee1ee6dbSThomas Gleixner 		atomic_set(&ref->refcnt, RCUREF_DEAD);
203ee1ee6dbSThomas Gleixner 		return false;
204ee1ee6dbSThomas Gleixner 	}
205ee1ee6dbSThomas Gleixner 
206ee1ee6dbSThomas Gleixner 	/*
207ee1ee6dbSThomas Gleixner 	 * If it was saturated, warn and mark it so. In case the increment
208ee1ee6dbSThomas Gleixner 	 * was already on a saturated value restore the saturation
209ee1ee6dbSThomas Gleixner 	 * marker. This keeps it in the middle of the saturation zone and
210ee1ee6dbSThomas Gleixner 	 * prevents the reference count from overflowing. This leaks the
211ee1ee6dbSThomas Gleixner 	 * object memory, but prevents the obvious reference count overflow
212ee1ee6dbSThomas Gleixner 	 * damage.
213ee1ee6dbSThomas Gleixner 	 */
214ee1ee6dbSThomas Gleixner 	if (WARN_ONCE(cnt > RCUREF_MAXREF, "rcuref saturated - leaking memory"))
215ee1ee6dbSThomas Gleixner 		atomic_set(&ref->refcnt, RCUREF_SATURATED);
216ee1ee6dbSThomas Gleixner 	return true;
217ee1ee6dbSThomas Gleixner }
218ee1ee6dbSThomas Gleixner EXPORT_SYMBOL_GPL(rcuref_get_slowpath);
219ee1ee6dbSThomas Gleixner 
220ee1ee6dbSThomas Gleixner /**
221ee1ee6dbSThomas Gleixner  * rcuref_put_slowpath - Slowpath of __rcuref_put()
222ee1ee6dbSThomas Gleixner  * @ref:	Pointer to the reference count
223*1d26aaa8SThomas Gleixner  * @cnt:	The resulting value of the fastpath decrement
224ee1ee6dbSThomas Gleixner  *
225ee1ee6dbSThomas Gleixner  * Invoked when the reference count is outside of the valid zone.
226ee1ee6dbSThomas Gleixner  *
227ee1ee6dbSThomas Gleixner  * Return:
228ee1ee6dbSThomas Gleixner  *	True if this was the last reference with no future references
229ee1ee6dbSThomas Gleixner  *	possible. This signals the caller that it can safely schedule the
230ee1ee6dbSThomas Gleixner  *	object, which is protected by the reference counter, for
231ee1ee6dbSThomas Gleixner  *	deconstruction.
232ee1ee6dbSThomas Gleixner  *
233ee1ee6dbSThomas Gleixner  *	False if there are still active references or the put() raced
234ee1ee6dbSThomas Gleixner  *	with a concurrent get()/put() pair. Caller is not allowed to
235ee1ee6dbSThomas Gleixner  *	deconstruct the protected object.
236ee1ee6dbSThomas Gleixner  */
rcuref_put_slowpath(rcuref_t * ref,unsigned int cnt)237*1d26aaa8SThomas Gleixner bool rcuref_put_slowpath(rcuref_t *ref, unsigned int cnt)
238ee1ee6dbSThomas Gleixner {
239ee1ee6dbSThomas Gleixner 	/* Did this drop the last reference? */
240ee1ee6dbSThomas Gleixner 	if (likely(cnt == RCUREF_NOREF)) {
241ee1ee6dbSThomas Gleixner 		/*
242ee1ee6dbSThomas Gleixner 		 * Carefully try to set the reference count to RCUREF_DEAD.
243ee1ee6dbSThomas Gleixner 		 *
244ee1ee6dbSThomas Gleixner 		 * This can fail if a concurrent get() operation has
245ee1ee6dbSThomas Gleixner 		 * elevated it again or the corresponding put() even marked
246ee1ee6dbSThomas Gleixner 		 * it dead already. Both are valid situations and do not
247ee1ee6dbSThomas Gleixner 		 * require a retry. If this fails the caller is not
248ee1ee6dbSThomas Gleixner 		 * allowed to deconstruct the object.
249ee1ee6dbSThomas Gleixner 		 */
250ee1ee6dbSThomas Gleixner 		if (atomic_cmpxchg_release(&ref->refcnt, RCUREF_NOREF, RCUREF_DEAD) != RCUREF_NOREF)
251ee1ee6dbSThomas Gleixner 			return false;
252ee1ee6dbSThomas Gleixner 
253ee1ee6dbSThomas Gleixner 		/*
254ee1ee6dbSThomas Gleixner 		 * The caller can safely schedule the object for
255ee1ee6dbSThomas Gleixner 		 * deconstruction. Provide acquire ordering.
256ee1ee6dbSThomas Gleixner 		 */
257ee1ee6dbSThomas Gleixner 		smp_acquire__after_ctrl_dep();
258ee1ee6dbSThomas Gleixner 		return true;
259ee1ee6dbSThomas Gleixner 	}
260ee1ee6dbSThomas Gleixner 
261ee1ee6dbSThomas Gleixner 	/*
262ee1ee6dbSThomas Gleixner 	 * If the reference count was already in the dead zone, then this
263ee1ee6dbSThomas Gleixner 	 * put() operation is imbalanced. Warn, put the reference count back to
264ee1ee6dbSThomas Gleixner 	 * DEAD and tell the caller to not deconstruct the object.
265ee1ee6dbSThomas Gleixner 	 */
266ee1ee6dbSThomas Gleixner 	if (WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()")) {
267ee1ee6dbSThomas Gleixner 		atomic_set(&ref->refcnt, RCUREF_DEAD);
268ee1ee6dbSThomas Gleixner 		return false;
269ee1ee6dbSThomas Gleixner 	}
270ee1ee6dbSThomas Gleixner 
271ee1ee6dbSThomas Gleixner 	/*
272ee1ee6dbSThomas Gleixner 	 * This is a put() operation on a saturated refcount. Restore the
273ee1ee6dbSThomas Gleixner 	 * mean saturation value and tell the caller to not deconstruct the
274ee1ee6dbSThomas Gleixner 	 * object.
275ee1ee6dbSThomas Gleixner 	 */
276ee1ee6dbSThomas Gleixner 	if (cnt > RCUREF_MAXREF)
277ee1ee6dbSThomas Gleixner 		atomic_set(&ref->refcnt, RCUREF_SATURATED);
278ee1ee6dbSThomas Gleixner 	return false;
279ee1ee6dbSThomas Gleixner }
280ee1ee6dbSThomas Gleixner EXPORT_SYMBOL_GPL(rcuref_put_slowpath);
281