xref: /openbmc/qemu/util/lockcnt.c (revision c5a5839856119a3644dcc0775a046ed0ee3081c3)
1 /*
2  * QemuLockCnt implementation
3  *
4  * Copyright Red Hat, Inc. 2017
5  *
6  * Author:
7  *   Paolo Bonzini <pbonzini@redhat.com>
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/thread.h"
11 #include "qemu/atomic.h"
12 #include "trace.h"
13 
14 #ifdef CONFIG_LINUX
15 #include "qemu/futex.h"
16 
17 /* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter.
18  * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
19  * this is not the most relaxing citation I could make...).  It is similar
20  * to mutex2 in the paper.
21  */
22 
23 #define QEMU_LOCKCNT_STATE_MASK    3
24 #define QEMU_LOCKCNT_STATE_FREE    0   /* free, uncontended */
25 #define QEMU_LOCKCNT_STATE_LOCKED  1   /* locked, uncontended */
26 #define QEMU_LOCKCNT_STATE_WAITING 2   /* locked, contended */
27 
28 #define QEMU_LOCKCNT_COUNT_STEP    4
29 #define QEMU_LOCKCNT_COUNT_SHIFT   2
30 
31 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
32 {
33     lockcnt->count = 0;
34 }
35 
36 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
37 {
38 }
39 
40 /* *val is the current value of lockcnt->count.
41  *
42  * If the lock is free, try a cmpxchg from *val to new_if_free; return
43  * true and set *val to the old value found by the cmpxchg in
44  * lockcnt->count.
45  *
46  * If the lock is taken, wait for it to be released and return false
47  * *without trying again to take the lock*.  Again, set *val to the
48  * new value of lockcnt->count.
49  *
50  * If *waited is true on return, new_if_free's bottom two bits must not
51  * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller
52  * does not know if there are other waiters.  Furthermore, after *waited
53  * is set the caller has effectively acquired the lock.  If it returns
54  * with the lock not taken, it must wake another futex waiter.
55  */
56 static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
57                                          int new_if_free, bool *waited)
58 {
59     /* Fast path for when the lock is free.  */
60     if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) {
61         int expected = *val;
62 
63         trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free);
64         *val = atomic_cmpxchg(&lockcnt->count, expected, new_if_free);
65         if (*val == expected) {
66             trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free);
67             *val = new_if_free;
68             return true;
69         }
70     }
71 
72     /* The slow path moves from locked to waiting if necessary, then
73      * does a futex wait.  Both steps can be repeated ad nauseam,
74      * only getting out of the loop if we can have another shot at the
75      * fast path.  Once we can, get out to compute the new destination
76      * value for the fast path.
77      */
78     while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) {
79         if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) {
80             int expected = *val;
81             int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING;
82 
83             trace_lockcnt_futex_wait_prepare(lockcnt, expected, new);
84             *val = atomic_cmpxchg(&lockcnt->count, expected, new);
85             if (*val == expected) {
86                 *val = new;
87             }
88             continue;
89         }
90 
91         if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) {
92             *waited = true;
93             trace_lockcnt_futex_wait(lockcnt, *val);
94             qemu_futex_wait(&lockcnt->count, *val);
95             *val = atomic_read(&lockcnt->count);
96             trace_lockcnt_futex_wait_resume(lockcnt, *val);
97             continue;
98         }
99 
100         abort();
101     }
102     return false;
103 }
104 
105 static void lockcnt_wake(QemuLockCnt *lockcnt)
106 {
107     trace_lockcnt_futex_wake(lockcnt);
108     qemu_futex_wake(&lockcnt->count, 1);
109 }
110 
111 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
112 {
113     int val = atomic_read(&lockcnt->count);
114     bool waited = false;
115 
116     for (;;) {
117         if (val >= QEMU_LOCKCNT_COUNT_STEP) {
118             int expected = val;
119             val = atomic_cmpxchg(&lockcnt->count, val, val + QEMU_LOCKCNT_COUNT_STEP);
120             if (val == expected) {
121                 break;
122             }
123         } else {
124             /* The fast path is (0, unlocked)->(1, unlocked).  */
125             if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP,
126                                              &waited)) {
127                 break;
128             }
129         }
130     }
131 
132     /* If we were woken by another thread, we should also wake one because
133      * we are effectively releasing the lock that was given to us.  This is
134      * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING
135      * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and
136      * wake someone.
137      */
138     if (waited) {
139         lockcnt_wake(lockcnt);
140     }
141 }
142 
143 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
144 {
145     atomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP);
146 }
147 
148 /* Decrement a counter, and return locked if it is decremented to zero.
149  * If the function returns true, it is impossible for the counter to
150  * become nonzero until the next qemu_lockcnt_unlock.
151  */
152 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
153 {
154     int val = atomic_read(&lockcnt->count);
155     int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
156     bool waited = false;
157 
158     for (;;) {
159         if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) {
160             int expected = val;
161             val = atomic_cmpxchg(&lockcnt->count, val, val - QEMU_LOCKCNT_COUNT_STEP);
162             if (val == expected) {
163                 break;
164             }
165         } else {
166             /* If count is going 1->0, take the lock. The fast path is
167              * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
168              */
169             if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
170                 return true;
171             }
172 
173             if (waited) {
174                 /* At this point we do not know if there are more waiters.  Assume
175                  * there are.
176                  */
177                 locked_state = QEMU_LOCKCNT_STATE_WAITING;
178             }
179         }
180     }
181 
182     /* If we were woken by another thread, but we're returning in unlocked
183      * state, we should also wake a thread because we are effectively
184      * releasing the lock that was given to us.  This is the case where
185      * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
186      * bits, and qemu_lockcnt_unlock would find it and wake someone.
187      */
188     if (waited) {
189         lockcnt_wake(lockcnt);
190     }
191     return false;
192 }
193 
194 /* If the counter is one, decrement it and return locked.  Otherwise do
195  * nothing.
196  *
197  * If the function returns true, it is impossible for the counter to
198  * become nonzero until the next qemu_lockcnt_unlock.
199  */
200 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
201 {
202     int val = atomic_read(&lockcnt->count);
203     int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
204     bool waited = false;
205 
206     while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) {
207         /* If count is going 1->0, take the lock. The fast path is
208          * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
209          */
210         if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
211             return true;
212         }
213 
214         if (waited) {
215             /* At this point we do not know if there are more waiters.  Assume
216              * there are.
217              */
218             locked_state = QEMU_LOCKCNT_STATE_WAITING;
219         }
220     }
221 
222     /* If we were woken by another thread, but we're returning in unlocked
223      * state, we should also wake a thread because we are effectively
224      * releasing the lock that was given to us.  This is the case where
225      * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
226      * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone.
227      */
228     if (waited) {
229         lockcnt_wake(lockcnt);
230     }
231     return false;
232 }
233 
234 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
235 {
236     int val = atomic_read(&lockcnt->count);
237     int step = QEMU_LOCKCNT_STATE_LOCKED;
238     bool waited = false;
239 
240     /* The third argument is only used if the low bits of val are 0
241      * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired
242      * state.
243      */
244     while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) {
245         if (waited) {
246             /* At this point we do not know if there are more waiters.  Assume
247              * there are.
248              */
249             step = QEMU_LOCKCNT_STATE_WAITING;
250         }
251     }
252 }
253 
254 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
255 {
256     int expected, new, val;
257 
258     val = atomic_read(&lockcnt->count);
259     do {
260         expected = val;
261         new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK;
262         trace_lockcnt_unlock_attempt(lockcnt, val, new);
263         val = atomic_cmpxchg(&lockcnt->count, val, new);
264     } while (val != expected);
265 
266     trace_lockcnt_unlock_success(lockcnt, val, new);
267     if (val & QEMU_LOCKCNT_STATE_WAITING) {
268         lockcnt_wake(lockcnt);
269     }
270 }
271 
272 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
273 {
274     int expected, new, val;
275 
276     val = atomic_read(&lockcnt->count);
277     do {
278         expected = val;
279         new = val & ~QEMU_LOCKCNT_STATE_MASK;
280         trace_lockcnt_unlock_attempt(lockcnt, val, new);
281         val = atomic_cmpxchg(&lockcnt->count, val, new);
282     } while (val != expected);
283 
284     trace_lockcnt_unlock_success(lockcnt, val, new);
285     if (val & QEMU_LOCKCNT_STATE_WAITING) {
286         lockcnt_wake(lockcnt);
287     }
288 }
289 
290 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
291 {
292     return atomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT;
293 }
294 #else
295 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
296 {
297     qemu_mutex_init(&lockcnt->mutex);
298     lockcnt->count = 0;
299 }
300 
301 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
302 {
303     qemu_mutex_destroy(&lockcnt->mutex);
304 }
305 
306 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
307 {
308     int old;
309     for (;;) {
310         old = atomic_read(&lockcnt->count);
311         if (old == 0) {
312             qemu_lockcnt_lock(lockcnt);
313             qemu_lockcnt_inc_and_unlock(lockcnt);
314             return;
315         } else {
316             if (atomic_cmpxchg(&lockcnt->count, old, old + 1) == old) {
317                 return;
318             }
319         }
320     }
321 }
322 
323 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
324 {
325     atomic_dec(&lockcnt->count);
326 }
327 
328 /* Decrement a counter, and return locked if it is decremented to zero.
329  * It is impossible for the counter to become nonzero while the mutex
330  * is taken.
331  */
332 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
333 {
334     int val = atomic_read(&lockcnt->count);
335     while (val > 1) {
336         int old = atomic_cmpxchg(&lockcnt->count, val, val - 1);
337         if (old != val) {
338             val = old;
339             continue;
340         }
341 
342         return false;
343     }
344 
345     qemu_lockcnt_lock(lockcnt);
346     if (atomic_fetch_dec(&lockcnt->count) == 1) {
347         return true;
348     }
349 
350     qemu_lockcnt_unlock(lockcnt);
351     return false;
352 }
353 
354 /* Decrement a counter and return locked if it is decremented to zero.
355  * Otherwise do nothing.
356  *
357  * It is impossible for the counter to become nonzero while the mutex
358  * is taken.
359  */
360 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
361 {
362     /* No need for acquire semantics if we return false.  */
363     int val = atomic_read(&lockcnt->count);
364     if (val > 1) {
365         return false;
366     }
367 
368     qemu_lockcnt_lock(lockcnt);
369     if (atomic_fetch_dec(&lockcnt->count) == 1) {
370         return true;
371     }
372 
373     qemu_lockcnt_inc_and_unlock(lockcnt);
374     return false;
375 }
376 
377 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
378 {
379     qemu_mutex_lock(&lockcnt->mutex);
380 }
381 
382 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
383 {
384     atomic_inc(&lockcnt->count);
385     qemu_mutex_unlock(&lockcnt->mutex);
386 }
387 
388 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
389 {
390     qemu_mutex_unlock(&lockcnt->mutex);
391 }
392 
393 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
394 {
395     return atomic_read(&lockcnt->count);
396 }
397 #endif
398