xref: /openbmc/qemu/util/lockcnt.c (revision 61f3c91a)
1 /*
2  * QemuLockCnt implementation
3  *
4  * Copyright Red Hat, Inc. 2017
5  *
6  * Author:
7  *   Paolo Bonzini <pbonzini@redhat.com>
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/thread.h"
11 #include "qemu/atomic.h"
12 #include "trace.h"
13 
14 #ifdef CONFIG_LINUX
15 #include "qemu/futex.h"
16 
17 /* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter.
18  * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
19  * this is not the most relaxing citation I could make...).  It is similar
20  * to mutex2 in the paper.
21  */
22 
23 #define QEMU_LOCKCNT_STATE_MASK    3
24 #define QEMU_LOCKCNT_STATE_FREE    0   /* free, uncontended */
25 #define QEMU_LOCKCNT_STATE_LOCKED  1   /* locked, uncontended */
26 #define QEMU_LOCKCNT_STATE_WAITING 2   /* locked, contended */
27 
28 #define QEMU_LOCKCNT_COUNT_STEP    4
29 #define QEMU_LOCKCNT_COUNT_SHIFT   2
30 
31 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
32 {
33     lockcnt->count = 0;
34 }
35 
36 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
37 {
38 }
39 
40 /* *val is the current value of lockcnt->count.
41  *
42  * If the lock is free, try a cmpxchg from *val to new_if_free; return
43  * true and set *val to the old value found by the cmpxchg in
44  * lockcnt->count.
45  *
46  * If the lock is taken, wait for it to be released and return false
47  * *without trying again to take the lock*.  Again, set *val to the
48  * new value of lockcnt->count.
49  *
50  * If *waited is true on return, new_if_free's bottom two bits must not
51  * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller
52  * does not know if there are other waiters.  Furthermore, after *waited
53  * is set the caller has effectively acquired the lock.  If it returns
54  * with the lock not taken, it must wake another futex waiter.
55  */
56 static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
57                                          int new_if_free, bool *waited)
58 {
59     /* Fast path for when the lock is free.  */
60     if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) {
61         int expected = *val;
62 
63         trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free);
64         *val = qatomic_cmpxchg(&lockcnt->count, expected, new_if_free);
65         if (*val == expected) {
66             trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free);
67             *val = new_if_free;
68             return true;
69         }
70     }
71 
72     /* The slow path moves from locked to waiting if necessary, then
73      * does a futex wait.  Both steps can be repeated ad nauseam,
74      * only getting out of the loop if we can have another shot at the
75      * fast path.  Once we can, get out to compute the new destination
76      * value for the fast path.
77      */
78     while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) {
79         if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) {
80             int expected = *val;
81             int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING;
82 
83             trace_lockcnt_futex_wait_prepare(lockcnt, expected, new);
84             *val = qatomic_cmpxchg(&lockcnt->count, expected, new);
85             if (*val == expected) {
86                 *val = new;
87             }
88             continue;
89         }
90 
91         if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) {
92             *waited = true;
93             trace_lockcnt_futex_wait(lockcnt, *val);
94             qemu_futex_wait(&lockcnt->count, *val);
95             *val = qatomic_read(&lockcnt->count);
96             trace_lockcnt_futex_wait_resume(lockcnt, *val);
97             continue;
98         }
99 
100         abort();
101     }
102     return false;
103 }
104 
105 static void lockcnt_wake(QemuLockCnt *lockcnt)
106 {
107     trace_lockcnt_futex_wake(lockcnt);
108     qemu_futex_wake(&lockcnt->count, 1);
109 }
110 
111 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
112 {
113     int val = qatomic_read(&lockcnt->count);
114     bool waited = false;
115 
116     for (;;) {
117         if (val >= QEMU_LOCKCNT_COUNT_STEP) {
118             int expected = val;
119             val = qatomic_cmpxchg(&lockcnt->count, val,
120                                   val + QEMU_LOCKCNT_COUNT_STEP);
121             if (val == expected) {
122                 break;
123             }
124         } else {
125             /* The fast path is (0, unlocked)->(1, unlocked).  */
126             if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP,
127                                              &waited)) {
128                 break;
129             }
130         }
131     }
132 
133     /* If we were woken by another thread, we should also wake one because
134      * we are effectively releasing the lock that was given to us.  This is
135      * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING
136      * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and
137      * wake someone.
138      */
139     if (waited) {
140         lockcnt_wake(lockcnt);
141     }
142 }
143 
144 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
145 {
146     qatomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP);
147 }
148 
149 /* Decrement a counter, and return locked if it is decremented to zero.
150  * If the function returns true, it is impossible for the counter to
151  * become nonzero until the next qemu_lockcnt_unlock.
152  */
153 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
154 {
155     int val = qatomic_read(&lockcnt->count);
156     int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
157     bool waited = false;
158 
159     for (;;) {
160         if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) {
161             int expected = val;
162             val = qatomic_cmpxchg(&lockcnt->count, val,
163                                   val - QEMU_LOCKCNT_COUNT_STEP);
164             if (val == expected) {
165                 break;
166             }
167         } else {
168             /* If count is going 1->0, take the lock. The fast path is
169              * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
170              */
171             if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
172                 return true;
173             }
174 
175             if (waited) {
176                 /* At this point we do not know if there are more waiters.  Assume
177                  * there are.
178                  */
179                 locked_state = QEMU_LOCKCNT_STATE_WAITING;
180             }
181         }
182     }
183 
184     /* If we were woken by another thread, but we're returning in unlocked
185      * state, we should also wake a thread because we are effectively
186      * releasing the lock that was given to us.  This is the case where
187      * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
188      * bits, and qemu_lockcnt_unlock would find it and wake someone.
189      */
190     if (waited) {
191         lockcnt_wake(lockcnt);
192     }
193     return false;
194 }
195 
196 /* If the counter is one, decrement it and return locked.  Otherwise do
197  * nothing.
198  *
199  * If the function returns true, it is impossible for the counter to
200  * become nonzero until the next qemu_lockcnt_unlock.
201  */
202 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
203 {
204     int val = qatomic_read(&lockcnt->count);
205     int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
206     bool waited = false;
207 
208     while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) {
209         /* If count is going 1->0, take the lock. The fast path is
210          * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
211          */
212         if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
213             return true;
214         }
215 
216         if (waited) {
217             /* At this point we do not know if there are more waiters.  Assume
218              * there are.
219              */
220             locked_state = QEMU_LOCKCNT_STATE_WAITING;
221         }
222     }
223 
224     /* If we were woken by another thread, but we're returning in unlocked
225      * state, we should also wake a thread because we are effectively
226      * releasing the lock that was given to us.  This is the case where
227      * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
228      * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone.
229      */
230     if (waited) {
231         lockcnt_wake(lockcnt);
232     }
233     return false;
234 }
235 
236 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
237 {
238     int val = qatomic_read(&lockcnt->count);
239     int step = QEMU_LOCKCNT_STATE_LOCKED;
240     bool waited = false;
241 
242     /* The third argument is only used if the low bits of val are 0
243      * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired
244      * state.
245      */
246     while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) {
247         if (waited) {
248             /* At this point we do not know if there are more waiters.  Assume
249              * there are.
250              */
251             step = QEMU_LOCKCNT_STATE_WAITING;
252         }
253     }
254 }
255 
256 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
257 {
258     int expected, new, val;
259 
260     val = qatomic_read(&lockcnt->count);
261     do {
262         expected = val;
263         new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK;
264         trace_lockcnt_unlock_attempt(lockcnt, val, new);
265         val = qatomic_cmpxchg(&lockcnt->count, val, new);
266     } while (val != expected);
267 
268     trace_lockcnt_unlock_success(lockcnt, val, new);
269     if (val & QEMU_LOCKCNT_STATE_WAITING) {
270         lockcnt_wake(lockcnt);
271     }
272 }
273 
274 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
275 {
276     int expected, new, val;
277 
278     val = qatomic_read(&lockcnt->count);
279     do {
280         expected = val;
281         new = val & ~QEMU_LOCKCNT_STATE_MASK;
282         trace_lockcnt_unlock_attempt(lockcnt, val, new);
283         val = qatomic_cmpxchg(&lockcnt->count, val, new);
284     } while (val != expected);
285 
286     trace_lockcnt_unlock_success(lockcnt, val, new);
287     if (val & QEMU_LOCKCNT_STATE_WAITING) {
288         lockcnt_wake(lockcnt);
289     }
290 }
291 
292 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
293 {
294     return qatomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT;
295 }
296 #else
297 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
298 {
299     qemu_mutex_init(&lockcnt->mutex);
300     lockcnt->count = 0;
301 }
302 
303 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
304 {
305     qemu_mutex_destroy(&lockcnt->mutex);
306 }
307 
308 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
309 {
310     int old;
311     for (;;) {
312         old = qatomic_read(&lockcnt->count);
313         if (old == 0) {
314             qemu_lockcnt_lock(lockcnt);
315             qemu_lockcnt_inc_and_unlock(lockcnt);
316             return;
317         } else {
318             if (qatomic_cmpxchg(&lockcnt->count, old, old + 1) == old) {
319                 return;
320             }
321         }
322     }
323 }
324 
325 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
326 {
327     qatomic_dec(&lockcnt->count);
328 }
329 
330 /* Decrement a counter, and return locked if it is decremented to zero.
331  * It is impossible for the counter to become nonzero while the mutex
332  * is taken.
333  */
334 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
335 {
336     int val = qatomic_read(&lockcnt->count);
337     while (val > 1) {
338         int old = qatomic_cmpxchg(&lockcnt->count, val, val - 1);
339         if (old != val) {
340             val = old;
341             continue;
342         }
343 
344         return false;
345     }
346 
347     qemu_lockcnt_lock(lockcnt);
348     if (qatomic_fetch_dec(&lockcnt->count) == 1) {
349         return true;
350     }
351 
352     qemu_lockcnt_unlock(lockcnt);
353     return false;
354 }
355 
356 /* Decrement a counter and return locked if it is decremented to zero.
357  * Otherwise do nothing.
358  *
359  * It is impossible for the counter to become nonzero while the mutex
360  * is taken.
361  */
362 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
363 {
364     /* No need for acquire semantics if we return false.  */
365     int val = qatomic_read(&lockcnt->count);
366     if (val > 1) {
367         return false;
368     }
369 
370     qemu_lockcnt_lock(lockcnt);
371     if (qatomic_fetch_dec(&lockcnt->count) == 1) {
372         return true;
373     }
374 
375     qemu_lockcnt_inc_and_unlock(lockcnt);
376     return false;
377 }
378 
379 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
380 {
381     qemu_mutex_lock(&lockcnt->mutex);
382 }
383 
384 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
385 {
386     qatomic_inc(&lockcnt->count);
387     qemu_mutex_unlock(&lockcnt->mutex);
388 }
389 
390 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
391 {
392     qemu_mutex_unlock(&lockcnt->mutex);
393 }
394 
395 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
396 {
397     return qatomic_read(&lockcnt->count);
398 }
399 #endif
400