xref: /openbmc/qemu/util/lockcnt.c (revision f774a677507966222624a9b2859f06ede7608100)
1 /*
2  * QemuLockCnt implementation
3  *
4  * Copyright Red Hat, Inc. 2017
5  *
6  * Author:
7  *   Paolo Bonzini <pbonzini@redhat.com>
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/lockcnt.h"
11 #include "qemu/thread.h"
12 #include "qemu/atomic.h"
13 #include "trace.h"
14 
15 #ifdef CONFIG_LINUX
16 #include "qemu/futex.h"
17 
18 /* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter.
19  * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
20  * this is not the most relaxing citation I could make...).  It is similar
21  * to mutex2 in the paper.
22  */
23 
24 #define QEMU_LOCKCNT_STATE_MASK    3
25 #define QEMU_LOCKCNT_STATE_FREE    0   /* free, uncontended */
26 #define QEMU_LOCKCNT_STATE_LOCKED  1   /* locked, uncontended */
27 #define QEMU_LOCKCNT_STATE_WAITING 2   /* locked, contended */
28 
29 #define QEMU_LOCKCNT_COUNT_STEP    4
30 #define QEMU_LOCKCNT_COUNT_SHIFT   2
31 
qemu_lockcnt_init(QemuLockCnt * lockcnt)32 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
33 {
34     lockcnt->count = 0;
35 }
36 
qemu_lockcnt_destroy(QemuLockCnt * lockcnt)37 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
38 {
39 }
40 
41 /* *val is the current value of lockcnt->count.
42  *
43  * If the lock is free, try a cmpxchg from *val to new_if_free; return
44  * true and set *val to the old value found by the cmpxchg in
45  * lockcnt->count.
46  *
47  * If the lock is taken, wait for it to be released and return false
48  * *without trying again to take the lock*.  Again, set *val to the
49  * new value of lockcnt->count.
50  *
51  * If *waited is true on return, new_if_free's bottom two bits must not
52  * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller
53  * does not know if there are other waiters.  Furthermore, after *waited
54  * is set the caller has effectively acquired the lock.  If it returns
55  * with the lock not taken, it must wake another futex waiter.
56  */
qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt * lockcnt,int * val,int new_if_free,bool * waited)57 static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
58                                          int new_if_free, bool *waited)
59 {
60     /* Fast path for when the lock is free.  */
61     if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) {
62         int expected = *val;
63 
64         trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free);
65         *val = qatomic_cmpxchg(&lockcnt->count, expected, new_if_free);
66         if (*val == expected) {
67             trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free);
68             *val = new_if_free;
69             return true;
70         }
71     }
72 
73     /* The slow path moves from locked to waiting if necessary, then
74      * does a futex wait.  Both steps can be repeated ad nauseam,
75      * only getting out of the loop if we can have another shot at the
76      * fast path.  Once we can, get out to compute the new destination
77      * value for the fast path.
78      */
79     while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) {
80         if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) {
81             int expected = *val;
82             int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING;
83 
84             trace_lockcnt_futex_wait_prepare(lockcnt, expected, new);
85             *val = qatomic_cmpxchg(&lockcnt->count, expected, new);
86             if (*val == expected) {
87                 *val = new;
88             }
89             continue;
90         }
91 
92         if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) {
93             *waited = true;
94             trace_lockcnt_futex_wait(lockcnt, *val);
95             qemu_futex_wait(&lockcnt->count, *val);
96             *val = qatomic_read(&lockcnt->count);
97             trace_lockcnt_futex_wait_resume(lockcnt, *val);
98             continue;
99         }
100 
101         abort();
102     }
103     return false;
104 }
105 
lockcnt_wake(QemuLockCnt * lockcnt)106 static void lockcnt_wake(QemuLockCnt *lockcnt)
107 {
108     trace_lockcnt_futex_wake(lockcnt);
109     qemu_futex_wake(&lockcnt->count, 1);
110 }
111 
qemu_lockcnt_inc(QemuLockCnt * lockcnt)112 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
113 {
114     int val = qatomic_read(&lockcnt->count);
115     bool waited = false;
116 
117     for (;;) {
118         if (val >= QEMU_LOCKCNT_COUNT_STEP) {
119             int expected = val;
120             val = qatomic_cmpxchg(&lockcnt->count, val,
121                                   val + QEMU_LOCKCNT_COUNT_STEP);
122             if (val == expected) {
123                 break;
124             }
125         } else {
126             /* The fast path is (0, unlocked)->(1, unlocked).  */
127             if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP,
128                                              &waited)) {
129                 break;
130             }
131         }
132     }
133 
134     /* If we were woken by another thread, we should also wake one because
135      * we are effectively releasing the lock that was given to us.  This is
136      * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING
137      * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and
138      * wake someone.
139      */
140     if (waited) {
141         lockcnt_wake(lockcnt);
142     }
143 }
144 
qemu_lockcnt_dec(QemuLockCnt * lockcnt)145 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
146 {
147     qatomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP);
148 }
149 
150 /* Decrement a counter, and return locked if it is decremented to zero.
151  * If the function returns true, it is impossible for the counter to
152  * become nonzero until the next qemu_lockcnt_unlock.
153  */
qemu_lockcnt_dec_and_lock(QemuLockCnt * lockcnt)154 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
155 {
156     int val = qatomic_read(&lockcnt->count);
157     int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
158     bool waited = false;
159 
160     for (;;) {
161         if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) {
162             int expected = val;
163             val = qatomic_cmpxchg(&lockcnt->count, val,
164                                   val - QEMU_LOCKCNT_COUNT_STEP);
165             if (val == expected) {
166                 break;
167             }
168         } else {
169             /* If count is going 1->0, take the lock. The fast path is
170              * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
171              */
172             if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
173                 return true;
174             }
175 
176             if (waited) {
177                 /* At this point we do not know if there are more waiters.  Assume
178                  * there are.
179                  */
180                 locked_state = QEMU_LOCKCNT_STATE_WAITING;
181             }
182         }
183     }
184 
185     /* If we were woken by another thread, but we're returning in unlocked
186      * state, we should also wake a thread because we are effectively
187      * releasing the lock that was given to us.  This is the case where
188      * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
189      * bits, and qemu_lockcnt_unlock would find it and wake someone.
190      */
191     if (waited) {
192         lockcnt_wake(lockcnt);
193     }
194     return false;
195 }
196 
197 /* If the counter is one, decrement it and return locked.  Otherwise do
198  * nothing.
199  *
200  * If the function returns true, it is impossible for the counter to
201  * become nonzero until the next qemu_lockcnt_unlock.
202  */
qemu_lockcnt_dec_if_lock(QemuLockCnt * lockcnt)203 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
204 {
205     int val = qatomic_read(&lockcnt->count);
206     int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
207     bool waited = false;
208 
209     while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) {
210         /* If count is going 1->0, take the lock. The fast path is
211          * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
212          */
213         if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
214             return true;
215         }
216 
217         if (waited) {
218             /* At this point we do not know if there are more waiters.  Assume
219              * there are.
220              */
221             locked_state = QEMU_LOCKCNT_STATE_WAITING;
222         }
223     }
224 
225     /* If we were woken by another thread, but we're returning in unlocked
226      * state, we should also wake a thread because we are effectively
227      * releasing the lock that was given to us.  This is the case where
228      * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
229      * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone.
230      */
231     if (waited) {
232         lockcnt_wake(lockcnt);
233     }
234     return false;
235 }
236 
qemu_lockcnt_lock(QemuLockCnt * lockcnt)237 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
238 {
239     int val = qatomic_read(&lockcnt->count);
240     int step = QEMU_LOCKCNT_STATE_LOCKED;
241     bool waited = false;
242 
243     /* The third argument is only used if the low bits of val are 0
244      * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired
245      * state.
246      */
247     while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) {
248         if (waited) {
249             /* At this point we do not know if there are more waiters.  Assume
250              * there are.
251              */
252             step = QEMU_LOCKCNT_STATE_WAITING;
253         }
254     }
255 }
256 
qemu_lockcnt_inc_and_unlock(QemuLockCnt * lockcnt)257 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
258 {
259     int expected, new, val;
260 
261     val = qatomic_read(&lockcnt->count);
262     do {
263         expected = val;
264         new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK;
265         trace_lockcnt_unlock_attempt(lockcnt, val, new);
266         val = qatomic_cmpxchg(&lockcnt->count, val, new);
267     } while (val != expected);
268 
269     trace_lockcnt_unlock_success(lockcnt, val, new);
270     if (val & QEMU_LOCKCNT_STATE_WAITING) {
271         lockcnt_wake(lockcnt);
272     }
273 }
274 
qemu_lockcnt_unlock(QemuLockCnt * lockcnt)275 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
276 {
277     int expected, new, val;
278 
279     val = qatomic_read(&lockcnt->count);
280     do {
281         expected = val;
282         new = val & ~QEMU_LOCKCNT_STATE_MASK;
283         trace_lockcnt_unlock_attempt(lockcnt, val, new);
284         val = qatomic_cmpxchg(&lockcnt->count, val, new);
285     } while (val != expected);
286 
287     trace_lockcnt_unlock_success(lockcnt, val, new);
288     if (val & QEMU_LOCKCNT_STATE_WAITING) {
289         lockcnt_wake(lockcnt);
290     }
291 }
292 
qemu_lockcnt_count(QemuLockCnt * lockcnt)293 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
294 {
295     return qatomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT;
296 }
297 #else
qemu_lockcnt_init(QemuLockCnt * lockcnt)298 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
299 {
300     qemu_mutex_init(&lockcnt->mutex);
301     lockcnt->count = 0;
302 }
303 
qemu_lockcnt_destroy(QemuLockCnt * lockcnt)304 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
305 {
306     qemu_mutex_destroy(&lockcnt->mutex);
307 }
308 
qemu_lockcnt_inc(QemuLockCnt * lockcnt)309 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
310 {
311     int old;
312     for (;;) {
313         old = qatomic_read(&lockcnt->count);
314         if (old == 0) {
315             qemu_lockcnt_lock(lockcnt);
316             qemu_lockcnt_inc_and_unlock(lockcnt);
317             return;
318         } else {
319             if (qatomic_cmpxchg(&lockcnt->count, old, old + 1) == old) {
320                 return;
321             }
322         }
323     }
324 }
325 
qemu_lockcnt_dec(QemuLockCnt * lockcnt)326 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
327 {
328     qatomic_dec(&lockcnt->count);
329 }
330 
331 /* Decrement a counter, and return locked if it is decremented to zero.
332  * It is impossible for the counter to become nonzero while the mutex
333  * is taken.
334  */
qemu_lockcnt_dec_and_lock(QemuLockCnt * lockcnt)335 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
336 {
337     int val = qatomic_read(&lockcnt->count);
338     while (val > 1) {
339         int old = qatomic_cmpxchg(&lockcnt->count, val, val - 1);
340         if (old != val) {
341             val = old;
342             continue;
343         }
344 
345         return false;
346     }
347 
348     qemu_lockcnt_lock(lockcnt);
349     if (qatomic_fetch_dec(&lockcnt->count) == 1) {
350         return true;
351     }
352 
353     qemu_lockcnt_unlock(lockcnt);
354     return false;
355 }
356 
357 /* Decrement a counter and return locked if it is decremented to zero.
358  * Otherwise do nothing.
359  *
360  * It is impossible for the counter to become nonzero while the mutex
361  * is taken.
362  */
qemu_lockcnt_dec_if_lock(QemuLockCnt * lockcnt)363 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
364 {
365     /* No need for acquire semantics if we return false.  */
366     int val = qatomic_read(&lockcnt->count);
367     if (val > 1) {
368         return false;
369     }
370 
371     qemu_lockcnt_lock(lockcnt);
372     if (qatomic_fetch_dec(&lockcnt->count) == 1) {
373         return true;
374     }
375 
376     qemu_lockcnt_inc_and_unlock(lockcnt);
377     return false;
378 }
379 
qemu_lockcnt_lock(QemuLockCnt * lockcnt)380 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
381 {
382     qemu_mutex_lock(&lockcnt->mutex);
383 }
384 
qemu_lockcnt_inc_and_unlock(QemuLockCnt * lockcnt)385 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
386 {
387     qatomic_inc(&lockcnt->count);
388     qemu_mutex_unlock(&lockcnt->mutex);
389 }
390 
qemu_lockcnt_unlock(QemuLockCnt * lockcnt)391 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
392 {
393     qemu_mutex_unlock(&lockcnt->mutex);
394 }
395 
qemu_lockcnt_count(QemuLockCnt * lockcnt)396 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
397 {
398     return qatomic_read(&lockcnt->count);
399 }
400 #endif
401