1 /*
2 * QemuLockCnt implementation
3 *
4 * Copyright Red Hat, Inc. 2017
5 *
6 * Author:
7 * Paolo Bonzini <pbonzini@redhat.com>
8 */
9 #include "qemu/osdep.h"
10 #include "qemu/lockcnt.h"
11 #include "qemu/thread.h"
12 #include "qemu/atomic.h"
13 #include "trace.h"
14
15 #ifdef CONFIG_LINUX
16 #include "qemu/futex.h"
17
18 /* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter.
19 * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
20 * this is not the most relaxing citation I could make...). It is similar
21 * to mutex2 in the paper.
22 */
23
24 #define QEMU_LOCKCNT_STATE_MASK 3
25 #define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */
26 #define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */
27 #define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */
28
29 #define QEMU_LOCKCNT_COUNT_STEP 4
30 #define QEMU_LOCKCNT_COUNT_SHIFT 2
31
qemu_lockcnt_init(QemuLockCnt * lockcnt)32 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
33 {
34 lockcnt->count = 0;
35 }
36
qemu_lockcnt_destroy(QemuLockCnt * lockcnt)37 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
38 {
39 }
40
41 /* *val is the current value of lockcnt->count.
42 *
43 * If the lock is free, try a cmpxchg from *val to new_if_free; return
44 * true and set *val to the old value found by the cmpxchg in
45 * lockcnt->count.
46 *
47 * If the lock is taken, wait for it to be released and return false
48 * *without trying again to take the lock*. Again, set *val to the
49 * new value of lockcnt->count.
50 *
51 * If *waited is true on return, new_if_free's bottom two bits must not
52 * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller
53 * does not know if there are other waiters. Furthermore, after *waited
54 * is set the caller has effectively acquired the lock. If it returns
55 * with the lock not taken, it must wake another futex waiter.
56 */
qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt * lockcnt,int * val,int new_if_free,bool * waited)57 static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
58 int new_if_free, bool *waited)
59 {
60 /* Fast path for when the lock is free. */
61 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) {
62 int expected = *val;
63
64 trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free);
65 *val = qatomic_cmpxchg(&lockcnt->count, expected, new_if_free);
66 if (*val == expected) {
67 trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free);
68 *val = new_if_free;
69 return true;
70 }
71 }
72
73 /* The slow path moves from locked to waiting if necessary, then
74 * does a futex wait. Both steps can be repeated ad nauseam,
75 * only getting out of the loop if we can have another shot at the
76 * fast path. Once we can, get out to compute the new destination
77 * value for the fast path.
78 */
79 while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) {
80 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) {
81 int expected = *val;
82 int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING;
83
84 trace_lockcnt_futex_wait_prepare(lockcnt, expected, new);
85 *val = qatomic_cmpxchg(&lockcnt->count, expected, new);
86 if (*val == expected) {
87 *val = new;
88 }
89 continue;
90 }
91
92 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) {
93 *waited = true;
94 trace_lockcnt_futex_wait(lockcnt, *val);
95 qemu_futex_wait(&lockcnt->count, *val);
96 *val = qatomic_read(&lockcnt->count);
97 trace_lockcnt_futex_wait_resume(lockcnt, *val);
98 continue;
99 }
100
101 abort();
102 }
103 return false;
104 }
105
lockcnt_wake(QemuLockCnt * lockcnt)106 static void lockcnt_wake(QemuLockCnt *lockcnt)
107 {
108 trace_lockcnt_futex_wake(lockcnt);
109 qemu_futex_wake(&lockcnt->count, 1);
110 }
111
qemu_lockcnt_inc(QemuLockCnt * lockcnt)112 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
113 {
114 int val = qatomic_read(&lockcnt->count);
115 bool waited = false;
116
117 for (;;) {
118 if (val >= QEMU_LOCKCNT_COUNT_STEP) {
119 int expected = val;
120 val = qatomic_cmpxchg(&lockcnt->count, val,
121 val + QEMU_LOCKCNT_COUNT_STEP);
122 if (val == expected) {
123 break;
124 }
125 } else {
126 /* The fast path is (0, unlocked)->(1, unlocked). */
127 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP,
128 &waited)) {
129 break;
130 }
131 }
132 }
133
134 /* If we were woken by another thread, we should also wake one because
135 * we are effectively releasing the lock that was given to us. This is
136 * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING
137 * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and
138 * wake someone.
139 */
140 if (waited) {
141 lockcnt_wake(lockcnt);
142 }
143 }
144
qemu_lockcnt_dec(QemuLockCnt * lockcnt)145 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
146 {
147 qatomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP);
148 }
149
150 /* Decrement a counter, and return locked if it is decremented to zero.
151 * If the function returns true, it is impossible for the counter to
152 * become nonzero until the next qemu_lockcnt_unlock.
153 */
qemu_lockcnt_dec_and_lock(QemuLockCnt * lockcnt)154 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
155 {
156 int val = qatomic_read(&lockcnt->count);
157 int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
158 bool waited = false;
159
160 for (;;) {
161 if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) {
162 int expected = val;
163 val = qatomic_cmpxchg(&lockcnt->count, val,
164 val - QEMU_LOCKCNT_COUNT_STEP);
165 if (val == expected) {
166 break;
167 }
168 } else {
169 /* If count is going 1->0, take the lock. The fast path is
170 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
171 */
172 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
173 return true;
174 }
175
176 if (waited) {
177 /* At this point we do not know if there are more waiters. Assume
178 * there are.
179 */
180 locked_state = QEMU_LOCKCNT_STATE_WAITING;
181 }
182 }
183 }
184
185 /* If we were woken by another thread, but we're returning in unlocked
186 * state, we should also wake a thread because we are effectively
187 * releasing the lock that was given to us. This is the case where
188 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
189 * bits, and qemu_lockcnt_unlock would find it and wake someone.
190 */
191 if (waited) {
192 lockcnt_wake(lockcnt);
193 }
194 return false;
195 }
196
197 /* If the counter is one, decrement it and return locked. Otherwise do
198 * nothing.
199 *
200 * If the function returns true, it is impossible for the counter to
201 * become nonzero until the next qemu_lockcnt_unlock.
202 */
qemu_lockcnt_dec_if_lock(QemuLockCnt * lockcnt)203 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
204 {
205 int val = qatomic_read(&lockcnt->count);
206 int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
207 bool waited = false;
208
209 while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) {
210 /* If count is going 1->0, take the lock. The fast path is
211 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
212 */
213 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
214 return true;
215 }
216
217 if (waited) {
218 /* At this point we do not know if there are more waiters. Assume
219 * there are.
220 */
221 locked_state = QEMU_LOCKCNT_STATE_WAITING;
222 }
223 }
224
225 /* If we were woken by another thread, but we're returning in unlocked
226 * state, we should also wake a thread because we are effectively
227 * releasing the lock that was given to us. This is the case where
228 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
229 * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone.
230 */
231 if (waited) {
232 lockcnt_wake(lockcnt);
233 }
234 return false;
235 }
236
qemu_lockcnt_lock(QemuLockCnt * lockcnt)237 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
238 {
239 int val = qatomic_read(&lockcnt->count);
240 int step = QEMU_LOCKCNT_STATE_LOCKED;
241 bool waited = false;
242
243 /* The third argument is only used if the low bits of val are 0
244 * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired
245 * state.
246 */
247 while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) {
248 if (waited) {
249 /* At this point we do not know if there are more waiters. Assume
250 * there are.
251 */
252 step = QEMU_LOCKCNT_STATE_WAITING;
253 }
254 }
255 }
256
qemu_lockcnt_inc_and_unlock(QemuLockCnt * lockcnt)257 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
258 {
259 int expected, new, val;
260
261 val = qatomic_read(&lockcnt->count);
262 do {
263 expected = val;
264 new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK;
265 trace_lockcnt_unlock_attempt(lockcnt, val, new);
266 val = qatomic_cmpxchg(&lockcnt->count, val, new);
267 } while (val != expected);
268
269 trace_lockcnt_unlock_success(lockcnt, val, new);
270 if (val & QEMU_LOCKCNT_STATE_WAITING) {
271 lockcnt_wake(lockcnt);
272 }
273 }
274
qemu_lockcnt_unlock(QemuLockCnt * lockcnt)275 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
276 {
277 int expected, new, val;
278
279 val = qatomic_read(&lockcnt->count);
280 do {
281 expected = val;
282 new = val & ~QEMU_LOCKCNT_STATE_MASK;
283 trace_lockcnt_unlock_attempt(lockcnt, val, new);
284 val = qatomic_cmpxchg(&lockcnt->count, val, new);
285 } while (val != expected);
286
287 trace_lockcnt_unlock_success(lockcnt, val, new);
288 if (val & QEMU_LOCKCNT_STATE_WAITING) {
289 lockcnt_wake(lockcnt);
290 }
291 }
292
qemu_lockcnt_count(QemuLockCnt * lockcnt)293 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
294 {
295 return qatomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT;
296 }
297 #else
qemu_lockcnt_init(QemuLockCnt * lockcnt)298 void qemu_lockcnt_init(QemuLockCnt *lockcnt)
299 {
300 qemu_mutex_init(&lockcnt->mutex);
301 lockcnt->count = 0;
302 }
303
qemu_lockcnt_destroy(QemuLockCnt * lockcnt)304 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
305 {
306 qemu_mutex_destroy(&lockcnt->mutex);
307 }
308
qemu_lockcnt_inc(QemuLockCnt * lockcnt)309 void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
310 {
311 int old;
312 for (;;) {
313 old = qatomic_read(&lockcnt->count);
314 if (old == 0) {
315 qemu_lockcnt_lock(lockcnt);
316 qemu_lockcnt_inc_and_unlock(lockcnt);
317 return;
318 } else {
319 if (qatomic_cmpxchg(&lockcnt->count, old, old + 1) == old) {
320 return;
321 }
322 }
323 }
324 }
325
qemu_lockcnt_dec(QemuLockCnt * lockcnt)326 void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
327 {
328 qatomic_dec(&lockcnt->count);
329 }
330
331 /* Decrement a counter, and return locked if it is decremented to zero.
332 * It is impossible for the counter to become nonzero while the mutex
333 * is taken.
334 */
qemu_lockcnt_dec_and_lock(QemuLockCnt * lockcnt)335 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
336 {
337 int val = qatomic_read(&lockcnt->count);
338 while (val > 1) {
339 int old = qatomic_cmpxchg(&lockcnt->count, val, val - 1);
340 if (old != val) {
341 val = old;
342 continue;
343 }
344
345 return false;
346 }
347
348 qemu_lockcnt_lock(lockcnt);
349 if (qatomic_fetch_dec(&lockcnt->count) == 1) {
350 return true;
351 }
352
353 qemu_lockcnt_unlock(lockcnt);
354 return false;
355 }
356
357 /* Decrement a counter and return locked if it is decremented to zero.
358 * Otherwise do nothing.
359 *
360 * It is impossible for the counter to become nonzero while the mutex
361 * is taken.
362 */
qemu_lockcnt_dec_if_lock(QemuLockCnt * lockcnt)363 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
364 {
365 /* No need for acquire semantics if we return false. */
366 int val = qatomic_read(&lockcnt->count);
367 if (val > 1) {
368 return false;
369 }
370
371 qemu_lockcnt_lock(lockcnt);
372 if (qatomic_fetch_dec(&lockcnt->count) == 1) {
373 return true;
374 }
375
376 qemu_lockcnt_inc_and_unlock(lockcnt);
377 return false;
378 }
379
qemu_lockcnt_lock(QemuLockCnt * lockcnt)380 void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
381 {
382 qemu_mutex_lock(&lockcnt->mutex);
383 }
384
qemu_lockcnt_inc_and_unlock(QemuLockCnt * lockcnt)385 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
386 {
387 qatomic_inc(&lockcnt->count);
388 qemu_mutex_unlock(&lockcnt->mutex);
389 }
390
qemu_lockcnt_unlock(QemuLockCnt * lockcnt)391 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
392 {
393 qemu_mutex_unlock(&lockcnt->mutex);
394 }
395
qemu_lockcnt_count(QemuLockCnt * lockcnt)396 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
397 {
398 return qatomic_read(&lockcnt->count);
399 }
400 #endif
401