1 /* 2 * QemuLockCnt implementation 3 * 4 * Copyright Red Hat, Inc. 2017 5 * 6 * Author: 7 * Paolo Bonzini <pbonzini@redhat.com> 8 */ 9 #include "qemu/osdep.h" 10 #include "qemu/thread.h" 11 #include "qemu/atomic.h" 12 #include "trace.h" 13 14 #ifdef CONFIG_LINUX 15 #include "qemu/futex.h" 16 17 /* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter. 18 * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok, 19 * this is not the most relaxing citation I could make...). It is similar 20 * to mutex2 in the paper. 21 */ 22 23 #define QEMU_LOCKCNT_STATE_MASK 3 24 #define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */ 25 #define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */ 26 #define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */ 27 28 #define QEMU_LOCKCNT_COUNT_STEP 4 29 #define QEMU_LOCKCNT_COUNT_SHIFT 2 30 31 void qemu_lockcnt_init(QemuLockCnt *lockcnt) 32 { 33 lockcnt->count = 0; 34 } 35 36 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) 37 { 38 } 39 40 /* *val is the current value of lockcnt->count. 41 * 42 * If the lock is free, try a cmpxchg from *val to new_if_free; return 43 * true and set *val to the old value found by the cmpxchg in 44 * lockcnt->count. 45 * 46 * If the lock is taken, wait for it to be released and return false 47 * *without trying again to take the lock*. Again, set *val to the 48 * new value of lockcnt->count. 49 * 50 * If *waited is true on return, new_if_free's bottom two bits must not 51 * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller 52 * does not know if there are other waiters. Furthermore, after *waited 53 * is set the caller has effectively acquired the lock. If it returns 54 * with the lock not taken, it must wake another futex waiter. 55 */ 56 static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val, 57 int new_if_free, bool *waited) 58 { 59 /* Fast path for when the lock is free. */ 60 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) { 61 int expected = *val; 62 63 trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free); 64 *val = atomic_cmpxchg(&lockcnt->count, expected, new_if_free); 65 if (*val == expected) { 66 trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free); 67 *val = new_if_free; 68 return true; 69 } 70 } 71 72 /* The slow path moves from locked to waiting if necessary, then 73 * does a futex wait. Both steps can be repeated ad nauseam, 74 * only getting out of the loop if we can have another shot at the 75 * fast path. Once we can, get out to compute the new destination 76 * value for the fast path. 77 */ 78 while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) { 79 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) { 80 int expected = *val; 81 int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING; 82 83 trace_lockcnt_futex_wait_prepare(lockcnt, expected, new); 84 *val = atomic_cmpxchg(&lockcnt->count, expected, new); 85 if (*val == expected) { 86 *val = new; 87 } 88 continue; 89 } 90 91 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) { 92 *waited = true; 93 trace_lockcnt_futex_wait(lockcnt, *val); 94 qemu_futex_wait(&lockcnt->count, *val); 95 *val = atomic_read(&lockcnt->count); 96 trace_lockcnt_futex_wait_resume(lockcnt, *val); 97 continue; 98 } 99 100 abort(); 101 } 102 return false; 103 } 104 105 static void lockcnt_wake(QemuLockCnt *lockcnt) 106 { 107 trace_lockcnt_futex_wake(lockcnt); 108 qemu_futex_wake(&lockcnt->count, 1); 109 } 110 111 void qemu_lockcnt_inc(QemuLockCnt *lockcnt) 112 { 113 int val = atomic_read(&lockcnt->count); 114 bool waited = false; 115 116 for (;;) { 117 if (val >= QEMU_LOCKCNT_COUNT_STEP) { 118 int expected = val; 119 val = atomic_cmpxchg(&lockcnt->count, val, val + QEMU_LOCKCNT_COUNT_STEP); 120 if (val == expected) { 121 break; 122 } 123 } else { 124 /* The fast path is (0, unlocked)->(1, unlocked). */ 125 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP, 126 &waited)) { 127 break; 128 } 129 } 130 } 131 132 /* If we were woken by another thread, we should also wake one because 133 * we are effectively releasing the lock that was given to us. This is 134 * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING 135 * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and 136 * wake someone. 137 */ 138 if (waited) { 139 lockcnt_wake(lockcnt); 140 } 141 } 142 143 void qemu_lockcnt_dec(QemuLockCnt *lockcnt) 144 { 145 atomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP); 146 } 147 148 /* Decrement a counter, and return locked if it is decremented to zero. 149 * If the function returns true, it is impossible for the counter to 150 * become nonzero until the next qemu_lockcnt_unlock. 151 */ 152 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) 153 { 154 int val = atomic_read(&lockcnt->count); 155 int locked_state = QEMU_LOCKCNT_STATE_LOCKED; 156 bool waited = false; 157 158 for (;;) { 159 if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) { 160 int expected = val; 161 val = atomic_cmpxchg(&lockcnt->count, val, val - QEMU_LOCKCNT_COUNT_STEP); 162 if (val == expected) { 163 break; 164 } 165 } else { 166 /* If count is going 1->0, take the lock. The fast path is 167 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). 168 */ 169 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { 170 return true; 171 } 172 173 if (waited) { 174 /* At this point we do not know if there are more waiters. Assume 175 * there are. 176 */ 177 locked_state = QEMU_LOCKCNT_STATE_WAITING; 178 } 179 } 180 } 181 182 /* If we were woken by another thread, but we're returning in unlocked 183 * state, we should also wake a thread because we are effectively 184 * releasing the lock that was given to us. This is the case where 185 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low 186 * bits, and qemu_lockcnt_unlock would find it and wake someone. 187 */ 188 if (waited) { 189 lockcnt_wake(lockcnt); 190 } 191 return false; 192 } 193 194 /* If the counter is one, decrement it and return locked. Otherwise do 195 * nothing. 196 * 197 * If the function returns true, it is impossible for the counter to 198 * become nonzero until the next qemu_lockcnt_unlock. 199 */ 200 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) 201 { 202 int val = atomic_read(&lockcnt->count); 203 int locked_state = QEMU_LOCKCNT_STATE_LOCKED; 204 bool waited = false; 205 206 while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) { 207 /* If count is going 1->0, take the lock. The fast path is 208 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). 209 */ 210 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { 211 return true; 212 } 213 214 if (waited) { 215 /* At this point we do not know if there are more waiters. Assume 216 * there are. 217 */ 218 locked_state = QEMU_LOCKCNT_STATE_WAITING; 219 } 220 } 221 222 /* If we were woken by another thread, but we're returning in unlocked 223 * state, we should also wake a thread because we are effectively 224 * releasing the lock that was given to us. This is the case where 225 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low 226 * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone. 227 */ 228 if (waited) { 229 lockcnt_wake(lockcnt); 230 } 231 return false; 232 } 233 234 void qemu_lockcnt_lock(QemuLockCnt *lockcnt) 235 { 236 int val = atomic_read(&lockcnt->count); 237 int step = QEMU_LOCKCNT_STATE_LOCKED; 238 bool waited = false; 239 240 /* The third argument is only used if the low bits of val are 0 241 * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired 242 * state. 243 */ 244 while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) { 245 if (waited) { 246 /* At this point we do not know if there are more waiters. Assume 247 * there are. 248 */ 249 step = QEMU_LOCKCNT_STATE_WAITING; 250 } 251 } 252 } 253 254 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) 255 { 256 int expected, new, val; 257 258 val = atomic_read(&lockcnt->count); 259 do { 260 expected = val; 261 new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK; 262 trace_lockcnt_unlock_attempt(lockcnt, val, new); 263 val = atomic_cmpxchg(&lockcnt->count, val, new); 264 } while (val != expected); 265 266 trace_lockcnt_unlock_success(lockcnt, val, new); 267 if (val & QEMU_LOCKCNT_STATE_WAITING) { 268 lockcnt_wake(lockcnt); 269 } 270 } 271 272 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) 273 { 274 int expected, new, val; 275 276 val = atomic_read(&lockcnt->count); 277 do { 278 expected = val; 279 new = val & ~QEMU_LOCKCNT_STATE_MASK; 280 trace_lockcnt_unlock_attempt(lockcnt, val, new); 281 val = atomic_cmpxchg(&lockcnt->count, val, new); 282 } while (val != expected); 283 284 trace_lockcnt_unlock_success(lockcnt, val, new); 285 if (val & QEMU_LOCKCNT_STATE_WAITING) { 286 lockcnt_wake(lockcnt); 287 } 288 } 289 290 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) 291 { 292 return atomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT; 293 } 294 #else 295 void qemu_lockcnt_init(QemuLockCnt *lockcnt) 296 { 297 qemu_mutex_init(&lockcnt->mutex); 298 lockcnt->count = 0; 299 } 300 301 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) 302 { 303 qemu_mutex_destroy(&lockcnt->mutex); 304 } 305 306 void qemu_lockcnt_inc(QemuLockCnt *lockcnt) 307 { 308 int old; 309 for (;;) { 310 old = atomic_read(&lockcnt->count); 311 if (old == 0) { 312 qemu_lockcnt_lock(lockcnt); 313 qemu_lockcnt_inc_and_unlock(lockcnt); 314 return; 315 } else { 316 if (atomic_cmpxchg(&lockcnt->count, old, old + 1) == old) { 317 return; 318 } 319 } 320 } 321 } 322 323 void qemu_lockcnt_dec(QemuLockCnt *lockcnt) 324 { 325 atomic_dec(&lockcnt->count); 326 } 327 328 /* Decrement a counter, and return locked if it is decremented to zero. 329 * It is impossible for the counter to become nonzero while the mutex 330 * is taken. 331 */ 332 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) 333 { 334 int val = atomic_read(&lockcnt->count); 335 while (val > 1) { 336 int old = atomic_cmpxchg(&lockcnt->count, val, val - 1); 337 if (old != val) { 338 val = old; 339 continue; 340 } 341 342 return false; 343 } 344 345 qemu_lockcnt_lock(lockcnt); 346 if (atomic_fetch_dec(&lockcnt->count) == 1) { 347 return true; 348 } 349 350 qemu_lockcnt_unlock(lockcnt); 351 return false; 352 } 353 354 /* Decrement a counter and return locked if it is decremented to zero. 355 * Otherwise do nothing. 356 * 357 * It is impossible for the counter to become nonzero while the mutex 358 * is taken. 359 */ 360 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) 361 { 362 /* No need for acquire semantics if we return false. */ 363 int val = atomic_read(&lockcnt->count); 364 if (val > 1) { 365 return false; 366 } 367 368 qemu_lockcnt_lock(lockcnt); 369 if (atomic_fetch_dec(&lockcnt->count) == 1) { 370 return true; 371 } 372 373 qemu_lockcnt_inc_and_unlock(lockcnt); 374 return false; 375 } 376 377 void qemu_lockcnt_lock(QemuLockCnt *lockcnt) 378 { 379 qemu_mutex_lock(&lockcnt->mutex); 380 } 381 382 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) 383 { 384 atomic_inc(&lockcnt->count); 385 qemu_mutex_unlock(&lockcnt->mutex); 386 } 387 388 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) 389 { 390 qemu_mutex_unlock(&lockcnt->mutex); 391 } 392 393 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) 394 { 395 return atomic_read(&lockcnt->count); 396 } 397 #endif 398