1 /* 2 * QemuLockCnt implementation 3 * 4 * Copyright Red Hat, Inc. 2017 5 * 6 * Author: 7 * Paolo Bonzini <pbonzini@redhat.com> 8 */ 9 #include "qemu/osdep.h" 10 #include "qemu/thread.h" 11 #include "qemu/atomic.h" 12 #include "trace.h" 13 14 #ifdef CONFIG_LINUX 15 #include "qemu/futex.h" 16 17 /* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter. 18 * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok, 19 * this is not the most relaxing citation I could make...). It is similar 20 * to mutex2 in the paper. 21 */ 22 23 #define QEMU_LOCKCNT_STATE_MASK 3 24 #define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */ 25 #define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */ 26 #define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */ 27 28 #define QEMU_LOCKCNT_COUNT_STEP 4 29 #define QEMU_LOCKCNT_COUNT_SHIFT 2 30 31 void qemu_lockcnt_init(QemuLockCnt *lockcnt) 32 { 33 lockcnt->count = 0; 34 } 35 36 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) 37 { 38 } 39 40 /* *val is the current value of lockcnt->count. 41 * 42 * If the lock is free, try a cmpxchg from *val to new_if_free; return 43 * true and set *val to the old value found by the cmpxchg in 44 * lockcnt->count. 45 * 46 * If the lock is taken, wait for it to be released and return false 47 * *without trying again to take the lock*. Again, set *val to the 48 * new value of lockcnt->count. 49 * 50 * If *waited is true on return, new_if_free's bottom two bits must not 51 * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller 52 * does not know if there are other waiters. Furthermore, after *waited 53 * is set the caller has effectively acquired the lock. If it returns 54 * with the lock not taken, it must wake another futex waiter. 55 */ 56 static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val, 57 int new_if_free, bool *waited) 58 { 59 /* Fast path for when the lock is free. */ 60 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) { 61 int expected = *val; 62 63 trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free); 64 *val = qatomic_cmpxchg(&lockcnt->count, expected, new_if_free); 65 if (*val == expected) { 66 trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free); 67 *val = new_if_free; 68 return true; 69 } 70 } 71 72 /* The slow path moves from locked to waiting if necessary, then 73 * does a futex wait. Both steps can be repeated ad nauseam, 74 * only getting out of the loop if we can have another shot at the 75 * fast path. Once we can, get out to compute the new destination 76 * value for the fast path. 77 */ 78 while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) { 79 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) { 80 int expected = *val; 81 int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING; 82 83 trace_lockcnt_futex_wait_prepare(lockcnt, expected, new); 84 *val = qatomic_cmpxchg(&lockcnt->count, expected, new); 85 if (*val == expected) { 86 *val = new; 87 } 88 continue; 89 } 90 91 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) { 92 *waited = true; 93 trace_lockcnt_futex_wait(lockcnt, *val); 94 qemu_futex_wait(&lockcnt->count, *val); 95 *val = qatomic_read(&lockcnt->count); 96 trace_lockcnt_futex_wait_resume(lockcnt, *val); 97 continue; 98 } 99 100 abort(); 101 } 102 return false; 103 } 104 105 static void lockcnt_wake(QemuLockCnt *lockcnt) 106 { 107 trace_lockcnt_futex_wake(lockcnt); 108 qemu_futex_wake(&lockcnt->count, 1); 109 } 110 111 void qemu_lockcnt_inc(QemuLockCnt *lockcnt) 112 { 113 int val = qatomic_read(&lockcnt->count); 114 bool waited = false; 115 116 for (;;) { 117 if (val >= QEMU_LOCKCNT_COUNT_STEP) { 118 int expected = val; 119 val = qatomic_cmpxchg(&lockcnt->count, val, 120 val + QEMU_LOCKCNT_COUNT_STEP); 121 if (val == expected) { 122 break; 123 } 124 } else { 125 /* The fast path is (0, unlocked)->(1, unlocked). */ 126 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP, 127 &waited)) { 128 break; 129 } 130 } 131 } 132 133 /* If we were woken by another thread, we should also wake one because 134 * we are effectively releasing the lock that was given to us. This is 135 * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING 136 * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and 137 * wake someone. 138 */ 139 if (waited) { 140 lockcnt_wake(lockcnt); 141 } 142 } 143 144 void qemu_lockcnt_dec(QemuLockCnt *lockcnt) 145 { 146 qatomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP); 147 } 148 149 /* Decrement a counter, and return locked if it is decremented to zero. 150 * If the function returns true, it is impossible for the counter to 151 * become nonzero until the next qemu_lockcnt_unlock. 152 */ 153 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) 154 { 155 int val = qatomic_read(&lockcnt->count); 156 int locked_state = QEMU_LOCKCNT_STATE_LOCKED; 157 bool waited = false; 158 159 for (;;) { 160 if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) { 161 int expected = val; 162 val = qatomic_cmpxchg(&lockcnt->count, val, 163 val - QEMU_LOCKCNT_COUNT_STEP); 164 if (val == expected) { 165 break; 166 } 167 } else { 168 /* If count is going 1->0, take the lock. The fast path is 169 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). 170 */ 171 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { 172 return true; 173 } 174 175 if (waited) { 176 /* At this point we do not know if there are more waiters. Assume 177 * there are. 178 */ 179 locked_state = QEMU_LOCKCNT_STATE_WAITING; 180 } 181 } 182 } 183 184 /* If we were woken by another thread, but we're returning in unlocked 185 * state, we should also wake a thread because we are effectively 186 * releasing the lock that was given to us. This is the case where 187 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low 188 * bits, and qemu_lockcnt_unlock would find it and wake someone. 189 */ 190 if (waited) { 191 lockcnt_wake(lockcnt); 192 } 193 return false; 194 } 195 196 /* If the counter is one, decrement it and return locked. Otherwise do 197 * nothing. 198 * 199 * If the function returns true, it is impossible for the counter to 200 * become nonzero until the next qemu_lockcnt_unlock. 201 */ 202 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) 203 { 204 int val = qatomic_read(&lockcnt->count); 205 int locked_state = QEMU_LOCKCNT_STATE_LOCKED; 206 bool waited = false; 207 208 while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) { 209 /* If count is going 1->0, take the lock. The fast path is 210 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). 211 */ 212 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { 213 return true; 214 } 215 216 if (waited) { 217 /* At this point we do not know if there are more waiters. Assume 218 * there are. 219 */ 220 locked_state = QEMU_LOCKCNT_STATE_WAITING; 221 } 222 } 223 224 /* If we were woken by another thread, but we're returning in unlocked 225 * state, we should also wake a thread because we are effectively 226 * releasing the lock that was given to us. This is the case where 227 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low 228 * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone. 229 */ 230 if (waited) { 231 lockcnt_wake(lockcnt); 232 } 233 return false; 234 } 235 236 void qemu_lockcnt_lock(QemuLockCnt *lockcnt) 237 { 238 int val = qatomic_read(&lockcnt->count); 239 int step = QEMU_LOCKCNT_STATE_LOCKED; 240 bool waited = false; 241 242 /* The third argument is only used if the low bits of val are 0 243 * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired 244 * state. 245 */ 246 while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) { 247 if (waited) { 248 /* At this point we do not know if there are more waiters. Assume 249 * there are. 250 */ 251 step = QEMU_LOCKCNT_STATE_WAITING; 252 } 253 } 254 } 255 256 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) 257 { 258 int expected, new, val; 259 260 val = qatomic_read(&lockcnt->count); 261 do { 262 expected = val; 263 new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK; 264 trace_lockcnt_unlock_attempt(lockcnt, val, new); 265 val = qatomic_cmpxchg(&lockcnt->count, val, new); 266 } while (val != expected); 267 268 trace_lockcnt_unlock_success(lockcnt, val, new); 269 if (val & QEMU_LOCKCNT_STATE_WAITING) { 270 lockcnt_wake(lockcnt); 271 } 272 } 273 274 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) 275 { 276 int expected, new, val; 277 278 val = qatomic_read(&lockcnt->count); 279 do { 280 expected = val; 281 new = val & ~QEMU_LOCKCNT_STATE_MASK; 282 trace_lockcnt_unlock_attempt(lockcnt, val, new); 283 val = qatomic_cmpxchg(&lockcnt->count, val, new); 284 } while (val != expected); 285 286 trace_lockcnt_unlock_success(lockcnt, val, new); 287 if (val & QEMU_LOCKCNT_STATE_WAITING) { 288 lockcnt_wake(lockcnt); 289 } 290 } 291 292 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) 293 { 294 return qatomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT; 295 } 296 #else 297 void qemu_lockcnt_init(QemuLockCnt *lockcnt) 298 { 299 qemu_mutex_init(&lockcnt->mutex); 300 lockcnt->count = 0; 301 } 302 303 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) 304 { 305 qemu_mutex_destroy(&lockcnt->mutex); 306 } 307 308 void qemu_lockcnt_inc(QemuLockCnt *lockcnt) 309 { 310 int old; 311 for (;;) { 312 old = qatomic_read(&lockcnt->count); 313 if (old == 0) { 314 qemu_lockcnt_lock(lockcnt); 315 qemu_lockcnt_inc_and_unlock(lockcnt); 316 return; 317 } else { 318 if (qatomic_cmpxchg(&lockcnt->count, old, old + 1) == old) { 319 return; 320 } 321 } 322 } 323 } 324 325 void qemu_lockcnt_dec(QemuLockCnt *lockcnt) 326 { 327 qatomic_dec(&lockcnt->count); 328 } 329 330 /* Decrement a counter, and return locked if it is decremented to zero. 331 * It is impossible for the counter to become nonzero while the mutex 332 * is taken. 333 */ 334 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) 335 { 336 int val = qatomic_read(&lockcnt->count); 337 while (val > 1) { 338 int old = qatomic_cmpxchg(&lockcnt->count, val, val - 1); 339 if (old != val) { 340 val = old; 341 continue; 342 } 343 344 return false; 345 } 346 347 qemu_lockcnt_lock(lockcnt); 348 if (qatomic_fetch_dec(&lockcnt->count) == 1) { 349 return true; 350 } 351 352 qemu_lockcnt_unlock(lockcnt); 353 return false; 354 } 355 356 /* Decrement a counter and return locked if it is decremented to zero. 357 * Otherwise do nothing. 358 * 359 * It is impossible for the counter to become nonzero while the mutex 360 * is taken. 361 */ 362 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) 363 { 364 /* No need for acquire semantics if we return false. */ 365 int val = qatomic_read(&lockcnt->count); 366 if (val > 1) { 367 return false; 368 } 369 370 qemu_lockcnt_lock(lockcnt); 371 if (qatomic_fetch_dec(&lockcnt->count) == 1) { 372 return true; 373 } 374 375 qemu_lockcnt_inc_and_unlock(lockcnt); 376 return false; 377 } 378 379 void qemu_lockcnt_lock(QemuLockCnt *lockcnt) 380 { 381 qemu_mutex_lock(&lockcnt->mutex); 382 } 383 384 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) 385 { 386 qatomic_inc(&lockcnt->count); 387 qemu_mutex_unlock(&lockcnt->mutex); 388 } 389 390 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) 391 { 392 qemu_mutex_unlock(&lockcnt->mutex); 393 } 394 395 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) 396 { 397 return qatomic_read(&lockcnt->count); 398 } 399 #endif 400