1 /* 2 * QemuLockCnt implementation 3 * 4 * Copyright Red Hat, Inc. 2017 5 * 6 * Author: 7 * Paolo Bonzini <pbonzini@redhat.com> 8 */ 9 #include "qemu/osdep.h" 10 #include "qemu/lockcnt.h" 11 #include "qemu/thread.h" 12 #include "qemu/atomic.h" 13 #include "trace.h" 14 15 #ifdef CONFIG_LINUX 16 #include "qemu/futex.h" 17 18 /* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter. 19 * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok, 20 * this is not the most relaxing citation I could make...). It is similar 21 * to mutex2 in the paper. 22 */ 23 24 #define QEMU_LOCKCNT_STATE_MASK 3 25 #define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */ 26 #define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */ 27 #define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */ 28 29 #define QEMU_LOCKCNT_COUNT_STEP 4 30 #define QEMU_LOCKCNT_COUNT_SHIFT 2 31 32 void qemu_lockcnt_init(QemuLockCnt *lockcnt) 33 { 34 lockcnt->count = 0; 35 } 36 37 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) 38 { 39 } 40 41 /* *val is the current value of lockcnt->count. 42 * 43 * If the lock is free, try a cmpxchg from *val to new_if_free; return 44 * true and set *val to the old value found by the cmpxchg in 45 * lockcnt->count. 46 * 47 * If the lock is taken, wait for it to be released and return false 48 * *without trying again to take the lock*. Again, set *val to the 49 * new value of lockcnt->count. 50 * 51 * If *waited is true on return, new_if_free's bottom two bits must not 52 * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller 53 * does not know if there are other waiters. Furthermore, after *waited 54 * is set the caller has effectively acquired the lock. If it returns 55 * with the lock not taken, it must wake another futex waiter. 56 */ 57 static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val, 58 int new_if_free, bool *waited) 59 { 60 /* Fast path for when the lock is free. */ 61 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) { 62 int expected = *val; 63 64 trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free); 65 *val = qatomic_cmpxchg(&lockcnt->count, expected, new_if_free); 66 if (*val == expected) { 67 trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free); 68 *val = new_if_free; 69 return true; 70 } 71 } 72 73 /* The slow path moves from locked to waiting if necessary, then 74 * does a futex wait. Both steps can be repeated ad nauseam, 75 * only getting out of the loop if we can have another shot at the 76 * fast path. Once we can, get out to compute the new destination 77 * value for the fast path. 78 */ 79 while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) { 80 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) { 81 int expected = *val; 82 int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING; 83 84 trace_lockcnt_futex_wait_prepare(lockcnt, expected, new); 85 *val = qatomic_cmpxchg(&lockcnt->count, expected, new); 86 if (*val == expected) { 87 *val = new; 88 } 89 continue; 90 } 91 92 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) { 93 *waited = true; 94 trace_lockcnt_futex_wait(lockcnt, *val); 95 qemu_futex_wait(&lockcnt->count, *val); 96 *val = qatomic_read(&lockcnt->count); 97 trace_lockcnt_futex_wait_resume(lockcnt, *val); 98 continue; 99 } 100 101 abort(); 102 } 103 return false; 104 } 105 106 static void lockcnt_wake(QemuLockCnt *lockcnt) 107 { 108 trace_lockcnt_futex_wake(lockcnt); 109 qemu_futex_wake(&lockcnt->count, 1); 110 } 111 112 void qemu_lockcnt_inc(QemuLockCnt *lockcnt) 113 { 114 int val = qatomic_read(&lockcnt->count); 115 bool waited = false; 116 117 for (;;) { 118 if (val >= QEMU_LOCKCNT_COUNT_STEP) { 119 int expected = val; 120 val = qatomic_cmpxchg(&lockcnt->count, val, 121 val + QEMU_LOCKCNT_COUNT_STEP); 122 if (val == expected) { 123 break; 124 } 125 } else { 126 /* The fast path is (0, unlocked)->(1, unlocked). */ 127 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP, 128 &waited)) { 129 break; 130 } 131 } 132 } 133 134 /* If we were woken by another thread, we should also wake one because 135 * we are effectively releasing the lock that was given to us. This is 136 * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING 137 * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and 138 * wake someone. 139 */ 140 if (waited) { 141 lockcnt_wake(lockcnt); 142 } 143 } 144 145 void qemu_lockcnt_dec(QemuLockCnt *lockcnt) 146 { 147 qatomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP); 148 } 149 150 /* Decrement a counter, and return locked if it is decremented to zero. 151 * If the function returns true, it is impossible for the counter to 152 * become nonzero until the next qemu_lockcnt_unlock. 153 */ 154 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) 155 { 156 int val = qatomic_read(&lockcnt->count); 157 int locked_state = QEMU_LOCKCNT_STATE_LOCKED; 158 bool waited = false; 159 160 for (;;) { 161 if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) { 162 int expected = val; 163 val = qatomic_cmpxchg(&lockcnt->count, val, 164 val - QEMU_LOCKCNT_COUNT_STEP); 165 if (val == expected) { 166 break; 167 } 168 } else { 169 /* If count is going 1->0, take the lock. The fast path is 170 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). 171 */ 172 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { 173 return true; 174 } 175 176 if (waited) { 177 /* At this point we do not know if there are more waiters. Assume 178 * there are. 179 */ 180 locked_state = QEMU_LOCKCNT_STATE_WAITING; 181 } 182 } 183 } 184 185 /* If we were woken by another thread, but we're returning in unlocked 186 * state, we should also wake a thread because we are effectively 187 * releasing the lock that was given to us. This is the case where 188 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low 189 * bits, and qemu_lockcnt_unlock would find it and wake someone. 190 */ 191 if (waited) { 192 lockcnt_wake(lockcnt); 193 } 194 return false; 195 } 196 197 /* If the counter is one, decrement it and return locked. Otherwise do 198 * nothing. 199 * 200 * If the function returns true, it is impossible for the counter to 201 * become nonzero until the next qemu_lockcnt_unlock. 202 */ 203 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) 204 { 205 int val = qatomic_read(&lockcnt->count); 206 int locked_state = QEMU_LOCKCNT_STATE_LOCKED; 207 bool waited = false; 208 209 while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) { 210 /* If count is going 1->0, take the lock. The fast path is 211 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). 212 */ 213 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { 214 return true; 215 } 216 217 if (waited) { 218 /* At this point we do not know if there are more waiters. Assume 219 * there are. 220 */ 221 locked_state = QEMU_LOCKCNT_STATE_WAITING; 222 } 223 } 224 225 /* If we were woken by another thread, but we're returning in unlocked 226 * state, we should also wake a thread because we are effectively 227 * releasing the lock that was given to us. This is the case where 228 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low 229 * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone. 230 */ 231 if (waited) { 232 lockcnt_wake(lockcnt); 233 } 234 return false; 235 } 236 237 void qemu_lockcnt_lock(QemuLockCnt *lockcnt) 238 { 239 int val = qatomic_read(&lockcnt->count); 240 int step = QEMU_LOCKCNT_STATE_LOCKED; 241 bool waited = false; 242 243 /* The third argument is only used if the low bits of val are 0 244 * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired 245 * state. 246 */ 247 while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) { 248 if (waited) { 249 /* At this point we do not know if there are more waiters. Assume 250 * there are. 251 */ 252 step = QEMU_LOCKCNT_STATE_WAITING; 253 } 254 } 255 } 256 257 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) 258 { 259 int expected, new, val; 260 261 val = qatomic_read(&lockcnt->count); 262 do { 263 expected = val; 264 new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK; 265 trace_lockcnt_unlock_attempt(lockcnt, val, new); 266 val = qatomic_cmpxchg(&lockcnt->count, val, new); 267 } while (val != expected); 268 269 trace_lockcnt_unlock_success(lockcnt, val, new); 270 if (val & QEMU_LOCKCNT_STATE_WAITING) { 271 lockcnt_wake(lockcnt); 272 } 273 } 274 275 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) 276 { 277 int expected, new, val; 278 279 val = qatomic_read(&lockcnt->count); 280 do { 281 expected = val; 282 new = val & ~QEMU_LOCKCNT_STATE_MASK; 283 trace_lockcnt_unlock_attempt(lockcnt, val, new); 284 val = qatomic_cmpxchg(&lockcnt->count, val, new); 285 } while (val != expected); 286 287 trace_lockcnt_unlock_success(lockcnt, val, new); 288 if (val & QEMU_LOCKCNT_STATE_WAITING) { 289 lockcnt_wake(lockcnt); 290 } 291 } 292 293 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) 294 { 295 return qatomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT; 296 } 297 #else 298 void qemu_lockcnt_init(QemuLockCnt *lockcnt) 299 { 300 qemu_mutex_init(&lockcnt->mutex); 301 lockcnt->count = 0; 302 } 303 304 void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) 305 { 306 qemu_mutex_destroy(&lockcnt->mutex); 307 } 308 309 void qemu_lockcnt_inc(QemuLockCnt *lockcnt) 310 { 311 int old; 312 for (;;) { 313 old = qatomic_read(&lockcnt->count); 314 if (old == 0) { 315 qemu_lockcnt_lock(lockcnt); 316 qemu_lockcnt_inc_and_unlock(lockcnt); 317 return; 318 } else { 319 if (qatomic_cmpxchg(&lockcnt->count, old, old + 1) == old) { 320 return; 321 } 322 } 323 } 324 } 325 326 void qemu_lockcnt_dec(QemuLockCnt *lockcnt) 327 { 328 qatomic_dec(&lockcnt->count); 329 } 330 331 /* Decrement a counter, and return locked if it is decremented to zero. 332 * It is impossible for the counter to become nonzero while the mutex 333 * is taken. 334 */ 335 bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) 336 { 337 int val = qatomic_read(&lockcnt->count); 338 while (val > 1) { 339 int old = qatomic_cmpxchg(&lockcnt->count, val, val - 1); 340 if (old != val) { 341 val = old; 342 continue; 343 } 344 345 return false; 346 } 347 348 qemu_lockcnt_lock(lockcnt); 349 if (qatomic_fetch_dec(&lockcnt->count) == 1) { 350 return true; 351 } 352 353 qemu_lockcnt_unlock(lockcnt); 354 return false; 355 } 356 357 /* Decrement a counter and return locked if it is decremented to zero. 358 * Otherwise do nothing. 359 * 360 * It is impossible for the counter to become nonzero while the mutex 361 * is taken. 362 */ 363 bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) 364 { 365 /* No need for acquire semantics if we return false. */ 366 int val = qatomic_read(&lockcnt->count); 367 if (val > 1) { 368 return false; 369 } 370 371 qemu_lockcnt_lock(lockcnt); 372 if (qatomic_fetch_dec(&lockcnt->count) == 1) { 373 return true; 374 } 375 376 qemu_lockcnt_inc_and_unlock(lockcnt); 377 return false; 378 } 379 380 void qemu_lockcnt_lock(QemuLockCnt *lockcnt) 381 { 382 qemu_mutex_lock(&lockcnt->mutex); 383 } 384 385 void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) 386 { 387 qatomic_inc(&lockcnt->count); 388 qemu_mutex_unlock(&lockcnt->mutex); 389 } 390 391 void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) 392 { 393 qemu_mutex_unlock(&lockcnt->mutex); 394 } 395 396 unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) 397 { 398 return qatomic_read(&lockcnt->count); 399 } 400 #endif 401