1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 4 * 5 * started by Ingo Molnar and Thomas Gleixner. 6 * 7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 10 * Copyright (C) 2006 Esben Nielsen 11 * Adaptive Spinlocks: 12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, 13 * and Peter Morreale, 14 * Adaptive Spinlocks simplification: 15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> 16 * 17 * See Documentation/locking/rt-mutex-design.rst for details. 18 */ 19 #include <linux/sched.h> 20 #include <linux/sched/debug.h> 21 #include <linux/sched/deadline.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/rt.h> 24 #include <linux/sched/wake_q.h> 25 #include <linux/ww_mutex.h> 26 27 #include <trace/events/lock.h> 28 29 #include "rtmutex_common.h" 30 31 #ifndef WW_RT 32 # define build_ww_mutex() (false) 33 # define ww_container_of(rtm) NULL 34 35 static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, 36 struct rt_mutex *lock, 37 struct ww_acquire_ctx *ww_ctx) 38 { 39 return 0; 40 } 41 42 static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, 43 struct ww_acquire_ctx *ww_ctx) 44 { 45 } 46 47 static inline void ww_mutex_lock_acquired(struct ww_mutex *lock, 48 struct ww_acquire_ctx *ww_ctx) 49 { 50 } 51 52 static inline int __ww_mutex_check_kill(struct rt_mutex *lock, 53 struct rt_mutex_waiter *waiter, 54 struct ww_acquire_ctx *ww_ctx) 55 { 56 return 0; 57 } 58 59 #else 60 # define build_ww_mutex() (true) 61 # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base) 62 # include "ww_mutex.h" 63 #endif 64 65 /* 66 * lock->owner state tracking: 67 * 68 * lock->owner holds the task_struct pointer of the owner. Bit 0 69 * is used to keep track of the "lock has waiters" state. 70 * 71 * owner bit0 72 * NULL 0 lock is free (fast acquire possible) 73 * NULL 1 lock is free and has waiters and the top waiter 74 * is going to take the lock* 75 * taskpointer 0 lock is held (fast release possible) 76 * taskpointer 1 lock is held and has waiters** 77 * 78 * The fast atomic compare exchange based acquire and release is only 79 * possible when bit 0 of lock->owner is 0. 80 * 81 * (*) It also can be a transitional state when grabbing the lock 82 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 83 * we need to set the bit0 before looking at the lock, and the owner may be 84 * NULL in this small time, hence this can be a transitional state. 85 * 86 * (**) There is a small time when bit 0 is set but there are no 87 * waiters. This can happen when grabbing the lock in the slow path. 88 * To prevent a cmpxchg of the owner releasing the lock, we need to 89 * set this bit before looking at the lock. 90 */ 91 92 static __always_inline struct task_struct * 93 rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner) 94 { 95 unsigned long val = (unsigned long)owner; 96 97 if (rt_mutex_has_waiters(lock)) 98 val |= RT_MUTEX_HAS_WAITERS; 99 100 return (struct task_struct *)val; 101 } 102 103 static __always_inline void 104 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) 105 { 106 /* 107 * lock->wait_lock is held but explicit acquire semantics are needed 108 * for a new lock owner so WRITE_ONCE is insufficient. 109 */ 110 xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner)); 111 } 112 113 static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock) 114 { 115 /* lock->wait_lock is held so the unlock provides release semantics. */ 116 WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL)); 117 } 118 119 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) 120 { 121 lock->owner = (struct task_struct *) 122 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 123 } 124 125 static __always_inline void 126 fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock) 127 { 128 unsigned long owner, *p = (unsigned long *) &lock->owner; 129 130 if (rt_mutex_has_waiters(lock)) 131 return; 132 133 /* 134 * The rbtree has no waiters enqueued, now make sure that the 135 * lock->owner still has the waiters bit set, otherwise the 136 * following can happen: 137 * 138 * CPU 0 CPU 1 CPU2 139 * l->owner=T1 140 * rt_mutex_lock(l) 141 * lock(l->lock) 142 * l->owner = T1 | HAS_WAITERS; 143 * enqueue(T2) 144 * boost() 145 * unlock(l->lock) 146 * block() 147 * 148 * rt_mutex_lock(l) 149 * lock(l->lock) 150 * l->owner = T1 | HAS_WAITERS; 151 * enqueue(T3) 152 * boost() 153 * unlock(l->lock) 154 * block() 155 * signal(->T2) signal(->T3) 156 * lock(l->lock) 157 * dequeue(T2) 158 * deboost() 159 * unlock(l->lock) 160 * lock(l->lock) 161 * dequeue(T3) 162 * ==> wait list is empty 163 * deboost() 164 * unlock(l->lock) 165 * lock(l->lock) 166 * fixup_rt_mutex_waiters() 167 * if (wait_list_empty(l) { 168 * l->owner = owner 169 * owner = l->owner & ~HAS_WAITERS; 170 * ==> l->owner = T1 171 * } 172 * lock(l->lock) 173 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 174 * if (wait_list_empty(l) { 175 * owner = l->owner & ~HAS_WAITERS; 176 * cmpxchg(l->owner, T1, NULL) 177 * ===> Success (l->owner = NULL) 178 * 179 * l->owner = owner 180 * ==> l->owner = T1 181 * } 182 * 183 * With the check for the waiter bit in place T3 on CPU2 will not 184 * overwrite. All tasks fiddling with the waiters bit are 185 * serialized by l->lock, so nothing else can modify the waiters 186 * bit. If the bit is set then nothing can change l->owner either 187 * so the simple RMW is safe. The cmpxchg() will simply fail if it 188 * happens in the middle of the RMW because the waiters bit is 189 * still set. 190 */ 191 owner = READ_ONCE(*p); 192 if (owner & RT_MUTEX_HAS_WAITERS) { 193 /* 194 * See rt_mutex_set_owner() and rt_mutex_clear_owner() on 195 * why xchg_acquire() is used for updating owner for 196 * locking and WRITE_ONCE() for unlocking. 197 * 198 * WRITE_ONCE() would work for the acquire case too, but 199 * in case that the lock acquisition failed it might 200 * force other lockers into the slow path unnecessarily. 201 */ 202 if (acquire_lock) 203 xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS); 204 else 205 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 206 } 207 } 208 209 /* 210 * We can speed up the acquire/release, if there's no debugging state to be 211 * set up. 212 */ 213 #ifndef CONFIG_DEBUG_RT_MUTEXES 214 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 215 struct task_struct *old, 216 struct task_struct *new) 217 { 218 return try_cmpxchg_acquire(&lock->owner, &old, new); 219 } 220 221 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 222 struct task_struct *old, 223 struct task_struct *new) 224 { 225 return try_cmpxchg_release(&lock->owner, &old, new); 226 } 227 228 /* 229 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 230 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 231 * relaxed semantics suffice. 232 */ 233 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 234 { 235 unsigned long owner, *p = (unsigned long *) &lock->owner; 236 237 do { 238 owner = *p; 239 } while (cmpxchg_relaxed(p, owner, 240 owner | RT_MUTEX_HAS_WAITERS) != owner); 241 242 /* 243 * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE 244 * operations in the event of contention. Ensure the successful 245 * cmpxchg is visible. 246 */ 247 smp_mb__after_atomic(); 248 } 249 250 /* 251 * Safe fastpath aware unlock: 252 * 1) Clear the waiters bit 253 * 2) Drop lock->wait_lock 254 * 3) Try to unlock the lock with cmpxchg 255 */ 256 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 257 unsigned long flags) 258 __releases(lock->wait_lock) 259 { 260 struct task_struct *owner = rt_mutex_owner(lock); 261 262 clear_rt_mutex_waiters(lock); 263 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 264 /* 265 * If a new waiter comes in between the unlock and the cmpxchg 266 * we have two situations: 267 * 268 * unlock(wait_lock); 269 * lock(wait_lock); 270 * cmpxchg(p, owner, 0) == owner 271 * mark_rt_mutex_waiters(lock); 272 * acquire(lock); 273 * or: 274 * 275 * unlock(wait_lock); 276 * lock(wait_lock); 277 * mark_rt_mutex_waiters(lock); 278 * 279 * cmpxchg(p, owner, 0) != owner 280 * enqueue_waiter(); 281 * unlock(wait_lock); 282 * lock(wait_lock); 283 * wake waiter(); 284 * unlock(wait_lock); 285 * lock(wait_lock); 286 * acquire(lock); 287 */ 288 return rt_mutex_cmpxchg_release(lock, owner, NULL); 289 } 290 291 #else 292 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 293 struct task_struct *old, 294 struct task_struct *new) 295 { 296 return false; 297 298 } 299 300 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 301 struct task_struct *old, 302 struct task_struct *new) 303 { 304 return false; 305 } 306 307 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 308 { 309 lock->owner = (struct task_struct *) 310 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 311 } 312 313 /* 314 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 315 */ 316 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 317 unsigned long flags) 318 __releases(lock->wait_lock) 319 { 320 lock->owner = NULL; 321 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 322 return true; 323 } 324 #endif 325 326 static __always_inline int __waiter_prio(struct task_struct *task) 327 { 328 int prio = task->prio; 329 330 if (!rt_prio(prio)) 331 return DEFAULT_PRIO; 332 333 return prio; 334 } 335 336 /* 337 * Update the waiter->tree copy of the sort keys. 338 */ 339 static __always_inline void 340 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 341 { 342 lockdep_assert_held(&waiter->lock->wait_lock); 343 lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry)); 344 345 waiter->tree.prio = __waiter_prio(task); 346 waiter->tree.deadline = task->dl.deadline; 347 } 348 349 /* 350 * Update the waiter->pi_tree copy of the sort keys (from the tree copy). 351 */ 352 static __always_inline void 353 waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 354 { 355 lockdep_assert_held(&waiter->lock->wait_lock); 356 lockdep_assert_held(&task->pi_lock); 357 lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry)); 358 359 waiter->pi_tree.prio = waiter->tree.prio; 360 waiter->pi_tree.deadline = waiter->tree.deadline; 361 } 362 363 /* 364 * Only use with rt_waiter_node_{less,equal}() 365 */ 366 #define task_to_waiter_node(p) \ 367 &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } 368 #define task_to_waiter(p) \ 369 &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) } 370 371 static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left, 372 struct rt_waiter_node *right) 373 { 374 if (left->prio < right->prio) 375 return 1; 376 377 /* 378 * If both waiters have dl_prio(), we check the deadlines of the 379 * associated tasks. 380 * If left waiter has a dl_prio(), and we didn't return 1 above, 381 * then right waiter has a dl_prio() too. 382 */ 383 if (dl_prio(left->prio)) 384 return dl_time_before(left->deadline, right->deadline); 385 386 return 0; 387 } 388 389 static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left, 390 struct rt_waiter_node *right) 391 { 392 if (left->prio != right->prio) 393 return 0; 394 395 /* 396 * If both waiters have dl_prio(), we check the deadlines of the 397 * associated tasks. 398 * If left waiter has a dl_prio(), and we didn't return 0 above, 399 * then right waiter has a dl_prio() too. 400 */ 401 if (dl_prio(left->prio)) 402 return left->deadline == right->deadline; 403 404 return 1; 405 } 406 407 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, 408 struct rt_mutex_waiter *top_waiter) 409 { 410 if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree)) 411 return true; 412 413 #ifdef RT_MUTEX_BUILD_SPINLOCKS 414 /* 415 * Note that RT tasks are excluded from same priority (lateral) 416 * steals to prevent the introduction of an unbounded latency. 417 */ 418 if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio)) 419 return false; 420 421 return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree); 422 #else 423 return false; 424 #endif 425 } 426 427 #define __node_2_waiter(node) \ 428 rb_entry((node), struct rt_mutex_waiter, tree.entry) 429 430 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 431 { 432 struct rt_mutex_waiter *aw = __node_2_waiter(a); 433 struct rt_mutex_waiter *bw = __node_2_waiter(b); 434 435 if (rt_waiter_node_less(&aw->tree, &bw->tree)) 436 return 1; 437 438 if (!build_ww_mutex()) 439 return 0; 440 441 if (rt_waiter_node_less(&bw->tree, &aw->tree)) 442 return 0; 443 444 /* NOTE: relies on waiter->ww_ctx being set before insertion */ 445 if (aw->ww_ctx) { 446 if (!bw->ww_ctx) 447 return 1; 448 449 return (signed long)(aw->ww_ctx->stamp - 450 bw->ww_ctx->stamp) < 0; 451 } 452 453 return 0; 454 } 455 456 static __always_inline void 457 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 458 { 459 lockdep_assert_held(&lock->wait_lock); 460 461 rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less); 462 } 463 464 static __always_inline void 465 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 466 { 467 lockdep_assert_held(&lock->wait_lock); 468 469 if (RB_EMPTY_NODE(&waiter->tree.entry)) 470 return; 471 472 rb_erase_cached(&waiter->tree.entry, &lock->waiters); 473 RB_CLEAR_NODE(&waiter->tree.entry); 474 } 475 476 #define __node_2_rt_node(node) \ 477 rb_entry((node), struct rt_waiter_node, entry) 478 479 static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 480 { 481 return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b)); 482 } 483 484 static __always_inline void 485 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 486 { 487 lockdep_assert_held(&task->pi_lock); 488 489 rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less); 490 } 491 492 static __always_inline void 493 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 494 { 495 lockdep_assert_held(&task->pi_lock); 496 497 if (RB_EMPTY_NODE(&waiter->pi_tree.entry)) 498 return; 499 500 rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters); 501 RB_CLEAR_NODE(&waiter->pi_tree.entry); 502 } 503 504 static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock, 505 struct task_struct *p) 506 { 507 struct task_struct *pi_task = NULL; 508 509 lockdep_assert_held(&lock->wait_lock); 510 lockdep_assert(rt_mutex_owner(lock) == p); 511 lockdep_assert_held(&p->pi_lock); 512 513 if (task_has_pi_waiters(p)) 514 pi_task = task_top_pi_waiter(p)->task; 515 516 rt_mutex_setprio(p, pi_task); 517 } 518 519 /* RT mutex specific wake_q wrappers */ 520 static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh, 521 struct task_struct *task, 522 unsigned int wake_state) 523 { 524 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) { 525 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 526 WARN_ON_ONCE(wqh->rtlock_task); 527 get_task_struct(task); 528 wqh->rtlock_task = task; 529 } else { 530 wake_q_add(&wqh->head, task); 531 } 532 } 533 534 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, 535 struct rt_mutex_waiter *w) 536 { 537 rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state); 538 } 539 540 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) 541 { 542 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { 543 wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); 544 put_task_struct(wqh->rtlock_task); 545 wqh->rtlock_task = NULL; 546 } 547 548 if (!wake_q_empty(&wqh->head)) 549 wake_up_q(&wqh->head); 550 551 /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ 552 preempt_enable(); 553 } 554 555 /* 556 * Deadlock detection is conditional: 557 * 558 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 559 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 560 * 561 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 562 * conducted independent of the detect argument. 563 * 564 * If the waiter argument is NULL this indicates the deboost path and 565 * deadlock detection is disabled independent of the detect argument 566 * and the config settings. 567 */ 568 static __always_inline bool 569 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 570 enum rtmutex_chainwalk chwalk) 571 { 572 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 573 return waiter != NULL; 574 return chwalk == RT_MUTEX_FULL_CHAINWALK; 575 } 576 577 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p) 578 { 579 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 580 } 581 582 /* 583 * Adjust the priority chain. Also used for deadlock detection. 584 * Decreases task's usage by one - may thus free the task. 585 * 586 * @task: the task owning the mutex (owner) for which a chain walk is 587 * probably needed 588 * @chwalk: do we have to carry out deadlock detection? 589 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 590 * things for a task that has just got its priority adjusted, and 591 * is waiting on a mutex) 592 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 593 * we dropped its pi_lock. Is never dereferenced, only used for 594 * comparison to detect lock chain changes. 595 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 596 * its priority to the mutex owner (can be NULL in the case 597 * depicted above or if the top waiter is gone away and we are 598 * actually deboosting the owner) 599 * @top_task: the current top waiter 600 * 601 * Returns 0 or -EDEADLK. 602 * 603 * Chain walk basics and protection scope 604 * 605 * [R] refcount on task 606 * [Pn] task->pi_lock held 607 * [L] rtmutex->wait_lock held 608 * 609 * Normal locking order: 610 * 611 * rtmutex->wait_lock 612 * task->pi_lock 613 * 614 * Step Description Protected by 615 * function arguments: 616 * @task [R] 617 * @orig_lock if != NULL @top_task is blocked on it 618 * @next_lock Unprotected. Cannot be 619 * dereferenced. Only used for 620 * comparison. 621 * @orig_waiter if != NULL @top_task is blocked on it 622 * @top_task current, or in case of proxy 623 * locking protected by calling 624 * code 625 * again: 626 * loop_sanity_check(); 627 * retry: 628 * [1] lock(task->pi_lock); [R] acquire [P1] 629 * [2] waiter = task->pi_blocked_on; [P1] 630 * [3] check_exit_conditions_1(); [P1] 631 * [4] lock = waiter->lock; [P1] 632 * [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L] 633 * unlock(task->pi_lock); release [P1] 634 * goto retry; 635 * } 636 * [6] check_exit_conditions_2(); [P1] + [L] 637 * [7] requeue_lock_waiter(lock, waiter); [P1] + [L] 638 * [8] unlock(task->pi_lock); release [P1] 639 * put_task_struct(task); release [R] 640 * [9] check_exit_conditions_3(); [L] 641 * [10] task = owner(lock); [L] 642 * get_task_struct(task); [L] acquire [R] 643 * lock(task->pi_lock); [L] acquire [P2] 644 * [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L] 645 * [12] check_exit_conditions_4(); [P2] + [L] 646 * [13] unlock(task->pi_lock); release [P2] 647 * unlock(lock->wait_lock); release [L] 648 * goto again; 649 * 650 * Where P1 is the blocking task and P2 is the lock owner; going up one step 651 * the owner becomes the next blocked task etc.. 652 * 653 * 654 */ 655 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 656 enum rtmutex_chainwalk chwalk, 657 struct rt_mutex_base *orig_lock, 658 struct rt_mutex_base *next_lock, 659 struct rt_mutex_waiter *orig_waiter, 660 struct task_struct *top_task) 661 { 662 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 663 struct rt_mutex_waiter *prerequeue_top_waiter; 664 int ret = 0, depth = 0; 665 struct rt_mutex_base *lock; 666 bool detect_deadlock; 667 bool requeue = true; 668 669 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 670 671 /* 672 * The (de)boosting is a step by step approach with a lot of 673 * pitfalls. We want this to be preemptible and we want hold a 674 * maximum of two locks per step. So we have to check 675 * carefully whether things change under us. 676 */ 677 again: 678 /* 679 * We limit the lock chain length for each invocation. 680 */ 681 if (++depth > max_lock_depth) { 682 static int prev_max; 683 684 /* 685 * Print this only once. If the admin changes the limit, 686 * print a new message when reaching the limit again. 687 */ 688 if (prev_max != max_lock_depth) { 689 prev_max = max_lock_depth; 690 printk(KERN_WARNING "Maximum lock depth %d reached " 691 "task: %s (%d)\n", max_lock_depth, 692 top_task->comm, task_pid_nr(top_task)); 693 } 694 put_task_struct(task); 695 696 return -EDEADLK; 697 } 698 699 /* 700 * We are fully preemptible here and only hold the refcount on 701 * @task. So everything can have changed under us since the 702 * caller or our own code below (goto retry/again) dropped all 703 * locks. 704 */ 705 retry: 706 /* 707 * [1] Task cannot go away as we did a get_task() before ! 708 */ 709 raw_spin_lock_irq(&task->pi_lock); 710 711 /* 712 * [2] Get the waiter on which @task is blocked on. 713 */ 714 waiter = task->pi_blocked_on; 715 716 /* 717 * [3] check_exit_conditions_1() protected by task->pi_lock. 718 */ 719 720 /* 721 * Check whether the end of the boosting chain has been 722 * reached or the state of the chain has changed while we 723 * dropped the locks. 724 */ 725 if (!waiter) 726 goto out_unlock_pi; 727 728 /* 729 * Check the orig_waiter state. After we dropped the locks, 730 * the previous owner of the lock might have released the lock. 731 */ 732 if (orig_waiter && !rt_mutex_owner(orig_lock)) 733 goto out_unlock_pi; 734 735 /* 736 * We dropped all locks after taking a refcount on @task, so 737 * the task might have moved on in the lock chain or even left 738 * the chain completely and blocks now on an unrelated lock or 739 * on @orig_lock. 740 * 741 * We stored the lock on which @task was blocked in @next_lock, 742 * so we can detect the chain change. 743 */ 744 if (next_lock != waiter->lock) 745 goto out_unlock_pi; 746 747 /* 748 * There could be 'spurious' loops in the lock graph due to ww_mutex, 749 * consider: 750 * 751 * P1: A, ww_A, ww_B 752 * P2: ww_B, ww_A 753 * P3: A 754 * 755 * P3 should not return -EDEADLK because it gets trapped in the cycle 756 * created by P1 and P2 (which will resolve -- and runs into 757 * max_lock_depth above). Therefore disable detect_deadlock such that 758 * the below termination condition can trigger once all relevant tasks 759 * are boosted. 760 * 761 * Even when we start with ww_mutex we can disable deadlock detection, 762 * since we would supress a ww_mutex induced deadlock at [6] anyway. 763 * Supressing it here however is not sufficient since we might still 764 * hit [6] due to adjustment driven iteration. 765 * 766 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd 767 * utterly fail to report it; lockdep should. 768 */ 769 if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock) 770 detect_deadlock = false; 771 772 /* 773 * Drop out, when the task has no waiters. Note, 774 * top_waiter can be NULL, when we are in the deboosting 775 * mode! 776 */ 777 if (top_waiter) { 778 if (!task_has_pi_waiters(task)) 779 goto out_unlock_pi; 780 /* 781 * If deadlock detection is off, we stop here if we 782 * are not the top pi waiter of the task. If deadlock 783 * detection is enabled we continue, but stop the 784 * requeueing in the chain walk. 785 */ 786 if (top_waiter != task_top_pi_waiter(task)) { 787 if (!detect_deadlock) 788 goto out_unlock_pi; 789 else 790 requeue = false; 791 } 792 } 793 794 /* 795 * If the waiter priority is the same as the task priority 796 * then there is no further priority adjustment necessary. If 797 * deadlock detection is off, we stop the chain walk. If its 798 * enabled we continue, but stop the requeueing in the chain 799 * walk. 800 */ 801 if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) { 802 if (!detect_deadlock) 803 goto out_unlock_pi; 804 else 805 requeue = false; 806 } 807 808 /* 809 * [4] Get the next lock; per holding task->pi_lock we can't unblock 810 * and guarantee @lock's existence. 811 */ 812 lock = waiter->lock; 813 /* 814 * [5] We need to trylock here as we are holding task->pi_lock, 815 * which is the reverse lock order versus the other rtmutex 816 * operations. 817 * 818 * Per the above, holding task->pi_lock guarantees lock exists, so 819 * inverting this lock order is infeasible from a life-time 820 * perspective. 821 */ 822 if (!raw_spin_trylock(&lock->wait_lock)) { 823 raw_spin_unlock_irq(&task->pi_lock); 824 cpu_relax(); 825 goto retry; 826 } 827 828 /* 829 * [6] check_exit_conditions_2() protected by task->pi_lock and 830 * lock->wait_lock. 831 * 832 * Deadlock detection. If the lock is the same as the original 833 * lock which caused us to walk the lock chain or if the 834 * current lock is owned by the task which initiated the chain 835 * walk, we detected a deadlock. 836 */ 837 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 838 ret = -EDEADLK; 839 840 /* 841 * When the deadlock is due to ww_mutex; also see above. Don't 842 * report the deadlock and instead let the ww_mutex wound/die 843 * logic pick which of the contending threads gets -EDEADLK. 844 * 845 * NOTE: assumes the cycle only contains a single ww_class; any 846 * other configuration and we fail to report; also, see 847 * lockdep. 848 */ 849 if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx) 850 ret = 0; 851 852 raw_spin_unlock(&lock->wait_lock); 853 goto out_unlock_pi; 854 } 855 856 /* 857 * If we just follow the lock chain for deadlock detection, no 858 * need to do all the requeue operations. To avoid a truckload 859 * of conditionals around the various places below, just do the 860 * minimum chain walk checks. 861 */ 862 if (!requeue) { 863 /* 864 * No requeue[7] here. Just release @task [8] 865 */ 866 raw_spin_unlock(&task->pi_lock); 867 put_task_struct(task); 868 869 /* 870 * [9] check_exit_conditions_3 protected by lock->wait_lock. 871 * If there is no owner of the lock, end of chain. 872 */ 873 if (!rt_mutex_owner(lock)) { 874 raw_spin_unlock_irq(&lock->wait_lock); 875 return 0; 876 } 877 878 /* [10] Grab the next task, i.e. owner of @lock */ 879 task = get_task_struct(rt_mutex_owner(lock)); 880 raw_spin_lock(&task->pi_lock); 881 882 /* 883 * No requeue [11] here. We just do deadlock detection. 884 * 885 * [12] Store whether owner is blocked 886 * itself. Decision is made after dropping the locks 887 */ 888 next_lock = task_blocked_on_lock(task); 889 /* 890 * Get the top waiter for the next iteration 891 */ 892 top_waiter = rt_mutex_top_waiter(lock); 893 894 /* [13] Drop locks */ 895 raw_spin_unlock(&task->pi_lock); 896 raw_spin_unlock_irq(&lock->wait_lock); 897 898 /* If owner is not blocked, end of chain. */ 899 if (!next_lock) 900 goto out_put_task; 901 goto again; 902 } 903 904 /* 905 * Store the current top waiter before doing the requeue 906 * operation on @lock. We need it for the boost/deboost 907 * decision below. 908 */ 909 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 910 911 /* [7] Requeue the waiter in the lock waiter tree. */ 912 rt_mutex_dequeue(lock, waiter); 913 914 /* 915 * Update the waiter prio fields now that we're dequeued. 916 * 917 * These values can have changed through either: 918 * 919 * sys_sched_set_scheduler() / sys_sched_setattr() 920 * 921 * or 922 * 923 * DL CBS enforcement advancing the effective deadline. 924 */ 925 waiter_update_prio(waiter, task); 926 927 rt_mutex_enqueue(lock, waiter); 928 929 /* 930 * [8] Release the (blocking) task in preparation for 931 * taking the owner task in [10]. 932 * 933 * Since we hold lock->waiter_lock, task cannot unblock, even if we 934 * release task->pi_lock. 935 */ 936 raw_spin_unlock(&task->pi_lock); 937 put_task_struct(task); 938 939 /* 940 * [9] check_exit_conditions_3 protected by lock->wait_lock. 941 * 942 * We must abort the chain walk if there is no lock owner even 943 * in the dead lock detection case, as we have nothing to 944 * follow here. This is the end of the chain we are walking. 945 */ 946 if (!rt_mutex_owner(lock)) { 947 /* 948 * If the requeue [7] above changed the top waiter, 949 * then we need to wake the new top waiter up to try 950 * to get the lock. 951 */ 952 top_waiter = rt_mutex_top_waiter(lock); 953 if (prerequeue_top_waiter != top_waiter) 954 wake_up_state(top_waiter->task, top_waiter->wake_state); 955 raw_spin_unlock_irq(&lock->wait_lock); 956 return 0; 957 } 958 959 /* 960 * [10] Grab the next task, i.e. the owner of @lock 961 * 962 * Per holding lock->wait_lock and checking for !owner above, there 963 * must be an owner and it cannot go away. 964 */ 965 task = get_task_struct(rt_mutex_owner(lock)); 966 raw_spin_lock(&task->pi_lock); 967 968 /* [11] requeue the pi waiters if necessary */ 969 if (waiter == rt_mutex_top_waiter(lock)) { 970 /* 971 * The waiter became the new top (highest priority) 972 * waiter on the lock. Replace the previous top waiter 973 * in the owner tasks pi waiters tree with this waiter 974 * and adjust the priority of the owner. 975 */ 976 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 977 waiter_clone_prio(waiter, task); 978 rt_mutex_enqueue_pi(task, waiter); 979 rt_mutex_adjust_prio(lock, task); 980 981 } else if (prerequeue_top_waiter == waiter) { 982 /* 983 * The waiter was the top waiter on the lock, but is 984 * no longer the top priority waiter. Replace waiter in 985 * the owner tasks pi waiters tree with the new top 986 * (highest priority) waiter and adjust the priority 987 * of the owner. 988 * The new top waiter is stored in @waiter so that 989 * @waiter == @top_waiter evaluates to true below and 990 * we continue to deboost the rest of the chain. 991 */ 992 rt_mutex_dequeue_pi(task, waiter); 993 waiter = rt_mutex_top_waiter(lock); 994 waiter_clone_prio(waiter, task); 995 rt_mutex_enqueue_pi(task, waiter); 996 rt_mutex_adjust_prio(lock, task); 997 } else { 998 /* 999 * Nothing changed. No need to do any priority 1000 * adjustment. 1001 */ 1002 } 1003 1004 /* 1005 * [12] check_exit_conditions_4() protected by task->pi_lock 1006 * and lock->wait_lock. The actual decisions are made after we 1007 * dropped the locks. 1008 * 1009 * Check whether the task which owns the current lock is pi 1010 * blocked itself. If yes we store a pointer to the lock for 1011 * the lock chain change detection above. After we dropped 1012 * task->pi_lock next_lock cannot be dereferenced anymore. 1013 */ 1014 next_lock = task_blocked_on_lock(task); 1015 /* 1016 * Store the top waiter of @lock for the end of chain walk 1017 * decision below. 1018 */ 1019 top_waiter = rt_mutex_top_waiter(lock); 1020 1021 /* [13] Drop the locks */ 1022 raw_spin_unlock(&task->pi_lock); 1023 raw_spin_unlock_irq(&lock->wait_lock); 1024 1025 /* 1026 * Make the actual exit decisions [12], based on the stored 1027 * values. 1028 * 1029 * We reached the end of the lock chain. Stop right here. No 1030 * point to go back just to figure that out. 1031 */ 1032 if (!next_lock) 1033 goto out_put_task; 1034 1035 /* 1036 * If the current waiter is not the top waiter on the lock, 1037 * then we can stop the chain walk here if we are not in full 1038 * deadlock detection mode. 1039 */ 1040 if (!detect_deadlock && waiter != top_waiter) 1041 goto out_put_task; 1042 1043 goto again; 1044 1045 out_unlock_pi: 1046 raw_spin_unlock_irq(&task->pi_lock); 1047 out_put_task: 1048 put_task_struct(task); 1049 1050 return ret; 1051 } 1052 1053 /* 1054 * Try to take an rt-mutex 1055 * 1056 * Must be called with lock->wait_lock held and interrupts disabled 1057 * 1058 * @lock: The lock to be acquired. 1059 * @task: The task which wants to acquire the lock 1060 * @waiter: The waiter that is queued to the lock's wait tree if the 1061 * callsite called task_blocked_on_lock(), otherwise NULL 1062 */ 1063 static int __sched 1064 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, 1065 struct rt_mutex_waiter *waiter) 1066 { 1067 lockdep_assert_held(&lock->wait_lock); 1068 1069 /* 1070 * Before testing whether we can acquire @lock, we set the 1071 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 1072 * other tasks which try to modify @lock into the slow path 1073 * and they serialize on @lock->wait_lock. 1074 * 1075 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 1076 * as explained at the top of this file if and only if: 1077 * 1078 * - There is a lock owner. The caller must fixup the 1079 * transient state if it does a trylock or leaves the lock 1080 * function due to a signal or timeout. 1081 * 1082 * - @task acquires the lock and there are no other 1083 * waiters. This is undone in rt_mutex_set_owner(@task) at 1084 * the end of this function. 1085 */ 1086 mark_rt_mutex_waiters(lock); 1087 1088 /* 1089 * If @lock has an owner, give up. 1090 */ 1091 if (rt_mutex_owner(lock)) 1092 return 0; 1093 1094 /* 1095 * If @waiter != NULL, @task has already enqueued the waiter 1096 * into @lock waiter tree. If @waiter == NULL then this is a 1097 * trylock attempt. 1098 */ 1099 if (waiter) { 1100 struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); 1101 1102 /* 1103 * If waiter is the highest priority waiter of @lock, 1104 * or allowed to steal it, take it over. 1105 */ 1106 if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { 1107 /* 1108 * We can acquire the lock. Remove the waiter from the 1109 * lock waiters tree. 1110 */ 1111 rt_mutex_dequeue(lock, waiter); 1112 } else { 1113 return 0; 1114 } 1115 } else { 1116 /* 1117 * If the lock has waiters already we check whether @task is 1118 * eligible to take over the lock. 1119 * 1120 * If there are no other waiters, @task can acquire 1121 * the lock. @task->pi_blocked_on is NULL, so it does 1122 * not need to be dequeued. 1123 */ 1124 if (rt_mutex_has_waiters(lock)) { 1125 /* Check whether the trylock can steal it. */ 1126 if (!rt_mutex_steal(task_to_waiter(task), 1127 rt_mutex_top_waiter(lock))) 1128 return 0; 1129 1130 /* 1131 * The current top waiter stays enqueued. We 1132 * don't have to change anything in the lock 1133 * waiters order. 1134 */ 1135 } else { 1136 /* 1137 * No waiters. Take the lock without the 1138 * pi_lock dance.@task->pi_blocked_on is NULL 1139 * and we have no waiters to enqueue in @task 1140 * pi waiters tree. 1141 */ 1142 goto takeit; 1143 } 1144 } 1145 1146 /* 1147 * Clear @task->pi_blocked_on. Requires protection by 1148 * @task->pi_lock. Redundant operation for the @waiter == NULL 1149 * case, but conditionals are more expensive than a redundant 1150 * store. 1151 */ 1152 raw_spin_lock(&task->pi_lock); 1153 task->pi_blocked_on = NULL; 1154 /* 1155 * Finish the lock acquisition. @task is the new owner. If 1156 * other waiters exist we have to insert the highest priority 1157 * waiter into @task->pi_waiters tree. 1158 */ 1159 if (rt_mutex_has_waiters(lock)) 1160 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 1161 raw_spin_unlock(&task->pi_lock); 1162 1163 takeit: 1164 /* 1165 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 1166 * are still waiters or clears it. 1167 */ 1168 rt_mutex_set_owner(lock, task); 1169 1170 return 1; 1171 } 1172 1173 /* 1174 * Task blocks on lock. 1175 * 1176 * Prepare waiter and propagate pi chain 1177 * 1178 * This must be called with lock->wait_lock held and interrupts disabled 1179 */ 1180 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, 1181 struct rt_mutex_waiter *waiter, 1182 struct task_struct *task, 1183 struct ww_acquire_ctx *ww_ctx, 1184 enum rtmutex_chainwalk chwalk) 1185 { 1186 struct task_struct *owner = rt_mutex_owner(lock); 1187 struct rt_mutex_waiter *top_waiter = waiter; 1188 struct rt_mutex_base *next_lock; 1189 int chain_walk = 0, res; 1190 1191 lockdep_assert_held(&lock->wait_lock); 1192 1193 /* 1194 * Early deadlock detection. We really don't want the task to 1195 * enqueue on itself just to untangle the mess later. It's not 1196 * only an optimization. We drop the locks, so another waiter 1197 * can come in before the chain walk detects the deadlock. So 1198 * the other will detect the deadlock and return -EDEADLOCK, 1199 * which is wrong, as the other waiter is not in a deadlock 1200 * situation. 1201 * 1202 * Except for ww_mutex, in that case the chain walk must already deal 1203 * with spurious cycles, see the comments at [3] and [6]. 1204 */ 1205 if (owner == task && !(build_ww_mutex() && ww_ctx)) 1206 return -EDEADLK; 1207 1208 raw_spin_lock(&task->pi_lock); 1209 waiter->task = task; 1210 waiter->lock = lock; 1211 waiter_update_prio(waiter, task); 1212 waiter_clone_prio(waiter, task); 1213 1214 /* Get the top priority waiter on the lock */ 1215 if (rt_mutex_has_waiters(lock)) 1216 top_waiter = rt_mutex_top_waiter(lock); 1217 rt_mutex_enqueue(lock, waiter); 1218 1219 task->pi_blocked_on = waiter; 1220 1221 raw_spin_unlock(&task->pi_lock); 1222 1223 if (build_ww_mutex() && ww_ctx) { 1224 struct rt_mutex *rtm; 1225 1226 /* Check whether the waiter should back out immediately */ 1227 rtm = container_of(lock, struct rt_mutex, rtmutex); 1228 res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); 1229 if (res) { 1230 raw_spin_lock(&task->pi_lock); 1231 rt_mutex_dequeue(lock, waiter); 1232 task->pi_blocked_on = NULL; 1233 raw_spin_unlock(&task->pi_lock); 1234 return res; 1235 } 1236 } 1237 1238 if (!owner) 1239 return 0; 1240 1241 raw_spin_lock(&owner->pi_lock); 1242 if (waiter == rt_mutex_top_waiter(lock)) { 1243 rt_mutex_dequeue_pi(owner, top_waiter); 1244 rt_mutex_enqueue_pi(owner, waiter); 1245 1246 rt_mutex_adjust_prio(lock, owner); 1247 if (owner->pi_blocked_on) 1248 chain_walk = 1; 1249 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1250 chain_walk = 1; 1251 } 1252 1253 /* Store the lock on which owner is blocked or NULL */ 1254 next_lock = task_blocked_on_lock(owner); 1255 1256 raw_spin_unlock(&owner->pi_lock); 1257 /* 1258 * Even if full deadlock detection is on, if the owner is not 1259 * blocked itself, we can avoid finding this out in the chain 1260 * walk. 1261 */ 1262 if (!chain_walk || !next_lock) 1263 return 0; 1264 1265 /* 1266 * The owner can't disappear while holding a lock, 1267 * so the owner struct is protected by wait_lock. 1268 * Gets dropped in rt_mutex_adjust_prio_chain()! 1269 */ 1270 get_task_struct(owner); 1271 1272 raw_spin_unlock_irq(&lock->wait_lock); 1273 1274 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1275 next_lock, waiter, task); 1276 1277 raw_spin_lock_irq(&lock->wait_lock); 1278 1279 return res; 1280 } 1281 1282 /* 1283 * Remove the top waiter from the current tasks pi waiter tree and 1284 * queue it up. 1285 * 1286 * Called with lock->wait_lock held and interrupts disabled. 1287 */ 1288 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, 1289 struct rt_mutex_base *lock) 1290 { 1291 struct rt_mutex_waiter *waiter; 1292 1293 lockdep_assert_held(&lock->wait_lock); 1294 1295 raw_spin_lock(¤t->pi_lock); 1296 1297 waiter = rt_mutex_top_waiter(lock); 1298 1299 /* 1300 * Remove it from current->pi_waiters and deboost. 1301 * 1302 * We must in fact deboost here in order to ensure we call 1303 * rt_mutex_setprio() to update p->pi_top_task before the 1304 * task unblocks. 1305 */ 1306 rt_mutex_dequeue_pi(current, waiter); 1307 rt_mutex_adjust_prio(lock, current); 1308 1309 /* 1310 * As we are waking up the top waiter, and the waiter stays 1311 * queued on the lock until it gets the lock, this lock 1312 * obviously has waiters. Just set the bit here and this has 1313 * the added benefit of forcing all new tasks into the 1314 * slow path making sure no task of lower priority than 1315 * the top waiter can steal this lock. 1316 */ 1317 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1318 1319 /* 1320 * We deboosted before waking the top waiter task such that we don't 1321 * run two tasks with the 'same' priority (and ensure the 1322 * p->pi_top_task pointer points to a blocked task). This however can 1323 * lead to priority inversion if we would get preempted after the 1324 * deboost but before waking our donor task, hence the preempt_disable() 1325 * before unlock. 1326 * 1327 * Pairs with preempt_enable() in rt_mutex_wake_up_q(); 1328 */ 1329 preempt_disable(); 1330 rt_mutex_wake_q_add(wqh, waiter); 1331 raw_spin_unlock(¤t->pi_lock); 1332 } 1333 1334 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1335 { 1336 int ret = try_to_take_rt_mutex(lock, current, NULL); 1337 1338 /* 1339 * try_to_take_rt_mutex() sets the lock waiters bit 1340 * unconditionally. Clean this up. 1341 */ 1342 fixup_rt_mutex_waiters(lock, true); 1343 1344 return ret; 1345 } 1346 1347 /* 1348 * Slow path try-lock function: 1349 */ 1350 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1351 { 1352 unsigned long flags; 1353 int ret; 1354 1355 /* 1356 * If the lock already has an owner we fail to get the lock. 1357 * This can be done without taking the @lock->wait_lock as 1358 * it is only being read, and this is a trylock anyway. 1359 */ 1360 if (rt_mutex_owner(lock)) 1361 return 0; 1362 1363 /* 1364 * The mutex has currently no owner. Lock the wait lock and try to 1365 * acquire the lock. We use irqsave here to support early boot calls. 1366 */ 1367 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1368 1369 ret = __rt_mutex_slowtrylock(lock); 1370 1371 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1372 1373 return ret; 1374 } 1375 1376 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) 1377 { 1378 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1379 return 1; 1380 1381 return rt_mutex_slowtrylock(lock); 1382 } 1383 1384 /* 1385 * Slow path to release a rt-mutex. 1386 */ 1387 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) 1388 { 1389 DEFINE_RT_WAKE_Q(wqh); 1390 unsigned long flags; 1391 1392 /* irqsave required to support early boot calls */ 1393 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1394 1395 debug_rt_mutex_unlock(lock); 1396 1397 /* 1398 * We must be careful here if the fast path is enabled. If we 1399 * have no waiters queued we cannot set owner to NULL here 1400 * because of: 1401 * 1402 * foo->lock->owner = NULL; 1403 * rtmutex_lock(foo->lock); <- fast path 1404 * free = atomic_dec_and_test(foo->refcnt); 1405 * rtmutex_unlock(foo->lock); <- fast path 1406 * if (free) 1407 * kfree(foo); 1408 * raw_spin_unlock(foo->lock->wait_lock); 1409 * 1410 * So for the fastpath enabled kernel: 1411 * 1412 * Nothing can set the waiters bit as long as we hold 1413 * lock->wait_lock. So we do the following sequence: 1414 * 1415 * owner = rt_mutex_owner(lock); 1416 * clear_rt_mutex_waiters(lock); 1417 * raw_spin_unlock(&lock->wait_lock); 1418 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1419 * return; 1420 * goto retry; 1421 * 1422 * The fastpath disabled variant is simple as all access to 1423 * lock->owner is serialized by lock->wait_lock: 1424 * 1425 * lock->owner = NULL; 1426 * raw_spin_unlock(&lock->wait_lock); 1427 */ 1428 while (!rt_mutex_has_waiters(lock)) { 1429 /* Drops lock->wait_lock ! */ 1430 if (unlock_rt_mutex_safe(lock, flags) == true) 1431 return; 1432 /* Relock the rtmutex and try again */ 1433 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1434 } 1435 1436 /* 1437 * The wakeup next waiter path does not suffer from the above 1438 * race. See the comments there. 1439 * 1440 * Queue the next waiter for wakeup once we release the wait_lock. 1441 */ 1442 mark_wakeup_next_waiter(&wqh, lock); 1443 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1444 1445 rt_mutex_wake_up_q(&wqh); 1446 } 1447 1448 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) 1449 { 1450 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1451 return; 1452 1453 rt_mutex_slowunlock(lock); 1454 } 1455 1456 #ifdef CONFIG_SMP 1457 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1458 struct rt_mutex_waiter *waiter, 1459 struct task_struct *owner) 1460 { 1461 bool res = true; 1462 1463 rcu_read_lock(); 1464 for (;;) { 1465 /* If owner changed, trylock again. */ 1466 if (owner != rt_mutex_owner(lock)) 1467 break; 1468 /* 1469 * Ensure that @owner is dereferenced after checking that 1470 * the lock owner still matches @owner. If that fails, 1471 * @owner might point to freed memory. If it still matches, 1472 * the rcu_read_lock() ensures the memory stays valid. 1473 */ 1474 barrier(); 1475 /* 1476 * Stop spinning when: 1477 * - the lock owner has been scheduled out 1478 * - current is not longer the top waiter 1479 * - current is requested to reschedule (redundant 1480 * for CONFIG_PREEMPT_RCU=y) 1481 * - the VCPU on which owner runs is preempted 1482 */ 1483 if (!owner_on_cpu(owner) || need_resched() || 1484 !rt_mutex_waiter_is_top_waiter(lock, waiter)) { 1485 res = false; 1486 break; 1487 } 1488 cpu_relax(); 1489 } 1490 rcu_read_unlock(); 1491 return res; 1492 } 1493 #else 1494 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1495 struct rt_mutex_waiter *waiter, 1496 struct task_struct *owner) 1497 { 1498 return false; 1499 } 1500 #endif 1501 1502 #ifdef RT_MUTEX_BUILD_MUTEX 1503 /* 1504 * Functions required for: 1505 * - rtmutex, futex on all kernels 1506 * - mutex and rwsem substitutions on RT kernels 1507 */ 1508 1509 /* 1510 * Remove a waiter from a lock and give up 1511 * 1512 * Must be called with lock->wait_lock held and interrupts disabled. It must 1513 * have just failed to try_to_take_rt_mutex(). 1514 */ 1515 static void __sched remove_waiter(struct rt_mutex_base *lock, 1516 struct rt_mutex_waiter *waiter) 1517 { 1518 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1519 struct task_struct *owner = rt_mutex_owner(lock); 1520 struct rt_mutex_base *next_lock; 1521 1522 lockdep_assert_held(&lock->wait_lock); 1523 1524 raw_spin_lock(¤t->pi_lock); 1525 rt_mutex_dequeue(lock, waiter); 1526 current->pi_blocked_on = NULL; 1527 raw_spin_unlock(¤t->pi_lock); 1528 1529 /* 1530 * Only update priority if the waiter was the highest priority 1531 * waiter of the lock and there is an owner to update. 1532 */ 1533 if (!owner || !is_top_waiter) 1534 return; 1535 1536 raw_spin_lock(&owner->pi_lock); 1537 1538 rt_mutex_dequeue_pi(owner, waiter); 1539 1540 if (rt_mutex_has_waiters(lock)) 1541 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1542 1543 rt_mutex_adjust_prio(lock, owner); 1544 1545 /* Store the lock on which owner is blocked or NULL */ 1546 next_lock = task_blocked_on_lock(owner); 1547 1548 raw_spin_unlock(&owner->pi_lock); 1549 1550 /* 1551 * Don't walk the chain, if the owner task is not blocked 1552 * itself. 1553 */ 1554 if (!next_lock) 1555 return; 1556 1557 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1558 get_task_struct(owner); 1559 1560 raw_spin_unlock_irq(&lock->wait_lock); 1561 1562 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1563 next_lock, NULL, current); 1564 1565 raw_spin_lock_irq(&lock->wait_lock); 1566 } 1567 1568 /** 1569 * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop 1570 * @lock: the rt_mutex to take 1571 * @ww_ctx: WW mutex context pointer 1572 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1573 * or TASK_UNINTERRUPTIBLE) 1574 * @timeout: the pre-initialized and started timer, or NULL for none 1575 * @waiter: the pre-initialized rt_mutex_waiter 1576 * 1577 * Must be called with lock->wait_lock held and interrupts disabled 1578 */ 1579 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, 1580 struct ww_acquire_ctx *ww_ctx, 1581 unsigned int state, 1582 struct hrtimer_sleeper *timeout, 1583 struct rt_mutex_waiter *waiter) 1584 { 1585 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1586 struct task_struct *owner; 1587 int ret = 0; 1588 1589 for (;;) { 1590 /* Try to acquire the lock: */ 1591 if (try_to_take_rt_mutex(lock, current, waiter)) 1592 break; 1593 1594 if (timeout && !timeout->task) { 1595 ret = -ETIMEDOUT; 1596 break; 1597 } 1598 if (signal_pending_state(state, current)) { 1599 ret = -EINTR; 1600 break; 1601 } 1602 1603 if (build_ww_mutex() && ww_ctx) { 1604 ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx); 1605 if (ret) 1606 break; 1607 } 1608 1609 if (waiter == rt_mutex_top_waiter(lock)) 1610 owner = rt_mutex_owner(lock); 1611 else 1612 owner = NULL; 1613 raw_spin_unlock_irq(&lock->wait_lock); 1614 1615 if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) 1616 schedule(); 1617 1618 raw_spin_lock_irq(&lock->wait_lock); 1619 set_current_state(state); 1620 } 1621 1622 __set_current_state(TASK_RUNNING); 1623 return ret; 1624 } 1625 1626 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 1627 struct rt_mutex_base *lock, 1628 struct rt_mutex_waiter *w) 1629 { 1630 /* 1631 * If the result is not -EDEADLOCK or the caller requested 1632 * deadlock detection, nothing to do here. 1633 */ 1634 if (res != -EDEADLOCK || detect_deadlock) 1635 return; 1636 1637 if (build_ww_mutex() && w->ww_ctx) 1638 return; 1639 1640 raw_spin_unlock_irq(&lock->wait_lock); 1641 1642 WARN(1, "rtmutex deadlock detected\n"); 1643 1644 while (1) { 1645 set_current_state(TASK_INTERRUPTIBLE); 1646 schedule(); 1647 } 1648 } 1649 1650 /** 1651 * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held 1652 * @lock: The rtmutex to block lock 1653 * @ww_ctx: WW mutex context pointer 1654 * @state: The task state for sleeping 1655 * @chwalk: Indicator whether full or partial chainwalk is requested 1656 * @waiter: Initializer waiter for blocking 1657 */ 1658 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, 1659 struct ww_acquire_ctx *ww_ctx, 1660 unsigned int state, 1661 enum rtmutex_chainwalk chwalk, 1662 struct rt_mutex_waiter *waiter) 1663 { 1664 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1665 struct ww_mutex *ww = ww_container_of(rtm); 1666 int ret; 1667 1668 lockdep_assert_held(&lock->wait_lock); 1669 1670 /* Try to acquire the lock again: */ 1671 if (try_to_take_rt_mutex(lock, current, NULL)) { 1672 if (build_ww_mutex() && ww_ctx) { 1673 __ww_mutex_check_waiters(rtm, ww_ctx); 1674 ww_mutex_lock_acquired(ww, ww_ctx); 1675 } 1676 return 0; 1677 } 1678 1679 set_current_state(state); 1680 1681 trace_contention_begin(lock, LCB_F_RT); 1682 1683 ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); 1684 if (likely(!ret)) 1685 ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); 1686 1687 if (likely(!ret)) { 1688 /* acquired the lock */ 1689 if (build_ww_mutex() && ww_ctx) { 1690 if (!ww_ctx->is_wait_die) 1691 __ww_mutex_check_waiters(rtm, ww_ctx); 1692 ww_mutex_lock_acquired(ww, ww_ctx); 1693 } 1694 } else { 1695 __set_current_state(TASK_RUNNING); 1696 remove_waiter(lock, waiter); 1697 rt_mutex_handle_deadlock(ret, chwalk, lock, waiter); 1698 } 1699 1700 /* 1701 * try_to_take_rt_mutex() sets the waiter bit 1702 * unconditionally. We might have to fix that up. 1703 */ 1704 fixup_rt_mutex_waiters(lock, true); 1705 1706 trace_contention_end(lock, ret); 1707 1708 return ret; 1709 } 1710 1711 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, 1712 struct ww_acquire_ctx *ww_ctx, 1713 unsigned int state) 1714 { 1715 struct rt_mutex_waiter waiter; 1716 int ret; 1717 1718 rt_mutex_init_waiter(&waiter); 1719 waiter.ww_ctx = ww_ctx; 1720 1721 ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, 1722 &waiter); 1723 1724 debug_rt_mutex_free_waiter(&waiter); 1725 return ret; 1726 } 1727 1728 /* 1729 * rt_mutex_slowlock - Locking slowpath invoked when fast path fails 1730 * @lock: The rtmutex to block lock 1731 * @ww_ctx: WW mutex context pointer 1732 * @state: The task state for sleeping 1733 */ 1734 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, 1735 struct ww_acquire_ctx *ww_ctx, 1736 unsigned int state) 1737 { 1738 unsigned long flags; 1739 int ret; 1740 1741 /* 1742 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1743 * be called in early boot if the cmpxchg() fast path is disabled 1744 * (debug, no architecture support). In this case we will acquire the 1745 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1746 * enable interrupts in that early boot case. So we need to use the 1747 * irqsave/restore variants. 1748 */ 1749 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1750 ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); 1751 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1752 1753 return ret; 1754 } 1755 1756 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, 1757 unsigned int state) 1758 { 1759 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1760 return 0; 1761 1762 return rt_mutex_slowlock(lock, NULL, state); 1763 } 1764 #endif /* RT_MUTEX_BUILD_MUTEX */ 1765 1766 #ifdef RT_MUTEX_BUILD_SPINLOCKS 1767 /* 1768 * Functions required for spin/rw_lock substitution on RT kernels 1769 */ 1770 1771 /** 1772 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks 1773 * @lock: The underlying RT mutex 1774 */ 1775 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) 1776 { 1777 struct rt_mutex_waiter waiter; 1778 struct task_struct *owner; 1779 1780 lockdep_assert_held(&lock->wait_lock); 1781 1782 if (try_to_take_rt_mutex(lock, current, NULL)) 1783 return; 1784 1785 rt_mutex_init_rtlock_waiter(&waiter); 1786 1787 /* Save current state and set state to TASK_RTLOCK_WAIT */ 1788 current_save_and_set_rtlock_wait_state(); 1789 1790 trace_contention_begin(lock, LCB_F_RT); 1791 1792 task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); 1793 1794 for (;;) { 1795 /* Try to acquire the lock again */ 1796 if (try_to_take_rt_mutex(lock, current, &waiter)) 1797 break; 1798 1799 if (&waiter == rt_mutex_top_waiter(lock)) 1800 owner = rt_mutex_owner(lock); 1801 else 1802 owner = NULL; 1803 raw_spin_unlock_irq(&lock->wait_lock); 1804 1805 if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) 1806 schedule_rtlock(); 1807 1808 raw_spin_lock_irq(&lock->wait_lock); 1809 set_current_state(TASK_RTLOCK_WAIT); 1810 } 1811 1812 /* Restore the task state */ 1813 current_restore_rtlock_saved_state(); 1814 1815 /* 1816 * try_to_take_rt_mutex() sets the waiter bit unconditionally. 1817 * We might have to fix that up: 1818 */ 1819 fixup_rt_mutex_waiters(lock, true); 1820 debug_rt_mutex_free_waiter(&waiter); 1821 1822 trace_contention_end(lock, 0); 1823 } 1824 1825 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) 1826 { 1827 unsigned long flags; 1828 1829 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1830 rtlock_slowlock_locked(lock); 1831 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1832 } 1833 1834 #endif /* RT_MUTEX_BUILD_SPINLOCKS */ 1835