1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 4 * 5 * started by Ingo Molnar and Thomas Gleixner. 6 * 7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 10 * Copyright (C) 2006 Esben Nielsen 11 * Adaptive Spinlocks: 12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, 13 * and Peter Morreale, 14 * Adaptive Spinlocks simplification: 15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> 16 * 17 * See Documentation/locking/rt-mutex-design.rst for details. 18 */ 19 #include <linux/sched.h> 20 #include <linux/sched/debug.h> 21 #include <linux/sched/deadline.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/rt.h> 24 #include <linux/sched/wake_q.h> 25 #include <linux/ww_mutex.h> 26 27 #include "rtmutex_common.h" 28 29 #ifndef WW_RT 30 # define build_ww_mutex() (false) 31 # define ww_container_of(rtm) NULL 32 33 static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, 34 struct rt_mutex *lock, 35 struct ww_acquire_ctx *ww_ctx) 36 { 37 return 0; 38 } 39 40 static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, 41 struct ww_acquire_ctx *ww_ctx) 42 { 43 } 44 45 static inline void ww_mutex_lock_acquired(struct ww_mutex *lock, 46 struct ww_acquire_ctx *ww_ctx) 47 { 48 } 49 50 static inline int __ww_mutex_check_kill(struct rt_mutex *lock, 51 struct rt_mutex_waiter *waiter, 52 struct ww_acquire_ctx *ww_ctx) 53 { 54 return 0; 55 } 56 57 #else 58 # define build_ww_mutex() (true) 59 # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base) 60 # include "ww_mutex.h" 61 #endif 62 63 /* 64 * lock->owner state tracking: 65 * 66 * lock->owner holds the task_struct pointer of the owner. Bit 0 67 * is used to keep track of the "lock has waiters" state. 68 * 69 * owner bit0 70 * NULL 0 lock is free (fast acquire possible) 71 * NULL 1 lock is free and has waiters and the top waiter 72 * is going to take the lock* 73 * taskpointer 0 lock is held (fast release possible) 74 * taskpointer 1 lock is held and has waiters** 75 * 76 * The fast atomic compare exchange based acquire and release is only 77 * possible when bit 0 of lock->owner is 0. 78 * 79 * (*) It also can be a transitional state when grabbing the lock 80 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 81 * we need to set the bit0 before looking at the lock, and the owner may be 82 * NULL in this small time, hence this can be a transitional state. 83 * 84 * (**) There is a small time when bit 0 is set but there are no 85 * waiters. This can happen when grabbing the lock in the slow path. 86 * To prevent a cmpxchg of the owner releasing the lock, we need to 87 * set this bit before looking at the lock. 88 */ 89 90 static __always_inline void 91 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) 92 { 93 unsigned long val = (unsigned long)owner; 94 95 if (rt_mutex_has_waiters(lock)) 96 val |= RT_MUTEX_HAS_WAITERS; 97 98 WRITE_ONCE(lock->owner, (struct task_struct *)val); 99 } 100 101 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) 102 { 103 lock->owner = (struct task_struct *) 104 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 105 } 106 107 static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) 108 { 109 unsigned long owner, *p = (unsigned long *) &lock->owner; 110 111 if (rt_mutex_has_waiters(lock)) 112 return; 113 114 /* 115 * The rbtree has no waiters enqueued, now make sure that the 116 * lock->owner still has the waiters bit set, otherwise the 117 * following can happen: 118 * 119 * CPU 0 CPU 1 CPU2 120 * l->owner=T1 121 * rt_mutex_lock(l) 122 * lock(l->lock) 123 * l->owner = T1 | HAS_WAITERS; 124 * enqueue(T2) 125 * boost() 126 * unlock(l->lock) 127 * block() 128 * 129 * rt_mutex_lock(l) 130 * lock(l->lock) 131 * l->owner = T1 | HAS_WAITERS; 132 * enqueue(T3) 133 * boost() 134 * unlock(l->lock) 135 * block() 136 * signal(->T2) signal(->T3) 137 * lock(l->lock) 138 * dequeue(T2) 139 * deboost() 140 * unlock(l->lock) 141 * lock(l->lock) 142 * dequeue(T3) 143 * ==> wait list is empty 144 * deboost() 145 * unlock(l->lock) 146 * lock(l->lock) 147 * fixup_rt_mutex_waiters() 148 * if (wait_list_empty(l) { 149 * l->owner = owner 150 * owner = l->owner & ~HAS_WAITERS; 151 * ==> l->owner = T1 152 * } 153 * lock(l->lock) 154 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 155 * if (wait_list_empty(l) { 156 * owner = l->owner & ~HAS_WAITERS; 157 * cmpxchg(l->owner, T1, NULL) 158 * ===> Success (l->owner = NULL) 159 * 160 * l->owner = owner 161 * ==> l->owner = T1 162 * } 163 * 164 * With the check for the waiter bit in place T3 on CPU2 will not 165 * overwrite. All tasks fiddling with the waiters bit are 166 * serialized by l->lock, so nothing else can modify the waiters 167 * bit. If the bit is set then nothing can change l->owner either 168 * so the simple RMW is safe. The cmpxchg() will simply fail if it 169 * happens in the middle of the RMW because the waiters bit is 170 * still set. 171 */ 172 owner = READ_ONCE(*p); 173 if (owner & RT_MUTEX_HAS_WAITERS) 174 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 175 } 176 177 /* 178 * We can speed up the acquire/release, if there's no debugging state to be 179 * set up. 180 */ 181 #ifndef CONFIG_DEBUG_RT_MUTEXES 182 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 183 struct task_struct *old, 184 struct task_struct *new) 185 { 186 return try_cmpxchg_acquire(&lock->owner, &old, new); 187 } 188 189 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 190 struct task_struct *old, 191 struct task_struct *new) 192 { 193 return try_cmpxchg_release(&lock->owner, &old, new); 194 } 195 196 /* 197 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 198 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 199 * relaxed semantics suffice. 200 */ 201 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 202 { 203 unsigned long owner, *p = (unsigned long *) &lock->owner; 204 205 do { 206 owner = *p; 207 } while (cmpxchg_relaxed(p, owner, 208 owner | RT_MUTEX_HAS_WAITERS) != owner); 209 } 210 211 /* 212 * Safe fastpath aware unlock: 213 * 1) Clear the waiters bit 214 * 2) Drop lock->wait_lock 215 * 3) Try to unlock the lock with cmpxchg 216 */ 217 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 218 unsigned long flags) 219 __releases(lock->wait_lock) 220 { 221 struct task_struct *owner = rt_mutex_owner(lock); 222 223 clear_rt_mutex_waiters(lock); 224 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 225 /* 226 * If a new waiter comes in between the unlock and the cmpxchg 227 * we have two situations: 228 * 229 * unlock(wait_lock); 230 * lock(wait_lock); 231 * cmpxchg(p, owner, 0) == owner 232 * mark_rt_mutex_waiters(lock); 233 * acquire(lock); 234 * or: 235 * 236 * unlock(wait_lock); 237 * lock(wait_lock); 238 * mark_rt_mutex_waiters(lock); 239 * 240 * cmpxchg(p, owner, 0) != owner 241 * enqueue_waiter(); 242 * unlock(wait_lock); 243 * lock(wait_lock); 244 * wake waiter(); 245 * unlock(wait_lock); 246 * lock(wait_lock); 247 * acquire(lock); 248 */ 249 return rt_mutex_cmpxchg_release(lock, owner, NULL); 250 } 251 252 #else 253 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 254 struct task_struct *old, 255 struct task_struct *new) 256 { 257 return false; 258 259 } 260 261 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 262 struct task_struct *old, 263 struct task_struct *new) 264 { 265 return false; 266 } 267 268 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 269 { 270 lock->owner = (struct task_struct *) 271 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 272 } 273 274 /* 275 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 276 */ 277 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 278 unsigned long flags) 279 __releases(lock->wait_lock) 280 { 281 lock->owner = NULL; 282 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 283 return true; 284 } 285 #endif 286 287 static __always_inline int __waiter_prio(struct task_struct *task) 288 { 289 int prio = task->prio; 290 291 if (!rt_prio(prio)) 292 return DEFAULT_PRIO; 293 294 return prio; 295 } 296 297 static __always_inline void 298 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 299 { 300 waiter->prio = __waiter_prio(task); 301 waiter->deadline = task->dl.deadline; 302 } 303 304 /* 305 * Only use with rt_mutex_waiter_{less,equal}() 306 */ 307 #define task_to_waiter(p) \ 308 &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } 309 310 static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, 311 struct rt_mutex_waiter *right) 312 { 313 if (left->prio < right->prio) 314 return 1; 315 316 /* 317 * If both waiters have dl_prio(), we check the deadlines of the 318 * associated tasks. 319 * If left waiter has a dl_prio(), and we didn't return 1 above, 320 * then right waiter has a dl_prio() too. 321 */ 322 if (dl_prio(left->prio)) 323 return dl_time_before(left->deadline, right->deadline); 324 325 return 0; 326 } 327 328 static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, 329 struct rt_mutex_waiter *right) 330 { 331 if (left->prio != right->prio) 332 return 0; 333 334 /* 335 * If both waiters have dl_prio(), we check the deadlines of the 336 * associated tasks. 337 * If left waiter has a dl_prio(), and we didn't return 0 above, 338 * then right waiter has a dl_prio() too. 339 */ 340 if (dl_prio(left->prio)) 341 return left->deadline == right->deadline; 342 343 return 1; 344 } 345 346 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, 347 struct rt_mutex_waiter *top_waiter) 348 { 349 if (rt_mutex_waiter_less(waiter, top_waiter)) 350 return true; 351 352 #ifdef RT_MUTEX_BUILD_SPINLOCKS 353 /* 354 * Note that RT tasks are excluded from same priority (lateral) 355 * steals to prevent the introduction of an unbounded latency. 356 */ 357 if (rt_prio(waiter->prio) || dl_prio(waiter->prio)) 358 return false; 359 360 return rt_mutex_waiter_equal(waiter, top_waiter); 361 #else 362 return false; 363 #endif 364 } 365 366 #define __node_2_waiter(node) \ 367 rb_entry((node), struct rt_mutex_waiter, tree_entry) 368 369 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 370 { 371 struct rt_mutex_waiter *aw = __node_2_waiter(a); 372 struct rt_mutex_waiter *bw = __node_2_waiter(b); 373 374 if (rt_mutex_waiter_less(aw, bw)) 375 return 1; 376 377 if (!build_ww_mutex()) 378 return 0; 379 380 if (rt_mutex_waiter_less(bw, aw)) 381 return 0; 382 383 /* NOTE: relies on waiter->ww_ctx being set before insertion */ 384 if (aw->ww_ctx) { 385 if (!bw->ww_ctx) 386 return 1; 387 388 return (signed long)(aw->ww_ctx->stamp - 389 bw->ww_ctx->stamp) < 0; 390 } 391 392 return 0; 393 } 394 395 static __always_inline void 396 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 397 { 398 rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); 399 } 400 401 static __always_inline void 402 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 403 { 404 if (RB_EMPTY_NODE(&waiter->tree_entry)) 405 return; 406 407 rb_erase_cached(&waiter->tree_entry, &lock->waiters); 408 RB_CLEAR_NODE(&waiter->tree_entry); 409 } 410 411 #define __node_2_pi_waiter(node) \ 412 rb_entry((node), struct rt_mutex_waiter, pi_tree_entry) 413 414 static __always_inline bool 415 __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 416 { 417 return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b)); 418 } 419 420 static __always_inline void 421 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 422 { 423 rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less); 424 } 425 426 static __always_inline void 427 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 428 { 429 if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 430 return; 431 432 rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); 433 RB_CLEAR_NODE(&waiter->pi_tree_entry); 434 } 435 436 static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) 437 { 438 struct task_struct *pi_task = NULL; 439 440 lockdep_assert_held(&p->pi_lock); 441 442 if (task_has_pi_waiters(p)) 443 pi_task = task_top_pi_waiter(p)->task; 444 445 rt_mutex_setprio(p, pi_task); 446 } 447 448 /* RT mutex specific wake_q wrappers */ 449 static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh, 450 struct task_struct *task, 451 unsigned int wake_state) 452 { 453 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) { 454 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 455 WARN_ON_ONCE(wqh->rtlock_task); 456 get_task_struct(task); 457 wqh->rtlock_task = task; 458 } else { 459 wake_q_add(&wqh->head, task); 460 } 461 } 462 463 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, 464 struct rt_mutex_waiter *w) 465 { 466 rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state); 467 } 468 469 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) 470 { 471 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { 472 wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); 473 put_task_struct(wqh->rtlock_task); 474 wqh->rtlock_task = NULL; 475 } 476 477 if (!wake_q_empty(&wqh->head)) 478 wake_up_q(&wqh->head); 479 480 /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ 481 preempt_enable(); 482 } 483 484 /* 485 * Deadlock detection is conditional: 486 * 487 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 488 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 489 * 490 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 491 * conducted independent of the detect argument. 492 * 493 * If the waiter argument is NULL this indicates the deboost path and 494 * deadlock detection is disabled independent of the detect argument 495 * and the config settings. 496 */ 497 static __always_inline bool 498 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 499 enum rtmutex_chainwalk chwalk) 500 { 501 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 502 return waiter != NULL; 503 return chwalk == RT_MUTEX_FULL_CHAINWALK; 504 } 505 506 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p) 507 { 508 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 509 } 510 511 /* 512 * Adjust the priority chain. Also used for deadlock detection. 513 * Decreases task's usage by one - may thus free the task. 514 * 515 * @task: the task owning the mutex (owner) for which a chain walk is 516 * probably needed 517 * @chwalk: do we have to carry out deadlock detection? 518 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 519 * things for a task that has just got its priority adjusted, and 520 * is waiting on a mutex) 521 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 522 * we dropped its pi_lock. Is never dereferenced, only used for 523 * comparison to detect lock chain changes. 524 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 525 * its priority to the mutex owner (can be NULL in the case 526 * depicted above or if the top waiter is gone away and we are 527 * actually deboosting the owner) 528 * @top_task: the current top waiter 529 * 530 * Returns 0 or -EDEADLK. 531 * 532 * Chain walk basics and protection scope 533 * 534 * [R] refcount on task 535 * [P] task->pi_lock held 536 * [L] rtmutex->wait_lock held 537 * 538 * Step Description Protected by 539 * function arguments: 540 * @task [R] 541 * @orig_lock if != NULL @top_task is blocked on it 542 * @next_lock Unprotected. Cannot be 543 * dereferenced. Only used for 544 * comparison. 545 * @orig_waiter if != NULL @top_task is blocked on it 546 * @top_task current, or in case of proxy 547 * locking protected by calling 548 * code 549 * again: 550 * loop_sanity_check(); 551 * retry: 552 * [1] lock(task->pi_lock); [R] acquire [P] 553 * [2] waiter = task->pi_blocked_on; [P] 554 * [3] check_exit_conditions_1(); [P] 555 * [4] lock = waiter->lock; [P] 556 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 557 * unlock(task->pi_lock); release [P] 558 * goto retry; 559 * } 560 * [6] check_exit_conditions_2(); [P] + [L] 561 * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 562 * [8] unlock(task->pi_lock); release [P] 563 * put_task_struct(task); release [R] 564 * [9] check_exit_conditions_3(); [L] 565 * [10] task = owner(lock); [L] 566 * get_task_struct(task); [L] acquire [R] 567 * lock(task->pi_lock); [L] acquire [P] 568 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 569 * [12] check_exit_conditions_4(); [P] + [L] 570 * [13] unlock(task->pi_lock); release [P] 571 * unlock(lock->wait_lock); release [L] 572 * goto again; 573 */ 574 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 575 enum rtmutex_chainwalk chwalk, 576 struct rt_mutex_base *orig_lock, 577 struct rt_mutex_base *next_lock, 578 struct rt_mutex_waiter *orig_waiter, 579 struct task_struct *top_task) 580 { 581 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 582 struct rt_mutex_waiter *prerequeue_top_waiter; 583 int ret = 0, depth = 0; 584 struct rt_mutex_base *lock; 585 bool detect_deadlock; 586 bool requeue = true; 587 588 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 589 590 /* 591 * The (de)boosting is a step by step approach with a lot of 592 * pitfalls. We want this to be preemptible and we want hold a 593 * maximum of two locks per step. So we have to check 594 * carefully whether things change under us. 595 */ 596 again: 597 /* 598 * We limit the lock chain length for each invocation. 599 */ 600 if (++depth > max_lock_depth) { 601 static int prev_max; 602 603 /* 604 * Print this only once. If the admin changes the limit, 605 * print a new message when reaching the limit again. 606 */ 607 if (prev_max != max_lock_depth) { 608 prev_max = max_lock_depth; 609 printk(KERN_WARNING "Maximum lock depth %d reached " 610 "task: %s (%d)\n", max_lock_depth, 611 top_task->comm, task_pid_nr(top_task)); 612 } 613 put_task_struct(task); 614 615 return -EDEADLK; 616 } 617 618 /* 619 * We are fully preemptible here and only hold the refcount on 620 * @task. So everything can have changed under us since the 621 * caller or our own code below (goto retry/again) dropped all 622 * locks. 623 */ 624 retry: 625 /* 626 * [1] Task cannot go away as we did a get_task() before ! 627 */ 628 raw_spin_lock_irq(&task->pi_lock); 629 630 /* 631 * [2] Get the waiter on which @task is blocked on. 632 */ 633 waiter = task->pi_blocked_on; 634 635 /* 636 * [3] check_exit_conditions_1() protected by task->pi_lock. 637 */ 638 639 /* 640 * Check whether the end of the boosting chain has been 641 * reached or the state of the chain has changed while we 642 * dropped the locks. 643 */ 644 if (!waiter) 645 goto out_unlock_pi; 646 647 /* 648 * Check the orig_waiter state. After we dropped the locks, 649 * the previous owner of the lock might have released the lock. 650 */ 651 if (orig_waiter && !rt_mutex_owner(orig_lock)) 652 goto out_unlock_pi; 653 654 /* 655 * We dropped all locks after taking a refcount on @task, so 656 * the task might have moved on in the lock chain or even left 657 * the chain completely and blocks now on an unrelated lock or 658 * on @orig_lock. 659 * 660 * We stored the lock on which @task was blocked in @next_lock, 661 * so we can detect the chain change. 662 */ 663 if (next_lock != waiter->lock) 664 goto out_unlock_pi; 665 666 /* 667 * There could be 'spurious' loops in the lock graph due to ww_mutex, 668 * consider: 669 * 670 * P1: A, ww_A, ww_B 671 * P2: ww_B, ww_A 672 * P3: A 673 * 674 * P3 should not return -EDEADLK because it gets trapped in the cycle 675 * created by P1 and P2 (which will resolve -- and runs into 676 * max_lock_depth above). Therefore disable detect_deadlock such that 677 * the below termination condition can trigger once all relevant tasks 678 * are boosted. 679 * 680 * Even when we start with ww_mutex we can disable deadlock detection, 681 * since we would supress a ww_mutex induced deadlock at [6] anyway. 682 * Supressing it here however is not sufficient since we might still 683 * hit [6] due to adjustment driven iteration. 684 * 685 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd 686 * utterly fail to report it; lockdep should. 687 */ 688 if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock) 689 detect_deadlock = false; 690 691 /* 692 * Drop out, when the task has no waiters. Note, 693 * top_waiter can be NULL, when we are in the deboosting 694 * mode! 695 */ 696 if (top_waiter) { 697 if (!task_has_pi_waiters(task)) 698 goto out_unlock_pi; 699 /* 700 * If deadlock detection is off, we stop here if we 701 * are not the top pi waiter of the task. If deadlock 702 * detection is enabled we continue, but stop the 703 * requeueing in the chain walk. 704 */ 705 if (top_waiter != task_top_pi_waiter(task)) { 706 if (!detect_deadlock) 707 goto out_unlock_pi; 708 else 709 requeue = false; 710 } 711 } 712 713 /* 714 * If the waiter priority is the same as the task priority 715 * then there is no further priority adjustment necessary. If 716 * deadlock detection is off, we stop the chain walk. If its 717 * enabled we continue, but stop the requeueing in the chain 718 * walk. 719 */ 720 if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { 721 if (!detect_deadlock) 722 goto out_unlock_pi; 723 else 724 requeue = false; 725 } 726 727 /* 728 * [4] Get the next lock 729 */ 730 lock = waiter->lock; 731 /* 732 * [5] We need to trylock here as we are holding task->pi_lock, 733 * which is the reverse lock order versus the other rtmutex 734 * operations. 735 */ 736 if (!raw_spin_trylock(&lock->wait_lock)) { 737 raw_spin_unlock_irq(&task->pi_lock); 738 cpu_relax(); 739 goto retry; 740 } 741 742 /* 743 * [6] check_exit_conditions_2() protected by task->pi_lock and 744 * lock->wait_lock. 745 * 746 * Deadlock detection. If the lock is the same as the original 747 * lock which caused us to walk the lock chain or if the 748 * current lock is owned by the task which initiated the chain 749 * walk, we detected a deadlock. 750 */ 751 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 752 ret = -EDEADLK; 753 754 /* 755 * When the deadlock is due to ww_mutex; also see above. Don't 756 * report the deadlock and instead let the ww_mutex wound/die 757 * logic pick which of the contending threads gets -EDEADLK. 758 * 759 * NOTE: assumes the cycle only contains a single ww_class; any 760 * other configuration and we fail to report; also, see 761 * lockdep. 762 */ 763 if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx) 764 ret = 0; 765 766 raw_spin_unlock(&lock->wait_lock); 767 goto out_unlock_pi; 768 } 769 770 /* 771 * If we just follow the lock chain for deadlock detection, no 772 * need to do all the requeue operations. To avoid a truckload 773 * of conditionals around the various places below, just do the 774 * minimum chain walk checks. 775 */ 776 if (!requeue) { 777 /* 778 * No requeue[7] here. Just release @task [8] 779 */ 780 raw_spin_unlock(&task->pi_lock); 781 put_task_struct(task); 782 783 /* 784 * [9] check_exit_conditions_3 protected by lock->wait_lock. 785 * If there is no owner of the lock, end of chain. 786 */ 787 if (!rt_mutex_owner(lock)) { 788 raw_spin_unlock_irq(&lock->wait_lock); 789 return 0; 790 } 791 792 /* [10] Grab the next task, i.e. owner of @lock */ 793 task = get_task_struct(rt_mutex_owner(lock)); 794 raw_spin_lock(&task->pi_lock); 795 796 /* 797 * No requeue [11] here. We just do deadlock detection. 798 * 799 * [12] Store whether owner is blocked 800 * itself. Decision is made after dropping the locks 801 */ 802 next_lock = task_blocked_on_lock(task); 803 /* 804 * Get the top waiter for the next iteration 805 */ 806 top_waiter = rt_mutex_top_waiter(lock); 807 808 /* [13] Drop locks */ 809 raw_spin_unlock(&task->pi_lock); 810 raw_spin_unlock_irq(&lock->wait_lock); 811 812 /* If owner is not blocked, end of chain. */ 813 if (!next_lock) 814 goto out_put_task; 815 goto again; 816 } 817 818 /* 819 * Store the current top waiter before doing the requeue 820 * operation on @lock. We need it for the boost/deboost 821 * decision below. 822 */ 823 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 824 825 /* [7] Requeue the waiter in the lock waiter tree. */ 826 rt_mutex_dequeue(lock, waiter); 827 828 /* 829 * Update the waiter prio fields now that we're dequeued. 830 * 831 * These values can have changed through either: 832 * 833 * sys_sched_set_scheduler() / sys_sched_setattr() 834 * 835 * or 836 * 837 * DL CBS enforcement advancing the effective deadline. 838 * 839 * Even though pi_waiters also uses these fields, and that tree is only 840 * updated in [11], we can do this here, since we hold [L], which 841 * serializes all pi_waiters access and rb_erase() does not care about 842 * the values of the node being removed. 843 */ 844 waiter_update_prio(waiter, task); 845 846 rt_mutex_enqueue(lock, waiter); 847 848 /* [8] Release the task */ 849 raw_spin_unlock(&task->pi_lock); 850 put_task_struct(task); 851 852 /* 853 * [9] check_exit_conditions_3 protected by lock->wait_lock. 854 * 855 * We must abort the chain walk if there is no lock owner even 856 * in the dead lock detection case, as we have nothing to 857 * follow here. This is the end of the chain we are walking. 858 */ 859 if (!rt_mutex_owner(lock)) { 860 /* 861 * If the requeue [7] above changed the top waiter, 862 * then we need to wake the new top waiter up to try 863 * to get the lock. 864 */ 865 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 866 wake_up_state(waiter->task, waiter->wake_state); 867 raw_spin_unlock_irq(&lock->wait_lock); 868 return 0; 869 } 870 871 /* [10] Grab the next task, i.e. the owner of @lock */ 872 task = get_task_struct(rt_mutex_owner(lock)); 873 raw_spin_lock(&task->pi_lock); 874 875 /* [11] requeue the pi waiters if necessary */ 876 if (waiter == rt_mutex_top_waiter(lock)) { 877 /* 878 * The waiter became the new top (highest priority) 879 * waiter on the lock. Replace the previous top waiter 880 * in the owner tasks pi waiters tree with this waiter 881 * and adjust the priority of the owner. 882 */ 883 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 884 rt_mutex_enqueue_pi(task, waiter); 885 rt_mutex_adjust_prio(task); 886 887 } else if (prerequeue_top_waiter == waiter) { 888 /* 889 * The waiter was the top waiter on the lock, but is 890 * no longer the top priority waiter. Replace waiter in 891 * the owner tasks pi waiters tree with the new top 892 * (highest priority) waiter and adjust the priority 893 * of the owner. 894 * The new top waiter is stored in @waiter so that 895 * @waiter == @top_waiter evaluates to true below and 896 * we continue to deboost the rest of the chain. 897 */ 898 rt_mutex_dequeue_pi(task, waiter); 899 waiter = rt_mutex_top_waiter(lock); 900 rt_mutex_enqueue_pi(task, waiter); 901 rt_mutex_adjust_prio(task); 902 } else { 903 /* 904 * Nothing changed. No need to do any priority 905 * adjustment. 906 */ 907 } 908 909 /* 910 * [12] check_exit_conditions_4() protected by task->pi_lock 911 * and lock->wait_lock. The actual decisions are made after we 912 * dropped the locks. 913 * 914 * Check whether the task which owns the current lock is pi 915 * blocked itself. If yes we store a pointer to the lock for 916 * the lock chain change detection above. After we dropped 917 * task->pi_lock next_lock cannot be dereferenced anymore. 918 */ 919 next_lock = task_blocked_on_lock(task); 920 /* 921 * Store the top waiter of @lock for the end of chain walk 922 * decision below. 923 */ 924 top_waiter = rt_mutex_top_waiter(lock); 925 926 /* [13] Drop the locks */ 927 raw_spin_unlock(&task->pi_lock); 928 raw_spin_unlock_irq(&lock->wait_lock); 929 930 /* 931 * Make the actual exit decisions [12], based on the stored 932 * values. 933 * 934 * We reached the end of the lock chain. Stop right here. No 935 * point to go back just to figure that out. 936 */ 937 if (!next_lock) 938 goto out_put_task; 939 940 /* 941 * If the current waiter is not the top waiter on the lock, 942 * then we can stop the chain walk here if we are not in full 943 * deadlock detection mode. 944 */ 945 if (!detect_deadlock && waiter != top_waiter) 946 goto out_put_task; 947 948 goto again; 949 950 out_unlock_pi: 951 raw_spin_unlock_irq(&task->pi_lock); 952 out_put_task: 953 put_task_struct(task); 954 955 return ret; 956 } 957 958 /* 959 * Try to take an rt-mutex 960 * 961 * Must be called with lock->wait_lock held and interrupts disabled 962 * 963 * @lock: The lock to be acquired. 964 * @task: The task which wants to acquire the lock 965 * @waiter: The waiter that is queued to the lock's wait tree if the 966 * callsite called task_blocked_on_lock(), otherwise NULL 967 */ 968 static int __sched 969 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, 970 struct rt_mutex_waiter *waiter) 971 { 972 lockdep_assert_held(&lock->wait_lock); 973 974 /* 975 * Before testing whether we can acquire @lock, we set the 976 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 977 * other tasks which try to modify @lock into the slow path 978 * and they serialize on @lock->wait_lock. 979 * 980 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 981 * as explained at the top of this file if and only if: 982 * 983 * - There is a lock owner. The caller must fixup the 984 * transient state if it does a trylock or leaves the lock 985 * function due to a signal or timeout. 986 * 987 * - @task acquires the lock and there are no other 988 * waiters. This is undone in rt_mutex_set_owner(@task) at 989 * the end of this function. 990 */ 991 mark_rt_mutex_waiters(lock); 992 993 /* 994 * If @lock has an owner, give up. 995 */ 996 if (rt_mutex_owner(lock)) 997 return 0; 998 999 /* 1000 * If @waiter != NULL, @task has already enqueued the waiter 1001 * into @lock waiter tree. If @waiter == NULL then this is a 1002 * trylock attempt. 1003 */ 1004 if (waiter) { 1005 struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); 1006 1007 /* 1008 * If waiter is the highest priority waiter of @lock, 1009 * or allowed to steal it, take it over. 1010 */ 1011 if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { 1012 /* 1013 * We can acquire the lock. Remove the waiter from the 1014 * lock waiters tree. 1015 */ 1016 rt_mutex_dequeue(lock, waiter); 1017 } else { 1018 return 0; 1019 } 1020 } else { 1021 /* 1022 * If the lock has waiters already we check whether @task is 1023 * eligible to take over the lock. 1024 * 1025 * If there are no other waiters, @task can acquire 1026 * the lock. @task->pi_blocked_on is NULL, so it does 1027 * not need to be dequeued. 1028 */ 1029 if (rt_mutex_has_waiters(lock)) { 1030 /* Check whether the trylock can steal it. */ 1031 if (!rt_mutex_steal(task_to_waiter(task), 1032 rt_mutex_top_waiter(lock))) 1033 return 0; 1034 1035 /* 1036 * The current top waiter stays enqueued. We 1037 * don't have to change anything in the lock 1038 * waiters order. 1039 */ 1040 } else { 1041 /* 1042 * No waiters. Take the lock without the 1043 * pi_lock dance.@task->pi_blocked_on is NULL 1044 * and we have no waiters to enqueue in @task 1045 * pi waiters tree. 1046 */ 1047 goto takeit; 1048 } 1049 } 1050 1051 /* 1052 * Clear @task->pi_blocked_on. Requires protection by 1053 * @task->pi_lock. Redundant operation for the @waiter == NULL 1054 * case, but conditionals are more expensive than a redundant 1055 * store. 1056 */ 1057 raw_spin_lock(&task->pi_lock); 1058 task->pi_blocked_on = NULL; 1059 /* 1060 * Finish the lock acquisition. @task is the new owner. If 1061 * other waiters exist we have to insert the highest priority 1062 * waiter into @task->pi_waiters tree. 1063 */ 1064 if (rt_mutex_has_waiters(lock)) 1065 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 1066 raw_spin_unlock(&task->pi_lock); 1067 1068 takeit: 1069 /* 1070 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 1071 * are still waiters or clears it. 1072 */ 1073 rt_mutex_set_owner(lock, task); 1074 1075 return 1; 1076 } 1077 1078 /* 1079 * Task blocks on lock. 1080 * 1081 * Prepare waiter and propagate pi chain 1082 * 1083 * This must be called with lock->wait_lock held and interrupts disabled 1084 */ 1085 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, 1086 struct rt_mutex_waiter *waiter, 1087 struct task_struct *task, 1088 struct ww_acquire_ctx *ww_ctx, 1089 enum rtmutex_chainwalk chwalk) 1090 { 1091 struct task_struct *owner = rt_mutex_owner(lock); 1092 struct rt_mutex_waiter *top_waiter = waiter; 1093 struct rt_mutex_base *next_lock; 1094 int chain_walk = 0, res; 1095 1096 lockdep_assert_held(&lock->wait_lock); 1097 1098 /* 1099 * Early deadlock detection. We really don't want the task to 1100 * enqueue on itself just to untangle the mess later. It's not 1101 * only an optimization. We drop the locks, so another waiter 1102 * can come in before the chain walk detects the deadlock. So 1103 * the other will detect the deadlock and return -EDEADLOCK, 1104 * which is wrong, as the other waiter is not in a deadlock 1105 * situation. 1106 * 1107 * Except for ww_mutex, in that case the chain walk must already deal 1108 * with spurious cycles, see the comments at [3] and [6]. 1109 */ 1110 if (owner == task && !(build_ww_mutex() && ww_ctx)) 1111 return -EDEADLK; 1112 1113 raw_spin_lock(&task->pi_lock); 1114 waiter->task = task; 1115 waiter->lock = lock; 1116 waiter_update_prio(waiter, task); 1117 1118 /* Get the top priority waiter on the lock */ 1119 if (rt_mutex_has_waiters(lock)) 1120 top_waiter = rt_mutex_top_waiter(lock); 1121 rt_mutex_enqueue(lock, waiter); 1122 1123 task->pi_blocked_on = waiter; 1124 1125 raw_spin_unlock(&task->pi_lock); 1126 1127 if (build_ww_mutex() && ww_ctx) { 1128 struct rt_mutex *rtm; 1129 1130 /* Check whether the waiter should back out immediately */ 1131 rtm = container_of(lock, struct rt_mutex, rtmutex); 1132 res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); 1133 if (res) { 1134 raw_spin_lock(&task->pi_lock); 1135 rt_mutex_dequeue(lock, waiter); 1136 task->pi_blocked_on = NULL; 1137 raw_spin_unlock(&task->pi_lock); 1138 return res; 1139 } 1140 } 1141 1142 if (!owner) 1143 return 0; 1144 1145 raw_spin_lock(&owner->pi_lock); 1146 if (waiter == rt_mutex_top_waiter(lock)) { 1147 rt_mutex_dequeue_pi(owner, top_waiter); 1148 rt_mutex_enqueue_pi(owner, waiter); 1149 1150 rt_mutex_adjust_prio(owner); 1151 if (owner->pi_blocked_on) 1152 chain_walk = 1; 1153 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1154 chain_walk = 1; 1155 } 1156 1157 /* Store the lock on which owner is blocked or NULL */ 1158 next_lock = task_blocked_on_lock(owner); 1159 1160 raw_spin_unlock(&owner->pi_lock); 1161 /* 1162 * Even if full deadlock detection is on, if the owner is not 1163 * blocked itself, we can avoid finding this out in the chain 1164 * walk. 1165 */ 1166 if (!chain_walk || !next_lock) 1167 return 0; 1168 1169 /* 1170 * The owner can't disappear while holding a lock, 1171 * so the owner struct is protected by wait_lock. 1172 * Gets dropped in rt_mutex_adjust_prio_chain()! 1173 */ 1174 get_task_struct(owner); 1175 1176 raw_spin_unlock_irq(&lock->wait_lock); 1177 1178 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1179 next_lock, waiter, task); 1180 1181 raw_spin_lock_irq(&lock->wait_lock); 1182 1183 return res; 1184 } 1185 1186 /* 1187 * Remove the top waiter from the current tasks pi waiter tree and 1188 * queue it up. 1189 * 1190 * Called with lock->wait_lock held and interrupts disabled. 1191 */ 1192 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, 1193 struct rt_mutex_base *lock) 1194 { 1195 struct rt_mutex_waiter *waiter; 1196 1197 raw_spin_lock(¤t->pi_lock); 1198 1199 waiter = rt_mutex_top_waiter(lock); 1200 1201 /* 1202 * Remove it from current->pi_waiters and deboost. 1203 * 1204 * We must in fact deboost here in order to ensure we call 1205 * rt_mutex_setprio() to update p->pi_top_task before the 1206 * task unblocks. 1207 */ 1208 rt_mutex_dequeue_pi(current, waiter); 1209 rt_mutex_adjust_prio(current); 1210 1211 /* 1212 * As we are waking up the top waiter, and the waiter stays 1213 * queued on the lock until it gets the lock, this lock 1214 * obviously has waiters. Just set the bit here and this has 1215 * the added benefit of forcing all new tasks into the 1216 * slow path making sure no task of lower priority than 1217 * the top waiter can steal this lock. 1218 */ 1219 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1220 1221 /* 1222 * We deboosted before waking the top waiter task such that we don't 1223 * run two tasks with the 'same' priority (and ensure the 1224 * p->pi_top_task pointer points to a blocked task). This however can 1225 * lead to priority inversion if we would get preempted after the 1226 * deboost but before waking our donor task, hence the preempt_disable() 1227 * before unlock. 1228 * 1229 * Pairs with preempt_enable() in rt_mutex_wake_up_q(); 1230 */ 1231 preempt_disable(); 1232 rt_mutex_wake_q_add(wqh, waiter); 1233 raw_spin_unlock(¤t->pi_lock); 1234 } 1235 1236 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1237 { 1238 int ret = try_to_take_rt_mutex(lock, current, NULL); 1239 1240 /* 1241 * try_to_take_rt_mutex() sets the lock waiters bit 1242 * unconditionally. Clean this up. 1243 */ 1244 fixup_rt_mutex_waiters(lock); 1245 1246 return ret; 1247 } 1248 1249 /* 1250 * Slow path try-lock function: 1251 */ 1252 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1253 { 1254 unsigned long flags; 1255 int ret; 1256 1257 /* 1258 * If the lock already has an owner we fail to get the lock. 1259 * This can be done without taking the @lock->wait_lock as 1260 * it is only being read, and this is a trylock anyway. 1261 */ 1262 if (rt_mutex_owner(lock)) 1263 return 0; 1264 1265 /* 1266 * The mutex has currently no owner. Lock the wait lock and try to 1267 * acquire the lock. We use irqsave here to support early boot calls. 1268 */ 1269 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1270 1271 ret = __rt_mutex_slowtrylock(lock); 1272 1273 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1274 1275 return ret; 1276 } 1277 1278 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) 1279 { 1280 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1281 return 1; 1282 1283 return rt_mutex_slowtrylock(lock); 1284 } 1285 1286 /* 1287 * Slow path to release a rt-mutex. 1288 */ 1289 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) 1290 { 1291 DEFINE_RT_WAKE_Q(wqh); 1292 unsigned long flags; 1293 1294 /* irqsave required to support early boot calls */ 1295 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1296 1297 debug_rt_mutex_unlock(lock); 1298 1299 /* 1300 * We must be careful here if the fast path is enabled. If we 1301 * have no waiters queued we cannot set owner to NULL here 1302 * because of: 1303 * 1304 * foo->lock->owner = NULL; 1305 * rtmutex_lock(foo->lock); <- fast path 1306 * free = atomic_dec_and_test(foo->refcnt); 1307 * rtmutex_unlock(foo->lock); <- fast path 1308 * if (free) 1309 * kfree(foo); 1310 * raw_spin_unlock(foo->lock->wait_lock); 1311 * 1312 * So for the fastpath enabled kernel: 1313 * 1314 * Nothing can set the waiters bit as long as we hold 1315 * lock->wait_lock. So we do the following sequence: 1316 * 1317 * owner = rt_mutex_owner(lock); 1318 * clear_rt_mutex_waiters(lock); 1319 * raw_spin_unlock(&lock->wait_lock); 1320 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1321 * return; 1322 * goto retry; 1323 * 1324 * The fastpath disabled variant is simple as all access to 1325 * lock->owner is serialized by lock->wait_lock: 1326 * 1327 * lock->owner = NULL; 1328 * raw_spin_unlock(&lock->wait_lock); 1329 */ 1330 while (!rt_mutex_has_waiters(lock)) { 1331 /* Drops lock->wait_lock ! */ 1332 if (unlock_rt_mutex_safe(lock, flags) == true) 1333 return; 1334 /* Relock the rtmutex and try again */ 1335 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1336 } 1337 1338 /* 1339 * The wakeup next waiter path does not suffer from the above 1340 * race. See the comments there. 1341 * 1342 * Queue the next waiter for wakeup once we release the wait_lock. 1343 */ 1344 mark_wakeup_next_waiter(&wqh, lock); 1345 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1346 1347 rt_mutex_wake_up_q(&wqh); 1348 } 1349 1350 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) 1351 { 1352 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1353 return; 1354 1355 rt_mutex_slowunlock(lock); 1356 } 1357 1358 #ifdef CONFIG_SMP 1359 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1360 struct rt_mutex_waiter *waiter, 1361 struct task_struct *owner) 1362 { 1363 bool res = true; 1364 1365 rcu_read_lock(); 1366 for (;;) { 1367 /* If owner changed, trylock again. */ 1368 if (owner != rt_mutex_owner(lock)) 1369 break; 1370 /* 1371 * Ensure that @owner is dereferenced after checking that 1372 * the lock owner still matches @owner. If that fails, 1373 * @owner might point to freed memory. If it still matches, 1374 * the rcu_read_lock() ensures the memory stays valid. 1375 */ 1376 barrier(); 1377 /* 1378 * Stop spinning when: 1379 * - the lock owner has been scheduled out 1380 * - current is not longer the top waiter 1381 * - current is requested to reschedule (redundant 1382 * for CONFIG_PREEMPT_RCU=y) 1383 * - the VCPU on which owner runs is preempted 1384 */ 1385 if (!owner_on_cpu(owner) || need_resched() || 1386 !rt_mutex_waiter_is_top_waiter(lock, waiter)) { 1387 res = false; 1388 break; 1389 } 1390 cpu_relax(); 1391 } 1392 rcu_read_unlock(); 1393 return res; 1394 } 1395 #else 1396 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1397 struct rt_mutex_waiter *waiter, 1398 struct task_struct *owner) 1399 { 1400 return false; 1401 } 1402 #endif 1403 1404 #ifdef RT_MUTEX_BUILD_MUTEX 1405 /* 1406 * Functions required for: 1407 * - rtmutex, futex on all kernels 1408 * - mutex and rwsem substitutions on RT kernels 1409 */ 1410 1411 /* 1412 * Remove a waiter from a lock and give up 1413 * 1414 * Must be called with lock->wait_lock held and interrupts disabled. It must 1415 * have just failed to try_to_take_rt_mutex(). 1416 */ 1417 static void __sched remove_waiter(struct rt_mutex_base *lock, 1418 struct rt_mutex_waiter *waiter) 1419 { 1420 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1421 struct task_struct *owner = rt_mutex_owner(lock); 1422 struct rt_mutex_base *next_lock; 1423 1424 lockdep_assert_held(&lock->wait_lock); 1425 1426 raw_spin_lock(¤t->pi_lock); 1427 rt_mutex_dequeue(lock, waiter); 1428 current->pi_blocked_on = NULL; 1429 raw_spin_unlock(¤t->pi_lock); 1430 1431 /* 1432 * Only update priority if the waiter was the highest priority 1433 * waiter of the lock and there is an owner to update. 1434 */ 1435 if (!owner || !is_top_waiter) 1436 return; 1437 1438 raw_spin_lock(&owner->pi_lock); 1439 1440 rt_mutex_dequeue_pi(owner, waiter); 1441 1442 if (rt_mutex_has_waiters(lock)) 1443 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1444 1445 rt_mutex_adjust_prio(owner); 1446 1447 /* Store the lock on which owner is blocked or NULL */ 1448 next_lock = task_blocked_on_lock(owner); 1449 1450 raw_spin_unlock(&owner->pi_lock); 1451 1452 /* 1453 * Don't walk the chain, if the owner task is not blocked 1454 * itself. 1455 */ 1456 if (!next_lock) 1457 return; 1458 1459 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1460 get_task_struct(owner); 1461 1462 raw_spin_unlock_irq(&lock->wait_lock); 1463 1464 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1465 next_lock, NULL, current); 1466 1467 raw_spin_lock_irq(&lock->wait_lock); 1468 } 1469 1470 /** 1471 * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop 1472 * @lock: the rt_mutex to take 1473 * @ww_ctx: WW mutex context pointer 1474 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1475 * or TASK_UNINTERRUPTIBLE) 1476 * @timeout: the pre-initialized and started timer, or NULL for none 1477 * @waiter: the pre-initialized rt_mutex_waiter 1478 * 1479 * Must be called with lock->wait_lock held and interrupts disabled 1480 */ 1481 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, 1482 struct ww_acquire_ctx *ww_ctx, 1483 unsigned int state, 1484 struct hrtimer_sleeper *timeout, 1485 struct rt_mutex_waiter *waiter) 1486 { 1487 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1488 struct task_struct *owner; 1489 int ret = 0; 1490 1491 for (;;) { 1492 /* Try to acquire the lock: */ 1493 if (try_to_take_rt_mutex(lock, current, waiter)) 1494 break; 1495 1496 if (timeout && !timeout->task) { 1497 ret = -ETIMEDOUT; 1498 break; 1499 } 1500 if (signal_pending_state(state, current)) { 1501 ret = -EINTR; 1502 break; 1503 } 1504 1505 if (build_ww_mutex() && ww_ctx) { 1506 ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx); 1507 if (ret) 1508 break; 1509 } 1510 1511 if (waiter == rt_mutex_top_waiter(lock)) 1512 owner = rt_mutex_owner(lock); 1513 else 1514 owner = NULL; 1515 raw_spin_unlock_irq(&lock->wait_lock); 1516 1517 if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) 1518 schedule(); 1519 1520 raw_spin_lock_irq(&lock->wait_lock); 1521 set_current_state(state); 1522 } 1523 1524 __set_current_state(TASK_RUNNING); 1525 return ret; 1526 } 1527 1528 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 1529 struct rt_mutex_waiter *w) 1530 { 1531 /* 1532 * If the result is not -EDEADLOCK or the caller requested 1533 * deadlock detection, nothing to do here. 1534 */ 1535 if (res != -EDEADLOCK || detect_deadlock) 1536 return; 1537 1538 if (build_ww_mutex() && w->ww_ctx) 1539 return; 1540 1541 /* 1542 * Yell loudly and stop the task right here. 1543 */ 1544 WARN(1, "rtmutex deadlock detected\n"); 1545 while (1) { 1546 set_current_state(TASK_INTERRUPTIBLE); 1547 schedule(); 1548 } 1549 } 1550 1551 /** 1552 * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held 1553 * @lock: The rtmutex to block lock 1554 * @ww_ctx: WW mutex context pointer 1555 * @state: The task state for sleeping 1556 * @chwalk: Indicator whether full or partial chainwalk is requested 1557 * @waiter: Initializer waiter for blocking 1558 */ 1559 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, 1560 struct ww_acquire_ctx *ww_ctx, 1561 unsigned int state, 1562 enum rtmutex_chainwalk chwalk, 1563 struct rt_mutex_waiter *waiter) 1564 { 1565 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1566 struct ww_mutex *ww = ww_container_of(rtm); 1567 int ret; 1568 1569 lockdep_assert_held(&lock->wait_lock); 1570 1571 /* Try to acquire the lock again: */ 1572 if (try_to_take_rt_mutex(lock, current, NULL)) { 1573 if (build_ww_mutex() && ww_ctx) { 1574 __ww_mutex_check_waiters(rtm, ww_ctx); 1575 ww_mutex_lock_acquired(ww, ww_ctx); 1576 } 1577 return 0; 1578 } 1579 1580 set_current_state(state); 1581 1582 ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); 1583 if (likely(!ret)) 1584 ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); 1585 1586 if (likely(!ret)) { 1587 /* acquired the lock */ 1588 if (build_ww_mutex() && ww_ctx) { 1589 if (!ww_ctx->is_wait_die) 1590 __ww_mutex_check_waiters(rtm, ww_ctx); 1591 ww_mutex_lock_acquired(ww, ww_ctx); 1592 } 1593 } else { 1594 __set_current_state(TASK_RUNNING); 1595 remove_waiter(lock, waiter); 1596 rt_mutex_handle_deadlock(ret, chwalk, waiter); 1597 } 1598 1599 /* 1600 * try_to_take_rt_mutex() sets the waiter bit 1601 * unconditionally. We might have to fix that up. 1602 */ 1603 fixup_rt_mutex_waiters(lock); 1604 return ret; 1605 } 1606 1607 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, 1608 struct ww_acquire_ctx *ww_ctx, 1609 unsigned int state) 1610 { 1611 struct rt_mutex_waiter waiter; 1612 int ret; 1613 1614 rt_mutex_init_waiter(&waiter); 1615 waiter.ww_ctx = ww_ctx; 1616 1617 ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, 1618 &waiter); 1619 1620 debug_rt_mutex_free_waiter(&waiter); 1621 return ret; 1622 } 1623 1624 /* 1625 * rt_mutex_slowlock - Locking slowpath invoked when fast path fails 1626 * @lock: The rtmutex to block lock 1627 * @ww_ctx: WW mutex context pointer 1628 * @state: The task state for sleeping 1629 */ 1630 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, 1631 struct ww_acquire_ctx *ww_ctx, 1632 unsigned int state) 1633 { 1634 unsigned long flags; 1635 int ret; 1636 1637 /* 1638 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1639 * be called in early boot if the cmpxchg() fast path is disabled 1640 * (debug, no architecture support). In this case we will acquire the 1641 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1642 * enable interrupts in that early boot case. So we need to use the 1643 * irqsave/restore variants. 1644 */ 1645 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1646 ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); 1647 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1648 1649 return ret; 1650 } 1651 1652 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, 1653 unsigned int state) 1654 { 1655 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1656 return 0; 1657 1658 return rt_mutex_slowlock(lock, NULL, state); 1659 } 1660 #endif /* RT_MUTEX_BUILD_MUTEX */ 1661 1662 #ifdef RT_MUTEX_BUILD_SPINLOCKS 1663 /* 1664 * Functions required for spin/rw_lock substitution on RT kernels 1665 */ 1666 1667 /** 1668 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks 1669 * @lock: The underlying RT mutex 1670 */ 1671 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) 1672 { 1673 struct rt_mutex_waiter waiter; 1674 struct task_struct *owner; 1675 1676 lockdep_assert_held(&lock->wait_lock); 1677 1678 if (try_to_take_rt_mutex(lock, current, NULL)) 1679 return; 1680 1681 rt_mutex_init_rtlock_waiter(&waiter); 1682 1683 /* Save current state and set state to TASK_RTLOCK_WAIT */ 1684 current_save_and_set_rtlock_wait_state(); 1685 1686 task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); 1687 1688 for (;;) { 1689 /* Try to acquire the lock again */ 1690 if (try_to_take_rt_mutex(lock, current, &waiter)) 1691 break; 1692 1693 if (&waiter == rt_mutex_top_waiter(lock)) 1694 owner = rt_mutex_owner(lock); 1695 else 1696 owner = NULL; 1697 raw_spin_unlock_irq(&lock->wait_lock); 1698 1699 if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) 1700 schedule_rtlock(); 1701 1702 raw_spin_lock_irq(&lock->wait_lock); 1703 set_current_state(TASK_RTLOCK_WAIT); 1704 } 1705 1706 /* Restore the task state */ 1707 current_restore_rtlock_saved_state(); 1708 1709 /* 1710 * try_to_take_rt_mutex() sets the waiter bit unconditionally. 1711 * We might have to fix that up: 1712 */ 1713 fixup_rt_mutex_waiters(lock); 1714 debug_rt_mutex_free_waiter(&waiter); 1715 } 1716 1717 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) 1718 { 1719 unsigned long flags; 1720 1721 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1722 rtlock_slowlock_locked(lock); 1723 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1724 } 1725 1726 #endif /* RT_MUTEX_BUILD_SPINLOCKS */ 1727