1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 4 * 5 * started by Ingo Molnar and Thomas Gleixner. 6 * 7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 10 * Copyright (C) 2006 Esben Nielsen 11 * Adaptive Spinlocks: 12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, 13 * and Peter Morreale, 14 * Adaptive Spinlocks simplification: 15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> 16 * 17 * See Documentation/locking/rt-mutex-design.rst for details. 18 */ 19 #include <linux/sched.h> 20 #include <linux/sched/debug.h> 21 #include <linux/sched/deadline.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/rt.h> 24 #include <linux/sched/wake_q.h> 25 #include <linux/ww_mutex.h> 26 27 #include "rtmutex_common.h" 28 29 #ifndef WW_RT 30 # define build_ww_mutex() (false) 31 # define ww_container_of(rtm) NULL 32 33 static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, 34 struct rt_mutex *lock, 35 struct ww_acquire_ctx *ww_ctx) 36 { 37 return 0; 38 } 39 40 static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, 41 struct ww_acquire_ctx *ww_ctx) 42 { 43 } 44 45 static inline void ww_mutex_lock_acquired(struct ww_mutex *lock, 46 struct ww_acquire_ctx *ww_ctx) 47 { 48 } 49 50 static inline int __ww_mutex_check_kill(struct rt_mutex *lock, 51 struct rt_mutex_waiter *waiter, 52 struct ww_acquire_ctx *ww_ctx) 53 { 54 return 0; 55 } 56 57 #else 58 # define build_ww_mutex() (true) 59 # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base) 60 # include "ww_mutex.h" 61 #endif 62 63 /* 64 * lock->owner state tracking: 65 * 66 * lock->owner holds the task_struct pointer of the owner. Bit 0 67 * is used to keep track of the "lock has waiters" state. 68 * 69 * owner bit0 70 * NULL 0 lock is free (fast acquire possible) 71 * NULL 1 lock is free and has waiters and the top waiter 72 * is going to take the lock* 73 * taskpointer 0 lock is held (fast release possible) 74 * taskpointer 1 lock is held and has waiters** 75 * 76 * The fast atomic compare exchange based acquire and release is only 77 * possible when bit 0 of lock->owner is 0. 78 * 79 * (*) It also can be a transitional state when grabbing the lock 80 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 81 * we need to set the bit0 before looking at the lock, and the owner may be 82 * NULL in this small time, hence this can be a transitional state. 83 * 84 * (**) There is a small time when bit 0 is set but there are no 85 * waiters. This can happen when grabbing the lock in the slow path. 86 * To prevent a cmpxchg of the owner releasing the lock, we need to 87 * set this bit before looking at the lock. 88 */ 89 90 static __always_inline void 91 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) 92 { 93 unsigned long val = (unsigned long)owner; 94 95 if (rt_mutex_has_waiters(lock)) 96 val |= RT_MUTEX_HAS_WAITERS; 97 98 WRITE_ONCE(lock->owner, (struct task_struct *)val); 99 } 100 101 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) 102 { 103 lock->owner = (struct task_struct *) 104 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 105 } 106 107 static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) 108 { 109 unsigned long owner, *p = (unsigned long *) &lock->owner; 110 111 if (rt_mutex_has_waiters(lock)) 112 return; 113 114 /* 115 * The rbtree has no waiters enqueued, now make sure that the 116 * lock->owner still has the waiters bit set, otherwise the 117 * following can happen: 118 * 119 * CPU 0 CPU 1 CPU2 120 * l->owner=T1 121 * rt_mutex_lock(l) 122 * lock(l->lock) 123 * l->owner = T1 | HAS_WAITERS; 124 * enqueue(T2) 125 * boost() 126 * unlock(l->lock) 127 * block() 128 * 129 * rt_mutex_lock(l) 130 * lock(l->lock) 131 * l->owner = T1 | HAS_WAITERS; 132 * enqueue(T3) 133 * boost() 134 * unlock(l->lock) 135 * block() 136 * signal(->T2) signal(->T3) 137 * lock(l->lock) 138 * dequeue(T2) 139 * deboost() 140 * unlock(l->lock) 141 * lock(l->lock) 142 * dequeue(T3) 143 * ==> wait list is empty 144 * deboost() 145 * unlock(l->lock) 146 * lock(l->lock) 147 * fixup_rt_mutex_waiters() 148 * if (wait_list_empty(l) { 149 * l->owner = owner 150 * owner = l->owner & ~HAS_WAITERS; 151 * ==> l->owner = T1 152 * } 153 * lock(l->lock) 154 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 155 * if (wait_list_empty(l) { 156 * owner = l->owner & ~HAS_WAITERS; 157 * cmpxchg(l->owner, T1, NULL) 158 * ===> Success (l->owner = NULL) 159 * 160 * l->owner = owner 161 * ==> l->owner = T1 162 * } 163 * 164 * With the check for the waiter bit in place T3 on CPU2 will not 165 * overwrite. All tasks fiddling with the waiters bit are 166 * serialized by l->lock, so nothing else can modify the waiters 167 * bit. If the bit is set then nothing can change l->owner either 168 * so the simple RMW is safe. The cmpxchg() will simply fail if it 169 * happens in the middle of the RMW because the waiters bit is 170 * still set. 171 */ 172 owner = READ_ONCE(*p); 173 if (owner & RT_MUTEX_HAS_WAITERS) 174 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 175 } 176 177 /* 178 * We can speed up the acquire/release, if there's no debugging state to be 179 * set up. 180 */ 181 #ifndef CONFIG_DEBUG_RT_MUTEXES 182 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 183 struct task_struct *old, 184 struct task_struct *new) 185 { 186 return try_cmpxchg_acquire(&lock->owner, &old, new); 187 } 188 189 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 190 struct task_struct *old, 191 struct task_struct *new) 192 { 193 return try_cmpxchg_release(&lock->owner, &old, new); 194 } 195 196 /* 197 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 198 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 199 * relaxed semantics suffice. 200 */ 201 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 202 { 203 unsigned long owner, *p = (unsigned long *) &lock->owner; 204 205 do { 206 owner = *p; 207 } while (cmpxchg_relaxed(p, owner, 208 owner | RT_MUTEX_HAS_WAITERS) != owner); 209 } 210 211 /* 212 * Safe fastpath aware unlock: 213 * 1) Clear the waiters bit 214 * 2) Drop lock->wait_lock 215 * 3) Try to unlock the lock with cmpxchg 216 */ 217 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 218 unsigned long flags) 219 __releases(lock->wait_lock) 220 { 221 struct task_struct *owner = rt_mutex_owner(lock); 222 223 clear_rt_mutex_waiters(lock); 224 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 225 /* 226 * If a new waiter comes in between the unlock and the cmpxchg 227 * we have two situations: 228 * 229 * unlock(wait_lock); 230 * lock(wait_lock); 231 * cmpxchg(p, owner, 0) == owner 232 * mark_rt_mutex_waiters(lock); 233 * acquire(lock); 234 * or: 235 * 236 * unlock(wait_lock); 237 * lock(wait_lock); 238 * mark_rt_mutex_waiters(lock); 239 * 240 * cmpxchg(p, owner, 0) != owner 241 * enqueue_waiter(); 242 * unlock(wait_lock); 243 * lock(wait_lock); 244 * wake waiter(); 245 * unlock(wait_lock); 246 * lock(wait_lock); 247 * acquire(lock); 248 */ 249 return rt_mutex_cmpxchg_release(lock, owner, NULL); 250 } 251 252 #else 253 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 254 struct task_struct *old, 255 struct task_struct *new) 256 { 257 return false; 258 259 } 260 261 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 262 struct task_struct *old, 263 struct task_struct *new) 264 { 265 return false; 266 } 267 268 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 269 { 270 lock->owner = (struct task_struct *) 271 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 272 } 273 274 /* 275 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 276 */ 277 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 278 unsigned long flags) 279 __releases(lock->wait_lock) 280 { 281 lock->owner = NULL; 282 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 283 return true; 284 } 285 #endif 286 287 static __always_inline int __waiter_prio(struct task_struct *task) 288 { 289 int prio = task->prio; 290 291 if (!rt_prio(prio)) 292 return DEFAULT_PRIO; 293 294 return prio; 295 } 296 297 static __always_inline void 298 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 299 { 300 waiter->prio = __waiter_prio(task); 301 waiter->deadline = task->dl.deadline; 302 } 303 304 /* 305 * Only use with rt_mutex_waiter_{less,equal}() 306 */ 307 #define task_to_waiter(p) \ 308 &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } 309 310 static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, 311 struct rt_mutex_waiter *right) 312 { 313 if (left->prio < right->prio) 314 return 1; 315 316 /* 317 * If both waiters have dl_prio(), we check the deadlines of the 318 * associated tasks. 319 * If left waiter has a dl_prio(), and we didn't return 1 above, 320 * then right waiter has a dl_prio() too. 321 */ 322 if (dl_prio(left->prio)) 323 return dl_time_before(left->deadline, right->deadline); 324 325 return 0; 326 } 327 328 static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, 329 struct rt_mutex_waiter *right) 330 { 331 if (left->prio != right->prio) 332 return 0; 333 334 /* 335 * If both waiters have dl_prio(), we check the deadlines of the 336 * associated tasks. 337 * If left waiter has a dl_prio(), and we didn't return 0 above, 338 * then right waiter has a dl_prio() too. 339 */ 340 if (dl_prio(left->prio)) 341 return left->deadline == right->deadline; 342 343 return 1; 344 } 345 346 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, 347 struct rt_mutex_waiter *top_waiter) 348 { 349 if (rt_mutex_waiter_less(waiter, top_waiter)) 350 return true; 351 352 #ifdef RT_MUTEX_BUILD_SPINLOCKS 353 /* 354 * Note that RT tasks are excluded from same priority (lateral) 355 * steals to prevent the introduction of an unbounded latency. 356 */ 357 if (rt_prio(waiter->prio) || dl_prio(waiter->prio)) 358 return false; 359 360 return rt_mutex_waiter_equal(waiter, top_waiter); 361 #else 362 return false; 363 #endif 364 } 365 366 #define __node_2_waiter(node) \ 367 rb_entry((node), struct rt_mutex_waiter, tree_entry) 368 369 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 370 { 371 struct rt_mutex_waiter *aw = __node_2_waiter(a); 372 struct rt_mutex_waiter *bw = __node_2_waiter(b); 373 374 if (rt_mutex_waiter_less(aw, bw)) 375 return 1; 376 377 if (!build_ww_mutex()) 378 return 0; 379 380 if (rt_mutex_waiter_less(bw, aw)) 381 return 0; 382 383 /* NOTE: relies on waiter->ww_ctx being set before insertion */ 384 if (aw->ww_ctx) { 385 if (!bw->ww_ctx) 386 return 1; 387 388 return (signed long)(aw->ww_ctx->stamp - 389 bw->ww_ctx->stamp) < 0; 390 } 391 392 return 0; 393 } 394 395 static __always_inline void 396 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 397 { 398 rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); 399 } 400 401 static __always_inline void 402 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 403 { 404 if (RB_EMPTY_NODE(&waiter->tree_entry)) 405 return; 406 407 rb_erase_cached(&waiter->tree_entry, &lock->waiters); 408 RB_CLEAR_NODE(&waiter->tree_entry); 409 } 410 411 #define __node_2_pi_waiter(node) \ 412 rb_entry((node), struct rt_mutex_waiter, pi_tree_entry) 413 414 static __always_inline bool 415 __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 416 { 417 return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b)); 418 } 419 420 static __always_inline void 421 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 422 { 423 rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less); 424 } 425 426 static __always_inline void 427 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 428 { 429 if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 430 return; 431 432 rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); 433 RB_CLEAR_NODE(&waiter->pi_tree_entry); 434 } 435 436 static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) 437 { 438 struct task_struct *pi_task = NULL; 439 440 lockdep_assert_held(&p->pi_lock); 441 442 if (task_has_pi_waiters(p)) 443 pi_task = task_top_pi_waiter(p)->task; 444 445 rt_mutex_setprio(p, pi_task); 446 } 447 448 /* RT mutex specific wake_q wrappers */ 449 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, 450 struct rt_mutex_waiter *w) 451 { 452 if (IS_ENABLED(CONFIG_PREEMPT_RT) && w->wake_state != TASK_NORMAL) { 453 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 454 WARN_ON_ONCE(wqh->rtlock_task); 455 get_task_struct(w->task); 456 wqh->rtlock_task = w->task; 457 } else { 458 wake_q_add(&wqh->head, w->task); 459 } 460 } 461 462 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) 463 { 464 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { 465 wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); 466 put_task_struct(wqh->rtlock_task); 467 wqh->rtlock_task = NULL; 468 } 469 470 if (!wake_q_empty(&wqh->head)) 471 wake_up_q(&wqh->head); 472 473 /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ 474 preempt_enable(); 475 } 476 477 /* 478 * Deadlock detection is conditional: 479 * 480 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 481 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 482 * 483 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 484 * conducted independent of the detect argument. 485 * 486 * If the waiter argument is NULL this indicates the deboost path and 487 * deadlock detection is disabled independent of the detect argument 488 * and the config settings. 489 */ 490 static __always_inline bool 491 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 492 enum rtmutex_chainwalk chwalk) 493 { 494 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 495 return waiter != NULL; 496 return chwalk == RT_MUTEX_FULL_CHAINWALK; 497 } 498 499 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p) 500 { 501 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 502 } 503 504 /* 505 * Adjust the priority chain. Also used for deadlock detection. 506 * Decreases task's usage by one - may thus free the task. 507 * 508 * @task: the task owning the mutex (owner) for which a chain walk is 509 * probably needed 510 * @chwalk: do we have to carry out deadlock detection? 511 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 512 * things for a task that has just got its priority adjusted, and 513 * is waiting on a mutex) 514 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 515 * we dropped its pi_lock. Is never dereferenced, only used for 516 * comparison to detect lock chain changes. 517 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 518 * its priority to the mutex owner (can be NULL in the case 519 * depicted above or if the top waiter is gone away and we are 520 * actually deboosting the owner) 521 * @top_task: the current top waiter 522 * 523 * Returns 0 or -EDEADLK. 524 * 525 * Chain walk basics and protection scope 526 * 527 * [R] refcount on task 528 * [P] task->pi_lock held 529 * [L] rtmutex->wait_lock held 530 * 531 * Step Description Protected by 532 * function arguments: 533 * @task [R] 534 * @orig_lock if != NULL @top_task is blocked on it 535 * @next_lock Unprotected. Cannot be 536 * dereferenced. Only used for 537 * comparison. 538 * @orig_waiter if != NULL @top_task is blocked on it 539 * @top_task current, or in case of proxy 540 * locking protected by calling 541 * code 542 * again: 543 * loop_sanity_check(); 544 * retry: 545 * [1] lock(task->pi_lock); [R] acquire [P] 546 * [2] waiter = task->pi_blocked_on; [P] 547 * [3] check_exit_conditions_1(); [P] 548 * [4] lock = waiter->lock; [P] 549 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 550 * unlock(task->pi_lock); release [P] 551 * goto retry; 552 * } 553 * [6] check_exit_conditions_2(); [P] + [L] 554 * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 555 * [8] unlock(task->pi_lock); release [P] 556 * put_task_struct(task); release [R] 557 * [9] check_exit_conditions_3(); [L] 558 * [10] task = owner(lock); [L] 559 * get_task_struct(task); [L] acquire [R] 560 * lock(task->pi_lock); [L] acquire [P] 561 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 562 * [12] check_exit_conditions_4(); [P] + [L] 563 * [13] unlock(task->pi_lock); release [P] 564 * unlock(lock->wait_lock); release [L] 565 * goto again; 566 */ 567 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 568 enum rtmutex_chainwalk chwalk, 569 struct rt_mutex_base *orig_lock, 570 struct rt_mutex_base *next_lock, 571 struct rt_mutex_waiter *orig_waiter, 572 struct task_struct *top_task) 573 { 574 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 575 struct rt_mutex_waiter *prerequeue_top_waiter; 576 int ret = 0, depth = 0; 577 struct rt_mutex_base *lock; 578 bool detect_deadlock; 579 bool requeue = true; 580 581 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 582 583 /* 584 * The (de)boosting is a step by step approach with a lot of 585 * pitfalls. We want this to be preemptible and we want hold a 586 * maximum of two locks per step. So we have to check 587 * carefully whether things change under us. 588 */ 589 again: 590 /* 591 * We limit the lock chain length for each invocation. 592 */ 593 if (++depth > max_lock_depth) { 594 static int prev_max; 595 596 /* 597 * Print this only once. If the admin changes the limit, 598 * print a new message when reaching the limit again. 599 */ 600 if (prev_max != max_lock_depth) { 601 prev_max = max_lock_depth; 602 printk(KERN_WARNING "Maximum lock depth %d reached " 603 "task: %s (%d)\n", max_lock_depth, 604 top_task->comm, task_pid_nr(top_task)); 605 } 606 put_task_struct(task); 607 608 return -EDEADLK; 609 } 610 611 /* 612 * We are fully preemptible here and only hold the refcount on 613 * @task. So everything can have changed under us since the 614 * caller or our own code below (goto retry/again) dropped all 615 * locks. 616 */ 617 retry: 618 /* 619 * [1] Task cannot go away as we did a get_task() before ! 620 */ 621 raw_spin_lock_irq(&task->pi_lock); 622 623 /* 624 * [2] Get the waiter on which @task is blocked on. 625 */ 626 waiter = task->pi_blocked_on; 627 628 /* 629 * [3] check_exit_conditions_1() protected by task->pi_lock. 630 */ 631 632 /* 633 * Check whether the end of the boosting chain has been 634 * reached or the state of the chain has changed while we 635 * dropped the locks. 636 */ 637 if (!waiter) 638 goto out_unlock_pi; 639 640 /* 641 * Check the orig_waiter state. After we dropped the locks, 642 * the previous owner of the lock might have released the lock. 643 */ 644 if (orig_waiter && !rt_mutex_owner(orig_lock)) 645 goto out_unlock_pi; 646 647 /* 648 * We dropped all locks after taking a refcount on @task, so 649 * the task might have moved on in the lock chain or even left 650 * the chain completely and blocks now on an unrelated lock or 651 * on @orig_lock. 652 * 653 * We stored the lock on which @task was blocked in @next_lock, 654 * so we can detect the chain change. 655 */ 656 if (next_lock != waiter->lock) 657 goto out_unlock_pi; 658 659 /* 660 * There could be 'spurious' loops in the lock graph due to ww_mutex, 661 * consider: 662 * 663 * P1: A, ww_A, ww_B 664 * P2: ww_B, ww_A 665 * P3: A 666 * 667 * P3 should not return -EDEADLK because it gets trapped in the cycle 668 * created by P1 and P2 (which will resolve -- and runs into 669 * max_lock_depth above). Therefore disable detect_deadlock such that 670 * the below termination condition can trigger once all relevant tasks 671 * are boosted. 672 * 673 * Even when we start with ww_mutex we can disable deadlock detection, 674 * since we would supress a ww_mutex induced deadlock at [6] anyway. 675 * Supressing it here however is not sufficient since we might still 676 * hit [6] due to adjustment driven iteration. 677 * 678 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd 679 * utterly fail to report it; lockdep should. 680 */ 681 if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock) 682 detect_deadlock = false; 683 684 /* 685 * Drop out, when the task has no waiters. Note, 686 * top_waiter can be NULL, when we are in the deboosting 687 * mode! 688 */ 689 if (top_waiter) { 690 if (!task_has_pi_waiters(task)) 691 goto out_unlock_pi; 692 /* 693 * If deadlock detection is off, we stop here if we 694 * are not the top pi waiter of the task. If deadlock 695 * detection is enabled we continue, but stop the 696 * requeueing in the chain walk. 697 */ 698 if (top_waiter != task_top_pi_waiter(task)) { 699 if (!detect_deadlock) 700 goto out_unlock_pi; 701 else 702 requeue = false; 703 } 704 } 705 706 /* 707 * If the waiter priority is the same as the task priority 708 * then there is no further priority adjustment necessary. If 709 * deadlock detection is off, we stop the chain walk. If its 710 * enabled we continue, but stop the requeueing in the chain 711 * walk. 712 */ 713 if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { 714 if (!detect_deadlock) 715 goto out_unlock_pi; 716 else 717 requeue = false; 718 } 719 720 /* 721 * [4] Get the next lock 722 */ 723 lock = waiter->lock; 724 /* 725 * [5] We need to trylock here as we are holding task->pi_lock, 726 * which is the reverse lock order versus the other rtmutex 727 * operations. 728 */ 729 if (!raw_spin_trylock(&lock->wait_lock)) { 730 raw_spin_unlock_irq(&task->pi_lock); 731 cpu_relax(); 732 goto retry; 733 } 734 735 /* 736 * [6] check_exit_conditions_2() protected by task->pi_lock and 737 * lock->wait_lock. 738 * 739 * Deadlock detection. If the lock is the same as the original 740 * lock which caused us to walk the lock chain or if the 741 * current lock is owned by the task which initiated the chain 742 * walk, we detected a deadlock. 743 */ 744 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 745 ret = -EDEADLK; 746 747 /* 748 * When the deadlock is due to ww_mutex; also see above. Don't 749 * report the deadlock and instead let the ww_mutex wound/die 750 * logic pick which of the contending threads gets -EDEADLK. 751 * 752 * NOTE: assumes the cycle only contains a single ww_class; any 753 * other configuration and we fail to report; also, see 754 * lockdep. 755 */ 756 if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx) 757 ret = 0; 758 759 raw_spin_unlock(&lock->wait_lock); 760 goto out_unlock_pi; 761 } 762 763 /* 764 * If we just follow the lock chain for deadlock detection, no 765 * need to do all the requeue operations. To avoid a truckload 766 * of conditionals around the various places below, just do the 767 * minimum chain walk checks. 768 */ 769 if (!requeue) { 770 /* 771 * No requeue[7] here. Just release @task [8] 772 */ 773 raw_spin_unlock(&task->pi_lock); 774 put_task_struct(task); 775 776 /* 777 * [9] check_exit_conditions_3 protected by lock->wait_lock. 778 * If there is no owner of the lock, end of chain. 779 */ 780 if (!rt_mutex_owner(lock)) { 781 raw_spin_unlock_irq(&lock->wait_lock); 782 return 0; 783 } 784 785 /* [10] Grab the next task, i.e. owner of @lock */ 786 task = get_task_struct(rt_mutex_owner(lock)); 787 raw_spin_lock(&task->pi_lock); 788 789 /* 790 * No requeue [11] here. We just do deadlock detection. 791 * 792 * [12] Store whether owner is blocked 793 * itself. Decision is made after dropping the locks 794 */ 795 next_lock = task_blocked_on_lock(task); 796 /* 797 * Get the top waiter for the next iteration 798 */ 799 top_waiter = rt_mutex_top_waiter(lock); 800 801 /* [13] Drop locks */ 802 raw_spin_unlock(&task->pi_lock); 803 raw_spin_unlock_irq(&lock->wait_lock); 804 805 /* If owner is not blocked, end of chain. */ 806 if (!next_lock) 807 goto out_put_task; 808 goto again; 809 } 810 811 /* 812 * Store the current top waiter before doing the requeue 813 * operation on @lock. We need it for the boost/deboost 814 * decision below. 815 */ 816 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 817 818 /* [7] Requeue the waiter in the lock waiter tree. */ 819 rt_mutex_dequeue(lock, waiter); 820 821 /* 822 * Update the waiter prio fields now that we're dequeued. 823 * 824 * These values can have changed through either: 825 * 826 * sys_sched_set_scheduler() / sys_sched_setattr() 827 * 828 * or 829 * 830 * DL CBS enforcement advancing the effective deadline. 831 * 832 * Even though pi_waiters also uses these fields, and that tree is only 833 * updated in [11], we can do this here, since we hold [L], which 834 * serializes all pi_waiters access and rb_erase() does not care about 835 * the values of the node being removed. 836 */ 837 waiter_update_prio(waiter, task); 838 839 rt_mutex_enqueue(lock, waiter); 840 841 /* [8] Release the task */ 842 raw_spin_unlock(&task->pi_lock); 843 put_task_struct(task); 844 845 /* 846 * [9] check_exit_conditions_3 protected by lock->wait_lock. 847 * 848 * We must abort the chain walk if there is no lock owner even 849 * in the dead lock detection case, as we have nothing to 850 * follow here. This is the end of the chain we are walking. 851 */ 852 if (!rt_mutex_owner(lock)) { 853 /* 854 * If the requeue [7] above changed the top waiter, 855 * then we need to wake the new top waiter up to try 856 * to get the lock. 857 */ 858 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 859 wake_up_state(waiter->task, waiter->wake_state); 860 raw_spin_unlock_irq(&lock->wait_lock); 861 return 0; 862 } 863 864 /* [10] Grab the next task, i.e. the owner of @lock */ 865 task = get_task_struct(rt_mutex_owner(lock)); 866 raw_spin_lock(&task->pi_lock); 867 868 /* [11] requeue the pi waiters if necessary */ 869 if (waiter == rt_mutex_top_waiter(lock)) { 870 /* 871 * The waiter became the new top (highest priority) 872 * waiter on the lock. Replace the previous top waiter 873 * in the owner tasks pi waiters tree with this waiter 874 * and adjust the priority of the owner. 875 */ 876 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 877 rt_mutex_enqueue_pi(task, waiter); 878 rt_mutex_adjust_prio(task); 879 880 } else if (prerequeue_top_waiter == waiter) { 881 /* 882 * The waiter was the top waiter on the lock, but is 883 * no longer the top priority waiter. Replace waiter in 884 * the owner tasks pi waiters tree with the new top 885 * (highest priority) waiter and adjust the priority 886 * of the owner. 887 * The new top waiter is stored in @waiter so that 888 * @waiter == @top_waiter evaluates to true below and 889 * we continue to deboost the rest of the chain. 890 */ 891 rt_mutex_dequeue_pi(task, waiter); 892 waiter = rt_mutex_top_waiter(lock); 893 rt_mutex_enqueue_pi(task, waiter); 894 rt_mutex_adjust_prio(task); 895 } else { 896 /* 897 * Nothing changed. No need to do any priority 898 * adjustment. 899 */ 900 } 901 902 /* 903 * [12] check_exit_conditions_4() protected by task->pi_lock 904 * and lock->wait_lock. The actual decisions are made after we 905 * dropped the locks. 906 * 907 * Check whether the task which owns the current lock is pi 908 * blocked itself. If yes we store a pointer to the lock for 909 * the lock chain change detection above. After we dropped 910 * task->pi_lock next_lock cannot be dereferenced anymore. 911 */ 912 next_lock = task_blocked_on_lock(task); 913 /* 914 * Store the top waiter of @lock for the end of chain walk 915 * decision below. 916 */ 917 top_waiter = rt_mutex_top_waiter(lock); 918 919 /* [13] Drop the locks */ 920 raw_spin_unlock(&task->pi_lock); 921 raw_spin_unlock_irq(&lock->wait_lock); 922 923 /* 924 * Make the actual exit decisions [12], based on the stored 925 * values. 926 * 927 * We reached the end of the lock chain. Stop right here. No 928 * point to go back just to figure that out. 929 */ 930 if (!next_lock) 931 goto out_put_task; 932 933 /* 934 * If the current waiter is not the top waiter on the lock, 935 * then we can stop the chain walk here if we are not in full 936 * deadlock detection mode. 937 */ 938 if (!detect_deadlock && waiter != top_waiter) 939 goto out_put_task; 940 941 goto again; 942 943 out_unlock_pi: 944 raw_spin_unlock_irq(&task->pi_lock); 945 out_put_task: 946 put_task_struct(task); 947 948 return ret; 949 } 950 951 /* 952 * Try to take an rt-mutex 953 * 954 * Must be called with lock->wait_lock held and interrupts disabled 955 * 956 * @lock: The lock to be acquired. 957 * @task: The task which wants to acquire the lock 958 * @waiter: The waiter that is queued to the lock's wait tree if the 959 * callsite called task_blocked_on_lock(), otherwise NULL 960 */ 961 static int __sched 962 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, 963 struct rt_mutex_waiter *waiter) 964 { 965 lockdep_assert_held(&lock->wait_lock); 966 967 /* 968 * Before testing whether we can acquire @lock, we set the 969 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 970 * other tasks which try to modify @lock into the slow path 971 * and they serialize on @lock->wait_lock. 972 * 973 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 974 * as explained at the top of this file if and only if: 975 * 976 * - There is a lock owner. The caller must fixup the 977 * transient state if it does a trylock or leaves the lock 978 * function due to a signal or timeout. 979 * 980 * - @task acquires the lock and there are no other 981 * waiters. This is undone in rt_mutex_set_owner(@task) at 982 * the end of this function. 983 */ 984 mark_rt_mutex_waiters(lock); 985 986 /* 987 * If @lock has an owner, give up. 988 */ 989 if (rt_mutex_owner(lock)) 990 return 0; 991 992 /* 993 * If @waiter != NULL, @task has already enqueued the waiter 994 * into @lock waiter tree. If @waiter == NULL then this is a 995 * trylock attempt. 996 */ 997 if (waiter) { 998 struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); 999 1000 /* 1001 * If waiter is the highest priority waiter of @lock, 1002 * or allowed to steal it, take it over. 1003 */ 1004 if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { 1005 /* 1006 * We can acquire the lock. Remove the waiter from the 1007 * lock waiters tree. 1008 */ 1009 rt_mutex_dequeue(lock, waiter); 1010 } else { 1011 return 0; 1012 } 1013 } else { 1014 /* 1015 * If the lock has waiters already we check whether @task is 1016 * eligible to take over the lock. 1017 * 1018 * If there are no other waiters, @task can acquire 1019 * the lock. @task->pi_blocked_on is NULL, so it does 1020 * not need to be dequeued. 1021 */ 1022 if (rt_mutex_has_waiters(lock)) { 1023 /* Check whether the trylock can steal it. */ 1024 if (!rt_mutex_steal(task_to_waiter(task), 1025 rt_mutex_top_waiter(lock))) 1026 return 0; 1027 1028 /* 1029 * The current top waiter stays enqueued. We 1030 * don't have to change anything in the lock 1031 * waiters order. 1032 */ 1033 } else { 1034 /* 1035 * No waiters. Take the lock without the 1036 * pi_lock dance.@task->pi_blocked_on is NULL 1037 * and we have no waiters to enqueue in @task 1038 * pi waiters tree. 1039 */ 1040 goto takeit; 1041 } 1042 } 1043 1044 /* 1045 * Clear @task->pi_blocked_on. Requires protection by 1046 * @task->pi_lock. Redundant operation for the @waiter == NULL 1047 * case, but conditionals are more expensive than a redundant 1048 * store. 1049 */ 1050 raw_spin_lock(&task->pi_lock); 1051 task->pi_blocked_on = NULL; 1052 /* 1053 * Finish the lock acquisition. @task is the new owner. If 1054 * other waiters exist we have to insert the highest priority 1055 * waiter into @task->pi_waiters tree. 1056 */ 1057 if (rt_mutex_has_waiters(lock)) 1058 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 1059 raw_spin_unlock(&task->pi_lock); 1060 1061 takeit: 1062 /* 1063 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 1064 * are still waiters or clears it. 1065 */ 1066 rt_mutex_set_owner(lock, task); 1067 1068 return 1; 1069 } 1070 1071 /* 1072 * Task blocks on lock. 1073 * 1074 * Prepare waiter and propagate pi chain 1075 * 1076 * This must be called with lock->wait_lock held and interrupts disabled 1077 */ 1078 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, 1079 struct rt_mutex_waiter *waiter, 1080 struct task_struct *task, 1081 struct ww_acquire_ctx *ww_ctx, 1082 enum rtmutex_chainwalk chwalk) 1083 { 1084 struct task_struct *owner = rt_mutex_owner(lock); 1085 struct rt_mutex_waiter *top_waiter = waiter; 1086 struct rt_mutex_base *next_lock; 1087 int chain_walk = 0, res; 1088 1089 lockdep_assert_held(&lock->wait_lock); 1090 1091 /* 1092 * Early deadlock detection. We really don't want the task to 1093 * enqueue on itself just to untangle the mess later. It's not 1094 * only an optimization. We drop the locks, so another waiter 1095 * can come in before the chain walk detects the deadlock. So 1096 * the other will detect the deadlock and return -EDEADLOCK, 1097 * which is wrong, as the other waiter is not in a deadlock 1098 * situation. 1099 */ 1100 if (owner == task) 1101 return -EDEADLK; 1102 1103 raw_spin_lock(&task->pi_lock); 1104 waiter->task = task; 1105 waiter->lock = lock; 1106 waiter_update_prio(waiter, task); 1107 1108 /* Get the top priority waiter on the lock */ 1109 if (rt_mutex_has_waiters(lock)) 1110 top_waiter = rt_mutex_top_waiter(lock); 1111 rt_mutex_enqueue(lock, waiter); 1112 1113 task->pi_blocked_on = waiter; 1114 1115 raw_spin_unlock(&task->pi_lock); 1116 1117 if (build_ww_mutex() && ww_ctx) { 1118 struct rt_mutex *rtm; 1119 1120 /* Check whether the waiter should back out immediately */ 1121 rtm = container_of(lock, struct rt_mutex, rtmutex); 1122 res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); 1123 if (res) { 1124 raw_spin_lock(&task->pi_lock); 1125 rt_mutex_dequeue(lock, waiter); 1126 task->pi_blocked_on = NULL; 1127 raw_spin_unlock(&task->pi_lock); 1128 return res; 1129 } 1130 } 1131 1132 if (!owner) 1133 return 0; 1134 1135 raw_spin_lock(&owner->pi_lock); 1136 if (waiter == rt_mutex_top_waiter(lock)) { 1137 rt_mutex_dequeue_pi(owner, top_waiter); 1138 rt_mutex_enqueue_pi(owner, waiter); 1139 1140 rt_mutex_adjust_prio(owner); 1141 if (owner->pi_blocked_on) 1142 chain_walk = 1; 1143 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1144 chain_walk = 1; 1145 } 1146 1147 /* Store the lock on which owner is blocked or NULL */ 1148 next_lock = task_blocked_on_lock(owner); 1149 1150 raw_spin_unlock(&owner->pi_lock); 1151 /* 1152 * Even if full deadlock detection is on, if the owner is not 1153 * blocked itself, we can avoid finding this out in the chain 1154 * walk. 1155 */ 1156 if (!chain_walk || !next_lock) 1157 return 0; 1158 1159 /* 1160 * The owner can't disappear while holding a lock, 1161 * so the owner struct is protected by wait_lock. 1162 * Gets dropped in rt_mutex_adjust_prio_chain()! 1163 */ 1164 get_task_struct(owner); 1165 1166 raw_spin_unlock_irq(&lock->wait_lock); 1167 1168 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1169 next_lock, waiter, task); 1170 1171 raw_spin_lock_irq(&lock->wait_lock); 1172 1173 return res; 1174 } 1175 1176 /* 1177 * Remove the top waiter from the current tasks pi waiter tree and 1178 * queue it up. 1179 * 1180 * Called with lock->wait_lock held and interrupts disabled. 1181 */ 1182 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, 1183 struct rt_mutex_base *lock) 1184 { 1185 struct rt_mutex_waiter *waiter; 1186 1187 raw_spin_lock(¤t->pi_lock); 1188 1189 waiter = rt_mutex_top_waiter(lock); 1190 1191 /* 1192 * Remove it from current->pi_waiters and deboost. 1193 * 1194 * We must in fact deboost here in order to ensure we call 1195 * rt_mutex_setprio() to update p->pi_top_task before the 1196 * task unblocks. 1197 */ 1198 rt_mutex_dequeue_pi(current, waiter); 1199 rt_mutex_adjust_prio(current); 1200 1201 /* 1202 * As we are waking up the top waiter, and the waiter stays 1203 * queued on the lock until it gets the lock, this lock 1204 * obviously has waiters. Just set the bit here and this has 1205 * the added benefit of forcing all new tasks into the 1206 * slow path making sure no task of lower priority than 1207 * the top waiter can steal this lock. 1208 */ 1209 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1210 1211 /* 1212 * We deboosted before waking the top waiter task such that we don't 1213 * run two tasks with the 'same' priority (and ensure the 1214 * p->pi_top_task pointer points to a blocked task). This however can 1215 * lead to priority inversion if we would get preempted after the 1216 * deboost but before waking our donor task, hence the preempt_disable() 1217 * before unlock. 1218 * 1219 * Pairs with preempt_enable() in rt_mutex_wake_up_q(); 1220 */ 1221 preempt_disable(); 1222 rt_mutex_wake_q_add(wqh, waiter); 1223 raw_spin_unlock(¤t->pi_lock); 1224 } 1225 1226 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1227 { 1228 int ret = try_to_take_rt_mutex(lock, current, NULL); 1229 1230 /* 1231 * try_to_take_rt_mutex() sets the lock waiters bit 1232 * unconditionally. Clean this up. 1233 */ 1234 fixup_rt_mutex_waiters(lock); 1235 1236 return ret; 1237 } 1238 1239 /* 1240 * Slow path try-lock function: 1241 */ 1242 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1243 { 1244 unsigned long flags; 1245 int ret; 1246 1247 /* 1248 * If the lock already has an owner we fail to get the lock. 1249 * This can be done without taking the @lock->wait_lock as 1250 * it is only being read, and this is a trylock anyway. 1251 */ 1252 if (rt_mutex_owner(lock)) 1253 return 0; 1254 1255 /* 1256 * The mutex has currently no owner. Lock the wait lock and try to 1257 * acquire the lock. We use irqsave here to support early boot calls. 1258 */ 1259 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1260 1261 ret = __rt_mutex_slowtrylock(lock); 1262 1263 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1264 1265 return ret; 1266 } 1267 1268 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) 1269 { 1270 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1271 return 1; 1272 1273 return rt_mutex_slowtrylock(lock); 1274 } 1275 1276 /* 1277 * Slow path to release a rt-mutex. 1278 */ 1279 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) 1280 { 1281 DEFINE_RT_WAKE_Q(wqh); 1282 unsigned long flags; 1283 1284 /* irqsave required to support early boot calls */ 1285 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1286 1287 debug_rt_mutex_unlock(lock); 1288 1289 /* 1290 * We must be careful here if the fast path is enabled. If we 1291 * have no waiters queued we cannot set owner to NULL here 1292 * because of: 1293 * 1294 * foo->lock->owner = NULL; 1295 * rtmutex_lock(foo->lock); <- fast path 1296 * free = atomic_dec_and_test(foo->refcnt); 1297 * rtmutex_unlock(foo->lock); <- fast path 1298 * if (free) 1299 * kfree(foo); 1300 * raw_spin_unlock(foo->lock->wait_lock); 1301 * 1302 * So for the fastpath enabled kernel: 1303 * 1304 * Nothing can set the waiters bit as long as we hold 1305 * lock->wait_lock. So we do the following sequence: 1306 * 1307 * owner = rt_mutex_owner(lock); 1308 * clear_rt_mutex_waiters(lock); 1309 * raw_spin_unlock(&lock->wait_lock); 1310 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1311 * return; 1312 * goto retry; 1313 * 1314 * The fastpath disabled variant is simple as all access to 1315 * lock->owner is serialized by lock->wait_lock: 1316 * 1317 * lock->owner = NULL; 1318 * raw_spin_unlock(&lock->wait_lock); 1319 */ 1320 while (!rt_mutex_has_waiters(lock)) { 1321 /* Drops lock->wait_lock ! */ 1322 if (unlock_rt_mutex_safe(lock, flags) == true) 1323 return; 1324 /* Relock the rtmutex and try again */ 1325 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1326 } 1327 1328 /* 1329 * The wakeup next waiter path does not suffer from the above 1330 * race. See the comments there. 1331 * 1332 * Queue the next waiter for wakeup once we release the wait_lock. 1333 */ 1334 mark_wakeup_next_waiter(&wqh, lock); 1335 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1336 1337 rt_mutex_wake_up_q(&wqh); 1338 } 1339 1340 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) 1341 { 1342 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1343 return; 1344 1345 rt_mutex_slowunlock(lock); 1346 } 1347 1348 #ifdef CONFIG_SMP 1349 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1350 struct rt_mutex_waiter *waiter, 1351 struct task_struct *owner) 1352 { 1353 bool res = true; 1354 1355 rcu_read_lock(); 1356 for (;;) { 1357 /* If owner changed, trylock again. */ 1358 if (owner != rt_mutex_owner(lock)) 1359 break; 1360 /* 1361 * Ensure that @owner is dereferenced after checking that 1362 * the lock owner still matches @owner. If that fails, 1363 * @owner might point to freed memory. If it still matches, 1364 * the rcu_read_lock() ensures the memory stays valid. 1365 */ 1366 barrier(); 1367 /* 1368 * Stop spinning when: 1369 * - the lock owner has been scheduled out 1370 * - current is not longer the top waiter 1371 * - current is requested to reschedule (redundant 1372 * for CONFIG_PREEMPT_RCU=y) 1373 * - the VCPU on which owner runs is preempted 1374 */ 1375 if (!owner->on_cpu || need_resched() || 1376 rt_mutex_waiter_is_top_waiter(lock, waiter) || 1377 vcpu_is_preempted(task_cpu(owner))) { 1378 res = false; 1379 break; 1380 } 1381 cpu_relax(); 1382 } 1383 rcu_read_unlock(); 1384 return res; 1385 } 1386 #else 1387 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1388 struct rt_mutex_waiter *waiter, 1389 struct task_struct *owner) 1390 { 1391 return false; 1392 } 1393 #endif 1394 1395 #ifdef RT_MUTEX_BUILD_MUTEX 1396 /* 1397 * Functions required for: 1398 * - rtmutex, futex on all kernels 1399 * - mutex and rwsem substitutions on RT kernels 1400 */ 1401 1402 /* 1403 * Remove a waiter from a lock and give up 1404 * 1405 * Must be called with lock->wait_lock held and interrupts disabled. It must 1406 * have just failed to try_to_take_rt_mutex(). 1407 */ 1408 static void __sched remove_waiter(struct rt_mutex_base *lock, 1409 struct rt_mutex_waiter *waiter) 1410 { 1411 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1412 struct task_struct *owner = rt_mutex_owner(lock); 1413 struct rt_mutex_base *next_lock; 1414 1415 lockdep_assert_held(&lock->wait_lock); 1416 1417 raw_spin_lock(¤t->pi_lock); 1418 rt_mutex_dequeue(lock, waiter); 1419 current->pi_blocked_on = NULL; 1420 raw_spin_unlock(¤t->pi_lock); 1421 1422 /* 1423 * Only update priority if the waiter was the highest priority 1424 * waiter of the lock and there is an owner to update. 1425 */ 1426 if (!owner || !is_top_waiter) 1427 return; 1428 1429 raw_spin_lock(&owner->pi_lock); 1430 1431 rt_mutex_dequeue_pi(owner, waiter); 1432 1433 if (rt_mutex_has_waiters(lock)) 1434 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1435 1436 rt_mutex_adjust_prio(owner); 1437 1438 /* Store the lock on which owner is blocked or NULL */ 1439 next_lock = task_blocked_on_lock(owner); 1440 1441 raw_spin_unlock(&owner->pi_lock); 1442 1443 /* 1444 * Don't walk the chain, if the owner task is not blocked 1445 * itself. 1446 */ 1447 if (!next_lock) 1448 return; 1449 1450 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1451 get_task_struct(owner); 1452 1453 raw_spin_unlock_irq(&lock->wait_lock); 1454 1455 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1456 next_lock, NULL, current); 1457 1458 raw_spin_lock_irq(&lock->wait_lock); 1459 } 1460 1461 /** 1462 * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop 1463 * @lock: the rt_mutex to take 1464 * @ww_ctx: WW mutex context pointer 1465 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1466 * or TASK_UNINTERRUPTIBLE) 1467 * @timeout: the pre-initialized and started timer, or NULL for none 1468 * @waiter: the pre-initialized rt_mutex_waiter 1469 * 1470 * Must be called with lock->wait_lock held and interrupts disabled 1471 */ 1472 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, 1473 struct ww_acquire_ctx *ww_ctx, 1474 unsigned int state, 1475 struct hrtimer_sleeper *timeout, 1476 struct rt_mutex_waiter *waiter) 1477 { 1478 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1479 struct task_struct *owner; 1480 int ret = 0; 1481 1482 for (;;) { 1483 /* Try to acquire the lock: */ 1484 if (try_to_take_rt_mutex(lock, current, waiter)) 1485 break; 1486 1487 if (timeout && !timeout->task) { 1488 ret = -ETIMEDOUT; 1489 break; 1490 } 1491 if (signal_pending_state(state, current)) { 1492 ret = -EINTR; 1493 break; 1494 } 1495 1496 if (build_ww_mutex() && ww_ctx) { 1497 ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx); 1498 if (ret) 1499 break; 1500 } 1501 1502 if (waiter == rt_mutex_top_waiter(lock)) 1503 owner = rt_mutex_owner(lock); 1504 else 1505 owner = NULL; 1506 raw_spin_unlock_irq(&lock->wait_lock); 1507 1508 if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) 1509 schedule(); 1510 1511 raw_spin_lock_irq(&lock->wait_lock); 1512 set_current_state(state); 1513 } 1514 1515 __set_current_state(TASK_RUNNING); 1516 return ret; 1517 } 1518 1519 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 1520 struct rt_mutex_waiter *w) 1521 { 1522 /* 1523 * If the result is not -EDEADLOCK or the caller requested 1524 * deadlock detection, nothing to do here. 1525 */ 1526 if (res != -EDEADLOCK || detect_deadlock) 1527 return; 1528 1529 if (build_ww_mutex() && w->ww_ctx) 1530 return; 1531 1532 /* 1533 * Yell loudly and stop the task right here. 1534 */ 1535 WARN(1, "rtmutex deadlock detected\n"); 1536 while (1) { 1537 set_current_state(TASK_INTERRUPTIBLE); 1538 schedule(); 1539 } 1540 } 1541 1542 /** 1543 * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held 1544 * @lock: The rtmutex to block lock 1545 * @ww_ctx: WW mutex context pointer 1546 * @state: The task state for sleeping 1547 * @chwalk: Indicator whether full or partial chainwalk is requested 1548 * @waiter: Initializer waiter for blocking 1549 */ 1550 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, 1551 struct ww_acquire_ctx *ww_ctx, 1552 unsigned int state, 1553 enum rtmutex_chainwalk chwalk, 1554 struct rt_mutex_waiter *waiter) 1555 { 1556 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1557 struct ww_mutex *ww = ww_container_of(rtm); 1558 int ret; 1559 1560 lockdep_assert_held(&lock->wait_lock); 1561 1562 /* Try to acquire the lock again: */ 1563 if (try_to_take_rt_mutex(lock, current, NULL)) { 1564 if (build_ww_mutex() && ww_ctx) { 1565 __ww_mutex_check_waiters(rtm, ww_ctx); 1566 ww_mutex_lock_acquired(ww, ww_ctx); 1567 } 1568 return 0; 1569 } 1570 1571 set_current_state(state); 1572 1573 ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); 1574 if (likely(!ret)) 1575 ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); 1576 1577 if (likely(!ret)) { 1578 /* acquired the lock */ 1579 if (build_ww_mutex() && ww_ctx) { 1580 if (!ww_ctx->is_wait_die) 1581 __ww_mutex_check_waiters(rtm, ww_ctx); 1582 ww_mutex_lock_acquired(ww, ww_ctx); 1583 } 1584 } else { 1585 __set_current_state(TASK_RUNNING); 1586 remove_waiter(lock, waiter); 1587 rt_mutex_handle_deadlock(ret, chwalk, waiter); 1588 } 1589 1590 /* 1591 * try_to_take_rt_mutex() sets the waiter bit 1592 * unconditionally. We might have to fix that up. 1593 */ 1594 fixup_rt_mutex_waiters(lock); 1595 return ret; 1596 } 1597 1598 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, 1599 struct ww_acquire_ctx *ww_ctx, 1600 unsigned int state) 1601 { 1602 struct rt_mutex_waiter waiter; 1603 int ret; 1604 1605 rt_mutex_init_waiter(&waiter); 1606 waiter.ww_ctx = ww_ctx; 1607 1608 ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, 1609 &waiter); 1610 1611 debug_rt_mutex_free_waiter(&waiter); 1612 return ret; 1613 } 1614 1615 /* 1616 * rt_mutex_slowlock - Locking slowpath invoked when fast path fails 1617 * @lock: The rtmutex to block lock 1618 * @ww_ctx: WW mutex context pointer 1619 * @state: The task state for sleeping 1620 */ 1621 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, 1622 struct ww_acquire_ctx *ww_ctx, 1623 unsigned int state) 1624 { 1625 unsigned long flags; 1626 int ret; 1627 1628 /* 1629 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1630 * be called in early boot if the cmpxchg() fast path is disabled 1631 * (debug, no architecture support). In this case we will acquire the 1632 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1633 * enable interrupts in that early boot case. So we need to use the 1634 * irqsave/restore variants. 1635 */ 1636 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1637 ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); 1638 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1639 1640 return ret; 1641 } 1642 1643 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, 1644 unsigned int state) 1645 { 1646 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1647 return 0; 1648 1649 return rt_mutex_slowlock(lock, NULL, state); 1650 } 1651 #endif /* RT_MUTEX_BUILD_MUTEX */ 1652 1653 #ifdef RT_MUTEX_BUILD_SPINLOCKS 1654 /* 1655 * Functions required for spin/rw_lock substitution on RT kernels 1656 */ 1657 1658 /** 1659 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks 1660 * @lock: The underlying RT mutex 1661 */ 1662 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) 1663 { 1664 struct rt_mutex_waiter waiter; 1665 struct task_struct *owner; 1666 1667 lockdep_assert_held(&lock->wait_lock); 1668 1669 if (try_to_take_rt_mutex(lock, current, NULL)) 1670 return; 1671 1672 rt_mutex_init_rtlock_waiter(&waiter); 1673 1674 /* Save current state and set state to TASK_RTLOCK_WAIT */ 1675 current_save_and_set_rtlock_wait_state(); 1676 1677 task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); 1678 1679 for (;;) { 1680 /* Try to acquire the lock again */ 1681 if (try_to_take_rt_mutex(lock, current, &waiter)) 1682 break; 1683 1684 if (&waiter == rt_mutex_top_waiter(lock)) 1685 owner = rt_mutex_owner(lock); 1686 else 1687 owner = NULL; 1688 raw_spin_unlock_irq(&lock->wait_lock); 1689 1690 if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) 1691 schedule_rtlock(); 1692 1693 raw_spin_lock_irq(&lock->wait_lock); 1694 set_current_state(TASK_RTLOCK_WAIT); 1695 } 1696 1697 /* Restore the task state */ 1698 current_restore_rtlock_saved_state(); 1699 1700 /* 1701 * try_to_take_rt_mutex() sets the waiter bit unconditionally. 1702 * We might have to fix that up: 1703 */ 1704 fixup_rt_mutex_waiters(lock); 1705 debug_rt_mutex_free_waiter(&waiter); 1706 } 1707 1708 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) 1709 { 1710 unsigned long flags; 1711 1712 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1713 rtlock_slowlock_locked(lock); 1714 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1715 } 1716 1717 #endif /* RT_MUTEX_BUILD_SPINLOCKS */ 1718