1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 4 * 5 * started by Ingo Molnar and Thomas Gleixner. 6 * 7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 10 * Copyright (C) 2006 Esben Nielsen 11 * 12 * See Documentation/locking/rt-mutex-design.rst for details. 13 */ 14 #include <linux/spinlock.h> 15 #include <linux/export.h> 16 #include <linux/sched/signal.h> 17 #include <linux/sched/rt.h> 18 #include <linux/sched/deadline.h> 19 #include <linux/sched/wake_q.h> 20 #include <linux/sched/debug.h> 21 #include <linux/timer.h> 22 23 #include "rtmutex_common.h" 24 25 /* 26 * lock->owner state tracking: 27 * 28 * lock->owner holds the task_struct pointer of the owner. Bit 0 29 * is used to keep track of the "lock has waiters" state. 30 * 31 * owner bit0 32 * NULL 0 lock is free (fast acquire possible) 33 * NULL 1 lock is free and has waiters and the top waiter 34 * is going to take the lock* 35 * taskpointer 0 lock is held (fast release possible) 36 * taskpointer 1 lock is held and has waiters** 37 * 38 * The fast atomic compare exchange based acquire and release is only 39 * possible when bit 0 of lock->owner is 0. 40 * 41 * (*) It also can be a transitional state when grabbing the lock 42 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 43 * we need to set the bit0 before looking at the lock, and the owner may be 44 * NULL in this small time, hence this can be a transitional state. 45 * 46 * (**) There is a small time when bit 0 is set but there are no 47 * waiters. This can happen when grabbing the lock in the slow path. 48 * To prevent a cmpxchg of the owner releasing the lock, we need to 49 * set this bit before looking at the lock. 50 */ 51 52 static void 53 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) 54 { 55 unsigned long val = (unsigned long)owner; 56 57 if (rt_mutex_has_waiters(lock)) 58 val |= RT_MUTEX_HAS_WAITERS; 59 60 lock->owner = (struct task_struct *)val; 61 } 62 63 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) 64 { 65 lock->owner = (struct task_struct *) 66 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 67 } 68 69 static void fixup_rt_mutex_waiters(struct rt_mutex *lock) 70 { 71 unsigned long owner, *p = (unsigned long *) &lock->owner; 72 73 if (rt_mutex_has_waiters(lock)) 74 return; 75 76 /* 77 * The rbtree has no waiters enqueued, now make sure that the 78 * lock->owner still has the waiters bit set, otherwise the 79 * following can happen: 80 * 81 * CPU 0 CPU 1 CPU2 82 * l->owner=T1 83 * rt_mutex_lock(l) 84 * lock(l->lock) 85 * l->owner = T1 | HAS_WAITERS; 86 * enqueue(T2) 87 * boost() 88 * unlock(l->lock) 89 * block() 90 * 91 * rt_mutex_lock(l) 92 * lock(l->lock) 93 * l->owner = T1 | HAS_WAITERS; 94 * enqueue(T3) 95 * boost() 96 * unlock(l->lock) 97 * block() 98 * signal(->T2) signal(->T3) 99 * lock(l->lock) 100 * dequeue(T2) 101 * deboost() 102 * unlock(l->lock) 103 * lock(l->lock) 104 * dequeue(T3) 105 * ==> wait list is empty 106 * deboost() 107 * unlock(l->lock) 108 * lock(l->lock) 109 * fixup_rt_mutex_waiters() 110 * if (wait_list_empty(l) { 111 * l->owner = owner 112 * owner = l->owner & ~HAS_WAITERS; 113 * ==> l->owner = T1 114 * } 115 * lock(l->lock) 116 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 117 * if (wait_list_empty(l) { 118 * owner = l->owner & ~HAS_WAITERS; 119 * cmpxchg(l->owner, T1, NULL) 120 * ===> Success (l->owner = NULL) 121 * 122 * l->owner = owner 123 * ==> l->owner = T1 124 * } 125 * 126 * With the check for the waiter bit in place T3 on CPU2 will not 127 * overwrite. All tasks fiddling with the waiters bit are 128 * serialized by l->lock, so nothing else can modify the waiters 129 * bit. If the bit is set then nothing can change l->owner either 130 * so the simple RMW is safe. The cmpxchg() will simply fail if it 131 * happens in the middle of the RMW because the waiters bit is 132 * still set. 133 */ 134 owner = READ_ONCE(*p); 135 if (owner & RT_MUTEX_HAS_WAITERS) 136 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 137 } 138 139 /* 140 * We can speed up the acquire/release, if there's no debugging state to be 141 * set up. 142 */ 143 #ifndef CONFIG_DEBUG_RT_MUTEXES 144 # define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c) 145 # define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c) 146 # define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c) 147 148 /* 149 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 150 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 151 * relaxed semantics suffice. 152 */ 153 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 154 { 155 unsigned long owner, *p = (unsigned long *) &lock->owner; 156 157 do { 158 owner = *p; 159 } while (cmpxchg_relaxed(p, owner, 160 owner | RT_MUTEX_HAS_WAITERS) != owner); 161 } 162 163 /* 164 * Safe fastpath aware unlock: 165 * 1) Clear the waiters bit 166 * 2) Drop lock->wait_lock 167 * 3) Try to unlock the lock with cmpxchg 168 */ 169 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 170 unsigned long flags) 171 __releases(lock->wait_lock) 172 { 173 struct task_struct *owner = rt_mutex_owner(lock); 174 175 clear_rt_mutex_waiters(lock); 176 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 177 /* 178 * If a new waiter comes in between the unlock and the cmpxchg 179 * we have two situations: 180 * 181 * unlock(wait_lock); 182 * lock(wait_lock); 183 * cmpxchg(p, owner, 0) == owner 184 * mark_rt_mutex_waiters(lock); 185 * acquire(lock); 186 * or: 187 * 188 * unlock(wait_lock); 189 * lock(wait_lock); 190 * mark_rt_mutex_waiters(lock); 191 * 192 * cmpxchg(p, owner, 0) != owner 193 * enqueue_waiter(); 194 * unlock(wait_lock); 195 * lock(wait_lock); 196 * wake waiter(); 197 * unlock(wait_lock); 198 * lock(wait_lock); 199 * acquire(lock); 200 */ 201 return rt_mutex_cmpxchg_release(lock, owner, NULL); 202 } 203 204 #else 205 # define rt_mutex_cmpxchg_relaxed(l,c,n) (0) 206 # define rt_mutex_cmpxchg_acquire(l,c,n) (0) 207 # define rt_mutex_cmpxchg_release(l,c,n) (0) 208 209 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 210 { 211 lock->owner = (struct task_struct *) 212 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 213 } 214 215 /* 216 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 217 */ 218 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 219 unsigned long flags) 220 __releases(lock->wait_lock) 221 { 222 lock->owner = NULL; 223 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 224 return true; 225 } 226 #endif 227 228 /* 229 * Only use with rt_mutex_waiter_{less,equal}() 230 */ 231 #define task_to_waiter(p) \ 232 &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } 233 234 static inline int 235 rt_mutex_waiter_less(struct rt_mutex_waiter *left, 236 struct rt_mutex_waiter *right) 237 { 238 if (left->prio < right->prio) 239 return 1; 240 241 /* 242 * If both waiters have dl_prio(), we check the deadlines of the 243 * associated tasks. 244 * If left waiter has a dl_prio(), and we didn't return 1 above, 245 * then right waiter has a dl_prio() too. 246 */ 247 if (dl_prio(left->prio)) 248 return dl_time_before(left->deadline, right->deadline); 249 250 return 0; 251 } 252 253 static inline int 254 rt_mutex_waiter_equal(struct rt_mutex_waiter *left, 255 struct rt_mutex_waiter *right) 256 { 257 if (left->prio != right->prio) 258 return 0; 259 260 /* 261 * If both waiters have dl_prio(), we check the deadlines of the 262 * associated tasks. 263 * If left waiter has a dl_prio(), and we didn't return 0 above, 264 * then right waiter has a dl_prio() too. 265 */ 266 if (dl_prio(left->prio)) 267 return left->deadline == right->deadline; 268 269 return 1; 270 } 271 272 static void 273 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 274 { 275 struct rb_node **link = &lock->waiters.rb_root.rb_node; 276 struct rb_node *parent = NULL; 277 struct rt_mutex_waiter *entry; 278 bool leftmost = true; 279 280 while (*link) { 281 parent = *link; 282 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); 283 if (rt_mutex_waiter_less(waiter, entry)) { 284 link = &parent->rb_left; 285 } else { 286 link = &parent->rb_right; 287 leftmost = false; 288 } 289 } 290 291 rb_link_node(&waiter->tree_entry, parent, link); 292 rb_insert_color_cached(&waiter->tree_entry, &lock->waiters, leftmost); 293 } 294 295 static void 296 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 297 { 298 if (RB_EMPTY_NODE(&waiter->tree_entry)) 299 return; 300 301 rb_erase_cached(&waiter->tree_entry, &lock->waiters); 302 RB_CLEAR_NODE(&waiter->tree_entry); 303 } 304 305 static void 306 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 307 { 308 struct rb_node **link = &task->pi_waiters.rb_root.rb_node; 309 struct rb_node *parent = NULL; 310 struct rt_mutex_waiter *entry; 311 bool leftmost = true; 312 313 while (*link) { 314 parent = *link; 315 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); 316 if (rt_mutex_waiter_less(waiter, entry)) { 317 link = &parent->rb_left; 318 } else { 319 link = &parent->rb_right; 320 leftmost = false; 321 } 322 } 323 324 rb_link_node(&waiter->pi_tree_entry, parent, link); 325 rb_insert_color_cached(&waiter->pi_tree_entry, &task->pi_waiters, leftmost); 326 } 327 328 static void 329 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 330 { 331 if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 332 return; 333 334 rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); 335 RB_CLEAR_NODE(&waiter->pi_tree_entry); 336 } 337 338 static void rt_mutex_adjust_prio(struct task_struct *p) 339 { 340 struct task_struct *pi_task = NULL; 341 342 lockdep_assert_held(&p->pi_lock); 343 344 if (task_has_pi_waiters(p)) 345 pi_task = task_top_pi_waiter(p)->task; 346 347 rt_mutex_setprio(p, pi_task); 348 } 349 350 /* 351 * Deadlock detection is conditional: 352 * 353 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 354 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 355 * 356 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 357 * conducted independent of the detect argument. 358 * 359 * If the waiter argument is NULL this indicates the deboost path and 360 * deadlock detection is disabled independent of the detect argument 361 * and the config settings. 362 */ 363 static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 364 enum rtmutex_chainwalk chwalk) 365 { 366 /* 367 * This is just a wrapper function for the following call, 368 * because debug_rt_mutex_detect_deadlock() smells like a magic 369 * debug feature and I wanted to keep the cond function in the 370 * main source file along with the comments instead of having 371 * two of the same in the headers. 372 */ 373 return debug_rt_mutex_detect_deadlock(waiter, chwalk); 374 } 375 376 /* 377 * Max number of times we'll walk the boosting chain: 378 */ 379 int max_lock_depth = 1024; 380 381 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) 382 { 383 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 384 } 385 386 /* 387 * Adjust the priority chain. Also used for deadlock detection. 388 * Decreases task's usage by one - may thus free the task. 389 * 390 * @task: the task owning the mutex (owner) for which a chain walk is 391 * probably needed 392 * @chwalk: do we have to carry out deadlock detection? 393 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 394 * things for a task that has just got its priority adjusted, and 395 * is waiting on a mutex) 396 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 397 * we dropped its pi_lock. Is never dereferenced, only used for 398 * comparison to detect lock chain changes. 399 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 400 * its priority to the mutex owner (can be NULL in the case 401 * depicted above or if the top waiter is gone away and we are 402 * actually deboosting the owner) 403 * @top_task: the current top waiter 404 * 405 * Returns 0 or -EDEADLK. 406 * 407 * Chain walk basics and protection scope 408 * 409 * [R] refcount on task 410 * [P] task->pi_lock held 411 * [L] rtmutex->wait_lock held 412 * 413 * Step Description Protected by 414 * function arguments: 415 * @task [R] 416 * @orig_lock if != NULL @top_task is blocked on it 417 * @next_lock Unprotected. Cannot be 418 * dereferenced. Only used for 419 * comparison. 420 * @orig_waiter if != NULL @top_task is blocked on it 421 * @top_task current, or in case of proxy 422 * locking protected by calling 423 * code 424 * again: 425 * loop_sanity_check(); 426 * retry: 427 * [1] lock(task->pi_lock); [R] acquire [P] 428 * [2] waiter = task->pi_blocked_on; [P] 429 * [3] check_exit_conditions_1(); [P] 430 * [4] lock = waiter->lock; [P] 431 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 432 * unlock(task->pi_lock); release [P] 433 * goto retry; 434 * } 435 * [6] check_exit_conditions_2(); [P] + [L] 436 * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 437 * [8] unlock(task->pi_lock); release [P] 438 * put_task_struct(task); release [R] 439 * [9] check_exit_conditions_3(); [L] 440 * [10] task = owner(lock); [L] 441 * get_task_struct(task); [L] acquire [R] 442 * lock(task->pi_lock); [L] acquire [P] 443 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 444 * [12] check_exit_conditions_4(); [P] + [L] 445 * [13] unlock(task->pi_lock); release [P] 446 * unlock(lock->wait_lock); release [L] 447 * goto again; 448 */ 449 static int rt_mutex_adjust_prio_chain(struct task_struct *task, 450 enum rtmutex_chainwalk chwalk, 451 struct rt_mutex *orig_lock, 452 struct rt_mutex *next_lock, 453 struct rt_mutex_waiter *orig_waiter, 454 struct task_struct *top_task) 455 { 456 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 457 struct rt_mutex_waiter *prerequeue_top_waiter; 458 int ret = 0, depth = 0; 459 struct rt_mutex *lock; 460 bool detect_deadlock; 461 bool requeue = true; 462 463 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 464 465 /* 466 * The (de)boosting is a step by step approach with a lot of 467 * pitfalls. We want this to be preemptible and we want hold a 468 * maximum of two locks per step. So we have to check 469 * carefully whether things change under us. 470 */ 471 again: 472 /* 473 * We limit the lock chain length for each invocation. 474 */ 475 if (++depth > max_lock_depth) { 476 static int prev_max; 477 478 /* 479 * Print this only once. If the admin changes the limit, 480 * print a new message when reaching the limit again. 481 */ 482 if (prev_max != max_lock_depth) { 483 prev_max = max_lock_depth; 484 printk(KERN_WARNING "Maximum lock depth %d reached " 485 "task: %s (%d)\n", max_lock_depth, 486 top_task->comm, task_pid_nr(top_task)); 487 } 488 put_task_struct(task); 489 490 return -EDEADLK; 491 } 492 493 /* 494 * We are fully preemptible here and only hold the refcount on 495 * @task. So everything can have changed under us since the 496 * caller or our own code below (goto retry/again) dropped all 497 * locks. 498 */ 499 retry: 500 /* 501 * [1] Task cannot go away as we did a get_task() before ! 502 */ 503 raw_spin_lock_irq(&task->pi_lock); 504 505 /* 506 * [2] Get the waiter on which @task is blocked on. 507 */ 508 waiter = task->pi_blocked_on; 509 510 /* 511 * [3] check_exit_conditions_1() protected by task->pi_lock. 512 */ 513 514 /* 515 * Check whether the end of the boosting chain has been 516 * reached or the state of the chain has changed while we 517 * dropped the locks. 518 */ 519 if (!waiter) 520 goto out_unlock_pi; 521 522 /* 523 * Check the orig_waiter state. After we dropped the locks, 524 * the previous owner of the lock might have released the lock. 525 */ 526 if (orig_waiter && !rt_mutex_owner(orig_lock)) 527 goto out_unlock_pi; 528 529 /* 530 * We dropped all locks after taking a refcount on @task, so 531 * the task might have moved on in the lock chain or even left 532 * the chain completely and blocks now on an unrelated lock or 533 * on @orig_lock. 534 * 535 * We stored the lock on which @task was blocked in @next_lock, 536 * so we can detect the chain change. 537 */ 538 if (next_lock != waiter->lock) 539 goto out_unlock_pi; 540 541 /* 542 * Drop out, when the task has no waiters. Note, 543 * top_waiter can be NULL, when we are in the deboosting 544 * mode! 545 */ 546 if (top_waiter) { 547 if (!task_has_pi_waiters(task)) 548 goto out_unlock_pi; 549 /* 550 * If deadlock detection is off, we stop here if we 551 * are not the top pi waiter of the task. If deadlock 552 * detection is enabled we continue, but stop the 553 * requeueing in the chain walk. 554 */ 555 if (top_waiter != task_top_pi_waiter(task)) { 556 if (!detect_deadlock) 557 goto out_unlock_pi; 558 else 559 requeue = false; 560 } 561 } 562 563 /* 564 * If the waiter priority is the same as the task priority 565 * then there is no further priority adjustment necessary. If 566 * deadlock detection is off, we stop the chain walk. If its 567 * enabled we continue, but stop the requeueing in the chain 568 * walk. 569 */ 570 if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { 571 if (!detect_deadlock) 572 goto out_unlock_pi; 573 else 574 requeue = false; 575 } 576 577 /* 578 * [4] Get the next lock 579 */ 580 lock = waiter->lock; 581 /* 582 * [5] We need to trylock here as we are holding task->pi_lock, 583 * which is the reverse lock order versus the other rtmutex 584 * operations. 585 */ 586 if (!raw_spin_trylock(&lock->wait_lock)) { 587 raw_spin_unlock_irq(&task->pi_lock); 588 cpu_relax(); 589 goto retry; 590 } 591 592 /* 593 * [6] check_exit_conditions_2() protected by task->pi_lock and 594 * lock->wait_lock. 595 * 596 * Deadlock detection. If the lock is the same as the original 597 * lock which caused us to walk the lock chain or if the 598 * current lock is owned by the task which initiated the chain 599 * walk, we detected a deadlock. 600 */ 601 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 602 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock); 603 raw_spin_unlock(&lock->wait_lock); 604 ret = -EDEADLK; 605 goto out_unlock_pi; 606 } 607 608 /* 609 * If we just follow the lock chain for deadlock detection, no 610 * need to do all the requeue operations. To avoid a truckload 611 * of conditionals around the various places below, just do the 612 * minimum chain walk checks. 613 */ 614 if (!requeue) { 615 /* 616 * No requeue[7] here. Just release @task [8] 617 */ 618 raw_spin_unlock(&task->pi_lock); 619 put_task_struct(task); 620 621 /* 622 * [9] check_exit_conditions_3 protected by lock->wait_lock. 623 * If there is no owner of the lock, end of chain. 624 */ 625 if (!rt_mutex_owner(lock)) { 626 raw_spin_unlock_irq(&lock->wait_lock); 627 return 0; 628 } 629 630 /* [10] Grab the next task, i.e. owner of @lock */ 631 task = rt_mutex_owner(lock); 632 get_task_struct(task); 633 raw_spin_lock(&task->pi_lock); 634 635 /* 636 * No requeue [11] here. We just do deadlock detection. 637 * 638 * [12] Store whether owner is blocked 639 * itself. Decision is made after dropping the locks 640 */ 641 next_lock = task_blocked_on_lock(task); 642 /* 643 * Get the top waiter for the next iteration 644 */ 645 top_waiter = rt_mutex_top_waiter(lock); 646 647 /* [13] Drop locks */ 648 raw_spin_unlock(&task->pi_lock); 649 raw_spin_unlock_irq(&lock->wait_lock); 650 651 /* If owner is not blocked, end of chain. */ 652 if (!next_lock) 653 goto out_put_task; 654 goto again; 655 } 656 657 /* 658 * Store the current top waiter before doing the requeue 659 * operation on @lock. We need it for the boost/deboost 660 * decision below. 661 */ 662 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 663 664 /* [7] Requeue the waiter in the lock waiter tree. */ 665 rt_mutex_dequeue(lock, waiter); 666 667 /* 668 * Update the waiter prio fields now that we're dequeued. 669 * 670 * These values can have changed through either: 671 * 672 * sys_sched_set_scheduler() / sys_sched_setattr() 673 * 674 * or 675 * 676 * DL CBS enforcement advancing the effective deadline. 677 * 678 * Even though pi_waiters also uses these fields, and that tree is only 679 * updated in [11], we can do this here, since we hold [L], which 680 * serializes all pi_waiters access and rb_erase() does not care about 681 * the values of the node being removed. 682 */ 683 waiter->prio = task->prio; 684 waiter->deadline = task->dl.deadline; 685 686 rt_mutex_enqueue(lock, waiter); 687 688 /* [8] Release the task */ 689 raw_spin_unlock(&task->pi_lock); 690 put_task_struct(task); 691 692 /* 693 * [9] check_exit_conditions_3 protected by lock->wait_lock. 694 * 695 * We must abort the chain walk if there is no lock owner even 696 * in the dead lock detection case, as we have nothing to 697 * follow here. This is the end of the chain we are walking. 698 */ 699 if (!rt_mutex_owner(lock)) { 700 /* 701 * If the requeue [7] above changed the top waiter, 702 * then we need to wake the new top waiter up to try 703 * to get the lock. 704 */ 705 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 706 wake_up_process(rt_mutex_top_waiter(lock)->task); 707 raw_spin_unlock_irq(&lock->wait_lock); 708 return 0; 709 } 710 711 /* [10] Grab the next task, i.e. the owner of @lock */ 712 task = rt_mutex_owner(lock); 713 get_task_struct(task); 714 raw_spin_lock(&task->pi_lock); 715 716 /* [11] requeue the pi waiters if necessary */ 717 if (waiter == rt_mutex_top_waiter(lock)) { 718 /* 719 * The waiter became the new top (highest priority) 720 * waiter on the lock. Replace the previous top waiter 721 * in the owner tasks pi waiters tree with this waiter 722 * and adjust the priority of the owner. 723 */ 724 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 725 rt_mutex_enqueue_pi(task, waiter); 726 rt_mutex_adjust_prio(task); 727 728 } else if (prerequeue_top_waiter == waiter) { 729 /* 730 * The waiter was the top waiter on the lock, but is 731 * no longer the top prority waiter. Replace waiter in 732 * the owner tasks pi waiters tree with the new top 733 * (highest priority) waiter and adjust the priority 734 * of the owner. 735 * The new top waiter is stored in @waiter so that 736 * @waiter == @top_waiter evaluates to true below and 737 * we continue to deboost the rest of the chain. 738 */ 739 rt_mutex_dequeue_pi(task, waiter); 740 waiter = rt_mutex_top_waiter(lock); 741 rt_mutex_enqueue_pi(task, waiter); 742 rt_mutex_adjust_prio(task); 743 } else { 744 /* 745 * Nothing changed. No need to do any priority 746 * adjustment. 747 */ 748 } 749 750 /* 751 * [12] check_exit_conditions_4() protected by task->pi_lock 752 * and lock->wait_lock. The actual decisions are made after we 753 * dropped the locks. 754 * 755 * Check whether the task which owns the current lock is pi 756 * blocked itself. If yes we store a pointer to the lock for 757 * the lock chain change detection above. After we dropped 758 * task->pi_lock next_lock cannot be dereferenced anymore. 759 */ 760 next_lock = task_blocked_on_lock(task); 761 /* 762 * Store the top waiter of @lock for the end of chain walk 763 * decision below. 764 */ 765 top_waiter = rt_mutex_top_waiter(lock); 766 767 /* [13] Drop the locks */ 768 raw_spin_unlock(&task->pi_lock); 769 raw_spin_unlock_irq(&lock->wait_lock); 770 771 /* 772 * Make the actual exit decisions [12], based on the stored 773 * values. 774 * 775 * We reached the end of the lock chain. Stop right here. No 776 * point to go back just to figure that out. 777 */ 778 if (!next_lock) 779 goto out_put_task; 780 781 /* 782 * If the current waiter is not the top waiter on the lock, 783 * then we can stop the chain walk here if we are not in full 784 * deadlock detection mode. 785 */ 786 if (!detect_deadlock && waiter != top_waiter) 787 goto out_put_task; 788 789 goto again; 790 791 out_unlock_pi: 792 raw_spin_unlock_irq(&task->pi_lock); 793 out_put_task: 794 put_task_struct(task); 795 796 return ret; 797 } 798 799 /* 800 * Try to take an rt-mutex 801 * 802 * Must be called with lock->wait_lock held and interrupts disabled 803 * 804 * @lock: The lock to be acquired. 805 * @task: The task which wants to acquire the lock 806 * @waiter: The waiter that is queued to the lock's wait tree if the 807 * callsite called task_blocked_on_lock(), otherwise NULL 808 */ 809 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, 810 struct rt_mutex_waiter *waiter) 811 { 812 lockdep_assert_held(&lock->wait_lock); 813 814 /* 815 * Before testing whether we can acquire @lock, we set the 816 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 817 * other tasks which try to modify @lock into the slow path 818 * and they serialize on @lock->wait_lock. 819 * 820 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 821 * as explained at the top of this file if and only if: 822 * 823 * - There is a lock owner. The caller must fixup the 824 * transient state if it does a trylock or leaves the lock 825 * function due to a signal or timeout. 826 * 827 * - @task acquires the lock and there are no other 828 * waiters. This is undone in rt_mutex_set_owner(@task) at 829 * the end of this function. 830 */ 831 mark_rt_mutex_waiters(lock); 832 833 /* 834 * If @lock has an owner, give up. 835 */ 836 if (rt_mutex_owner(lock)) 837 return 0; 838 839 /* 840 * If @waiter != NULL, @task has already enqueued the waiter 841 * into @lock waiter tree. If @waiter == NULL then this is a 842 * trylock attempt. 843 */ 844 if (waiter) { 845 /* 846 * If waiter is not the highest priority waiter of 847 * @lock, give up. 848 */ 849 if (waiter != rt_mutex_top_waiter(lock)) 850 return 0; 851 852 /* 853 * We can acquire the lock. Remove the waiter from the 854 * lock waiters tree. 855 */ 856 rt_mutex_dequeue(lock, waiter); 857 858 } else { 859 /* 860 * If the lock has waiters already we check whether @task is 861 * eligible to take over the lock. 862 * 863 * If there are no other waiters, @task can acquire 864 * the lock. @task->pi_blocked_on is NULL, so it does 865 * not need to be dequeued. 866 */ 867 if (rt_mutex_has_waiters(lock)) { 868 /* 869 * If @task->prio is greater than or equal to 870 * the top waiter priority (kernel view), 871 * @task lost. 872 */ 873 if (!rt_mutex_waiter_less(task_to_waiter(task), 874 rt_mutex_top_waiter(lock))) 875 return 0; 876 877 /* 878 * The current top waiter stays enqueued. We 879 * don't have to change anything in the lock 880 * waiters order. 881 */ 882 } else { 883 /* 884 * No waiters. Take the lock without the 885 * pi_lock dance.@task->pi_blocked_on is NULL 886 * and we have no waiters to enqueue in @task 887 * pi waiters tree. 888 */ 889 goto takeit; 890 } 891 } 892 893 /* 894 * Clear @task->pi_blocked_on. Requires protection by 895 * @task->pi_lock. Redundant operation for the @waiter == NULL 896 * case, but conditionals are more expensive than a redundant 897 * store. 898 */ 899 raw_spin_lock(&task->pi_lock); 900 task->pi_blocked_on = NULL; 901 /* 902 * Finish the lock acquisition. @task is the new owner. If 903 * other waiters exist we have to insert the highest priority 904 * waiter into @task->pi_waiters tree. 905 */ 906 if (rt_mutex_has_waiters(lock)) 907 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 908 raw_spin_unlock(&task->pi_lock); 909 910 takeit: 911 /* We got the lock. */ 912 debug_rt_mutex_lock(lock); 913 914 /* 915 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 916 * are still waiters or clears it. 917 */ 918 rt_mutex_set_owner(lock, task); 919 920 return 1; 921 } 922 923 /* 924 * Task blocks on lock. 925 * 926 * Prepare waiter and propagate pi chain 927 * 928 * This must be called with lock->wait_lock held and interrupts disabled 929 */ 930 static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 931 struct rt_mutex_waiter *waiter, 932 struct task_struct *task, 933 enum rtmutex_chainwalk chwalk) 934 { 935 struct task_struct *owner = rt_mutex_owner(lock); 936 struct rt_mutex_waiter *top_waiter = waiter; 937 struct rt_mutex *next_lock; 938 int chain_walk = 0, res; 939 940 lockdep_assert_held(&lock->wait_lock); 941 942 /* 943 * Early deadlock detection. We really don't want the task to 944 * enqueue on itself just to untangle the mess later. It's not 945 * only an optimization. We drop the locks, so another waiter 946 * can come in before the chain walk detects the deadlock. So 947 * the other will detect the deadlock and return -EDEADLOCK, 948 * which is wrong, as the other waiter is not in a deadlock 949 * situation. 950 */ 951 if (owner == task) 952 return -EDEADLK; 953 954 raw_spin_lock(&task->pi_lock); 955 waiter->task = task; 956 waiter->lock = lock; 957 waiter->prio = task->prio; 958 waiter->deadline = task->dl.deadline; 959 960 /* Get the top priority waiter on the lock */ 961 if (rt_mutex_has_waiters(lock)) 962 top_waiter = rt_mutex_top_waiter(lock); 963 rt_mutex_enqueue(lock, waiter); 964 965 task->pi_blocked_on = waiter; 966 967 raw_spin_unlock(&task->pi_lock); 968 969 if (!owner) 970 return 0; 971 972 raw_spin_lock(&owner->pi_lock); 973 if (waiter == rt_mutex_top_waiter(lock)) { 974 rt_mutex_dequeue_pi(owner, top_waiter); 975 rt_mutex_enqueue_pi(owner, waiter); 976 977 rt_mutex_adjust_prio(owner); 978 if (owner->pi_blocked_on) 979 chain_walk = 1; 980 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 981 chain_walk = 1; 982 } 983 984 /* Store the lock on which owner is blocked or NULL */ 985 next_lock = task_blocked_on_lock(owner); 986 987 raw_spin_unlock(&owner->pi_lock); 988 /* 989 * Even if full deadlock detection is on, if the owner is not 990 * blocked itself, we can avoid finding this out in the chain 991 * walk. 992 */ 993 if (!chain_walk || !next_lock) 994 return 0; 995 996 /* 997 * The owner can't disappear while holding a lock, 998 * so the owner struct is protected by wait_lock. 999 * Gets dropped in rt_mutex_adjust_prio_chain()! 1000 */ 1001 get_task_struct(owner); 1002 1003 raw_spin_unlock_irq(&lock->wait_lock); 1004 1005 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1006 next_lock, waiter, task); 1007 1008 raw_spin_lock_irq(&lock->wait_lock); 1009 1010 return res; 1011 } 1012 1013 /* 1014 * Remove the top waiter from the current tasks pi waiter tree and 1015 * queue it up. 1016 * 1017 * Called with lock->wait_lock held and interrupts disabled. 1018 */ 1019 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, 1020 struct rt_mutex *lock) 1021 { 1022 struct rt_mutex_waiter *waiter; 1023 1024 raw_spin_lock(¤t->pi_lock); 1025 1026 waiter = rt_mutex_top_waiter(lock); 1027 1028 /* 1029 * Remove it from current->pi_waiters and deboost. 1030 * 1031 * We must in fact deboost here in order to ensure we call 1032 * rt_mutex_setprio() to update p->pi_top_task before the 1033 * task unblocks. 1034 */ 1035 rt_mutex_dequeue_pi(current, waiter); 1036 rt_mutex_adjust_prio(current); 1037 1038 /* 1039 * As we are waking up the top waiter, and the waiter stays 1040 * queued on the lock until it gets the lock, this lock 1041 * obviously has waiters. Just set the bit here and this has 1042 * the added benefit of forcing all new tasks into the 1043 * slow path making sure no task of lower priority than 1044 * the top waiter can steal this lock. 1045 */ 1046 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1047 1048 /* 1049 * We deboosted before waking the top waiter task such that we don't 1050 * run two tasks with the 'same' priority (and ensure the 1051 * p->pi_top_task pointer points to a blocked task). This however can 1052 * lead to priority inversion if we would get preempted after the 1053 * deboost but before waking our donor task, hence the preempt_disable() 1054 * before unlock. 1055 * 1056 * Pairs with preempt_enable() in rt_mutex_postunlock(); 1057 */ 1058 preempt_disable(); 1059 wake_q_add(wake_q, waiter->task); 1060 raw_spin_unlock(¤t->pi_lock); 1061 } 1062 1063 /* 1064 * Remove a waiter from a lock and give up 1065 * 1066 * Must be called with lock->wait_lock held and interrupts disabled. I must 1067 * have just failed to try_to_take_rt_mutex(). 1068 */ 1069 static void remove_waiter(struct rt_mutex *lock, 1070 struct rt_mutex_waiter *waiter) 1071 { 1072 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1073 struct task_struct *owner = rt_mutex_owner(lock); 1074 struct rt_mutex *next_lock; 1075 1076 lockdep_assert_held(&lock->wait_lock); 1077 1078 raw_spin_lock(¤t->pi_lock); 1079 rt_mutex_dequeue(lock, waiter); 1080 current->pi_blocked_on = NULL; 1081 raw_spin_unlock(¤t->pi_lock); 1082 1083 /* 1084 * Only update priority if the waiter was the highest priority 1085 * waiter of the lock and there is an owner to update. 1086 */ 1087 if (!owner || !is_top_waiter) 1088 return; 1089 1090 raw_spin_lock(&owner->pi_lock); 1091 1092 rt_mutex_dequeue_pi(owner, waiter); 1093 1094 if (rt_mutex_has_waiters(lock)) 1095 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1096 1097 rt_mutex_adjust_prio(owner); 1098 1099 /* Store the lock on which owner is blocked or NULL */ 1100 next_lock = task_blocked_on_lock(owner); 1101 1102 raw_spin_unlock(&owner->pi_lock); 1103 1104 /* 1105 * Don't walk the chain, if the owner task is not blocked 1106 * itself. 1107 */ 1108 if (!next_lock) 1109 return; 1110 1111 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1112 get_task_struct(owner); 1113 1114 raw_spin_unlock_irq(&lock->wait_lock); 1115 1116 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1117 next_lock, NULL, current); 1118 1119 raw_spin_lock_irq(&lock->wait_lock); 1120 } 1121 1122 /* 1123 * Recheck the pi chain, in case we got a priority setting 1124 * 1125 * Called from sched_setscheduler 1126 */ 1127 void rt_mutex_adjust_pi(struct task_struct *task) 1128 { 1129 struct rt_mutex_waiter *waiter; 1130 struct rt_mutex *next_lock; 1131 unsigned long flags; 1132 1133 raw_spin_lock_irqsave(&task->pi_lock, flags); 1134 1135 waiter = task->pi_blocked_on; 1136 if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { 1137 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1138 return; 1139 } 1140 next_lock = waiter->lock; 1141 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1142 1143 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1144 get_task_struct(task); 1145 1146 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, 1147 next_lock, NULL, task); 1148 } 1149 1150 void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 1151 { 1152 debug_rt_mutex_init_waiter(waiter); 1153 RB_CLEAR_NODE(&waiter->pi_tree_entry); 1154 RB_CLEAR_NODE(&waiter->tree_entry); 1155 waiter->task = NULL; 1156 } 1157 1158 /** 1159 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop 1160 * @lock: the rt_mutex to take 1161 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1162 * or TASK_UNINTERRUPTIBLE) 1163 * @timeout: the pre-initialized and started timer, or NULL for none 1164 * @waiter: the pre-initialized rt_mutex_waiter 1165 * 1166 * Must be called with lock->wait_lock held and interrupts disabled 1167 */ 1168 static int __sched 1169 __rt_mutex_slowlock(struct rt_mutex *lock, int state, 1170 struct hrtimer_sleeper *timeout, 1171 struct rt_mutex_waiter *waiter) 1172 { 1173 int ret = 0; 1174 1175 for (;;) { 1176 /* Try to acquire the lock: */ 1177 if (try_to_take_rt_mutex(lock, current, waiter)) 1178 break; 1179 1180 /* 1181 * TASK_INTERRUPTIBLE checks for signals and 1182 * timeout. Ignored otherwise. 1183 */ 1184 if (likely(state == TASK_INTERRUPTIBLE)) { 1185 /* Signal pending? */ 1186 if (signal_pending(current)) 1187 ret = -EINTR; 1188 if (timeout && !timeout->task) 1189 ret = -ETIMEDOUT; 1190 if (ret) 1191 break; 1192 } 1193 1194 raw_spin_unlock_irq(&lock->wait_lock); 1195 1196 debug_rt_mutex_print_deadlock(waiter); 1197 1198 schedule(); 1199 1200 raw_spin_lock_irq(&lock->wait_lock); 1201 set_current_state(state); 1202 } 1203 1204 __set_current_state(TASK_RUNNING); 1205 return ret; 1206 } 1207 1208 static void rt_mutex_handle_deadlock(int res, int detect_deadlock, 1209 struct rt_mutex_waiter *w) 1210 { 1211 /* 1212 * If the result is not -EDEADLOCK or the caller requested 1213 * deadlock detection, nothing to do here. 1214 */ 1215 if (res != -EDEADLOCK || detect_deadlock) 1216 return; 1217 1218 /* 1219 * Yell lowdly and stop the task right here. 1220 */ 1221 rt_mutex_print_deadlock(w); 1222 while (1) { 1223 set_current_state(TASK_INTERRUPTIBLE); 1224 schedule(); 1225 } 1226 } 1227 1228 /* 1229 * Slow path lock function: 1230 */ 1231 static int __sched 1232 rt_mutex_slowlock(struct rt_mutex *lock, int state, 1233 struct hrtimer_sleeper *timeout, 1234 enum rtmutex_chainwalk chwalk) 1235 { 1236 struct rt_mutex_waiter waiter; 1237 unsigned long flags; 1238 int ret = 0; 1239 1240 rt_mutex_init_waiter(&waiter); 1241 1242 /* 1243 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1244 * be called in early boot if the cmpxchg() fast path is disabled 1245 * (debug, no architecture support). In this case we will acquire the 1246 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1247 * enable interrupts in that early boot case. So we need to use the 1248 * irqsave/restore variants. 1249 */ 1250 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1251 1252 /* Try to acquire the lock again: */ 1253 if (try_to_take_rt_mutex(lock, current, NULL)) { 1254 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1255 return 0; 1256 } 1257 1258 set_current_state(state); 1259 1260 /* Setup the timer, when timeout != NULL */ 1261 if (unlikely(timeout)) 1262 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); 1263 1264 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); 1265 1266 if (likely(!ret)) 1267 /* sleep on the mutex */ 1268 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); 1269 1270 if (unlikely(ret)) { 1271 __set_current_state(TASK_RUNNING); 1272 remove_waiter(lock, &waiter); 1273 rt_mutex_handle_deadlock(ret, chwalk, &waiter); 1274 } 1275 1276 /* 1277 * try_to_take_rt_mutex() sets the waiter bit 1278 * unconditionally. We might have to fix that up. 1279 */ 1280 fixup_rt_mutex_waiters(lock); 1281 1282 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1283 1284 /* Remove pending timer: */ 1285 if (unlikely(timeout)) 1286 hrtimer_cancel(&timeout->timer); 1287 1288 debug_rt_mutex_free_waiter(&waiter); 1289 1290 return ret; 1291 } 1292 1293 static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock) 1294 { 1295 int ret = try_to_take_rt_mutex(lock, current, NULL); 1296 1297 /* 1298 * try_to_take_rt_mutex() sets the lock waiters bit 1299 * unconditionally. Clean this up. 1300 */ 1301 fixup_rt_mutex_waiters(lock); 1302 1303 return ret; 1304 } 1305 1306 /* 1307 * Slow path try-lock function: 1308 */ 1309 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) 1310 { 1311 unsigned long flags; 1312 int ret; 1313 1314 /* 1315 * If the lock already has an owner we fail to get the lock. 1316 * This can be done without taking the @lock->wait_lock as 1317 * it is only being read, and this is a trylock anyway. 1318 */ 1319 if (rt_mutex_owner(lock)) 1320 return 0; 1321 1322 /* 1323 * The mutex has currently no owner. Lock the wait lock and try to 1324 * acquire the lock. We use irqsave here to support early boot calls. 1325 */ 1326 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1327 1328 ret = __rt_mutex_slowtrylock(lock); 1329 1330 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1331 1332 return ret; 1333 } 1334 1335 /* 1336 * Slow path to release a rt-mutex. 1337 * 1338 * Return whether the current task needs to call rt_mutex_postunlock(). 1339 */ 1340 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, 1341 struct wake_q_head *wake_q) 1342 { 1343 unsigned long flags; 1344 1345 /* irqsave required to support early boot calls */ 1346 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1347 1348 debug_rt_mutex_unlock(lock); 1349 1350 /* 1351 * We must be careful here if the fast path is enabled. If we 1352 * have no waiters queued we cannot set owner to NULL here 1353 * because of: 1354 * 1355 * foo->lock->owner = NULL; 1356 * rtmutex_lock(foo->lock); <- fast path 1357 * free = atomic_dec_and_test(foo->refcnt); 1358 * rtmutex_unlock(foo->lock); <- fast path 1359 * if (free) 1360 * kfree(foo); 1361 * raw_spin_unlock(foo->lock->wait_lock); 1362 * 1363 * So for the fastpath enabled kernel: 1364 * 1365 * Nothing can set the waiters bit as long as we hold 1366 * lock->wait_lock. So we do the following sequence: 1367 * 1368 * owner = rt_mutex_owner(lock); 1369 * clear_rt_mutex_waiters(lock); 1370 * raw_spin_unlock(&lock->wait_lock); 1371 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1372 * return; 1373 * goto retry; 1374 * 1375 * The fastpath disabled variant is simple as all access to 1376 * lock->owner is serialized by lock->wait_lock: 1377 * 1378 * lock->owner = NULL; 1379 * raw_spin_unlock(&lock->wait_lock); 1380 */ 1381 while (!rt_mutex_has_waiters(lock)) { 1382 /* Drops lock->wait_lock ! */ 1383 if (unlock_rt_mutex_safe(lock, flags) == true) 1384 return false; 1385 /* Relock the rtmutex and try again */ 1386 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1387 } 1388 1389 /* 1390 * The wakeup next waiter path does not suffer from the above 1391 * race. See the comments there. 1392 * 1393 * Queue the next waiter for wakeup once we release the wait_lock. 1394 */ 1395 mark_wakeup_next_waiter(wake_q, lock); 1396 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1397 1398 return true; /* call rt_mutex_postunlock() */ 1399 } 1400 1401 /* 1402 * debug aware fast / slowpath lock,trylock,unlock 1403 * 1404 * The atomic acquire/release ops are compiled away, when either the 1405 * architecture does not support cmpxchg or when debugging is enabled. 1406 */ 1407 static inline int 1408 rt_mutex_fastlock(struct rt_mutex *lock, int state, 1409 int (*slowfn)(struct rt_mutex *lock, int state, 1410 struct hrtimer_sleeper *timeout, 1411 enum rtmutex_chainwalk chwalk)) 1412 { 1413 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1414 return 0; 1415 1416 return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); 1417 } 1418 1419 static inline int 1420 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, 1421 struct hrtimer_sleeper *timeout, 1422 enum rtmutex_chainwalk chwalk, 1423 int (*slowfn)(struct rt_mutex *lock, int state, 1424 struct hrtimer_sleeper *timeout, 1425 enum rtmutex_chainwalk chwalk)) 1426 { 1427 if (chwalk == RT_MUTEX_MIN_CHAINWALK && 1428 likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1429 return 0; 1430 1431 return slowfn(lock, state, timeout, chwalk); 1432 } 1433 1434 static inline int 1435 rt_mutex_fasttrylock(struct rt_mutex *lock, 1436 int (*slowfn)(struct rt_mutex *lock)) 1437 { 1438 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1439 return 1; 1440 1441 return slowfn(lock); 1442 } 1443 1444 /* 1445 * Performs the wakeup of the the top-waiter and re-enables preemption. 1446 */ 1447 void rt_mutex_postunlock(struct wake_q_head *wake_q) 1448 { 1449 wake_up_q(wake_q); 1450 1451 /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ 1452 preempt_enable(); 1453 } 1454 1455 static inline void 1456 rt_mutex_fastunlock(struct rt_mutex *lock, 1457 bool (*slowfn)(struct rt_mutex *lock, 1458 struct wake_q_head *wqh)) 1459 { 1460 DEFINE_WAKE_Q(wake_q); 1461 1462 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1463 return; 1464 1465 if (slowfn(lock, &wake_q)) 1466 rt_mutex_postunlock(&wake_q); 1467 } 1468 1469 static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) 1470 { 1471 might_sleep(); 1472 1473 mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); 1474 rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); 1475 } 1476 1477 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1478 /** 1479 * rt_mutex_lock_nested - lock a rt_mutex 1480 * 1481 * @lock: the rt_mutex to be locked 1482 * @subclass: the lockdep subclass 1483 */ 1484 void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) 1485 { 1486 __rt_mutex_lock(lock, subclass); 1487 } 1488 EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); 1489 1490 #else /* !CONFIG_DEBUG_LOCK_ALLOC */ 1491 1492 /** 1493 * rt_mutex_lock - lock a rt_mutex 1494 * 1495 * @lock: the rt_mutex to be locked 1496 */ 1497 void __sched rt_mutex_lock(struct rt_mutex *lock) 1498 { 1499 __rt_mutex_lock(lock, 0); 1500 } 1501 EXPORT_SYMBOL_GPL(rt_mutex_lock); 1502 #endif 1503 1504 /** 1505 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible 1506 * 1507 * @lock: the rt_mutex to be locked 1508 * 1509 * Returns: 1510 * 0 on success 1511 * -EINTR when interrupted by a signal 1512 */ 1513 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) 1514 { 1515 int ret; 1516 1517 might_sleep(); 1518 1519 mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); 1520 ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); 1521 if (ret) 1522 mutex_release(&lock->dep_map, 1, _RET_IP_); 1523 1524 return ret; 1525 } 1526 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); 1527 1528 /* 1529 * Futex variant, must not use fastpath. 1530 */ 1531 int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) 1532 { 1533 return rt_mutex_slowtrylock(lock); 1534 } 1535 1536 int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) 1537 { 1538 return __rt_mutex_slowtrylock(lock); 1539 } 1540 1541 /** 1542 * rt_mutex_timed_lock - lock a rt_mutex interruptible 1543 * the timeout structure is provided 1544 * by the caller 1545 * 1546 * @lock: the rt_mutex to be locked 1547 * @timeout: timeout structure or NULL (no timeout) 1548 * 1549 * Returns: 1550 * 0 on success 1551 * -EINTR when interrupted by a signal 1552 * -ETIMEDOUT when the timeout expired 1553 */ 1554 int 1555 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) 1556 { 1557 int ret; 1558 1559 might_sleep(); 1560 1561 mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); 1562 ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 1563 RT_MUTEX_MIN_CHAINWALK, 1564 rt_mutex_slowlock); 1565 if (ret) 1566 mutex_release(&lock->dep_map, 1, _RET_IP_); 1567 1568 return ret; 1569 } 1570 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); 1571 1572 /** 1573 * rt_mutex_trylock - try to lock a rt_mutex 1574 * 1575 * @lock: the rt_mutex to be locked 1576 * 1577 * This function can only be called in thread context. It's safe to 1578 * call it from atomic regions, but not from hard interrupt or soft 1579 * interrupt context. 1580 * 1581 * Returns 1 on success and 0 on contention 1582 */ 1583 int __sched rt_mutex_trylock(struct rt_mutex *lock) 1584 { 1585 int ret; 1586 1587 if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) 1588 return 0; 1589 1590 ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); 1591 if (ret) 1592 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); 1593 1594 return ret; 1595 } 1596 EXPORT_SYMBOL_GPL(rt_mutex_trylock); 1597 1598 /** 1599 * rt_mutex_unlock - unlock a rt_mutex 1600 * 1601 * @lock: the rt_mutex to be unlocked 1602 */ 1603 void __sched rt_mutex_unlock(struct rt_mutex *lock) 1604 { 1605 mutex_release(&lock->dep_map, 1, _RET_IP_); 1606 rt_mutex_fastunlock(lock, rt_mutex_slowunlock); 1607 } 1608 EXPORT_SYMBOL_GPL(rt_mutex_unlock); 1609 1610 /** 1611 * Futex variant, that since futex variants do not use the fast-path, can be 1612 * simple and will not need to retry. 1613 */ 1614 bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, 1615 struct wake_q_head *wake_q) 1616 { 1617 lockdep_assert_held(&lock->wait_lock); 1618 1619 debug_rt_mutex_unlock(lock); 1620 1621 if (!rt_mutex_has_waiters(lock)) { 1622 lock->owner = NULL; 1623 return false; /* done */ 1624 } 1625 1626 /* 1627 * We've already deboosted, mark_wakeup_next_waiter() will 1628 * retain preempt_disabled when we drop the wait_lock, to 1629 * avoid inversion prior to the wakeup. preempt_disable() 1630 * therein pairs with rt_mutex_postunlock(). 1631 */ 1632 mark_wakeup_next_waiter(wake_q, lock); 1633 1634 return true; /* call postunlock() */ 1635 } 1636 1637 void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) 1638 { 1639 DEFINE_WAKE_Q(wake_q); 1640 unsigned long flags; 1641 bool postunlock; 1642 1643 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1644 postunlock = __rt_mutex_futex_unlock(lock, &wake_q); 1645 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1646 1647 if (postunlock) 1648 rt_mutex_postunlock(&wake_q); 1649 } 1650 1651 /** 1652 * rt_mutex_destroy - mark a mutex unusable 1653 * @lock: the mutex to be destroyed 1654 * 1655 * This function marks the mutex uninitialized, and any subsequent 1656 * use of the mutex is forbidden. The mutex must not be locked when 1657 * this function is called. 1658 */ 1659 void rt_mutex_destroy(struct rt_mutex *lock) 1660 { 1661 WARN_ON(rt_mutex_is_locked(lock)); 1662 #ifdef CONFIG_DEBUG_RT_MUTEXES 1663 lock->magic = NULL; 1664 #endif 1665 } 1666 EXPORT_SYMBOL_GPL(rt_mutex_destroy); 1667 1668 /** 1669 * __rt_mutex_init - initialize the rt lock 1670 * 1671 * @lock: the rt lock to be initialized 1672 * 1673 * Initialize the rt lock to unlocked state. 1674 * 1675 * Initializing of a locked rt lock is not allowed 1676 */ 1677 void __rt_mutex_init(struct rt_mutex *lock, const char *name, 1678 struct lock_class_key *key) 1679 { 1680 lock->owner = NULL; 1681 raw_spin_lock_init(&lock->wait_lock); 1682 lock->waiters = RB_ROOT_CACHED; 1683 1684 if (name && key) 1685 debug_rt_mutex_init(lock, name, key); 1686 } 1687 EXPORT_SYMBOL_GPL(__rt_mutex_init); 1688 1689 /** 1690 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a 1691 * proxy owner 1692 * 1693 * @lock: the rt_mutex to be locked 1694 * @proxy_owner:the task to set as owner 1695 * 1696 * No locking. Caller has to do serializing itself 1697 * 1698 * Special API call for PI-futex support. This initializes the rtmutex and 1699 * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not 1700 * possible at this point because the pi_state which contains the rtmutex 1701 * is not yet visible to other tasks. 1702 */ 1703 void rt_mutex_init_proxy_locked(struct rt_mutex *lock, 1704 struct task_struct *proxy_owner) 1705 { 1706 __rt_mutex_init(lock, NULL, NULL); 1707 debug_rt_mutex_proxy_lock(lock, proxy_owner); 1708 rt_mutex_set_owner(lock, proxy_owner); 1709 } 1710 1711 /** 1712 * rt_mutex_proxy_unlock - release a lock on behalf of owner 1713 * 1714 * @lock: the rt_mutex to be locked 1715 * 1716 * No locking. Caller has to do serializing itself 1717 * 1718 * Special API call for PI-futex support. This merrily cleans up the rtmutex 1719 * (debugging) state. Concurrent operations on this rt_mutex are not 1720 * possible because it belongs to the pi_state which is about to be freed 1721 * and it is not longer visible to other tasks. 1722 */ 1723 void rt_mutex_proxy_unlock(struct rt_mutex *lock, 1724 struct task_struct *proxy_owner) 1725 { 1726 debug_rt_mutex_proxy_unlock(lock); 1727 rt_mutex_set_owner(lock, NULL); 1728 } 1729 1730 /** 1731 * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task 1732 * @lock: the rt_mutex to take 1733 * @waiter: the pre-initialized rt_mutex_waiter 1734 * @task: the task to prepare 1735 * 1736 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock 1737 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. 1738 * 1739 * NOTE: does _NOT_ remove the @waiter on failure; must either call 1740 * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. 1741 * 1742 * Returns: 1743 * 0 - task blocked on lock 1744 * 1 - acquired the lock for task, caller should wake it up 1745 * <0 - error 1746 * 1747 * Special API call for PI-futex support. 1748 */ 1749 int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1750 struct rt_mutex_waiter *waiter, 1751 struct task_struct *task) 1752 { 1753 int ret; 1754 1755 lockdep_assert_held(&lock->wait_lock); 1756 1757 if (try_to_take_rt_mutex(lock, task, NULL)) 1758 return 1; 1759 1760 /* We enforce deadlock detection for futexes */ 1761 ret = task_blocks_on_rt_mutex(lock, waiter, task, 1762 RT_MUTEX_FULL_CHAINWALK); 1763 1764 if (ret && !rt_mutex_owner(lock)) { 1765 /* 1766 * Reset the return value. We might have 1767 * returned with -EDEADLK and the owner 1768 * released the lock while we were walking the 1769 * pi chain. Let the waiter sort it out. 1770 */ 1771 ret = 0; 1772 } 1773 1774 debug_rt_mutex_print_deadlock(waiter); 1775 1776 return ret; 1777 } 1778 1779 /** 1780 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task 1781 * @lock: the rt_mutex to take 1782 * @waiter: the pre-initialized rt_mutex_waiter 1783 * @task: the task to prepare 1784 * 1785 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock 1786 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. 1787 * 1788 * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter 1789 * on failure. 1790 * 1791 * Returns: 1792 * 0 - task blocked on lock 1793 * 1 - acquired the lock for task, caller should wake it up 1794 * <0 - error 1795 * 1796 * Special API call for PI-futex support. 1797 */ 1798 int rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1799 struct rt_mutex_waiter *waiter, 1800 struct task_struct *task) 1801 { 1802 int ret; 1803 1804 raw_spin_lock_irq(&lock->wait_lock); 1805 ret = __rt_mutex_start_proxy_lock(lock, waiter, task); 1806 if (unlikely(ret)) 1807 remove_waiter(lock, waiter); 1808 raw_spin_unlock_irq(&lock->wait_lock); 1809 1810 return ret; 1811 } 1812 1813 /** 1814 * rt_mutex_next_owner - return the next owner of the lock 1815 * 1816 * @lock: the rt lock query 1817 * 1818 * Returns the next owner of the lock or NULL 1819 * 1820 * Caller has to serialize against other accessors to the lock 1821 * itself. 1822 * 1823 * Special API call for PI-futex support 1824 */ 1825 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) 1826 { 1827 if (!rt_mutex_has_waiters(lock)) 1828 return NULL; 1829 1830 return rt_mutex_top_waiter(lock)->task; 1831 } 1832 1833 /** 1834 * rt_mutex_wait_proxy_lock() - Wait for lock acquisition 1835 * @lock: the rt_mutex we were woken on 1836 * @to: the timeout, null if none. hrtimer should already have 1837 * been started. 1838 * @waiter: the pre-initialized rt_mutex_waiter 1839 * 1840 * Wait for the the lock acquisition started on our behalf by 1841 * rt_mutex_start_proxy_lock(). Upon failure, the caller must call 1842 * rt_mutex_cleanup_proxy_lock(). 1843 * 1844 * Returns: 1845 * 0 - success 1846 * <0 - error, one of -EINTR, -ETIMEDOUT 1847 * 1848 * Special API call for PI-futex support 1849 */ 1850 int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, 1851 struct hrtimer_sleeper *to, 1852 struct rt_mutex_waiter *waiter) 1853 { 1854 int ret; 1855 1856 raw_spin_lock_irq(&lock->wait_lock); 1857 /* sleep on the mutex */ 1858 set_current_state(TASK_INTERRUPTIBLE); 1859 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); 1860 /* 1861 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might 1862 * have to fix that up. 1863 */ 1864 fixup_rt_mutex_waiters(lock); 1865 raw_spin_unlock_irq(&lock->wait_lock); 1866 1867 return ret; 1868 } 1869 1870 /** 1871 * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition 1872 * @lock: the rt_mutex we were woken on 1873 * @waiter: the pre-initialized rt_mutex_waiter 1874 * 1875 * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or 1876 * rt_mutex_wait_proxy_lock(). 1877 * 1878 * Unless we acquired the lock; we're still enqueued on the wait-list and can 1879 * in fact still be granted ownership until we're removed. Therefore we can 1880 * find we are in fact the owner and must disregard the 1881 * rt_mutex_wait_proxy_lock() failure. 1882 * 1883 * Returns: 1884 * true - did the cleanup, we done. 1885 * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, 1886 * caller should disregards its return value. 1887 * 1888 * Special API call for PI-futex support 1889 */ 1890 bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, 1891 struct rt_mutex_waiter *waiter) 1892 { 1893 bool cleanup = false; 1894 1895 raw_spin_lock_irq(&lock->wait_lock); 1896 /* 1897 * Do an unconditional try-lock, this deals with the lock stealing 1898 * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() 1899 * sets a NULL owner. 1900 * 1901 * We're not interested in the return value, because the subsequent 1902 * test on rt_mutex_owner() will infer that. If the trylock succeeded, 1903 * we will own the lock and it will have removed the waiter. If we 1904 * failed the trylock, we're still not owner and we need to remove 1905 * ourselves. 1906 */ 1907 try_to_take_rt_mutex(lock, current, waiter); 1908 /* 1909 * Unless we're the owner; we're still enqueued on the wait_list. 1910 * So check if we became owner, if not, take us off the wait_list. 1911 */ 1912 if (rt_mutex_owner(lock) != current) { 1913 remove_waiter(lock, waiter); 1914 cleanup = true; 1915 } 1916 /* 1917 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might 1918 * have to fix that up. 1919 */ 1920 fixup_rt_mutex_waiters(lock); 1921 1922 raw_spin_unlock_irq(&lock->wait_lock); 1923 1924 return cleanup; 1925 } 1926