1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 4 * 5 * started by Ingo Molnar and Thomas Gleixner. 6 * 7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 10 * Copyright (C) 2006 Esben Nielsen 11 * 12 * See Documentation/locking/rt-mutex-design.rst for details. 13 */ 14 #include <linux/spinlock.h> 15 #include <linux/export.h> 16 #include <linux/sched/signal.h> 17 #include <linux/sched/rt.h> 18 #include <linux/sched/deadline.h> 19 #include <linux/sched/wake_q.h> 20 #include <linux/sched/debug.h> 21 #include <linux/timer.h> 22 23 #include "rtmutex_common.h" 24 25 /* 26 * lock->owner state tracking: 27 * 28 * lock->owner holds the task_struct pointer of the owner. Bit 0 29 * is used to keep track of the "lock has waiters" state. 30 * 31 * owner bit0 32 * NULL 0 lock is free (fast acquire possible) 33 * NULL 1 lock is free and has waiters and the top waiter 34 * is going to take the lock* 35 * taskpointer 0 lock is held (fast release possible) 36 * taskpointer 1 lock is held and has waiters** 37 * 38 * The fast atomic compare exchange based acquire and release is only 39 * possible when bit 0 of lock->owner is 0. 40 * 41 * (*) It also can be a transitional state when grabbing the lock 42 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 43 * we need to set the bit0 before looking at the lock, and the owner may be 44 * NULL in this small time, hence this can be a transitional state. 45 * 46 * (**) There is a small time when bit 0 is set but there are no 47 * waiters. This can happen when grabbing the lock in the slow path. 48 * To prevent a cmpxchg of the owner releasing the lock, we need to 49 * set this bit before looking at the lock. 50 */ 51 52 static void 53 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) 54 { 55 unsigned long val = (unsigned long)owner; 56 57 if (rt_mutex_has_waiters(lock)) 58 val |= RT_MUTEX_HAS_WAITERS; 59 60 WRITE_ONCE(lock->owner, (struct task_struct *)val); 61 } 62 63 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) 64 { 65 lock->owner = (struct task_struct *) 66 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 67 } 68 69 static void fixup_rt_mutex_waiters(struct rt_mutex *lock) 70 { 71 unsigned long owner, *p = (unsigned long *) &lock->owner; 72 73 if (rt_mutex_has_waiters(lock)) 74 return; 75 76 /* 77 * The rbtree has no waiters enqueued, now make sure that the 78 * lock->owner still has the waiters bit set, otherwise the 79 * following can happen: 80 * 81 * CPU 0 CPU 1 CPU2 82 * l->owner=T1 83 * rt_mutex_lock(l) 84 * lock(l->lock) 85 * l->owner = T1 | HAS_WAITERS; 86 * enqueue(T2) 87 * boost() 88 * unlock(l->lock) 89 * block() 90 * 91 * rt_mutex_lock(l) 92 * lock(l->lock) 93 * l->owner = T1 | HAS_WAITERS; 94 * enqueue(T3) 95 * boost() 96 * unlock(l->lock) 97 * block() 98 * signal(->T2) signal(->T3) 99 * lock(l->lock) 100 * dequeue(T2) 101 * deboost() 102 * unlock(l->lock) 103 * lock(l->lock) 104 * dequeue(T3) 105 * ==> wait list is empty 106 * deboost() 107 * unlock(l->lock) 108 * lock(l->lock) 109 * fixup_rt_mutex_waiters() 110 * if (wait_list_empty(l) { 111 * l->owner = owner 112 * owner = l->owner & ~HAS_WAITERS; 113 * ==> l->owner = T1 114 * } 115 * lock(l->lock) 116 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 117 * if (wait_list_empty(l) { 118 * owner = l->owner & ~HAS_WAITERS; 119 * cmpxchg(l->owner, T1, NULL) 120 * ===> Success (l->owner = NULL) 121 * 122 * l->owner = owner 123 * ==> l->owner = T1 124 * } 125 * 126 * With the check for the waiter bit in place T3 on CPU2 will not 127 * overwrite. All tasks fiddling with the waiters bit are 128 * serialized by l->lock, so nothing else can modify the waiters 129 * bit. If the bit is set then nothing can change l->owner either 130 * so the simple RMW is safe. The cmpxchg() will simply fail if it 131 * happens in the middle of the RMW because the waiters bit is 132 * still set. 133 */ 134 owner = READ_ONCE(*p); 135 if (owner & RT_MUTEX_HAS_WAITERS) 136 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 137 } 138 139 /* 140 * We can speed up the acquire/release, if there's no debugging state to be 141 * set up. 142 */ 143 #ifndef CONFIG_DEBUG_RT_MUTEXES 144 # define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c) 145 # define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c) 146 # define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c) 147 148 /* 149 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 150 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 151 * relaxed semantics suffice. 152 */ 153 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 154 { 155 unsigned long owner, *p = (unsigned long *) &lock->owner; 156 157 do { 158 owner = *p; 159 } while (cmpxchg_relaxed(p, owner, 160 owner | RT_MUTEX_HAS_WAITERS) != owner); 161 } 162 163 /* 164 * Safe fastpath aware unlock: 165 * 1) Clear the waiters bit 166 * 2) Drop lock->wait_lock 167 * 3) Try to unlock the lock with cmpxchg 168 */ 169 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 170 unsigned long flags) 171 __releases(lock->wait_lock) 172 { 173 struct task_struct *owner = rt_mutex_owner(lock); 174 175 clear_rt_mutex_waiters(lock); 176 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 177 /* 178 * If a new waiter comes in between the unlock and the cmpxchg 179 * we have two situations: 180 * 181 * unlock(wait_lock); 182 * lock(wait_lock); 183 * cmpxchg(p, owner, 0) == owner 184 * mark_rt_mutex_waiters(lock); 185 * acquire(lock); 186 * or: 187 * 188 * unlock(wait_lock); 189 * lock(wait_lock); 190 * mark_rt_mutex_waiters(lock); 191 * 192 * cmpxchg(p, owner, 0) != owner 193 * enqueue_waiter(); 194 * unlock(wait_lock); 195 * lock(wait_lock); 196 * wake waiter(); 197 * unlock(wait_lock); 198 * lock(wait_lock); 199 * acquire(lock); 200 */ 201 return rt_mutex_cmpxchg_release(lock, owner, NULL); 202 } 203 204 #else 205 # define rt_mutex_cmpxchg_relaxed(l,c,n) (0) 206 # define rt_mutex_cmpxchg_acquire(l,c,n) (0) 207 # define rt_mutex_cmpxchg_release(l,c,n) (0) 208 209 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 210 { 211 lock->owner = (struct task_struct *) 212 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 213 } 214 215 /* 216 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 217 */ 218 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 219 unsigned long flags) 220 __releases(lock->wait_lock) 221 { 222 lock->owner = NULL; 223 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 224 return true; 225 } 226 #endif 227 228 /* 229 * Only use with rt_mutex_waiter_{less,equal}() 230 */ 231 #define task_to_waiter(p) \ 232 &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } 233 234 static inline int 235 rt_mutex_waiter_less(struct rt_mutex_waiter *left, 236 struct rt_mutex_waiter *right) 237 { 238 if (left->prio < right->prio) 239 return 1; 240 241 /* 242 * If both waiters have dl_prio(), we check the deadlines of the 243 * associated tasks. 244 * If left waiter has a dl_prio(), and we didn't return 1 above, 245 * then right waiter has a dl_prio() too. 246 */ 247 if (dl_prio(left->prio)) 248 return dl_time_before(left->deadline, right->deadline); 249 250 return 0; 251 } 252 253 static inline int 254 rt_mutex_waiter_equal(struct rt_mutex_waiter *left, 255 struct rt_mutex_waiter *right) 256 { 257 if (left->prio != right->prio) 258 return 0; 259 260 /* 261 * If both waiters have dl_prio(), we check the deadlines of the 262 * associated tasks. 263 * If left waiter has a dl_prio(), and we didn't return 0 above, 264 * then right waiter has a dl_prio() too. 265 */ 266 if (dl_prio(left->prio)) 267 return left->deadline == right->deadline; 268 269 return 1; 270 } 271 272 static void 273 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 274 { 275 struct rb_node **link = &lock->waiters.rb_root.rb_node; 276 struct rb_node *parent = NULL; 277 struct rt_mutex_waiter *entry; 278 bool leftmost = true; 279 280 while (*link) { 281 parent = *link; 282 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); 283 if (rt_mutex_waiter_less(waiter, entry)) { 284 link = &parent->rb_left; 285 } else { 286 link = &parent->rb_right; 287 leftmost = false; 288 } 289 } 290 291 rb_link_node(&waiter->tree_entry, parent, link); 292 rb_insert_color_cached(&waiter->tree_entry, &lock->waiters, leftmost); 293 } 294 295 static void 296 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 297 { 298 if (RB_EMPTY_NODE(&waiter->tree_entry)) 299 return; 300 301 rb_erase_cached(&waiter->tree_entry, &lock->waiters); 302 RB_CLEAR_NODE(&waiter->tree_entry); 303 } 304 305 static void 306 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 307 { 308 struct rb_node **link = &task->pi_waiters.rb_root.rb_node; 309 struct rb_node *parent = NULL; 310 struct rt_mutex_waiter *entry; 311 bool leftmost = true; 312 313 while (*link) { 314 parent = *link; 315 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); 316 if (rt_mutex_waiter_less(waiter, entry)) { 317 link = &parent->rb_left; 318 } else { 319 link = &parent->rb_right; 320 leftmost = false; 321 } 322 } 323 324 rb_link_node(&waiter->pi_tree_entry, parent, link); 325 rb_insert_color_cached(&waiter->pi_tree_entry, &task->pi_waiters, leftmost); 326 } 327 328 static void 329 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 330 { 331 if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 332 return; 333 334 rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); 335 RB_CLEAR_NODE(&waiter->pi_tree_entry); 336 } 337 338 static void rt_mutex_adjust_prio(struct task_struct *p) 339 { 340 struct task_struct *pi_task = NULL; 341 342 lockdep_assert_held(&p->pi_lock); 343 344 if (task_has_pi_waiters(p)) 345 pi_task = task_top_pi_waiter(p)->task; 346 347 rt_mutex_setprio(p, pi_task); 348 } 349 350 /* 351 * Deadlock detection is conditional: 352 * 353 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 354 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 355 * 356 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 357 * conducted independent of the detect argument. 358 * 359 * If the waiter argument is NULL this indicates the deboost path and 360 * deadlock detection is disabled independent of the detect argument 361 * and the config settings. 362 */ 363 static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 364 enum rtmutex_chainwalk chwalk) 365 { 366 /* 367 * This is just a wrapper function for the following call, 368 * because debug_rt_mutex_detect_deadlock() smells like a magic 369 * debug feature and I wanted to keep the cond function in the 370 * main source file along with the comments instead of having 371 * two of the same in the headers. 372 */ 373 return debug_rt_mutex_detect_deadlock(waiter, chwalk); 374 } 375 376 /* 377 * Max number of times we'll walk the boosting chain: 378 */ 379 int max_lock_depth = 1024; 380 381 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) 382 { 383 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 384 } 385 386 /* 387 * Adjust the priority chain. Also used for deadlock detection. 388 * Decreases task's usage by one - may thus free the task. 389 * 390 * @task: the task owning the mutex (owner) for which a chain walk is 391 * probably needed 392 * @chwalk: do we have to carry out deadlock detection? 393 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 394 * things for a task that has just got its priority adjusted, and 395 * is waiting on a mutex) 396 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 397 * we dropped its pi_lock. Is never dereferenced, only used for 398 * comparison to detect lock chain changes. 399 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 400 * its priority to the mutex owner (can be NULL in the case 401 * depicted above or if the top waiter is gone away and we are 402 * actually deboosting the owner) 403 * @top_task: the current top waiter 404 * 405 * Returns 0 or -EDEADLK. 406 * 407 * Chain walk basics and protection scope 408 * 409 * [R] refcount on task 410 * [P] task->pi_lock held 411 * [L] rtmutex->wait_lock held 412 * 413 * Step Description Protected by 414 * function arguments: 415 * @task [R] 416 * @orig_lock if != NULL @top_task is blocked on it 417 * @next_lock Unprotected. Cannot be 418 * dereferenced. Only used for 419 * comparison. 420 * @orig_waiter if != NULL @top_task is blocked on it 421 * @top_task current, or in case of proxy 422 * locking protected by calling 423 * code 424 * again: 425 * loop_sanity_check(); 426 * retry: 427 * [1] lock(task->pi_lock); [R] acquire [P] 428 * [2] waiter = task->pi_blocked_on; [P] 429 * [3] check_exit_conditions_1(); [P] 430 * [4] lock = waiter->lock; [P] 431 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 432 * unlock(task->pi_lock); release [P] 433 * goto retry; 434 * } 435 * [6] check_exit_conditions_2(); [P] + [L] 436 * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 437 * [8] unlock(task->pi_lock); release [P] 438 * put_task_struct(task); release [R] 439 * [9] check_exit_conditions_3(); [L] 440 * [10] task = owner(lock); [L] 441 * get_task_struct(task); [L] acquire [R] 442 * lock(task->pi_lock); [L] acquire [P] 443 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 444 * [12] check_exit_conditions_4(); [P] + [L] 445 * [13] unlock(task->pi_lock); release [P] 446 * unlock(lock->wait_lock); release [L] 447 * goto again; 448 */ 449 static int rt_mutex_adjust_prio_chain(struct task_struct *task, 450 enum rtmutex_chainwalk chwalk, 451 struct rt_mutex *orig_lock, 452 struct rt_mutex *next_lock, 453 struct rt_mutex_waiter *orig_waiter, 454 struct task_struct *top_task) 455 { 456 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 457 struct rt_mutex_waiter *prerequeue_top_waiter; 458 int ret = 0, depth = 0; 459 struct rt_mutex *lock; 460 bool detect_deadlock; 461 bool requeue = true; 462 463 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 464 465 /* 466 * The (de)boosting is a step by step approach with a lot of 467 * pitfalls. We want this to be preemptible and we want hold a 468 * maximum of two locks per step. So we have to check 469 * carefully whether things change under us. 470 */ 471 again: 472 /* 473 * We limit the lock chain length for each invocation. 474 */ 475 if (++depth > max_lock_depth) { 476 static int prev_max; 477 478 /* 479 * Print this only once. If the admin changes the limit, 480 * print a new message when reaching the limit again. 481 */ 482 if (prev_max != max_lock_depth) { 483 prev_max = max_lock_depth; 484 printk(KERN_WARNING "Maximum lock depth %d reached " 485 "task: %s (%d)\n", max_lock_depth, 486 top_task->comm, task_pid_nr(top_task)); 487 } 488 put_task_struct(task); 489 490 return -EDEADLK; 491 } 492 493 /* 494 * We are fully preemptible here and only hold the refcount on 495 * @task. So everything can have changed under us since the 496 * caller or our own code below (goto retry/again) dropped all 497 * locks. 498 */ 499 retry: 500 /* 501 * [1] Task cannot go away as we did a get_task() before ! 502 */ 503 raw_spin_lock_irq(&task->pi_lock); 504 505 /* 506 * [2] Get the waiter on which @task is blocked on. 507 */ 508 waiter = task->pi_blocked_on; 509 510 /* 511 * [3] check_exit_conditions_1() protected by task->pi_lock. 512 */ 513 514 /* 515 * Check whether the end of the boosting chain has been 516 * reached or the state of the chain has changed while we 517 * dropped the locks. 518 */ 519 if (!waiter) 520 goto out_unlock_pi; 521 522 /* 523 * Check the orig_waiter state. After we dropped the locks, 524 * the previous owner of the lock might have released the lock. 525 */ 526 if (orig_waiter && !rt_mutex_owner(orig_lock)) 527 goto out_unlock_pi; 528 529 /* 530 * We dropped all locks after taking a refcount on @task, so 531 * the task might have moved on in the lock chain or even left 532 * the chain completely and blocks now on an unrelated lock or 533 * on @orig_lock. 534 * 535 * We stored the lock on which @task was blocked in @next_lock, 536 * so we can detect the chain change. 537 */ 538 if (next_lock != waiter->lock) 539 goto out_unlock_pi; 540 541 /* 542 * Drop out, when the task has no waiters. Note, 543 * top_waiter can be NULL, when we are in the deboosting 544 * mode! 545 */ 546 if (top_waiter) { 547 if (!task_has_pi_waiters(task)) 548 goto out_unlock_pi; 549 /* 550 * If deadlock detection is off, we stop here if we 551 * are not the top pi waiter of the task. If deadlock 552 * detection is enabled we continue, but stop the 553 * requeueing in the chain walk. 554 */ 555 if (top_waiter != task_top_pi_waiter(task)) { 556 if (!detect_deadlock) 557 goto out_unlock_pi; 558 else 559 requeue = false; 560 } 561 } 562 563 /* 564 * If the waiter priority is the same as the task priority 565 * then there is no further priority adjustment necessary. If 566 * deadlock detection is off, we stop the chain walk. If its 567 * enabled we continue, but stop the requeueing in the chain 568 * walk. 569 */ 570 if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { 571 if (!detect_deadlock) 572 goto out_unlock_pi; 573 else 574 requeue = false; 575 } 576 577 /* 578 * [4] Get the next lock 579 */ 580 lock = waiter->lock; 581 /* 582 * [5] We need to trylock here as we are holding task->pi_lock, 583 * which is the reverse lock order versus the other rtmutex 584 * operations. 585 */ 586 if (!raw_spin_trylock(&lock->wait_lock)) { 587 raw_spin_unlock_irq(&task->pi_lock); 588 cpu_relax(); 589 goto retry; 590 } 591 592 /* 593 * [6] check_exit_conditions_2() protected by task->pi_lock and 594 * lock->wait_lock. 595 * 596 * Deadlock detection. If the lock is the same as the original 597 * lock which caused us to walk the lock chain or if the 598 * current lock is owned by the task which initiated the chain 599 * walk, we detected a deadlock. 600 */ 601 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 602 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock); 603 raw_spin_unlock(&lock->wait_lock); 604 ret = -EDEADLK; 605 goto out_unlock_pi; 606 } 607 608 /* 609 * If we just follow the lock chain for deadlock detection, no 610 * need to do all the requeue operations. To avoid a truckload 611 * of conditionals around the various places below, just do the 612 * minimum chain walk checks. 613 */ 614 if (!requeue) { 615 /* 616 * No requeue[7] here. Just release @task [8] 617 */ 618 raw_spin_unlock(&task->pi_lock); 619 put_task_struct(task); 620 621 /* 622 * [9] check_exit_conditions_3 protected by lock->wait_lock. 623 * If there is no owner of the lock, end of chain. 624 */ 625 if (!rt_mutex_owner(lock)) { 626 raw_spin_unlock_irq(&lock->wait_lock); 627 return 0; 628 } 629 630 /* [10] Grab the next task, i.e. owner of @lock */ 631 task = get_task_struct(rt_mutex_owner(lock)); 632 raw_spin_lock(&task->pi_lock); 633 634 /* 635 * No requeue [11] here. We just do deadlock detection. 636 * 637 * [12] Store whether owner is blocked 638 * itself. Decision is made after dropping the locks 639 */ 640 next_lock = task_blocked_on_lock(task); 641 /* 642 * Get the top waiter for the next iteration 643 */ 644 top_waiter = rt_mutex_top_waiter(lock); 645 646 /* [13] Drop locks */ 647 raw_spin_unlock(&task->pi_lock); 648 raw_spin_unlock_irq(&lock->wait_lock); 649 650 /* If owner is not blocked, end of chain. */ 651 if (!next_lock) 652 goto out_put_task; 653 goto again; 654 } 655 656 /* 657 * Store the current top waiter before doing the requeue 658 * operation on @lock. We need it for the boost/deboost 659 * decision below. 660 */ 661 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 662 663 /* [7] Requeue the waiter in the lock waiter tree. */ 664 rt_mutex_dequeue(lock, waiter); 665 666 /* 667 * Update the waiter prio fields now that we're dequeued. 668 * 669 * These values can have changed through either: 670 * 671 * sys_sched_set_scheduler() / sys_sched_setattr() 672 * 673 * or 674 * 675 * DL CBS enforcement advancing the effective deadline. 676 * 677 * Even though pi_waiters also uses these fields, and that tree is only 678 * updated in [11], we can do this here, since we hold [L], which 679 * serializes all pi_waiters access and rb_erase() does not care about 680 * the values of the node being removed. 681 */ 682 waiter->prio = task->prio; 683 waiter->deadline = task->dl.deadline; 684 685 rt_mutex_enqueue(lock, waiter); 686 687 /* [8] Release the task */ 688 raw_spin_unlock(&task->pi_lock); 689 put_task_struct(task); 690 691 /* 692 * [9] check_exit_conditions_3 protected by lock->wait_lock. 693 * 694 * We must abort the chain walk if there is no lock owner even 695 * in the dead lock detection case, as we have nothing to 696 * follow here. This is the end of the chain we are walking. 697 */ 698 if (!rt_mutex_owner(lock)) { 699 /* 700 * If the requeue [7] above changed the top waiter, 701 * then we need to wake the new top waiter up to try 702 * to get the lock. 703 */ 704 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 705 wake_up_process(rt_mutex_top_waiter(lock)->task); 706 raw_spin_unlock_irq(&lock->wait_lock); 707 return 0; 708 } 709 710 /* [10] Grab the next task, i.e. the owner of @lock */ 711 task = get_task_struct(rt_mutex_owner(lock)); 712 raw_spin_lock(&task->pi_lock); 713 714 /* [11] requeue the pi waiters if necessary */ 715 if (waiter == rt_mutex_top_waiter(lock)) { 716 /* 717 * The waiter became the new top (highest priority) 718 * waiter on the lock. Replace the previous top waiter 719 * in the owner tasks pi waiters tree with this waiter 720 * and adjust the priority of the owner. 721 */ 722 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 723 rt_mutex_enqueue_pi(task, waiter); 724 rt_mutex_adjust_prio(task); 725 726 } else if (prerequeue_top_waiter == waiter) { 727 /* 728 * The waiter was the top waiter on the lock, but is 729 * no longer the top prority waiter. Replace waiter in 730 * the owner tasks pi waiters tree with the new top 731 * (highest priority) waiter and adjust the priority 732 * of the owner. 733 * The new top waiter is stored in @waiter so that 734 * @waiter == @top_waiter evaluates to true below and 735 * we continue to deboost the rest of the chain. 736 */ 737 rt_mutex_dequeue_pi(task, waiter); 738 waiter = rt_mutex_top_waiter(lock); 739 rt_mutex_enqueue_pi(task, waiter); 740 rt_mutex_adjust_prio(task); 741 } else { 742 /* 743 * Nothing changed. No need to do any priority 744 * adjustment. 745 */ 746 } 747 748 /* 749 * [12] check_exit_conditions_4() protected by task->pi_lock 750 * and lock->wait_lock. The actual decisions are made after we 751 * dropped the locks. 752 * 753 * Check whether the task which owns the current lock is pi 754 * blocked itself. If yes we store a pointer to the lock for 755 * the lock chain change detection above. After we dropped 756 * task->pi_lock next_lock cannot be dereferenced anymore. 757 */ 758 next_lock = task_blocked_on_lock(task); 759 /* 760 * Store the top waiter of @lock for the end of chain walk 761 * decision below. 762 */ 763 top_waiter = rt_mutex_top_waiter(lock); 764 765 /* [13] Drop the locks */ 766 raw_spin_unlock(&task->pi_lock); 767 raw_spin_unlock_irq(&lock->wait_lock); 768 769 /* 770 * Make the actual exit decisions [12], based on the stored 771 * values. 772 * 773 * We reached the end of the lock chain. Stop right here. No 774 * point to go back just to figure that out. 775 */ 776 if (!next_lock) 777 goto out_put_task; 778 779 /* 780 * If the current waiter is not the top waiter on the lock, 781 * then we can stop the chain walk here if we are not in full 782 * deadlock detection mode. 783 */ 784 if (!detect_deadlock && waiter != top_waiter) 785 goto out_put_task; 786 787 goto again; 788 789 out_unlock_pi: 790 raw_spin_unlock_irq(&task->pi_lock); 791 out_put_task: 792 put_task_struct(task); 793 794 return ret; 795 } 796 797 /* 798 * Try to take an rt-mutex 799 * 800 * Must be called with lock->wait_lock held and interrupts disabled 801 * 802 * @lock: The lock to be acquired. 803 * @task: The task which wants to acquire the lock 804 * @waiter: The waiter that is queued to the lock's wait tree if the 805 * callsite called task_blocked_on_lock(), otherwise NULL 806 */ 807 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, 808 struct rt_mutex_waiter *waiter) 809 { 810 lockdep_assert_held(&lock->wait_lock); 811 812 /* 813 * Before testing whether we can acquire @lock, we set the 814 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 815 * other tasks which try to modify @lock into the slow path 816 * and they serialize on @lock->wait_lock. 817 * 818 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 819 * as explained at the top of this file if and only if: 820 * 821 * - There is a lock owner. The caller must fixup the 822 * transient state if it does a trylock or leaves the lock 823 * function due to a signal or timeout. 824 * 825 * - @task acquires the lock and there are no other 826 * waiters. This is undone in rt_mutex_set_owner(@task) at 827 * the end of this function. 828 */ 829 mark_rt_mutex_waiters(lock); 830 831 /* 832 * If @lock has an owner, give up. 833 */ 834 if (rt_mutex_owner(lock)) 835 return 0; 836 837 /* 838 * If @waiter != NULL, @task has already enqueued the waiter 839 * into @lock waiter tree. If @waiter == NULL then this is a 840 * trylock attempt. 841 */ 842 if (waiter) { 843 /* 844 * If waiter is not the highest priority waiter of 845 * @lock, give up. 846 */ 847 if (waiter != rt_mutex_top_waiter(lock)) 848 return 0; 849 850 /* 851 * We can acquire the lock. Remove the waiter from the 852 * lock waiters tree. 853 */ 854 rt_mutex_dequeue(lock, waiter); 855 856 } else { 857 /* 858 * If the lock has waiters already we check whether @task is 859 * eligible to take over the lock. 860 * 861 * If there are no other waiters, @task can acquire 862 * the lock. @task->pi_blocked_on is NULL, so it does 863 * not need to be dequeued. 864 */ 865 if (rt_mutex_has_waiters(lock)) { 866 /* 867 * If @task->prio is greater than or equal to 868 * the top waiter priority (kernel view), 869 * @task lost. 870 */ 871 if (!rt_mutex_waiter_less(task_to_waiter(task), 872 rt_mutex_top_waiter(lock))) 873 return 0; 874 875 /* 876 * The current top waiter stays enqueued. We 877 * don't have to change anything in the lock 878 * waiters order. 879 */ 880 } else { 881 /* 882 * No waiters. Take the lock without the 883 * pi_lock dance.@task->pi_blocked_on is NULL 884 * and we have no waiters to enqueue in @task 885 * pi waiters tree. 886 */ 887 goto takeit; 888 } 889 } 890 891 /* 892 * Clear @task->pi_blocked_on. Requires protection by 893 * @task->pi_lock. Redundant operation for the @waiter == NULL 894 * case, but conditionals are more expensive than a redundant 895 * store. 896 */ 897 raw_spin_lock(&task->pi_lock); 898 task->pi_blocked_on = NULL; 899 /* 900 * Finish the lock acquisition. @task is the new owner. If 901 * other waiters exist we have to insert the highest priority 902 * waiter into @task->pi_waiters tree. 903 */ 904 if (rt_mutex_has_waiters(lock)) 905 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 906 raw_spin_unlock(&task->pi_lock); 907 908 takeit: 909 /* We got the lock. */ 910 debug_rt_mutex_lock(lock); 911 912 /* 913 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 914 * are still waiters or clears it. 915 */ 916 rt_mutex_set_owner(lock, task); 917 918 return 1; 919 } 920 921 /* 922 * Task blocks on lock. 923 * 924 * Prepare waiter and propagate pi chain 925 * 926 * This must be called with lock->wait_lock held and interrupts disabled 927 */ 928 static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 929 struct rt_mutex_waiter *waiter, 930 struct task_struct *task, 931 enum rtmutex_chainwalk chwalk) 932 { 933 struct task_struct *owner = rt_mutex_owner(lock); 934 struct rt_mutex_waiter *top_waiter = waiter; 935 struct rt_mutex *next_lock; 936 int chain_walk = 0, res; 937 938 lockdep_assert_held(&lock->wait_lock); 939 940 /* 941 * Early deadlock detection. We really don't want the task to 942 * enqueue on itself just to untangle the mess later. It's not 943 * only an optimization. We drop the locks, so another waiter 944 * can come in before the chain walk detects the deadlock. So 945 * the other will detect the deadlock and return -EDEADLOCK, 946 * which is wrong, as the other waiter is not in a deadlock 947 * situation. 948 */ 949 if (owner == task) 950 return -EDEADLK; 951 952 raw_spin_lock(&task->pi_lock); 953 waiter->task = task; 954 waiter->lock = lock; 955 waiter->prio = task->prio; 956 waiter->deadline = task->dl.deadline; 957 958 /* Get the top priority waiter on the lock */ 959 if (rt_mutex_has_waiters(lock)) 960 top_waiter = rt_mutex_top_waiter(lock); 961 rt_mutex_enqueue(lock, waiter); 962 963 task->pi_blocked_on = waiter; 964 965 raw_spin_unlock(&task->pi_lock); 966 967 if (!owner) 968 return 0; 969 970 raw_spin_lock(&owner->pi_lock); 971 if (waiter == rt_mutex_top_waiter(lock)) { 972 rt_mutex_dequeue_pi(owner, top_waiter); 973 rt_mutex_enqueue_pi(owner, waiter); 974 975 rt_mutex_adjust_prio(owner); 976 if (owner->pi_blocked_on) 977 chain_walk = 1; 978 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 979 chain_walk = 1; 980 } 981 982 /* Store the lock on which owner is blocked or NULL */ 983 next_lock = task_blocked_on_lock(owner); 984 985 raw_spin_unlock(&owner->pi_lock); 986 /* 987 * Even if full deadlock detection is on, if the owner is not 988 * blocked itself, we can avoid finding this out in the chain 989 * walk. 990 */ 991 if (!chain_walk || !next_lock) 992 return 0; 993 994 /* 995 * The owner can't disappear while holding a lock, 996 * so the owner struct is protected by wait_lock. 997 * Gets dropped in rt_mutex_adjust_prio_chain()! 998 */ 999 get_task_struct(owner); 1000 1001 raw_spin_unlock_irq(&lock->wait_lock); 1002 1003 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1004 next_lock, waiter, task); 1005 1006 raw_spin_lock_irq(&lock->wait_lock); 1007 1008 return res; 1009 } 1010 1011 /* 1012 * Remove the top waiter from the current tasks pi waiter tree and 1013 * queue it up. 1014 * 1015 * Called with lock->wait_lock held and interrupts disabled. 1016 */ 1017 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, 1018 struct rt_mutex *lock) 1019 { 1020 struct rt_mutex_waiter *waiter; 1021 1022 raw_spin_lock(¤t->pi_lock); 1023 1024 waiter = rt_mutex_top_waiter(lock); 1025 1026 /* 1027 * Remove it from current->pi_waiters and deboost. 1028 * 1029 * We must in fact deboost here in order to ensure we call 1030 * rt_mutex_setprio() to update p->pi_top_task before the 1031 * task unblocks. 1032 */ 1033 rt_mutex_dequeue_pi(current, waiter); 1034 rt_mutex_adjust_prio(current); 1035 1036 /* 1037 * As we are waking up the top waiter, and the waiter stays 1038 * queued on the lock until it gets the lock, this lock 1039 * obviously has waiters. Just set the bit here and this has 1040 * the added benefit of forcing all new tasks into the 1041 * slow path making sure no task of lower priority than 1042 * the top waiter can steal this lock. 1043 */ 1044 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1045 1046 /* 1047 * We deboosted before waking the top waiter task such that we don't 1048 * run two tasks with the 'same' priority (and ensure the 1049 * p->pi_top_task pointer points to a blocked task). This however can 1050 * lead to priority inversion if we would get preempted after the 1051 * deboost but before waking our donor task, hence the preempt_disable() 1052 * before unlock. 1053 * 1054 * Pairs with preempt_enable() in rt_mutex_postunlock(); 1055 */ 1056 preempt_disable(); 1057 wake_q_add(wake_q, waiter->task); 1058 raw_spin_unlock(¤t->pi_lock); 1059 } 1060 1061 /* 1062 * Remove a waiter from a lock and give up 1063 * 1064 * Must be called with lock->wait_lock held and interrupts disabled. I must 1065 * have just failed to try_to_take_rt_mutex(). 1066 */ 1067 static void remove_waiter(struct rt_mutex *lock, 1068 struct rt_mutex_waiter *waiter) 1069 { 1070 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1071 struct task_struct *owner = rt_mutex_owner(lock); 1072 struct rt_mutex *next_lock; 1073 1074 lockdep_assert_held(&lock->wait_lock); 1075 1076 raw_spin_lock(¤t->pi_lock); 1077 rt_mutex_dequeue(lock, waiter); 1078 current->pi_blocked_on = NULL; 1079 raw_spin_unlock(¤t->pi_lock); 1080 1081 /* 1082 * Only update priority if the waiter was the highest priority 1083 * waiter of the lock and there is an owner to update. 1084 */ 1085 if (!owner || !is_top_waiter) 1086 return; 1087 1088 raw_spin_lock(&owner->pi_lock); 1089 1090 rt_mutex_dequeue_pi(owner, waiter); 1091 1092 if (rt_mutex_has_waiters(lock)) 1093 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1094 1095 rt_mutex_adjust_prio(owner); 1096 1097 /* Store the lock on which owner is blocked or NULL */ 1098 next_lock = task_blocked_on_lock(owner); 1099 1100 raw_spin_unlock(&owner->pi_lock); 1101 1102 /* 1103 * Don't walk the chain, if the owner task is not blocked 1104 * itself. 1105 */ 1106 if (!next_lock) 1107 return; 1108 1109 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1110 get_task_struct(owner); 1111 1112 raw_spin_unlock_irq(&lock->wait_lock); 1113 1114 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1115 next_lock, NULL, current); 1116 1117 raw_spin_lock_irq(&lock->wait_lock); 1118 } 1119 1120 /* 1121 * Recheck the pi chain, in case we got a priority setting 1122 * 1123 * Called from sched_setscheduler 1124 */ 1125 void rt_mutex_adjust_pi(struct task_struct *task) 1126 { 1127 struct rt_mutex_waiter *waiter; 1128 struct rt_mutex *next_lock; 1129 unsigned long flags; 1130 1131 raw_spin_lock_irqsave(&task->pi_lock, flags); 1132 1133 waiter = task->pi_blocked_on; 1134 if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { 1135 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1136 return; 1137 } 1138 next_lock = waiter->lock; 1139 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1140 1141 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1142 get_task_struct(task); 1143 1144 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, 1145 next_lock, NULL, task); 1146 } 1147 1148 void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 1149 { 1150 debug_rt_mutex_init_waiter(waiter); 1151 RB_CLEAR_NODE(&waiter->pi_tree_entry); 1152 RB_CLEAR_NODE(&waiter->tree_entry); 1153 waiter->task = NULL; 1154 } 1155 1156 /** 1157 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop 1158 * @lock: the rt_mutex to take 1159 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1160 * or TASK_UNINTERRUPTIBLE) 1161 * @timeout: the pre-initialized and started timer, or NULL for none 1162 * @waiter: the pre-initialized rt_mutex_waiter 1163 * 1164 * Must be called with lock->wait_lock held and interrupts disabled 1165 */ 1166 static int __sched 1167 __rt_mutex_slowlock(struct rt_mutex *lock, int state, 1168 struct hrtimer_sleeper *timeout, 1169 struct rt_mutex_waiter *waiter) 1170 { 1171 int ret = 0; 1172 1173 for (;;) { 1174 /* Try to acquire the lock: */ 1175 if (try_to_take_rt_mutex(lock, current, waiter)) 1176 break; 1177 1178 /* 1179 * TASK_INTERRUPTIBLE checks for signals and 1180 * timeout. Ignored otherwise. 1181 */ 1182 if (likely(state == TASK_INTERRUPTIBLE)) { 1183 /* Signal pending? */ 1184 if (signal_pending(current)) 1185 ret = -EINTR; 1186 if (timeout && !timeout->task) 1187 ret = -ETIMEDOUT; 1188 if (ret) 1189 break; 1190 } 1191 1192 raw_spin_unlock_irq(&lock->wait_lock); 1193 1194 debug_rt_mutex_print_deadlock(waiter); 1195 1196 schedule(); 1197 1198 raw_spin_lock_irq(&lock->wait_lock); 1199 set_current_state(state); 1200 } 1201 1202 __set_current_state(TASK_RUNNING); 1203 return ret; 1204 } 1205 1206 static void rt_mutex_handle_deadlock(int res, int detect_deadlock, 1207 struct rt_mutex_waiter *w) 1208 { 1209 /* 1210 * If the result is not -EDEADLOCK or the caller requested 1211 * deadlock detection, nothing to do here. 1212 */ 1213 if (res != -EDEADLOCK || detect_deadlock) 1214 return; 1215 1216 /* 1217 * Yell lowdly and stop the task right here. 1218 */ 1219 rt_mutex_print_deadlock(w); 1220 while (1) { 1221 set_current_state(TASK_INTERRUPTIBLE); 1222 schedule(); 1223 } 1224 } 1225 1226 /* 1227 * Slow path lock function: 1228 */ 1229 static int __sched 1230 rt_mutex_slowlock(struct rt_mutex *lock, int state, 1231 struct hrtimer_sleeper *timeout, 1232 enum rtmutex_chainwalk chwalk) 1233 { 1234 struct rt_mutex_waiter waiter; 1235 unsigned long flags; 1236 int ret = 0; 1237 1238 rt_mutex_init_waiter(&waiter); 1239 1240 /* 1241 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1242 * be called in early boot if the cmpxchg() fast path is disabled 1243 * (debug, no architecture support). In this case we will acquire the 1244 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1245 * enable interrupts in that early boot case. So we need to use the 1246 * irqsave/restore variants. 1247 */ 1248 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1249 1250 /* Try to acquire the lock again: */ 1251 if (try_to_take_rt_mutex(lock, current, NULL)) { 1252 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1253 return 0; 1254 } 1255 1256 set_current_state(state); 1257 1258 /* Setup the timer, when timeout != NULL */ 1259 if (unlikely(timeout)) 1260 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); 1261 1262 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); 1263 1264 if (likely(!ret)) 1265 /* sleep on the mutex */ 1266 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); 1267 1268 if (unlikely(ret)) { 1269 __set_current_state(TASK_RUNNING); 1270 remove_waiter(lock, &waiter); 1271 rt_mutex_handle_deadlock(ret, chwalk, &waiter); 1272 } 1273 1274 /* 1275 * try_to_take_rt_mutex() sets the waiter bit 1276 * unconditionally. We might have to fix that up. 1277 */ 1278 fixup_rt_mutex_waiters(lock); 1279 1280 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1281 1282 /* Remove pending timer: */ 1283 if (unlikely(timeout)) 1284 hrtimer_cancel(&timeout->timer); 1285 1286 debug_rt_mutex_free_waiter(&waiter); 1287 1288 return ret; 1289 } 1290 1291 static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock) 1292 { 1293 int ret = try_to_take_rt_mutex(lock, current, NULL); 1294 1295 /* 1296 * try_to_take_rt_mutex() sets the lock waiters bit 1297 * unconditionally. Clean this up. 1298 */ 1299 fixup_rt_mutex_waiters(lock); 1300 1301 return ret; 1302 } 1303 1304 /* 1305 * Slow path try-lock function: 1306 */ 1307 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) 1308 { 1309 unsigned long flags; 1310 int ret; 1311 1312 /* 1313 * If the lock already has an owner we fail to get the lock. 1314 * This can be done without taking the @lock->wait_lock as 1315 * it is only being read, and this is a trylock anyway. 1316 */ 1317 if (rt_mutex_owner(lock)) 1318 return 0; 1319 1320 /* 1321 * The mutex has currently no owner. Lock the wait lock and try to 1322 * acquire the lock. We use irqsave here to support early boot calls. 1323 */ 1324 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1325 1326 ret = __rt_mutex_slowtrylock(lock); 1327 1328 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1329 1330 return ret; 1331 } 1332 1333 /* 1334 * Slow path to release a rt-mutex. 1335 * 1336 * Return whether the current task needs to call rt_mutex_postunlock(). 1337 */ 1338 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, 1339 struct wake_q_head *wake_q) 1340 { 1341 unsigned long flags; 1342 1343 /* irqsave required to support early boot calls */ 1344 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1345 1346 debug_rt_mutex_unlock(lock); 1347 1348 /* 1349 * We must be careful here if the fast path is enabled. If we 1350 * have no waiters queued we cannot set owner to NULL here 1351 * because of: 1352 * 1353 * foo->lock->owner = NULL; 1354 * rtmutex_lock(foo->lock); <- fast path 1355 * free = atomic_dec_and_test(foo->refcnt); 1356 * rtmutex_unlock(foo->lock); <- fast path 1357 * if (free) 1358 * kfree(foo); 1359 * raw_spin_unlock(foo->lock->wait_lock); 1360 * 1361 * So for the fastpath enabled kernel: 1362 * 1363 * Nothing can set the waiters bit as long as we hold 1364 * lock->wait_lock. So we do the following sequence: 1365 * 1366 * owner = rt_mutex_owner(lock); 1367 * clear_rt_mutex_waiters(lock); 1368 * raw_spin_unlock(&lock->wait_lock); 1369 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1370 * return; 1371 * goto retry; 1372 * 1373 * The fastpath disabled variant is simple as all access to 1374 * lock->owner is serialized by lock->wait_lock: 1375 * 1376 * lock->owner = NULL; 1377 * raw_spin_unlock(&lock->wait_lock); 1378 */ 1379 while (!rt_mutex_has_waiters(lock)) { 1380 /* Drops lock->wait_lock ! */ 1381 if (unlock_rt_mutex_safe(lock, flags) == true) 1382 return false; 1383 /* Relock the rtmutex and try again */ 1384 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1385 } 1386 1387 /* 1388 * The wakeup next waiter path does not suffer from the above 1389 * race. See the comments there. 1390 * 1391 * Queue the next waiter for wakeup once we release the wait_lock. 1392 */ 1393 mark_wakeup_next_waiter(wake_q, lock); 1394 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1395 1396 return true; /* call rt_mutex_postunlock() */ 1397 } 1398 1399 /* 1400 * debug aware fast / slowpath lock,trylock,unlock 1401 * 1402 * The atomic acquire/release ops are compiled away, when either the 1403 * architecture does not support cmpxchg or when debugging is enabled. 1404 */ 1405 static inline int 1406 rt_mutex_fastlock(struct rt_mutex *lock, int state, 1407 int (*slowfn)(struct rt_mutex *lock, int state, 1408 struct hrtimer_sleeper *timeout, 1409 enum rtmutex_chainwalk chwalk)) 1410 { 1411 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1412 return 0; 1413 1414 return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); 1415 } 1416 1417 static inline int 1418 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, 1419 struct hrtimer_sleeper *timeout, 1420 enum rtmutex_chainwalk chwalk, 1421 int (*slowfn)(struct rt_mutex *lock, int state, 1422 struct hrtimer_sleeper *timeout, 1423 enum rtmutex_chainwalk chwalk)) 1424 { 1425 if (chwalk == RT_MUTEX_MIN_CHAINWALK && 1426 likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1427 return 0; 1428 1429 return slowfn(lock, state, timeout, chwalk); 1430 } 1431 1432 static inline int 1433 rt_mutex_fasttrylock(struct rt_mutex *lock, 1434 int (*slowfn)(struct rt_mutex *lock)) 1435 { 1436 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1437 return 1; 1438 1439 return slowfn(lock); 1440 } 1441 1442 /* 1443 * Performs the wakeup of the the top-waiter and re-enables preemption. 1444 */ 1445 void rt_mutex_postunlock(struct wake_q_head *wake_q) 1446 { 1447 wake_up_q(wake_q); 1448 1449 /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ 1450 preempt_enable(); 1451 } 1452 1453 static inline void 1454 rt_mutex_fastunlock(struct rt_mutex *lock, 1455 bool (*slowfn)(struct rt_mutex *lock, 1456 struct wake_q_head *wqh)) 1457 { 1458 DEFINE_WAKE_Q(wake_q); 1459 1460 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1461 return; 1462 1463 if (slowfn(lock, &wake_q)) 1464 rt_mutex_postunlock(&wake_q); 1465 } 1466 1467 static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) 1468 { 1469 might_sleep(); 1470 1471 mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); 1472 rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); 1473 } 1474 1475 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1476 /** 1477 * rt_mutex_lock_nested - lock a rt_mutex 1478 * 1479 * @lock: the rt_mutex to be locked 1480 * @subclass: the lockdep subclass 1481 */ 1482 void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) 1483 { 1484 __rt_mutex_lock(lock, subclass); 1485 } 1486 EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); 1487 1488 #else /* !CONFIG_DEBUG_LOCK_ALLOC */ 1489 1490 /** 1491 * rt_mutex_lock - lock a rt_mutex 1492 * 1493 * @lock: the rt_mutex to be locked 1494 */ 1495 void __sched rt_mutex_lock(struct rt_mutex *lock) 1496 { 1497 __rt_mutex_lock(lock, 0); 1498 } 1499 EXPORT_SYMBOL_GPL(rt_mutex_lock); 1500 #endif 1501 1502 /** 1503 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible 1504 * 1505 * @lock: the rt_mutex to be locked 1506 * 1507 * Returns: 1508 * 0 on success 1509 * -EINTR when interrupted by a signal 1510 */ 1511 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) 1512 { 1513 int ret; 1514 1515 might_sleep(); 1516 1517 mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); 1518 ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); 1519 if (ret) 1520 mutex_release(&lock->dep_map, _RET_IP_); 1521 1522 return ret; 1523 } 1524 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); 1525 1526 /* 1527 * Futex variant, must not use fastpath. 1528 */ 1529 int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) 1530 { 1531 return rt_mutex_slowtrylock(lock); 1532 } 1533 1534 int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) 1535 { 1536 return __rt_mutex_slowtrylock(lock); 1537 } 1538 1539 /** 1540 * rt_mutex_timed_lock - lock a rt_mutex interruptible 1541 * the timeout structure is provided 1542 * by the caller 1543 * 1544 * @lock: the rt_mutex to be locked 1545 * @timeout: timeout structure or NULL (no timeout) 1546 * 1547 * Returns: 1548 * 0 on success 1549 * -EINTR when interrupted by a signal 1550 * -ETIMEDOUT when the timeout expired 1551 */ 1552 int 1553 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) 1554 { 1555 int ret; 1556 1557 might_sleep(); 1558 1559 mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); 1560 ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 1561 RT_MUTEX_MIN_CHAINWALK, 1562 rt_mutex_slowlock); 1563 if (ret) 1564 mutex_release(&lock->dep_map, _RET_IP_); 1565 1566 return ret; 1567 } 1568 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); 1569 1570 /** 1571 * rt_mutex_trylock - try to lock a rt_mutex 1572 * 1573 * @lock: the rt_mutex to be locked 1574 * 1575 * This function can only be called in thread context. It's safe to 1576 * call it from atomic regions, but not from hard interrupt or soft 1577 * interrupt context. 1578 * 1579 * Returns 1 on success and 0 on contention 1580 */ 1581 int __sched rt_mutex_trylock(struct rt_mutex *lock) 1582 { 1583 int ret; 1584 1585 if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) 1586 return 0; 1587 1588 ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); 1589 if (ret) 1590 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); 1591 1592 return ret; 1593 } 1594 EXPORT_SYMBOL_GPL(rt_mutex_trylock); 1595 1596 /** 1597 * rt_mutex_unlock - unlock a rt_mutex 1598 * 1599 * @lock: the rt_mutex to be unlocked 1600 */ 1601 void __sched rt_mutex_unlock(struct rt_mutex *lock) 1602 { 1603 mutex_release(&lock->dep_map, _RET_IP_); 1604 rt_mutex_fastunlock(lock, rt_mutex_slowunlock); 1605 } 1606 EXPORT_SYMBOL_GPL(rt_mutex_unlock); 1607 1608 /** 1609 * Futex variant, that since futex variants do not use the fast-path, can be 1610 * simple and will not need to retry. 1611 */ 1612 bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, 1613 struct wake_q_head *wake_q) 1614 { 1615 lockdep_assert_held(&lock->wait_lock); 1616 1617 debug_rt_mutex_unlock(lock); 1618 1619 if (!rt_mutex_has_waiters(lock)) { 1620 lock->owner = NULL; 1621 return false; /* done */ 1622 } 1623 1624 /* 1625 * We've already deboosted, mark_wakeup_next_waiter() will 1626 * retain preempt_disabled when we drop the wait_lock, to 1627 * avoid inversion prior to the wakeup. preempt_disable() 1628 * therein pairs with rt_mutex_postunlock(). 1629 */ 1630 mark_wakeup_next_waiter(wake_q, lock); 1631 1632 return true; /* call postunlock() */ 1633 } 1634 1635 void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) 1636 { 1637 DEFINE_WAKE_Q(wake_q); 1638 unsigned long flags; 1639 bool postunlock; 1640 1641 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1642 postunlock = __rt_mutex_futex_unlock(lock, &wake_q); 1643 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1644 1645 if (postunlock) 1646 rt_mutex_postunlock(&wake_q); 1647 } 1648 1649 /** 1650 * rt_mutex_destroy - mark a mutex unusable 1651 * @lock: the mutex to be destroyed 1652 * 1653 * This function marks the mutex uninitialized, and any subsequent 1654 * use of the mutex is forbidden. The mutex must not be locked when 1655 * this function is called. 1656 */ 1657 void rt_mutex_destroy(struct rt_mutex *lock) 1658 { 1659 WARN_ON(rt_mutex_is_locked(lock)); 1660 #ifdef CONFIG_DEBUG_RT_MUTEXES 1661 lock->magic = NULL; 1662 #endif 1663 } 1664 EXPORT_SYMBOL_GPL(rt_mutex_destroy); 1665 1666 /** 1667 * __rt_mutex_init - initialize the rt lock 1668 * 1669 * @lock: the rt lock to be initialized 1670 * 1671 * Initialize the rt lock to unlocked state. 1672 * 1673 * Initializing of a locked rt lock is not allowed 1674 */ 1675 void __rt_mutex_init(struct rt_mutex *lock, const char *name, 1676 struct lock_class_key *key) 1677 { 1678 lock->owner = NULL; 1679 raw_spin_lock_init(&lock->wait_lock); 1680 lock->waiters = RB_ROOT_CACHED; 1681 1682 if (name && key) 1683 debug_rt_mutex_init(lock, name, key); 1684 } 1685 EXPORT_SYMBOL_GPL(__rt_mutex_init); 1686 1687 /** 1688 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a 1689 * proxy owner 1690 * 1691 * @lock: the rt_mutex to be locked 1692 * @proxy_owner:the task to set as owner 1693 * 1694 * No locking. Caller has to do serializing itself 1695 * 1696 * Special API call for PI-futex support. This initializes the rtmutex and 1697 * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not 1698 * possible at this point because the pi_state which contains the rtmutex 1699 * is not yet visible to other tasks. 1700 */ 1701 void rt_mutex_init_proxy_locked(struct rt_mutex *lock, 1702 struct task_struct *proxy_owner) 1703 { 1704 __rt_mutex_init(lock, NULL, NULL); 1705 debug_rt_mutex_proxy_lock(lock, proxy_owner); 1706 rt_mutex_set_owner(lock, proxy_owner); 1707 } 1708 1709 /** 1710 * rt_mutex_proxy_unlock - release a lock on behalf of owner 1711 * 1712 * @lock: the rt_mutex to be locked 1713 * 1714 * No locking. Caller has to do serializing itself 1715 * 1716 * Special API call for PI-futex support. This merrily cleans up the rtmutex 1717 * (debugging) state. Concurrent operations on this rt_mutex are not 1718 * possible because it belongs to the pi_state which is about to be freed 1719 * and it is not longer visible to other tasks. 1720 */ 1721 void rt_mutex_proxy_unlock(struct rt_mutex *lock, 1722 struct task_struct *proxy_owner) 1723 { 1724 debug_rt_mutex_proxy_unlock(lock); 1725 rt_mutex_set_owner(lock, NULL); 1726 } 1727 1728 /** 1729 * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task 1730 * @lock: the rt_mutex to take 1731 * @waiter: the pre-initialized rt_mutex_waiter 1732 * @task: the task to prepare 1733 * 1734 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock 1735 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. 1736 * 1737 * NOTE: does _NOT_ remove the @waiter on failure; must either call 1738 * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. 1739 * 1740 * Returns: 1741 * 0 - task blocked on lock 1742 * 1 - acquired the lock for task, caller should wake it up 1743 * <0 - error 1744 * 1745 * Special API call for PI-futex support. 1746 */ 1747 int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1748 struct rt_mutex_waiter *waiter, 1749 struct task_struct *task) 1750 { 1751 int ret; 1752 1753 lockdep_assert_held(&lock->wait_lock); 1754 1755 if (try_to_take_rt_mutex(lock, task, NULL)) 1756 return 1; 1757 1758 /* We enforce deadlock detection for futexes */ 1759 ret = task_blocks_on_rt_mutex(lock, waiter, task, 1760 RT_MUTEX_FULL_CHAINWALK); 1761 1762 if (ret && !rt_mutex_owner(lock)) { 1763 /* 1764 * Reset the return value. We might have 1765 * returned with -EDEADLK and the owner 1766 * released the lock while we were walking the 1767 * pi chain. Let the waiter sort it out. 1768 */ 1769 ret = 0; 1770 } 1771 1772 debug_rt_mutex_print_deadlock(waiter); 1773 1774 return ret; 1775 } 1776 1777 /** 1778 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task 1779 * @lock: the rt_mutex to take 1780 * @waiter: the pre-initialized rt_mutex_waiter 1781 * @task: the task to prepare 1782 * 1783 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock 1784 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. 1785 * 1786 * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter 1787 * on failure. 1788 * 1789 * Returns: 1790 * 0 - task blocked on lock 1791 * 1 - acquired the lock for task, caller should wake it up 1792 * <0 - error 1793 * 1794 * Special API call for PI-futex support. 1795 */ 1796 int rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1797 struct rt_mutex_waiter *waiter, 1798 struct task_struct *task) 1799 { 1800 int ret; 1801 1802 raw_spin_lock_irq(&lock->wait_lock); 1803 ret = __rt_mutex_start_proxy_lock(lock, waiter, task); 1804 if (unlikely(ret)) 1805 remove_waiter(lock, waiter); 1806 raw_spin_unlock_irq(&lock->wait_lock); 1807 1808 return ret; 1809 } 1810 1811 /** 1812 * rt_mutex_next_owner - return the next owner of the lock 1813 * 1814 * @lock: the rt lock query 1815 * 1816 * Returns the next owner of the lock or NULL 1817 * 1818 * Caller has to serialize against other accessors to the lock 1819 * itself. 1820 * 1821 * Special API call for PI-futex support 1822 */ 1823 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) 1824 { 1825 if (!rt_mutex_has_waiters(lock)) 1826 return NULL; 1827 1828 return rt_mutex_top_waiter(lock)->task; 1829 } 1830 1831 /** 1832 * rt_mutex_wait_proxy_lock() - Wait for lock acquisition 1833 * @lock: the rt_mutex we were woken on 1834 * @to: the timeout, null if none. hrtimer should already have 1835 * been started. 1836 * @waiter: the pre-initialized rt_mutex_waiter 1837 * 1838 * Wait for the the lock acquisition started on our behalf by 1839 * rt_mutex_start_proxy_lock(). Upon failure, the caller must call 1840 * rt_mutex_cleanup_proxy_lock(). 1841 * 1842 * Returns: 1843 * 0 - success 1844 * <0 - error, one of -EINTR, -ETIMEDOUT 1845 * 1846 * Special API call for PI-futex support 1847 */ 1848 int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, 1849 struct hrtimer_sleeper *to, 1850 struct rt_mutex_waiter *waiter) 1851 { 1852 int ret; 1853 1854 raw_spin_lock_irq(&lock->wait_lock); 1855 /* sleep on the mutex */ 1856 set_current_state(TASK_INTERRUPTIBLE); 1857 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); 1858 /* 1859 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might 1860 * have to fix that up. 1861 */ 1862 fixup_rt_mutex_waiters(lock); 1863 raw_spin_unlock_irq(&lock->wait_lock); 1864 1865 return ret; 1866 } 1867 1868 /** 1869 * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition 1870 * @lock: the rt_mutex we were woken on 1871 * @waiter: the pre-initialized rt_mutex_waiter 1872 * 1873 * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or 1874 * rt_mutex_wait_proxy_lock(). 1875 * 1876 * Unless we acquired the lock; we're still enqueued on the wait-list and can 1877 * in fact still be granted ownership until we're removed. Therefore we can 1878 * find we are in fact the owner and must disregard the 1879 * rt_mutex_wait_proxy_lock() failure. 1880 * 1881 * Returns: 1882 * true - did the cleanup, we done. 1883 * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, 1884 * caller should disregards its return value. 1885 * 1886 * Special API call for PI-futex support 1887 */ 1888 bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, 1889 struct rt_mutex_waiter *waiter) 1890 { 1891 bool cleanup = false; 1892 1893 raw_spin_lock_irq(&lock->wait_lock); 1894 /* 1895 * Do an unconditional try-lock, this deals with the lock stealing 1896 * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() 1897 * sets a NULL owner. 1898 * 1899 * We're not interested in the return value, because the subsequent 1900 * test on rt_mutex_owner() will infer that. If the trylock succeeded, 1901 * we will own the lock and it will have removed the waiter. If we 1902 * failed the trylock, we're still not owner and we need to remove 1903 * ourselves. 1904 */ 1905 try_to_take_rt_mutex(lock, current, waiter); 1906 /* 1907 * Unless we're the owner; we're still enqueued on the wait_list. 1908 * So check if we became owner, if not, take us off the wait_list. 1909 */ 1910 if (rt_mutex_owner(lock) != current) { 1911 remove_waiter(lock, waiter); 1912 cleanup = true; 1913 } 1914 /* 1915 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might 1916 * have to fix that up. 1917 */ 1918 fixup_rt_mutex_waiters(lock); 1919 1920 raw_spin_unlock_irq(&lock->wait_lock); 1921 1922 return cleanup; 1923 } 1924