1 /* 2 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 3 * 4 * started by Ingo Molnar and Thomas Gleixner. 5 * 6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 9 * Copyright (C) 2006 Esben Nielsen 10 * 11 * See Documentation/locking/rt-mutex-design.txt for details. 12 */ 13 #include <linux/spinlock.h> 14 #include <linux/export.h> 15 #include <linux/sched.h> 16 #include <linux/sched/rt.h> 17 #include <linux/sched/deadline.h> 18 #include <linux/timer.h> 19 20 #include "rtmutex_common.h" 21 22 /* 23 * lock->owner state tracking: 24 * 25 * lock->owner holds the task_struct pointer of the owner. Bit 0 26 * is used to keep track of the "lock has waiters" state. 27 * 28 * owner bit0 29 * NULL 0 lock is free (fast acquire possible) 30 * NULL 1 lock is free and has waiters and the top waiter 31 * is going to take the lock* 32 * taskpointer 0 lock is held (fast release possible) 33 * taskpointer 1 lock is held and has waiters** 34 * 35 * The fast atomic compare exchange based acquire and release is only 36 * possible when bit 0 of lock->owner is 0. 37 * 38 * (*) It also can be a transitional state when grabbing the lock 39 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 40 * we need to set the bit0 before looking at the lock, and the owner may be 41 * NULL in this small time, hence this can be a transitional state. 42 * 43 * (**) There is a small time when bit 0 is set but there are no 44 * waiters. This can happen when grabbing the lock in the slow path. 45 * To prevent a cmpxchg of the owner releasing the lock, we need to 46 * set this bit before looking at the lock. 47 */ 48 49 static void 50 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) 51 { 52 unsigned long val = (unsigned long)owner; 53 54 if (rt_mutex_has_waiters(lock)) 55 val |= RT_MUTEX_HAS_WAITERS; 56 57 lock->owner = (struct task_struct *)val; 58 } 59 60 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) 61 { 62 lock->owner = (struct task_struct *) 63 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 64 } 65 66 static void fixup_rt_mutex_waiters(struct rt_mutex *lock) 67 { 68 unsigned long owner, *p = (unsigned long *) &lock->owner; 69 70 if (rt_mutex_has_waiters(lock)) 71 return; 72 73 /* 74 * The rbtree has no waiters enqueued, now make sure that the 75 * lock->owner still has the waiters bit set, otherwise the 76 * following can happen: 77 * 78 * CPU 0 CPU 1 CPU2 79 * l->owner=T1 80 * rt_mutex_lock(l) 81 * lock(l->lock) 82 * l->owner = T1 | HAS_WAITERS; 83 * enqueue(T2) 84 * boost() 85 * unlock(l->lock) 86 * block() 87 * 88 * rt_mutex_lock(l) 89 * lock(l->lock) 90 * l->owner = T1 | HAS_WAITERS; 91 * enqueue(T3) 92 * boost() 93 * unlock(l->lock) 94 * block() 95 * signal(->T2) signal(->T3) 96 * lock(l->lock) 97 * dequeue(T2) 98 * deboost() 99 * unlock(l->lock) 100 * lock(l->lock) 101 * dequeue(T3) 102 * ==> wait list is empty 103 * deboost() 104 * unlock(l->lock) 105 * lock(l->lock) 106 * fixup_rt_mutex_waiters() 107 * if (wait_list_empty(l) { 108 * l->owner = owner 109 * owner = l->owner & ~HAS_WAITERS; 110 * ==> l->owner = T1 111 * } 112 * lock(l->lock) 113 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 114 * if (wait_list_empty(l) { 115 * owner = l->owner & ~HAS_WAITERS; 116 * cmpxchg(l->owner, T1, NULL) 117 * ===> Success (l->owner = NULL) 118 * 119 * l->owner = owner 120 * ==> l->owner = T1 121 * } 122 * 123 * With the check for the waiter bit in place T3 on CPU2 will not 124 * overwrite. All tasks fiddling with the waiters bit are 125 * serialized by l->lock, so nothing else can modify the waiters 126 * bit. If the bit is set then nothing can change l->owner either 127 * so the simple RMW is safe. The cmpxchg() will simply fail if it 128 * happens in the middle of the RMW because the waiters bit is 129 * still set. 130 */ 131 owner = READ_ONCE(*p); 132 if (owner & RT_MUTEX_HAS_WAITERS) 133 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 134 } 135 136 /* 137 * We can speed up the acquire/release, if there's no debugging state to be 138 * set up. 139 */ 140 #ifndef CONFIG_DEBUG_RT_MUTEXES 141 # define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c) 142 # define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c) 143 # define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c) 144 145 /* 146 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 147 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 148 * relaxed semantics suffice. 149 */ 150 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 151 { 152 unsigned long owner, *p = (unsigned long *) &lock->owner; 153 154 do { 155 owner = *p; 156 } while (cmpxchg_relaxed(p, owner, 157 owner | RT_MUTEX_HAS_WAITERS) != owner); 158 } 159 160 /* 161 * Safe fastpath aware unlock: 162 * 1) Clear the waiters bit 163 * 2) Drop lock->wait_lock 164 * 3) Try to unlock the lock with cmpxchg 165 */ 166 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 167 unsigned long flags) 168 __releases(lock->wait_lock) 169 { 170 struct task_struct *owner = rt_mutex_owner(lock); 171 172 clear_rt_mutex_waiters(lock); 173 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 174 /* 175 * If a new waiter comes in between the unlock and the cmpxchg 176 * we have two situations: 177 * 178 * unlock(wait_lock); 179 * lock(wait_lock); 180 * cmpxchg(p, owner, 0) == owner 181 * mark_rt_mutex_waiters(lock); 182 * acquire(lock); 183 * or: 184 * 185 * unlock(wait_lock); 186 * lock(wait_lock); 187 * mark_rt_mutex_waiters(lock); 188 * 189 * cmpxchg(p, owner, 0) != owner 190 * enqueue_waiter(); 191 * unlock(wait_lock); 192 * lock(wait_lock); 193 * wake waiter(); 194 * unlock(wait_lock); 195 * lock(wait_lock); 196 * acquire(lock); 197 */ 198 return rt_mutex_cmpxchg_release(lock, owner, NULL); 199 } 200 201 #else 202 # define rt_mutex_cmpxchg_relaxed(l,c,n) (0) 203 # define rt_mutex_cmpxchg_acquire(l,c,n) (0) 204 # define rt_mutex_cmpxchg_release(l,c,n) (0) 205 206 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 207 { 208 lock->owner = (struct task_struct *) 209 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 210 } 211 212 /* 213 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 214 */ 215 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 216 unsigned long flags) 217 __releases(lock->wait_lock) 218 { 219 lock->owner = NULL; 220 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 221 return true; 222 } 223 #endif 224 225 static inline int 226 rt_mutex_waiter_less(struct rt_mutex_waiter *left, 227 struct rt_mutex_waiter *right) 228 { 229 if (left->prio < right->prio) 230 return 1; 231 232 /* 233 * If both waiters have dl_prio(), we check the deadlines of the 234 * associated tasks. 235 * If left waiter has a dl_prio(), and we didn't return 1 above, 236 * then right waiter has a dl_prio() too. 237 */ 238 if (dl_prio(left->prio)) 239 return dl_time_before(left->task->dl.deadline, 240 right->task->dl.deadline); 241 242 return 0; 243 } 244 245 static void 246 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 247 { 248 struct rb_node **link = &lock->waiters.rb_node; 249 struct rb_node *parent = NULL; 250 struct rt_mutex_waiter *entry; 251 int leftmost = 1; 252 253 while (*link) { 254 parent = *link; 255 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); 256 if (rt_mutex_waiter_less(waiter, entry)) { 257 link = &parent->rb_left; 258 } else { 259 link = &parent->rb_right; 260 leftmost = 0; 261 } 262 } 263 264 if (leftmost) 265 lock->waiters_leftmost = &waiter->tree_entry; 266 267 rb_link_node(&waiter->tree_entry, parent, link); 268 rb_insert_color(&waiter->tree_entry, &lock->waiters); 269 } 270 271 static void 272 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 273 { 274 if (RB_EMPTY_NODE(&waiter->tree_entry)) 275 return; 276 277 if (lock->waiters_leftmost == &waiter->tree_entry) 278 lock->waiters_leftmost = rb_next(&waiter->tree_entry); 279 280 rb_erase(&waiter->tree_entry, &lock->waiters); 281 RB_CLEAR_NODE(&waiter->tree_entry); 282 } 283 284 static void 285 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 286 { 287 struct rb_node **link = &task->pi_waiters.rb_node; 288 struct rb_node *parent = NULL; 289 struct rt_mutex_waiter *entry; 290 int leftmost = 1; 291 292 while (*link) { 293 parent = *link; 294 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); 295 if (rt_mutex_waiter_less(waiter, entry)) { 296 link = &parent->rb_left; 297 } else { 298 link = &parent->rb_right; 299 leftmost = 0; 300 } 301 } 302 303 if (leftmost) 304 task->pi_waiters_leftmost = &waiter->pi_tree_entry; 305 306 rb_link_node(&waiter->pi_tree_entry, parent, link); 307 rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters); 308 } 309 310 static void 311 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 312 { 313 if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 314 return; 315 316 if (task->pi_waiters_leftmost == &waiter->pi_tree_entry) 317 task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry); 318 319 rb_erase(&waiter->pi_tree_entry, &task->pi_waiters); 320 RB_CLEAR_NODE(&waiter->pi_tree_entry); 321 } 322 323 /* 324 * Calculate task priority from the waiter tree priority 325 * 326 * Return task->normal_prio when the waiter tree is empty or when 327 * the waiter is not allowed to do priority boosting 328 */ 329 int rt_mutex_getprio(struct task_struct *task) 330 { 331 if (likely(!task_has_pi_waiters(task))) 332 return task->normal_prio; 333 334 return min(task_top_pi_waiter(task)->prio, 335 task->normal_prio); 336 } 337 338 struct task_struct *rt_mutex_get_top_task(struct task_struct *task) 339 { 340 if (likely(!task_has_pi_waiters(task))) 341 return NULL; 342 343 return task_top_pi_waiter(task)->task; 344 } 345 346 /* 347 * Called by sched_setscheduler() to get the priority which will be 348 * effective after the change. 349 */ 350 int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) 351 { 352 if (!task_has_pi_waiters(task)) 353 return newprio; 354 355 if (task_top_pi_waiter(task)->task->prio <= newprio) 356 return task_top_pi_waiter(task)->task->prio; 357 return newprio; 358 } 359 360 /* 361 * Adjust the priority of a task, after its pi_waiters got modified. 362 * 363 * This can be both boosting and unboosting. task->pi_lock must be held. 364 */ 365 static void __rt_mutex_adjust_prio(struct task_struct *task) 366 { 367 int prio = rt_mutex_getprio(task); 368 369 if (task->prio != prio || dl_prio(prio)) 370 rt_mutex_setprio(task, prio); 371 } 372 373 /* 374 * Adjust task priority (undo boosting). Called from the exit path of 375 * rt_mutex_slowunlock() and rt_mutex_slowlock(). 376 * 377 * (Note: We do this outside of the protection of lock->wait_lock to 378 * allow the lock to be taken while or before we readjust the priority 379 * of task. We do not use the spin_xx_mutex() variants here as we are 380 * outside of the debug path.) 381 */ 382 void rt_mutex_adjust_prio(struct task_struct *task) 383 { 384 unsigned long flags; 385 386 raw_spin_lock_irqsave(&task->pi_lock, flags); 387 __rt_mutex_adjust_prio(task); 388 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 389 } 390 391 /* 392 * Deadlock detection is conditional: 393 * 394 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 395 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 396 * 397 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 398 * conducted independent of the detect argument. 399 * 400 * If the waiter argument is NULL this indicates the deboost path and 401 * deadlock detection is disabled independent of the detect argument 402 * and the config settings. 403 */ 404 static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 405 enum rtmutex_chainwalk chwalk) 406 { 407 /* 408 * This is just a wrapper function for the following call, 409 * because debug_rt_mutex_detect_deadlock() smells like a magic 410 * debug feature and I wanted to keep the cond function in the 411 * main source file along with the comments instead of having 412 * two of the same in the headers. 413 */ 414 return debug_rt_mutex_detect_deadlock(waiter, chwalk); 415 } 416 417 /* 418 * Max number of times we'll walk the boosting chain: 419 */ 420 int max_lock_depth = 1024; 421 422 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) 423 { 424 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 425 } 426 427 /* 428 * Adjust the priority chain. Also used for deadlock detection. 429 * Decreases task's usage by one - may thus free the task. 430 * 431 * @task: the task owning the mutex (owner) for which a chain walk is 432 * probably needed 433 * @chwalk: do we have to carry out deadlock detection? 434 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 435 * things for a task that has just got its priority adjusted, and 436 * is waiting on a mutex) 437 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 438 * we dropped its pi_lock. Is never dereferenced, only used for 439 * comparison to detect lock chain changes. 440 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 441 * its priority to the mutex owner (can be NULL in the case 442 * depicted above or if the top waiter is gone away and we are 443 * actually deboosting the owner) 444 * @top_task: the current top waiter 445 * 446 * Returns 0 or -EDEADLK. 447 * 448 * Chain walk basics and protection scope 449 * 450 * [R] refcount on task 451 * [P] task->pi_lock held 452 * [L] rtmutex->wait_lock held 453 * 454 * Step Description Protected by 455 * function arguments: 456 * @task [R] 457 * @orig_lock if != NULL @top_task is blocked on it 458 * @next_lock Unprotected. Cannot be 459 * dereferenced. Only used for 460 * comparison. 461 * @orig_waiter if != NULL @top_task is blocked on it 462 * @top_task current, or in case of proxy 463 * locking protected by calling 464 * code 465 * again: 466 * loop_sanity_check(); 467 * retry: 468 * [1] lock(task->pi_lock); [R] acquire [P] 469 * [2] waiter = task->pi_blocked_on; [P] 470 * [3] check_exit_conditions_1(); [P] 471 * [4] lock = waiter->lock; [P] 472 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 473 * unlock(task->pi_lock); release [P] 474 * goto retry; 475 * } 476 * [6] check_exit_conditions_2(); [P] + [L] 477 * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 478 * [8] unlock(task->pi_lock); release [P] 479 * put_task_struct(task); release [R] 480 * [9] check_exit_conditions_3(); [L] 481 * [10] task = owner(lock); [L] 482 * get_task_struct(task); [L] acquire [R] 483 * lock(task->pi_lock); [L] acquire [P] 484 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 485 * [12] check_exit_conditions_4(); [P] + [L] 486 * [13] unlock(task->pi_lock); release [P] 487 * unlock(lock->wait_lock); release [L] 488 * goto again; 489 */ 490 static int rt_mutex_adjust_prio_chain(struct task_struct *task, 491 enum rtmutex_chainwalk chwalk, 492 struct rt_mutex *orig_lock, 493 struct rt_mutex *next_lock, 494 struct rt_mutex_waiter *orig_waiter, 495 struct task_struct *top_task) 496 { 497 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 498 struct rt_mutex_waiter *prerequeue_top_waiter; 499 int ret = 0, depth = 0; 500 struct rt_mutex *lock; 501 bool detect_deadlock; 502 bool requeue = true; 503 504 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 505 506 /* 507 * The (de)boosting is a step by step approach with a lot of 508 * pitfalls. We want this to be preemptible and we want hold a 509 * maximum of two locks per step. So we have to check 510 * carefully whether things change under us. 511 */ 512 again: 513 /* 514 * We limit the lock chain length for each invocation. 515 */ 516 if (++depth > max_lock_depth) { 517 static int prev_max; 518 519 /* 520 * Print this only once. If the admin changes the limit, 521 * print a new message when reaching the limit again. 522 */ 523 if (prev_max != max_lock_depth) { 524 prev_max = max_lock_depth; 525 printk(KERN_WARNING "Maximum lock depth %d reached " 526 "task: %s (%d)\n", max_lock_depth, 527 top_task->comm, task_pid_nr(top_task)); 528 } 529 put_task_struct(task); 530 531 return -EDEADLK; 532 } 533 534 /* 535 * We are fully preemptible here and only hold the refcount on 536 * @task. So everything can have changed under us since the 537 * caller or our own code below (goto retry/again) dropped all 538 * locks. 539 */ 540 retry: 541 /* 542 * [1] Task cannot go away as we did a get_task() before ! 543 */ 544 raw_spin_lock_irq(&task->pi_lock); 545 546 /* 547 * [2] Get the waiter on which @task is blocked on. 548 */ 549 waiter = task->pi_blocked_on; 550 551 /* 552 * [3] check_exit_conditions_1() protected by task->pi_lock. 553 */ 554 555 /* 556 * Check whether the end of the boosting chain has been 557 * reached or the state of the chain has changed while we 558 * dropped the locks. 559 */ 560 if (!waiter) 561 goto out_unlock_pi; 562 563 /* 564 * Check the orig_waiter state. After we dropped the locks, 565 * the previous owner of the lock might have released the lock. 566 */ 567 if (orig_waiter && !rt_mutex_owner(orig_lock)) 568 goto out_unlock_pi; 569 570 /* 571 * We dropped all locks after taking a refcount on @task, so 572 * the task might have moved on in the lock chain or even left 573 * the chain completely and blocks now on an unrelated lock or 574 * on @orig_lock. 575 * 576 * We stored the lock on which @task was blocked in @next_lock, 577 * so we can detect the chain change. 578 */ 579 if (next_lock != waiter->lock) 580 goto out_unlock_pi; 581 582 /* 583 * Drop out, when the task has no waiters. Note, 584 * top_waiter can be NULL, when we are in the deboosting 585 * mode! 586 */ 587 if (top_waiter) { 588 if (!task_has_pi_waiters(task)) 589 goto out_unlock_pi; 590 /* 591 * If deadlock detection is off, we stop here if we 592 * are not the top pi waiter of the task. If deadlock 593 * detection is enabled we continue, but stop the 594 * requeueing in the chain walk. 595 */ 596 if (top_waiter != task_top_pi_waiter(task)) { 597 if (!detect_deadlock) 598 goto out_unlock_pi; 599 else 600 requeue = false; 601 } 602 } 603 604 /* 605 * If the waiter priority is the same as the task priority 606 * then there is no further priority adjustment necessary. If 607 * deadlock detection is off, we stop the chain walk. If its 608 * enabled we continue, but stop the requeueing in the chain 609 * walk. 610 */ 611 if (waiter->prio == task->prio) { 612 if (!detect_deadlock) 613 goto out_unlock_pi; 614 else 615 requeue = false; 616 } 617 618 /* 619 * [4] Get the next lock 620 */ 621 lock = waiter->lock; 622 /* 623 * [5] We need to trylock here as we are holding task->pi_lock, 624 * which is the reverse lock order versus the other rtmutex 625 * operations. 626 */ 627 if (!raw_spin_trylock(&lock->wait_lock)) { 628 raw_spin_unlock_irq(&task->pi_lock); 629 cpu_relax(); 630 goto retry; 631 } 632 633 /* 634 * [6] check_exit_conditions_2() protected by task->pi_lock and 635 * lock->wait_lock. 636 * 637 * Deadlock detection. If the lock is the same as the original 638 * lock which caused us to walk the lock chain or if the 639 * current lock is owned by the task which initiated the chain 640 * walk, we detected a deadlock. 641 */ 642 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 643 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock); 644 raw_spin_unlock(&lock->wait_lock); 645 ret = -EDEADLK; 646 goto out_unlock_pi; 647 } 648 649 /* 650 * If we just follow the lock chain for deadlock detection, no 651 * need to do all the requeue operations. To avoid a truckload 652 * of conditionals around the various places below, just do the 653 * minimum chain walk checks. 654 */ 655 if (!requeue) { 656 /* 657 * No requeue[7] here. Just release @task [8] 658 */ 659 raw_spin_unlock(&task->pi_lock); 660 put_task_struct(task); 661 662 /* 663 * [9] check_exit_conditions_3 protected by lock->wait_lock. 664 * If there is no owner of the lock, end of chain. 665 */ 666 if (!rt_mutex_owner(lock)) { 667 raw_spin_unlock_irq(&lock->wait_lock); 668 return 0; 669 } 670 671 /* [10] Grab the next task, i.e. owner of @lock */ 672 task = rt_mutex_owner(lock); 673 get_task_struct(task); 674 raw_spin_lock(&task->pi_lock); 675 676 /* 677 * No requeue [11] here. We just do deadlock detection. 678 * 679 * [12] Store whether owner is blocked 680 * itself. Decision is made after dropping the locks 681 */ 682 next_lock = task_blocked_on_lock(task); 683 /* 684 * Get the top waiter for the next iteration 685 */ 686 top_waiter = rt_mutex_top_waiter(lock); 687 688 /* [13] Drop locks */ 689 raw_spin_unlock(&task->pi_lock); 690 raw_spin_unlock_irq(&lock->wait_lock); 691 692 /* If owner is not blocked, end of chain. */ 693 if (!next_lock) 694 goto out_put_task; 695 goto again; 696 } 697 698 /* 699 * Store the current top waiter before doing the requeue 700 * operation on @lock. We need it for the boost/deboost 701 * decision below. 702 */ 703 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 704 705 /* [7] Requeue the waiter in the lock waiter tree. */ 706 rt_mutex_dequeue(lock, waiter); 707 waiter->prio = task->prio; 708 rt_mutex_enqueue(lock, waiter); 709 710 /* [8] Release the task */ 711 raw_spin_unlock(&task->pi_lock); 712 put_task_struct(task); 713 714 /* 715 * [9] check_exit_conditions_3 protected by lock->wait_lock. 716 * 717 * We must abort the chain walk if there is no lock owner even 718 * in the dead lock detection case, as we have nothing to 719 * follow here. This is the end of the chain we are walking. 720 */ 721 if (!rt_mutex_owner(lock)) { 722 /* 723 * If the requeue [7] above changed the top waiter, 724 * then we need to wake the new top waiter up to try 725 * to get the lock. 726 */ 727 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 728 wake_up_process(rt_mutex_top_waiter(lock)->task); 729 raw_spin_unlock_irq(&lock->wait_lock); 730 return 0; 731 } 732 733 /* [10] Grab the next task, i.e. the owner of @lock */ 734 task = rt_mutex_owner(lock); 735 get_task_struct(task); 736 raw_spin_lock(&task->pi_lock); 737 738 /* [11] requeue the pi waiters if necessary */ 739 if (waiter == rt_mutex_top_waiter(lock)) { 740 /* 741 * The waiter became the new top (highest priority) 742 * waiter on the lock. Replace the previous top waiter 743 * in the owner tasks pi waiters tree with this waiter 744 * and adjust the priority of the owner. 745 */ 746 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 747 rt_mutex_enqueue_pi(task, waiter); 748 __rt_mutex_adjust_prio(task); 749 750 } else if (prerequeue_top_waiter == waiter) { 751 /* 752 * The waiter was the top waiter on the lock, but is 753 * no longer the top prority waiter. Replace waiter in 754 * the owner tasks pi waiters tree with the new top 755 * (highest priority) waiter and adjust the priority 756 * of the owner. 757 * The new top waiter is stored in @waiter so that 758 * @waiter == @top_waiter evaluates to true below and 759 * we continue to deboost the rest of the chain. 760 */ 761 rt_mutex_dequeue_pi(task, waiter); 762 waiter = rt_mutex_top_waiter(lock); 763 rt_mutex_enqueue_pi(task, waiter); 764 __rt_mutex_adjust_prio(task); 765 } else { 766 /* 767 * Nothing changed. No need to do any priority 768 * adjustment. 769 */ 770 } 771 772 /* 773 * [12] check_exit_conditions_4() protected by task->pi_lock 774 * and lock->wait_lock. The actual decisions are made after we 775 * dropped the locks. 776 * 777 * Check whether the task which owns the current lock is pi 778 * blocked itself. If yes we store a pointer to the lock for 779 * the lock chain change detection above. After we dropped 780 * task->pi_lock next_lock cannot be dereferenced anymore. 781 */ 782 next_lock = task_blocked_on_lock(task); 783 /* 784 * Store the top waiter of @lock for the end of chain walk 785 * decision below. 786 */ 787 top_waiter = rt_mutex_top_waiter(lock); 788 789 /* [13] Drop the locks */ 790 raw_spin_unlock(&task->pi_lock); 791 raw_spin_unlock_irq(&lock->wait_lock); 792 793 /* 794 * Make the actual exit decisions [12], based on the stored 795 * values. 796 * 797 * We reached the end of the lock chain. Stop right here. No 798 * point to go back just to figure that out. 799 */ 800 if (!next_lock) 801 goto out_put_task; 802 803 /* 804 * If the current waiter is not the top waiter on the lock, 805 * then we can stop the chain walk here if we are not in full 806 * deadlock detection mode. 807 */ 808 if (!detect_deadlock && waiter != top_waiter) 809 goto out_put_task; 810 811 goto again; 812 813 out_unlock_pi: 814 raw_spin_unlock_irq(&task->pi_lock); 815 out_put_task: 816 put_task_struct(task); 817 818 return ret; 819 } 820 821 /* 822 * Try to take an rt-mutex 823 * 824 * Must be called with lock->wait_lock held and interrupts disabled 825 * 826 * @lock: The lock to be acquired. 827 * @task: The task which wants to acquire the lock 828 * @waiter: The waiter that is queued to the lock's wait tree if the 829 * callsite called task_blocked_on_lock(), otherwise NULL 830 */ 831 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, 832 struct rt_mutex_waiter *waiter) 833 { 834 /* 835 * Before testing whether we can acquire @lock, we set the 836 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 837 * other tasks which try to modify @lock into the slow path 838 * and they serialize on @lock->wait_lock. 839 * 840 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 841 * as explained at the top of this file if and only if: 842 * 843 * - There is a lock owner. The caller must fixup the 844 * transient state if it does a trylock or leaves the lock 845 * function due to a signal or timeout. 846 * 847 * - @task acquires the lock and there are no other 848 * waiters. This is undone in rt_mutex_set_owner(@task) at 849 * the end of this function. 850 */ 851 mark_rt_mutex_waiters(lock); 852 853 /* 854 * If @lock has an owner, give up. 855 */ 856 if (rt_mutex_owner(lock)) 857 return 0; 858 859 /* 860 * If @waiter != NULL, @task has already enqueued the waiter 861 * into @lock waiter tree. If @waiter == NULL then this is a 862 * trylock attempt. 863 */ 864 if (waiter) { 865 /* 866 * If waiter is not the highest priority waiter of 867 * @lock, give up. 868 */ 869 if (waiter != rt_mutex_top_waiter(lock)) 870 return 0; 871 872 /* 873 * We can acquire the lock. Remove the waiter from the 874 * lock waiters tree. 875 */ 876 rt_mutex_dequeue(lock, waiter); 877 878 } else { 879 /* 880 * If the lock has waiters already we check whether @task is 881 * eligible to take over the lock. 882 * 883 * If there are no other waiters, @task can acquire 884 * the lock. @task->pi_blocked_on is NULL, so it does 885 * not need to be dequeued. 886 */ 887 if (rt_mutex_has_waiters(lock)) { 888 /* 889 * If @task->prio is greater than or equal to 890 * the top waiter priority (kernel view), 891 * @task lost. 892 */ 893 if (task->prio >= rt_mutex_top_waiter(lock)->prio) 894 return 0; 895 896 /* 897 * The current top waiter stays enqueued. We 898 * don't have to change anything in the lock 899 * waiters order. 900 */ 901 } else { 902 /* 903 * No waiters. Take the lock without the 904 * pi_lock dance.@task->pi_blocked_on is NULL 905 * and we have no waiters to enqueue in @task 906 * pi waiters tree. 907 */ 908 goto takeit; 909 } 910 } 911 912 /* 913 * Clear @task->pi_blocked_on. Requires protection by 914 * @task->pi_lock. Redundant operation for the @waiter == NULL 915 * case, but conditionals are more expensive than a redundant 916 * store. 917 */ 918 raw_spin_lock(&task->pi_lock); 919 task->pi_blocked_on = NULL; 920 /* 921 * Finish the lock acquisition. @task is the new owner. If 922 * other waiters exist we have to insert the highest priority 923 * waiter into @task->pi_waiters tree. 924 */ 925 if (rt_mutex_has_waiters(lock)) 926 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 927 raw_spin_unlock(&task->pi_lock); 928 929 takeit: 930 /* We got the lock. */ 931 debug_rt_mutex_lock(lock); 932 933 /* 934 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 935 * are still waiters or clears it. 936 */ 937 rt_mutex_set_owner(lock, task); 938 939 rt_mutex_deadlock_account_lock(lock, task); 940 941 return 1; 942 } 943 944 /* 945 * Task blocks on lock. 946 * 947 * Prepare waiter and propagate pi chain 948 * 949 * This must be called with lock->wait_lock held and interrupts disabled 950 */ 951 static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 952 struct rt_mutex_waiter *waiter, 953 struct task_struct *task, 954 enum rtmutex_chainwalk chwalk) 955 { 956 struct task_struct *owner = rt_mutex_owner(lock); 957 struct rt_mutex_waiter *top_waiter = waiter; 958 struct rt_mutex *next_lock; 959 int chain_walk = 0, res; 960 961 /* 962 * Early deadlock detection. We really don't want the task to 963 * enqueue on itself just to untangle the mess later. It's not 964 * only an optimization. We drop the locks, so another waiter 965 * can come in before the chain walk detects the deadlock. So 966 * the other will detect the deadlock and return -EDEADLOCK, 967 * which is wrong, as the other waiter is not in a deadlock 968 * situation. 969 */ 970 if (owner == task) 971 return -EDEADLK; 972 973 raw_spin_lock(&task->pi_lock); 974 __rt_mutex_adjust_prio(task); 975 waiter->task = task; 976 waiter->lock = lock; 977 waiter->prio = task->prio; 978 979 /* Get the top priority waiter on the lock */ 980 if (rt_mutex_has_waiters(lock)) 981 top_waiter = rt_mutex_top_waiter(lock); 982 rt_mutex_enqueue(lock, waiter); 983 984 task->pi_blocked_on = waiter; 985 986 raw_spin_unlock(&task->pi_lock); 987 988 if (!owner) 989 return 0; 990 991 raw_spin_lock(&owner->pi_lock); 992 if (waiter == rt_mutex_top_waiter(lock)) { 993 rt_mutex_dequeue_pi(owner, top_waiter); 994 rt_mutex_enqueue_pi(owner, waiter); 995 996 __rt_mutex_adjust_prio(owner); 997 if (owner->pi_blocked_on) 998 chain_walk = 1; 999 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1000 chain_walk = 1; 1001 } 1002 1003 /* Store the lock on which owner is blocked or NULL */ 1004 next_lock = task_blocked_on_lock(owner); 1005 1006 raw_spin_unlock(&owner->pi_lock); 1007 /* 1008 * Even if full deadlock detection is on, if the owner is not 1009 * blocked itself, we can avoid finding this out in the chain 1010 * walk. 1011 */ 1012 if (!chain_walk || !next_lock) 1013 return 0; 1014 1015 /* 1016 * The owner can't disappear while holding a lock, 1017 * so the owner struct is protected by wait_lock. 1018 * Gets dropped in rt_mutex_adjust_prio_chain()! 1019 */ 1020 get_task_struct(owner); 1021 1022 raw_spin_unlock_irq(&lock->wait_lock); 1023 1024 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1025 next_lock, waiter, task); 1026 1027 raw_spin_lock_irq(&lock->wait_lock); 1028 1029 return res; 1030 } 1031 1032 /* 1033 * Remove the top waiter from the current tasks pi waiter tree and 1034 * queue it up. 1035 * 1036 * Called with lock->wait_lock held and interrupts disabled. 1037 */ 1038 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, 1039 struct rt_mutex *lock) 1040 { 1041 struct rt_mutex_waiter *waiter; 1042 1043 raw_spin_lock(¤t->pi_lock); 1044 1045 waiter = rt_mutex_top_waiter(lock); 1046 1047 /* 1048 * Remove it from current->pi_waiters. We do not adjust a 1049 * possible priority boost right now. We execute wakeup in the 1050 * boosted mode and go back to normal after releasing 1051 * lock->wait_lock. 1052 */ 1053 rt_mutex_dequeue_pi(current, waiter); 1054 1055 /* 1056 * As we are waking up the top waiter, and the waiter stays 1057 * queued on the lock until it gets the lock, this lock 1058 * obviously has waiters. Just set the bit here and this has 1059 * the added benefit of forcing all new tasks into the 1060 * slow path making sure no task of lower priority than 1061 * the top waiter can steal this lock. 1062 */ 1063 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1064 1065 raw_spin_unlock(¤t->pi_lock); 1066 1067 wake_q_add(wake_q, waiter->task); 1068 } 1069 1070 /* 1071 * Remove a waiter from a lock and give up 1072 * 1073 * Must be called with lock->wait_lock held and interrupts disabled. I must 1074 * have just failed to try_to_take_rt_mutex(). 1075 */ 1076 static void remove_waiter(struct rt_mutex *lock, 1077 struct rt_mutex_waiter *waiter) 1078 { 1079 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1080 struct task_struct *owner = rt_mutex_owner(lock); 1081 struct rt_mutex *next_lock; 1082 1083 raw_spin_lock(¤t->pi_lock); 1084 rt_mutex_dequeue(lock, waiter); 1085 current->pi_blocked_on = NULL; 1086 raw_spin_unlock(¤t->pi_lock); 1087 1088 /* 1089 * Only update priority if the waiter was the highest priority 1090 * waiter of the lock and there is an owner to update. 1091 */ 1092 if (!owner || !is_top_waiter) 1093 return; 1094 1095 raw_spin_lock(&owner->pi_lock); 1096 1097 rt_mutex_dequeue_pi(owner, waiter); 1098 1099 if (rt_mutex_has_waiters(lock)) 1100 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1101 1102 __rt_mutex_adjust_prio(owner); 1103 1104 /* Store the lock on which owner is blocked or NULL */ 1105 next_lock = task_blocked_on_lock(owner); 1106 1107 raw_spin_unlock(&owner->pi_lock); 1108 1109 /* 1110 * Don't walk the chain, if the owner task is not blocked 1111 * itself. 1112 */ 1113 if (!next_lock) 1114 return; 1115 1116 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1117 get_task_struct(owner); 1118 1119 raw_spin_unlock_irq(&lock->wait_lock); 1120 1121 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1122 next_lock, NULL, current); 1123 1124 raw_spin_lock_irq(&lock->wait_lock); 1125 } 1126 1127 /* 1128 * Recheck the pi chain, in case we got a priority setting 1129 * 1130 * Called from sched_setscheduler 1131 */ 1132 void rt_mutex_adjust_pi(struct task_struct *task) 1133 { 1134 struct rt_mutex_waiter *waiter; 1135 struct rt_mutex *next_lock; 1136 unsigned long flags; 1137 1138 raw_spin_lock_irqsave(&task->pi_lock, flags); 1139 1140 waiter = task->pi_blocked_on; 1141 if (!waiter || (waiter->prio == task->prio && 1142 !dl_prio(task->prio))) { 1143 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1144 return; 1145 } 1146 next_lock = waiter->lock; 1147 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1148 1149 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1150 get_task_struct(task); 1151 1152 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, 1153 next_lock, NULL, task); 1154 } 1155 1156 /** 1157 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop 1158 * @lock: the rt_mutex to take 1159 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1160 * or TASK_UNINTERRUPTIBLE) 1161 * @timeout: the pre-initialized and started timer, or NULL for none 1162 * @waiter: the pre-initialized rt_mutex_waiter 1163 * 1164 * Must be called with lock->wait_lock held and interrupts disabled 1165 */ 1166 static int __sched 1167 __rt_mutex_slowlock(struct rt_mutex *lock, int state, 1168 struct hrtimer_sleeper *timeout, 1169 struct rt_mutex_waiter *waiter) 1170 { 1171 int ret = 0; 1172 1173 for (;;) { 1174 /* Try to acquire the lock: */ 1175 if (try_to_take_rt_mutex(lock, current, waiter)) 1176 break; 1177 1178 /* 1179 * TASK_INTERRUPTIBLE checks for signals and 1180 * timeout. Ignored otherwise. 1181 */ 1182 if (unlikely(state == TASK_INTERRUPTIBLE)) { 1183 /* Signal pending? */ 1184 if (signal_pending(current)) 1185 ret = -EINTR; 1186 if (timeout && !timeout->task) 1187 ret = -ETIMEDOUT; 1188 if (ret) 1189 break; 1190 } 1191 1192 raw_spin_unlock_irq(&lock->wait_lock); 1193 1194 debug_rt_mutex_print_deadlock(waiter); 1195 1196 schedule(); 1197 1198 raw_spin_lock_irq(&lock->wait_lock); 1199 set_current_state(state); 1200 } 1201 1202 __set_current_state(TASK_RUNNING); 1203 return ret; 1204 } 1205 1206 static void rt_mutex_handle_deadlock(int res, int detect_deadlock, 1207 struct rt_mutex_waiter *w) 1208 { 1209 /* 1210 * If the result is not -EDEADLOCK or the caller requested 1211 * deadlock detection, nothing to do here. 1212 */ 1213 if (res != -EDEADLOCK || detect_deadlock) 1214 return; 1215 1216 /* 1217 * Yell lowdly and stop the task right here. 1218 */ 1219 rt_mutex_print_deadlock(w); 1220 while (1) { 1221 set_current_state(TASK_INTERRUPTIBLE); 1222 schedule(); 1223 } 1224 } 1225 1226 /* 1227 * Slow path lock function: 1228 */ 1229 static int __sched 1230 rt_mutex_slowlock(struct rt_mutex *lock, int state, 1231 struct hrtimer_sleeper *timeout, 1232 enum rtmutex_chainwalk chwalk) 1233 { 1234 struct rt_mutex_waiter waiter; 1235 unsigned long flags; 1236 int ret = 0; 1237 1238 debug_rt_mutex_init_waiter(&waiter); 1239 RB_CLEAR_NODE(&waiter.pi_tree_entry); 1240 RB_CLEAR_NODE(&waiter.tree_entry); 1241 1242 /* 1243 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1244 * be called in early boot if the cmpxchg() fast path is disabled 1245 * (debug, no architecture support). In this case we will acquire the 1246 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1247 * enable interrupts in that early boot case. So we need to use the 1248 * irqsave/restore variants. 1249 */ 1250 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1251 1252 /* Try to acquire the lock again: */ 1253 if (try_to_take_rt_mutex(lock, current, NULL)) { 1254 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1255 return 0; 1256 } 1257 1258 set_current_state(state); 1259 1260 /* Setup the timer, when timeout != NULL */ 1261 if (unlikely(timeout)) 1262 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); 1263 1264 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); 1265 1266 if (likely(!ret)) 1267 /* sleep on the mutex */ 1268 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); 1269 1270 if (unlikely(ret)) { 1271 __set_current_state(TASK_RUNNING); 1272 if (rt_mutex_has_waiters(lock)) 1273 remove_waiter(lock, &waiter); 1274 rt_mutex_handle_deadlock(ret, chwalk, &waiter); 1275 } 1276 1277 /* 1278 * try_to_take_rt_mutex() sets the waiter bit 1279 * unconditionally. We might have to fix that up. 1280 */ 1281 fixup_rt_mutex_waiters(lock); 1282 1283 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1284 1285 /* Remove pending timer: */ 1286 if (unlikely(timeout)) 1287 hrtimer_cancel(&timeout->timer); 1288 1289 debug_rt_mutex_free_waiter(&waiter); 1290 1291 return ret; 1292 } 1293 1294 /* 1295 * Slow path try-lock function: 1296 */ 1297 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) 1298 { 1299 unsigned long flags; 1300 int ret; 1301 1302 /* 1303 * If the lock already has an owner we fail to get the lock. 1304 * This can be done without taking the @lock->wait_lock as 1305 * it is only being read, and this is a trylock anyway. 1306 */ 1307 if (rt_mutex_owner(lock)) 1308 return 0; 1309 1310 /* 1311 * The mutex has currently no owner. Lock the wait lock and try to 1312 * acquire the lock. We use irqsave here to support early boot calls. 1313 */ 1314 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1315 1316 ret = try_to_take_rt_mutex(lock, current, NULL); 1317 1318 /* 1319 * try_to_take_rt_mutex() sets the lock waiters bit 1320 * unconditionally. Clean this up. 1321 */ 1322 fixup_rt_mutex_waiters(lock); 1323 1324 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1325 1326 return ret; 1327 } 1328 1329 /* 1330 * Slow path to release a rt-mutex. 1331 * Return whether the current task needs to undo a potential priority boosting. 1332 */ 1333 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, 1334 struct wake_q_head *wake_q) 1335 { 1336 unsigned long flags; 1337 1338 /* irqsave required to support early boot calls */ 1339 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1340 1341 debug_rt_mutex_unlock(lock); 1342 1343 rt_mutex_deadlock_account_unlock(current); 1344 1345 /* 1346 * We must be careful here if the fast path is enabled. If we 1347 * have no waiters queued we cannot set owner to NULL here 1348 * because of: 1349 * 1350 * foo->lock->owner = NULL; 1351 * rtmutex_lock(foo->lock); <- fast path 1352 * free = atomic_dec_and_test(foo->refcnt); 1353 * rtmutex_unlock(foo->lock); <- fast path 1354 * if (free) 1355 * kfree(foo); 1356 * raw_spin_unlock(foo->lock->wait_lock); 1357 * 1358 * So for the fastpath enabled kernel: 1359 * 1360 * Nothing can set the waiters bit as long as we hold 1361 * lock->wait_lock. So we do the following sequence: 1362 * 1363 * owner = rt_mutex_owner(lock); 1364 * clear_rt_mutex_waiters(lock); 1365 * raw_spin_unlock(&lock->wait_lock); 1366 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1367 * return; 1368 * goto retry; 1369 * 1370 * The fastpath disabled variant is simple as all access to 1371 * lock->owner is serialized by lock->wait_lock: 1372 * 1373 * lock->owner = NULL; 1374 * raw_spin_unlock(&lock->wait_lock); 1375 */ 1376 while (!rt_mutex_has_waiters(lock)) { 1377 /* Drops lock->wait_lock ! */ 1378 if (unlock_rt_mutex_safe(lock, flags) == true) 1379 return false; 1380 /* Relock the rtmutex and try again */ 1381 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1382 } 1383 1384 /* 1385 * The wakeup next waiter path does not suffer from the above 1386 * race. See the comments there. 1387 * 1388 * Queue the next waiter for wakeup once we release the wait_lock. 1389 */ 1390 mark_wakeup_next_waiter(wake_q, lock); 1391 1392 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1393 1394 /* check PI boosting */ 1395 return true; 1396 } 1397 1398 /* 1399 * debug aware fast / slowpath lock,trylock,unlock 1400 * 1401 * The atomic acquire/release ops are compiled away, when either the 1402 * architecture does not support cmpxchg or when debugging is enabled. 1403 */ 1404 static inline int 1405 rt_mutex_fastlock(struct rt_mutex *lock, int state, 1406 int (*slowfn)(struct rt_mutex *lock, int state, 1407 struct hrtimer_sleeper *timeout, 1408 enum rtmutex_chainwalk chwalk)) 1409 { 1410 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { 1411 rt_mutex_deadlock_account_lock(lock, current); 1412 return 0; 1413 } else 1414 return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); 1415 } 1416 1417 static inline int 1418 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, 1419 struct hrtimer_sleeper *timeout, 1420 enum rtmutex_chainwalk chwalk, 1421 int (*slowfn)(struct rt_mutex *lock, int state, 1422 struct hrtimer_sleeper *timeout, 1423 enum rtmutex_chainwalk chwalk)) 1424 { 1425 if (chwalk == RT_MUTEX_MIN_CHAINWALK && 1426 likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { 1427 rt_mutex_deadlock_account_lock(lock, current); 1428 return 0; 1429 } else 1430 return slowfn(lock, state, timeout, chwalk); 1431 } 1432 1433 static inline int 1434 rt_mutex_fasttrylock(struct rt_mutex *lock, 1435 int (*slowfn)(struct rt_mutex *lock)) 1436 { 1437 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { 1438 rt_mutex_deadlock_account_lock(lock, current); 1439 return 1; 1440 } 1441 return slowfn(lock); 1442 } 1443 1444 static inline void 1445 rt_mutex_fastunlock(struct rt_mutex *lock, 1446 bool (*slowfn)(struct rt_mutex *lock, 1447 struct wake_q_head *wqh)) 1448 { 1449 DEFINE_WAKE_Q(wake_q); 1450 1451 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { 1452 rt_mutex_deadlock_account_unlock(current); 1453 1454 } else { 1455 bool deboost = slowfn(lock, &wake_q); 1456 1457 wake_up_q(&wake_q); 1458 1459 /* Undo pi boosting if necessary: */ 1460 if (deboost) 1461 rt_mutex_adjust_prio(current); 1462 } 1463 } 1464 1465 /** 1466 * rt_mutex_lock - lock a rt_mutex 1467 * 1468 * @lock: the rt_mutex to be locked 1469 */ 1470 void __sched rt_mutex_lock(struct rt_mutex *lock) 1471 { 1472 might_sleep(); 1473 1474 rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); 1475 } 1476 EXPORT_SYMBOL_GPL(rt_mutex_lock); 1477 1478 /** 1479 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible 1480 * 1481 * @lock: the rt_mutex to be locked 1482 * 1483 * Returns: 1484 * 0 on success 1485 * -EINTR when interrupted by a signal 1486 */ 1487 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) 1488 { 1489 might_sleep(); 1490 1491 return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); 1492 } 1493 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); 1494 1495 /* 1496 * Futex variant with full deadlock detection. 1497 */ 1498 int rt_mutex_timed_futex_lock(struct rt_mutex *lock, 1499 struct hrtimer_sleeper *timeout) 1500 { 1501 might_sleep(); 1502 1503 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 1504 RT_MUTEX_FULL_CHAINWALK, 1505 rt_mutex_slowlock); 1506 } 1507 1508 /** 1509 * rt_mutex_timed_lock - lock a rt_mutex interruptible 1510 * the timeout structure is provided 1511 * by the caller 1512 * 1513 * @lock: the rt_mutex to be locked 1514 * @timeout: timeout structure or NULL (no timeout) 1515 * 1516 * Returns: 1517 * 0 on success 1518 * -EINTR when interrupted by a signal 1519 * -ETIMEDOUT when the timeout expired 1520 */ 1521 int 1522 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) 1523 { 1524 might_sleep(); 1525 1526 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 1527 RT_MUTEX_MIN_CHAINWALK, 1528 rt_mutex_slowlock); 1529 } 1530 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); 1531 1532 /** 1533 * rt_mutex_trylock - try to lock a rt_mutex 1534 * 1535 * @lock: the rt_mutex to be locked 1536 * 1537 * This function can only be called in thread context. It's safe to 1538 * call it from atomic regions, but not from hard interrupt or soft 1539 * interrupt context. 1540 * 1541 * Returns 1 on success and 0 on contention 1542 */ 1543 int __sched rt_mutex_trylock(struct rt_mutex *lock) 1544 { 1545 if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) 1546 return 0; 1547 1548 return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); 1549 } 1550 EXPORT_SYMBOL_GPL(rt_mutex_trylock); 1551 1552 /** 1553 * rt_mutex_unlock - unlock a rt_mutex 1554 * 1555 * @lock: the rt_mutex to be unlocked 1556 */ 1557 void __sched rt_mutex_unlock(struct rt_mutex *lock) 1558 { 1559 rt_mutex_fastunlock(lock, rt_mutex_slowunlock); 1560 } 1561 EXPORT_SYMBOL_GPL(rt_mutex_unlock); 1562 1563 /** 1564 * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock 1565 * @lock: the rt_mutex to be unlocked 1566 * 1567 * Returns: true/false indicating whether priority adjustment is 1568 * required or not. 1569 */ 1570 bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, 1571 struct wake_q_head *wqh) 1572 { 1573 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { 1574 rt_mutex_deadlock_account_unlock(current); 1575 return false; 1576 } 1577 return rt_mutex_slowunlock(lock, wqh); 1578 } 1579 1580 /** 1581 * rt_mutex_destroy - mark a mutex unusable 1582 * @lock: the mutex to be destroyed 1583 * 1584 * This function marks the mutex uninitialized, and any subsequent 1585 * use of the mutex is forbidden. The mutex must not be locked when 1586 * this function is called. 1587 */ 1588 void rt_mutex_destroy(struct rt_mutex *lock) 1589 { 1590 WARN_ON(rt_mutex_is_locked(lock)); 1591 #ifdef CONFIG_DEBUG_RT_MUTEXES 1592 lock->magic = NULL; 1593 #endif 1594 } 1595 1596 EXPORT_SYMBOL_GPL(rt_mutex_destroy); 1597 1598 /** 1599 * __rt_mutex_init - initialize the rt lock 1600 * 1601 * @lock: the rt lock to be initialized 1602 * 1603 * Initialize the rt lock to unlocked state. 1604 * 1605 * Initializing of a locked rt lock is not allowed 1606 */ 1607 void __rt_mutex_init(struct rt_mutex *lock, const char *name) 1608 { 1609 lock->owner = NULL; 1610 raw_spin_lock_init(&lock->wait_lock); 1611 lock->waiters = RB_ROOT; 1612 lock->waiters_leftmost = NULL; 1613 1614 debug_rt_mutex_init(lock, name); 1615 } 1616 EXPORT_SYMBOL_GPL(__rt_mutex_init); 1617 1618 /** 1619 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a 1620 * proxy owner 1621 * 1622 * @lock: the rt_mutex to be locked 1623 * @proxy_owner:the task to set as owner 1624 * 1625 * No locking. Caller has to do serializing itself 1626 * 1627 * Special API call for PI-futex support. This initializes the rtmutex and 1628 * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not 1629 * possible at this point because the pi_state which contains the rtmutex 1630 * is not yet visible to other tasks. 1631 */ 1632 void rt_mutex_init_proxy_locked(struct rt_mutex *lock, 1633 struct task_struct *proxy_owner) 1634 { 1635 __rt_mutex_init(lock, NULL); 1636 debug_rt_mutex_proxy_lock(lock, proxy_owner); 1637 rt_mutex_set_owner(lock, proxy_owner); 1638 rt_mutex_deadlock_account_lock(lock, proxy_owner); 1639 } 1640 1641 /** 1642 * rt_mutex_proxy_unlock - release a lock on behalf of owner 1643 * 1644 * @lock: the rt_mutex to be locked 1645 * 1646 * No locking. Caller has to do serializing itself 1647 * 1648 * Special API call for PI-futex support. This merrily cleans up the rtmutex 1649 * (debugging) state. Concurrent operations on this rt_mutex are not 1650 * possible because it belongs to the pi_state which is about to be freed 1651 * and it is not longer visible to other tasks. 1652 */ 1653 void rt_mutex_proxy_unlock(struct rt_mutex *lock, 1654 struct task_struct *proxy_owner) 1655 { 1656 debug_rt_mutex_proxy_unlock(lock); 1657 rt_mutex_set_owner(lock, NULL); 1658 rt_mutex_deadlock_account_unlock(proxy_owner); 1659 } 1660 1661 /** 1662 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task 1663 * @lock: the rt_mutex to take 1664 * @waiter: the pre-initialized rt_mutex_waiter 1665 * @task: the task to prepare 1666 * 1667 * Returns: 1668 * 0 - task blocked on lock 1669 * 1 - acquired the lock for task, caller should wake it up 1670 * <0 - error 1671 * 1672 * Special API call for FUTEX_REQUEUE_PI support. 1673 */ 1674 int rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1675 struct rt_mutex_waiter *waiter, 1676 struct task_struct *task) 1677 { 1678 int ret; 1679 1680 raw_spin_lock_irq(&lock->wait_lock); 1681 1682 if (try_to_take_rt_mutex(lock, task, NULL)) { 1683 raw_spin_unlock_irq(&lock->wait_lock); 1684 return 1; 1685 } 1686 1687 /* We enforce deadlock detection for futexes */ 1688 ret = task_blocks_on_rt_mutex(lock, waiter, task, 1689 RT_MUTEX_FULL_CHAINWALK); 1690 1691 if (ret && !rt_mutex_owner(lock)) { 1692 /* 1693 * Reset the return value. We might have 1694 * returned with -EDEADLK and the owner 1695 * released the lock while we were walking the 1696 * pi chain. Let the waiter sort it out. 1697 */ 1698 ret = 0; 1699 } 1700 1701 if (unlikely(ret)) 1702 remove_waiter(lock, waiter); 1703 1704 raw_spin_unlock_irq(&lock->wait_lock); 1705 1706 debug_rt_mutex_print_deadlock(waiter); 1707 1708 return ret; 1709 } 1710 1711 /** 1712 * rt_mutex_next_owner - return the next owner of the lock 1713 * 1714 * @lock: the rt lock query 1715 * 1716 * Returns the next owner of the lock or NULL 1717 * 1718 * Caller has to serialize against other accessors to the lock 1719 * itself. 1720 * 1721 * Special API call for PI-futex support 1722 */ 1723 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) 1724 { 1725 if (!rt_mutex_has_waiters(lock)) 1726 return NULL; 1727 1728 return rt_mutex_top_waiter(lock)->task; 1729 } 1730 1731 /** 1732 * rt_mutex_finish_proxy_lock() - Complete lock acquisition 1733 * @lock: the rt_mutex we were woken on 1734 * @to: the timeout, null if none. hrtimer should already have 1735 * been started. 1736 * @waiter: the pre-initialized rt_mutex_waiter 1737 * 1738 * Complete the lock acquisition started our behalf by another thread. 1739 * 1740 * Returns: 1741 * 0 - success 1742 * <0 - error, one of -EINTR, -ETIMEDOUT 1743 * 1744 * Special API call for PI-futex requeue support 1745 */ 1746 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, 1747 struct hrtimer_sleeper *to, 1748 struct rt_mutex_waiter *waiter) 1749 { 1750 int ret; 1751 1752 raw_spin_lock_irq(&lock->wait_lock); 1753 1754 set_current_state(TASK_INTERRUPTIBLE); 1755 1756 /* sleep on the mutex */ 1757 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); 1758 1759 if (unlikely(ret)) 1760 remove_waiter(lock, waiter); 1761 1762 /* 1763 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might 1764 * have to fix that up. 1765 */ 1766 fixup_rt_mutex_waiters(lock); 1767 1768 raw_spin_unlock_irq(&lock->wait_lock); 1769 1770 return ret; 1771 } 1772