1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 4 * 5 * started by Ingo Molnar and Thomas Gleixner. 6 * 7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <[email protected]> 8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <[email protected]> 9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 10 * Copyright (C) 2006 Esben Nielsen 11 * Adaptive Spinlocks: 12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, 13 * and Peter Morreale, 14 * Adaptive Spinlocks simplification: 15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <[email protected]> 16 * 17 * See Documentation/locking/rt-mutex-design.rst for details. 18 */ 19 #include <linux/sched.h> 20 #include <linux/sched/debug.h> 21 #include <linux/sched/deadline.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/rt.h> 24 #include <linux/sched/wake_q.h> 25 #include <linux/ww_mutex.h> 26 27 #include "rtmutex_common.h" 28 29 #ifndef WW_RT 30 # define build_ww_mutex() (false) 31 # define ww_container_of(rtm) NULL 32 33 static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, 34 struct rt_mutex *lock, 35 struct ww_acquire_ctx *ww_ctx) 36 { 37 return 0; 38 } 39 40 static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, 41 struct ww_acquire_ctx *ww_ctx) 42 { 43 } 44 45 static inline void ww_mutex_lock_acquired(struct ww_mutex *lock, 46 struct ww_acquire_ctx *ww_ctx) 47 { 48 } 49 50 static inline int __ww_mutex_check_kill(struct rt_mutex *lock, 51 struct rt_mutex_waiter *waiter, 52 struct ww_acquire_ctx *ww_ctx) 53 { 54 return 0; 55 } 56 57 #else 58 # define build_ww_mutex() (true) 59 # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base) 60 # include "ww_mutex.h" 61 #endif 62 63 /* 64 * lock->owner state tracking: 65 * 66 * lock->owner holds the task_struct pointer of the owner. Bit 0 67 * is used to keep track of the "lock has waiters" state. 68 * 69 * owner bit0 70 * NULL 0 lock is free (fast acquire possible) 71 * NULL 1 lock is free and has waiters and the top waiter 72 * is going to take the lock* 73 * taskpointer 0 lock is held (fast release possible) 74 * taskpointer 1 lock is held and has waiters** 75 * 76 * The fast atomic compare exchange based acquire and release is only 77 * possible when bit 0 of lock->owner is 0. 78 * 79 * (*) It also can be a transitional state when grabbing the lock 80 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 81 * we need to set the bit0 before looking at the lock, and the owner may be 82 * NULL in this small time, hence this can be a transitional state. 83 * 84 * (**) There is a small time when bit 0 is set but there are no 85 * waiters. This can happen when grabbing the lock in the slow path. 86 * To prevent a cmpxchg of the owner releasing the lock, we need to 87 * set this bit before looking at the lock. 88 */ 89 90 static __always_inline void 91 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) 92 { 93 unsigned long val = (unsigned long)owner; 94 95 if (rt_mutex_has_waiters(lock)) 96 val |= RT_MUTEX_HAS_WAITERS; 97 98 WRITE_ONCE(lock->owner, (struct task_struct *)val); 99 } 100 101 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) 102 { 103 lock->owner = (struct task_struct *) 104 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 105 } 106 107 static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) 108 { 109 unsigned long owner, *p = (unsigned long *) &lock->owner; 110 111 if (rt_mutex_has_waiters(lock)) 112 return; 113 114 /* 115 * The rbtree has no waiters enqueued, now make sure that the 116 * lock->owner still has the waiters bit set, otherwise the 117 * following can happen: 118 * 119 * CPU 0 CPU 1 CPU2 120 * l->owner=T1 121 * rt_mutex_lock(l) 122 * lock(l->lock) 123 * l->owner = T1 | HAS_WAITERS; 124 * enqueue(T2) 125 * boost() 126 * unlock(l->lock) 127 * block() 128 * 129 * rt_mutex_lock(l) 130 * lock(l->lock) 131 * l->owner = T1 | HAS_WAITERS; 132 * enqueue(T3) 133 * boost() 134 * unlock(l->lock) 135 * block() 136 * signal(->T2) signal(->T3) 137 * lock(l->lock) 138 * dequeue(T2) 139 * deboost() 140 * unlock(l->lock) 141 * lock(l->lock) 142 * dequeue(T3) 143 * ==> wait list is empty 144 * deboost() 145 * unlock(l->lock) 146 * lock(l->lock) 147 * fixup_rt_mutex_waiters() 148 * if (wait_list_empty(l) { 149 * l->owner = owner 150 * owner = l->owner & ~HAS_WAITERS; 151 * ==> l->owner = T1 152 * } 153 * lock(l->lock) 154 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 155 * if (wait_list_empty(l) { 156 * owner = l->owner & ~HAS_WAITERS; 157 * cmpxchg(l->owner, T1, NULL) 158 * ===> Success (l->owner = NULL) 159 * 160 * l->owner = owner 161 * ==> l->owner = T1 162 * } 163 * 164 * With the check for the waiter bit in place T3 on CPU2 will not 165 * overwrite. All tasks fiddling with the waiters bit are 166 * serialized by l->lock, so nothing else can modify the waiters 167 * bit. If the bit is set then nothing can change l->owner either 168 * so the simple RMW is safe. The cmpxchg() will simply fail if it 169 * happens in the middle of the RMW because the waiters bit is 170 * still set. 171 */ 172 owner = READ_ONCE(*p); 173 if (owner & RT_MUTEX_HAS_WAITERS) 174 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 175 } 176 177 /* 178 * We can speed up the acquire/release, if there's no debugging state to be 179 * set up. 180 */ 181 #ifndef CONFIG_DEBUG_RT_MUTEXES 182 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 183 struct task_struct *old, 184 struct task_struct *new) 185 { 186 return try_cmpxchg_acquire(&lock->owner, &old, new); 187 } 188 189 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 190 struct task_struct *old, 191 struct task_struct *new) 192 { 193 return try_cmpxchg_release(&lock->owner, &old, new); 194 } 195 196 /* 197 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 198 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 199 * relaxed semantics suffice. 200 */ 201 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 202 { 203 unsigned long owner, *p = (unsigned long *) &lock->owner; 204 205 do { 206 owner = *p; 207 } while (cmpxchg_relaxed(p, owner, 208 owner | RT_MUTEX_HAS_WAITERS) != owner); 209 } 210 211 /* 212 * Safe fastpath aware unlock: 213 * 1) Clear the waiters bit 214 * 2) Drop lock->wait_lock 215 * 3) Try to unlock the lock with cmpxchg 216 */ 217 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 218 unsigned long flags) 219 __releases(lock->wait_lock) 220 { 221 struct task_struct *owner = rt_mutex_owner(lock); 222 223 clear_rt_mutex_waiters(lock); 224 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 225 /* 226 * If a new waiter comes in between the unlock and the cmpxchg 227 * we have two situations: 228 * 229 * unlock(wait_lock); 230 * lock(wait_lock); 231 * cmpxchg(p, owner, 0) == owner 232 * mark_rt_mutex_waiters(lock); 233 * acquire(lock); 234 * or: 235 * 236 * unlock(wait_lock); 237 * lock(wait_lock); 238 * mark_rt_mutex_waiters(lock); 239 * 240 * cmpxchg(p, owner, 0) != owner 241 * enqueue_waiter(); 242 * unlock(wait_lock); 243 * lock(wait_lock); 244 * wake waiter(); 245 * unlock(wait_lock); 246 * lock(wait_lock); 247 * acquire(lock); 248 */ 249 return rt_mutex_cmpxchg_release(lock, owner, NULL); 250 } 251 252 #else 253 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 254 struct task_struct *old, 255 struct task_struct *new) 256 { 257 return false; 258 259 } 260 261 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 262 struct task_struct *old, 263 struct task_struct *new) 264 { 265 return false; 266 } 267 268 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 269 { 270 lock->owner = (struct task_struct *) 271 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 272 } 273 274 /* 275 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 276 */ 277 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 278 unsigned long flags) 279 __releases(lock->wait_lock) 280 { 281 lock->owner = NULL; 282 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 283 return true; 284 } 285 #endif 286 287 static __always_inline int __waiter_prio(struct task_struct *task) 288 { 289 int prio = task->prio; 290 291 if (!rt_prio(prio)) 292 return DEFAULT_PRIO; 293 294 return prio; 295 } 296 297 static __always_inline void 298 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 299 { 300 waiter->prio = __waiter_prio(task); 301 waiter->deadline = task->dl.deadline; 302 } 303 304 /* 305 * Only use with rt_mutex_waiter_{less,equal}() 306 */ 307 #define task_to_waiter(p) \ 308 &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } 309 310 static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, 311 struct rt_mutex_waiter *right) 312 { 313 if (left->prio < right->prio) 314 return 1; 315 316 /* 317 * If both waiters have dl_prio(), we check the deadlines of the 318 * associated tasks. 319 * If left waiter has a dl_prio(), and we didn't return 1 above, 320 * then right waiter has a dl_prio() too. 321 */ 322 if (dl_prio(left->prio)) 323 return dl_time_before(left->deadline, right->deadline); 324 325 return 0; 326 } 327 328 static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, 329 struct rt_mutex_waiter *right) 330 { 331 if (left->prio != right->prio) 332 return 0; 333 334 /* 335 * If both waiters have dl_prio(), we check the deadlines of the 336 * associated tasks. 337 * If left waiter has a dl_prio(), and we didn't return 0 above, 338 * then right waiter has a dl_prio() too. 339 */ 340 if (dl_prio(left->prio)) 341 return left->deadline == right->deadline; 342 343 return 1; 344 } 345 346 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, 347 struct rt_mutex_waiter *top_waiter) 348 { 349 if (rt_mutex_waiter_less(waiter, top_waiter)) 350 return true; 351 352 #ifdef RT_MUTEX_BUILD_SPINLOCKS 353 /* 354 * Note that RT tasks are excluded from same priority (lateral) 355 * steals to prevent the introduction of an unbounded latency. 356 */ 357 if (rt_prio(waiter->prio) || dl_prio(waiter->prio)) 358 return false; 359 360 return rt_mutex_waiter_equal(waiter, top_waiter); 361 #else 362 return false; 363 #endif 364 } 365 366 #define __node_2_waiter(node) \ 367 rb_entry((node), struct rt_mutex_waiter, tree_entry) 368 369 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 370 { 371 struct rt_mutex_waiter *aw = __node_2_waiter(a); 372 struct rt_mutex_waiter *bw = __node_2_waiter(b); 373 374 if (rt_mutex_waiter_less(aw, bw)) 375 return 1; 376 377 if (!build_ww_mutex()) 378 return 0; 379 380 if (rt_mutex_waiter_less(bw, aw)) 381 return 0; 382 383 /* NOTE: relies on waiter->ww_ctx being set before insertion */ 384 if (aw->ww_ctx) { 385 if (!bw->ww_ctx) 386 return 1; 387 388 return (signed long)(aw->ww_ctx->stamp - 389 bw->ww_ctx->stamp) < 0; 390 } 391 392 return 0; 393 } 394 395 static __always_inline void 396 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 397 { 398 rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); 399 } 400 401 static __always_inline void 402 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 403 { 404 if (RB_EMPTY_NODE(&waiter->tree_entry)) 405 return; 406 407 rb_erase_cached(&waiter->tree_entry, &lock->waiters); 408 RB_CLEAR_NODE(&waiter->tree_entry); 409 } 410 411 #define __node_2_pi_waiter(node) \ 412 rb_entry((node), struct rt_mutex_waiter, pi_tree_entry) 413 414 static __always_inline bool 415 __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 416 { 417 return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b)); 418 } 419 420 static __always_inline void 421 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 422 { 423 rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less); 424 } 425 426 static __always_inline void 427 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 428 { 429 if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 430 return; 431 432 rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); 433 RB_CLEAR_NODE(&waiter->pi_tree_entry); 434 } 435 436 static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) 437 { 438 struct task_struct *pi_task = NULL; 439 440 lockdep_assert_held(&p->pi_lock); 441 442 if (task_has_pi_waiters(p)) 443 pi_task = task_top_pi_waiter(p)->task; 444 445 rt_mutex_setprio(p, pi_task); 446 } 447 448 /* RT mutex specific wake_q wrappers */ 449 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, 450 struct rt_mutex_waiter *w) 451 { 452 if (IS_ENABLED(CONFIG_PREEMPT_RT) && w->wake_state != TASK_NORMAL) { 453 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 454 WARN_ON_ONCE(wqh->rtlock_task); 455 get_task_struct(w->task); 456 wqh->rtlock_task = w->task; 457 } else { 458 wake_q_add(&wqh->head, w->task); 459 } 460 } 461 462 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) 463 { 464 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { 465 wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); 466 put_task_struct(wqh->rtlock_task); 467 wqh->rtlock_task = NULL; 468 } 469 470 if (!wake_q_empty(&wqh->head)) 471 wake_up_q(&wqh->head); 472 473 /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ 474 preempt_enable(); 475 } 476 477 /* 478 * Deadlock detection is conditional: 479 * 480 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 481 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 482 * 483 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 484 * conducted independent of the detect argument. 485 * 486 * If the waiter argument is NULL this indicates the deboost path and 487 * deadlock detection is disabled independent of the detect argument 488 * and the config settings. 489 */ 490 static __always_inline bool 491 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 492 enum rtmutex_chainwalk chwalk) 493 { 494 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 495 return waiter != NULL; 496 return chwalk == RT_MUTEX_FULL_CHAINWALK; 497 } 498 499 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p) 500 { 501 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 502 } 503 504 /* 505 * Adjust the priority chain. Also used for deadlock detection. 506 * Decreases task's usage by one - may thus free the task. 507 * 508 * @task: the task owning the mutex (owner) for which a chain walk is 509 * probably needed 510 * @chwalk: do we have to carry out deadlock detection? 511 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 512 * things for a task that has just got its priority adjusted, and 513 * is waiting on a mutex) 514 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 515 * we dropped its pi_lock. Is never dereferenced, only used for 516 * comparison to detect lock chain changes. 517 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 518 * its priority to the mutex owner (can be NULL in the case 519 * depicted above or if the top waiter is gone away and we are 520 * actually deboosting the owner) 521 * @top_task: the current top waiter 522 * 523 * Returns 0 or -EDEADLK. 524 * 525 * Chain walk basics and protection scope 526 * 527 * [R] refcount on task 528 * [P] task->pi_lock held 529 * [L] rtmutex->wait_lock held 530 * 531 * Step Description Protected by 532 * function arguments: 533 * @task [R] 534 * @orig_lock if != NULL @top_task is blocked on it 535 * @next_lock Unprotected. Cannot be 536 * dereferenced. Only used for 537 * comparison. 538 * @orig_waiter if != NULL @top_task is blocked on it 539 * @top_task current, or in case of proxy 540 * locking protected by calling 541 * code 542 * again: 543 * loop_sanity_check(); 544 * retry: 545 * [1] lock(task->pi_lock); [R] acquire [P] 546 * [2] waiter = task->pi_blocked_on; [P] 547 * [3] check_exit_conditions_1(); [P] 548 * [4] lock = waiter->lock; [P] 549 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 550 * unlock(task->pi_lock); release [P] 551 * goto retry; 552 * } 553 * [6] check_exit_conditions_2(); [P] + [L] 554 * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 555 * [8] unlock(task->pi_lock); release [P] 556 * put_task_struct(task); release [R] 557 * [9] check_exit_conditions_3(); [L] 558 * [10] task = owner(lock); [L] 559 * get_task_struct(task); [L] acquire [R] 560 * lock(task->pi_lock); [L] acquire [P] 561 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 562 * [12] check_exit_conditions_4(); [P] + [L] 563 * [13] unlock(task->pi_lock); release [P] 564 * unlock(lock->wait_lock); release [L] 565 * goto again; 566 */ 567 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 568 enum rtmutex_chainwalk chwalk, 569 struct rt_mutex_base *orig_lock, 570 struct rt_mutex_base *next_lock, 571 struct rt_mutex_waiter *orig_waiter, 572 struct task_struct *top_task) 573 { 574 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 575 struct rt_mutex_waiter *prerequeue_top_waiter; 576 int ret = 0, depth = 0; 577 struct rt_mutex_base *lock; 578 bool detect_deadlock; 579 bool requeue = true; 580 581 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 582 583 /* 584 * The (de)boosting is a step by step approach with a lot of 585 * pitfalls. We want this to be preemptible and we want hold a 586 * maximum of two locks per step. So we have to check 587 * carefully whether things change under us. 588 */ 589 again: 590 /* 591 * We limit the lock chain length for each invocation. 592 */ 593 if (++depth > max_lock_depth) { 594 static int prev_max; 595 596 /* 597 * Print this only once. If the admin changes the limit, 598 * print a new message when reaching the limit again. 599 */ 600 if (prev_max != max_lock_depth) { 601 prev_max = max_lock_depth; 602 printk(KERN_WARNING "Maximum lock depth %d reached " 603 "task: %s (%d)\n", max_lock_depth, 604 top_task->comm, task_pid_nr(top_task)); 605 } 606 put_task_struct(task); 607 608 return -EDEADLK; 609 } 610 611 /* 612 * We are fully preemptible here and only hold the refcount on 613 * @task. So everything can have changed under us since the 614 * caller or our own code below (goto retry/again) dropped all 615 * locks. 616 */ 617 retry: 618 /* 619 * [1] Task cannot go away as we did a get_task() before ! 620 */ 621 raw_spin_lock_irq(&task->pi_lock); 622 623 /* 624 * [2] Get the waiter on which @task is blocked on. 625 */ 626 waiter = task->pi_blocked_on; 627 628 /* 629 * [3] check_exit_conditions_1() protected by task->pi_lock. 630 */ 631 632 /* 633 * Check whether the end of the boosting chain has been 634 * reached or the state of the chain has changed while we 635 * dropped the locks. 636 */ 637 if (!waiter) 638 goto out_unlock_pi; 639 640 /* 641 * Check the orig_waiter state. After we dropped the locks, 642 * the previous owner of the lock might have released the lock. 643 */ 644 if (orig_waiter && !rt_mutex_owner(orig_lock)) 645 goto out_unlock_pi; 646 647 /* 648 * We dropped all locks after taking a refcount on @task, so 649 * the task might have moved on in the lock chain or even left 650 * the chain completely and blocks now on an unrelated lock or 651 * on @orig_lock. 652 * 653 * We stored the lock on which @task was blocked in @next_lock, 654 * so we can detect the chain change. 655 */ 656 if (next_lock != waiter->lock) 657 goto out_unlock_pi; 658 659 /* 660 * There could be 'spurious' loops in the lock graph due to ww_mutex, 661 * consider: 662 * 663 * P1: A, ww_A, ww_B 664 * P2: ww_B, ww_A 665 * P3: A 666 * 667 * P3 should not return -EDEADLK because it gets trapped in the cycle 668 * created by P1 and P2 (which will resolve -- and runs into 669 * max_lock_depth above). Therefore disable detect_deadlock such that 670 * the below termination condition can trigger once all relevant tasks 671 * are boosted. 672 * 673 * Even when we start with ww_mutex we can disable deadlock detection, 674 * since we would supress a ww_mutex induced deadlock at [6] anyway. 675 * Supressing it here however is not sufficient since we might still 676 * hit [6] due to adjustment driven iteration. 677 * 678 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd 679 * utterly fail to report it; lockdep should. 680 */ 681 if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock) 682 detect_deadlock = false; 683 684 /* 685 * Drop out, when the task has no waiters. Note, 686 * top_waiter can be NULL, when we are in the deboosting 687 * mode! 688 */ 689 if (top_waiter) { 690 if (!task_has_pi_waiters(task)) 691 goto out_unlock_pi; 692 /* 693 * If deadlock detection is off, we stop here if we 694 * are not the top pi waiter of the task. If deadlock 695 * detection is enabled we continue, but stop the 696 * requeueing in the chain walk. 697 */ 698 if (top_waiter != task_top_pi_waiter(task)) { 699 if (!detect_deadlock) 700 goto out_unlock_pi; 701 else 702 requeue = false; 703 } 704 } 705 706 /* 707 * If the waiter priority is the same as the task priority 708 * then there is no further priority adjustment necessary. If 709 * deadlock detection is off, we stop the chain walk. If its 710 * enabled we continue, but stop the requeueing in the chain 711 * walk. 712 */ 713 if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { 714 if (!detect_deadlock) 715 goto out_unlock_pi; 716 else 717 requeue = false; 718 } 719 720 /* 721 * [4] Get the next lock 722 */ 723 lock = waiter->lock; 724 /* 725 * [5] We need to trylock here as we are holding task->pi_lock, 726 * which is the reverse lock order versus the other rtmutex 727 * operations. 728 */ 729 if (!raw_spin_trylock(&lock->wait_lock)) { 730 raw_spin_unlock_irq(&task->pi_lock); 731 cpu_relax(); 732 goto retry; 733 } 734 735 /* 736 * [6] check_exit_conditions_2() protected by task->pi_lock and 737 * lock->wait_lock. 738 * 739 * Deadlock detection. If the lock is the same as the original 740 * lock which caused us to walk the lock chain or if the 741 * current lock is owned by the task which initiated the chain 742 * walk, we detected a deadlock. 743 */ 744 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 745 raw_spin_unlock(&lock->wait_lock); 746 ret = -EDEADLK; 747 goto out_unlock_pi; 748 } 749 750 /* 751 * If we just follow the lock chain for deadlock detection, no 752 * need to do all the requeue operations. To avoid a truckload 753 * of conditionals around the various places below, just do the 754 * minimum chain walk checks. 755 */ 756 if (!requeue) { 757 /* 758 * No requeue[7] here. Just release @task [8] 759 */ 760 raw_spin_unlock(&task->pi_lock); 761 put_task_struct(task); 762 763 /* 764 * [9] check_exit_conditions_3 protected by lock->wait_lock. 765 * If there is no owner of the lock, end of chain. 766 */ 767 if (!rt_mutex_owner(lock)) { 768 raw_spin_unlock_irq(&lock->wait_lock); 769 return 0; 770 } 771 772 /* [10] Grab the next task, i.e. owner of @lock */ 773 task = get_task_struct(rt_mutex_owner(lock)); 774 raw_spin_lock(&task->pi_lock); 775 776 /* 777 * No requeue [11] here. We just do deadlock detection. 778 * 779 * [12] Store whether owner is blocked 780 * itself. Decision is made after dropping the locks 781 */ 782 next_lock = task_blocked_on_lock(task); 783 /* 784 * Get the top waiter for the next iteration 785 */ 786 top_waiter = rt_mutex_top_waiter(lock); 787 788 /* [13] Drop locks */ 789 raw_spin_unlock(&task->pi_lock); 790 raw_spin_unlock_irq(&lock->wait_lock); 791 792 /* If owner is not blocked, end of chain. */ 793 if (!next_lock) 794 goto out_put_task; 795 goto again; 796 } 797 798 /* 799 * Store the current top waiter before doing the requeue 800 * operation on @lock. We need it for the boost/deboost 801 * decision below. 802 */ 803 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 804 805 /* [7] Requeue the waiter in the lock waiter tree. */ 806 rt_mutex_dequeue(lock, waiter); 807 808 /* 809 * Update the waiter prio fields now that we're dequeued. 810 * 811 * These values can have changed through either: 812 * 813 * sys_sched_set_scheduler() / sys_sched_setattr() 814 * 815 * or 816 * 817 * DL CBS enforcement advancing the effective deadline. 818 * 819 * Even though pi_waiters also uses these fields, and that tree is only 820 * updated in [11], we can do this here, since we hold [L], which 821 * serializes all pi_waiters access and rb_erase() does not care about 822 * the values of the node being removed. 823 */ 824 waiter_update_prio(waiter, task); 825 826 rt_mutex_enqueue(lock, waiter); 827 828 /* [8] Release the task */ 829 raw_spin_unlock(&task->pi_lock); 830 put_task_struct(task); 831 832 /* 833 * [9] check_exit_conditions_3 protected by lock->wait_lock. 834 * 835 * We must abort the chain walk if there is no lock owner even 836 * in the dead lock detection case, as we have nothing to 837 * follow here. This is the end of the chain we are walking. 838 */ 839 if (!rt_mutex_owner(lock)) { 840 /* 841 * If the requeue [7] above changed the top waiter, 842 * then we need to wake the new top waiter up to try 843 * to get the lock. 844 */ 845 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 846 wake_up_state(waiter->task, waiter->wake_state); 847 raw_spin_unlock_irq(&lock->wait_lock); 848 return 0; 849 } 850 851 /* [10] Grab the next task, i.e. the owner of @lock */ 852 task = get_task_struct(rt_mutex_owner(lock)); 853 raw_spin_lock(&task->pi_lock); 854 855 /* [11] requeue the pi waiters if necessary */ 856 if (waiter == rt_mutex_top_waiter(lock)) { 857 /* 858 * The waiter became the new top (highest priority) 859 * waiter on the lock. Replace the previous top waiter 860 * in the owner tasks pi waiters tree with this waiter 861 * and adjust the priority of the owner. 862 */ 863 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 864 rt_mutex_enqueue_pi(task, waiter); 865 rt_mutex_adjust_prio(task); 866 867 } else if (prerequeue_top_waiter == waiter) { 868 /* 869 * The waiter was the top waiter on the lock, but is 870 * no longer the top priority waiter. Replace waiter in 871 * the owner tasks pi waiters tree with the new top 872 * (highest priority) waiter and adjust the priority 873 * of the owner. 874 * The new top waiter is stored in @waiter so that 875 * @waiter == @top_waiter evaluates to true below and 876 * we continue to deboost the rest of the chain. 877 */ 878 rt_mutex_dequeue_pi(task, waiter); 879 waiter = rt_mutex_top_waiter(lock); 880 rt_mutex_enqueue_pi(task, waiter); 881 rt_mutex_adjust_prio(task); 882 } else { 883 /* 884 * Nothing changed. No need to do any priority 885 * adjustment. 886 */ 887 } 888 889 /* 890 * [12] check_exit_conditions_4() protected by task->pi_lock 891 * and lock->wait_lock. The actual decisions are made after we 892 * dropped the locks. 893 * 894 * Check whether the task which owns the current lock is pi 895 * blocked itself. If yes we store a pointer to the lock for 896 * the lock chain change detection above. After we dropped 897 * task->pi_lock next_lock cannot be dereferenced anymore. 898 */ 899 next_lock = task_blocked_on_lock(task); 900 /* 901 * Store the top waiter of @lock for the end of chain walk 902 * decision below. 903 */ 904 top_waiter = rt_mutex_top_waiter(lock); 905 906 /* [13] Drop the locks */ 907 raw_spin_unlock(&task->pi_lock); 908 raw_spin_unlock_irq(&lock->wait_lock); 909 910 /* 911 * Make the actual exit decisions [12], based on the stored 912 * values. 913 * 914 * We reached the end of the lock chain. Stop right here. No 915 * point to go back just to figure that out. 916 */ 917 if (!next_lock) 918 goto out_put_task; 919 920 /* 921 * If the current waiter is not the top waiter on the lock, 922 * then we can stop the chain walk here if we are not in full 923 * deadlock detection mode. 924 */ 925 if (!detect_deadlock && waiter != top_waiter) 926 goto out_put_task; 927 928 goto again; 929 930 out_unlock_pi: 931 raw_spin_unlock_irq(&task->pi_lock); 932 out_put_task: 933 put_task_struct(task); 934 935 return ret; 936 } 937 938 /* 939 * Try to take an rt-mutex 940 * 941 * Must be called with lock->wait_lock held and interrupts disabled 942 * 943 * @lock: The lock to be acquired. 944 * @task: The task which wants to acquire the lock 945 * @waiter: The waiter that is queued to the lock's wait tree if the 946 * callsite called task_blocked_on_lock(), otherwise NULL 947 */ 948 static int __sched 949 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, 950 struct rt_mutex_waiter *waiter) 951 { 952 lockdep_assert_held(&lock->wait_lock); 953 954 /* 955 * Before testing whether we can acquire @lock, we set the 956 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 957 * other tasks which try to modify @lock into the slow path 958 * and they serialize on @lock->wait_lock. 959 * 960 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 961 * as explained at the top of this file if and only if: 962 * 963 * - There is a lock owner. The caller must fixup the 964 * transient state if it does a trylock or leaves the lock 965 * function due to a signal or timeout. 966 * 967 * - @task acquires the lock and there are no other 968 * waiters. This is undone in rt_mutex_set_owner(@task) at 969 * the end of this function. 970 */ 971 mark_rt_mutex_waiters(lock); 972 973 /* 974 * If @lock has an owner, give up. 975 */ 976 if (rt_mutex_owner(lock)) 977 return 0; 978 979 /* 980 * If @waiter != NULL, @task has already enqueued the waiter 981 * into @lock waiter tree. If @waiter == NULL then this is a 982 * trylock attempt. 983 */ 984 if (waiter) { 985 struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); 986 987 /* 988 * If waiter is the highest priority waiter of @lock, 989 * or allowed to steal it, take it over. 990 */ 991 if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { 992 /* 993 * We can acquire the lock. Remove the waiter from the 994 * lock waiters tree. 995 */ 996 rt_mutex_dequeue(lock, waiter); 997 } else { 998 return 0; 999 } 1000 } else { 1001 /* 1002 * If the lock has waiters already we check whether @task is 1003 * eligible to take over the lock. 1004 * 1005 * If there are no other waiters, @task can acquire 1006 * the lock. @task->pi_blocked_on is NULL, so it does 1007 * not need to be dequeued. 1008 */ 1009 if (rt_mutex_has_waiters(lock)) { 1010 /* Check whether the trylock can steal it. */ 1011 if (!rt_mutex_steal(task_to_waiter(task), 1012 rt_mutex_top_waiter(lock))) 1013 return 0; 1014 1015 /* 1016 * The current top waiter stays enqueued. We 1017 * don't have to change anything in the lock 1018 * waiters order. 1019 */ 1020 } else { 1021 /* 1022 * No waiters. Take the lock without the 1023 * pi_lock dance.@task->pi_blocked_on is NULL 1024 * and we have no waiters to enqueue in @task 1025 * pi waiters tree. 1026 */ 1027 goto takeit; 1028 } 1029 } 1030 1031 /* 1032 * Clear @task->pi_blocked_on. Requires protection by 1033 * @task->pi_lock. Redundant operation for the @waiter == NULL 1034 * case, but conditionals are more expensive than a redundant 1035 * store. 1036 */ 1037 raw_spin_lock(&task->pi_lock); 1038 task->pi_blocked_on = NULL; 1039 /* 1040 * Finish the lock acquisition. @task is the new owner. If 1041 * other waiters exist we have to insert the highest priority 1042 * waiter into @task->pi_waiters tree. 1043 */ 1044 if (rt_mutex_has_waiters(lock)) 1045 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 1046 raw_spin_unlock(&task->pi_lock); 1047 1048 takeit: 1049 /* 1050 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 1051 * are still waiters or clears it. 1052 */ 1053 rt_mutex_set_owner(lock, task); 1054 1055 return 1; 1056 } 1057 1058 /* 1059 * Task blocks on lock. 1060 * 1061 * Prepare waiter and propagate pi chain 1062 * 1063 * This must be called with lock->wait_lock held and interrupts disabled 1064 */ 1065 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, 1066 struct rt_mutex_waiter *waiter, 1067 struct task_struct *task, 1068 struct ww_acquire_ctx *ww_ctx, 1069 enum rtmutex_chainwalk chwalk) 1070 { 1071 struct task_struct *owner = rt_mutex_owner(lock); 1072 struct rt_mutex_waiter *top_waiter = waiter; 1073 struct rt_mutex_base *next_lock; 1074 int chain_walk = 0, res; 1075 1076 lockdep_assert_held(&lock->wait_lock); 1077 1078 /* 1079 * Early deadlock detection. We really don't want the task to 1080 * enqueue on itself just to untangle the mess later. It's not 1081 * only an optimization. We drop the locks, so another waiter 1082 * can come in before the chain walk detects the deadlock. So 1083 * the other will detect the deadlock and return -EDEADLOCK, 1084 * which is wrong, as the other waiter is not in a deadlock 1085 * situation. 1086 */ 1087 if (owner == task) 1088 return -EDEADLK; 1089 1090 raw_spin_lock(&task->pi_lock); 1091 waiter->task = task; 1092 waiter->lock = lock; 1093 waiter_update_prio(waiter, task); 1094 1095 /* Get the top priority waiter on the lock */ 1096 if (rt_mutex_has_waiters(lock)) 1097 top_waiter = rt_mutex_top_waiter(lock); 1098 rt_mutex_enqueue(lock, waiter); 1099 1100 task->pi_blocked_on = waiter; 1101 1102 raw_spin_unlock(&task->pi_lock); 1103 1104 if (build_ww_mutex() && ww_ctx) { 1105 struct rt_mutex *rtm; 1106 1107 /* Check whether the waiter should back out immediately */ 1108 rtm = container_of(lock, struct rt_mutex, rtmutex); 1109 res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); 1110 if (res) { 1111 raw_spin_lock(&task->pi_lock); 1112 rt_mutex_dequeue(lock, waiter); 1113 task->pi_blocked_on = NULL; 1114 raw_spin_unlock(&task->pi_lock); 1115 return res; 1116 } 1117 } 1118 1119 if (!owner) 1120 return 0; 1121 1122 raw_spin_lock(&owner->pi_lock); 1123 if (waiter == rt_mutex_top_waiter(lock)) { 1124 rt_mutex_dequeue_pi(owner, top_waiter); 1125 rt_mutex_enqueue_pi(owner, waiter); 1126 1127 rt_mutex_adjust_prio(owner); 1128 if (owner->pi_blocked_on) 1129 chain_walk = 1; 1130 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1131 chain_walk = 1; 1132 } 1133 1134 /* Store the lock on which owner is blocked or NULL */ 1135 next_lock = task_blocked_on_lock(owner); 1136 1137 raw_spin_unlock(&owner->pi_lock); 1138 /* 1139 * Even if full deadlock detection is on, if the owner is not 1140 * blocked itself, we can avoid finding this out in the chain 1141 * walk. 1142 */ 1143 if (!chain_walk || !next_lock) 1144 return 0; 1145 1146 /* 1147 * The owner can't disappear while holding a lock, 1148 * so the owner struct is protected by wait_lock. 1149 * Gets dropped in rt_mutex_adjust_prio_chain()! 1150 */ 1151 get_task_struct(owner); 1152 1153 raw_spin_unlock_irq(&lock->wait_lock); 1154 1155 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1156 next_lock, waiter, task); 1157 1158 raw_spin_lock_irq(&lock->wait_lock); 1159 1160 return res; 1161 } 1162 1163 /* 1164 * Remove the top waiter from the current tasks pi waiter tree and 1165 * queue it up. 1166 * 1167 * Called with lock->wait_lock held and interrupts disabled. 1168 */ 1169 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, 1170 struct rt_mutex_base *lock) 1171 { 1172 struct rt_mutex_waiter *waiter; 1173 1174 raw_spin_lock(¤t->pi_lock); 1175 1176 waiter = rt_mutex_top_waiter(lock); 1177 1178 /* 1179 * Remove it from current->pi_waiters and deboost. 1180 * 1181 * We must in fact deboost here in order to ensure we call 1182 * rt_mutex_setprio() to update p->pi_top_task before the 1183 * task unblocks. 1184 */ 1185 rt_mutex_dequeue_pi(current, waiter); 1186 rt_mutex_adjust_prio(current); 1187 1188 /* 1189 * As we are waking up the top waiter, and the waiter stays 1190 * queued on the lock until it gets the lock, this lock 1191 * obviously has waiters. Just set the bit here and this has 1192 * the added benefit of forcing all new tasks into the 1193 * slow path making sure no task of lower priority than 1194 * the top waiter can steal this lock. 1195 */ 1196 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1197 1198 /* 1199 * We deboosted before waking the top waiter task such that we don't 1200 * run two tasks with the 'same' priority (and ensure the 1201 * p->pi_top_task pointer points to a blocked task). This however can 1202 * lead to priority inversion if we would get preempted after the 1203 * deboost but before waking our donor task, hence the preempt_disable() 1204 * before unlock. 1205 * 1206 * Pairs with preempt_enable() in rt_mutex_wake_up_q(); 1207 */ 1208 preempt_disable(); 1209 rt_mutex_wake_q_add(wqh, waiter); 1210 raw_spin_unlock(¤t->pi_lock); 1211 } 1212 1213 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1214 { 1215 int ret = try_to_take_rt_mutex(lock, current, NULL); 1216 1217 /* 1218 * try_to_take_rt_mutex() sets the lock waiters bit 1219 * unconditionally. Clean this up. 1220 */ 1221 fixup_rt_mutex_waiters(lock); 1222 1223 return ret; 1224 } 1225 1226 /* 1227 * Slow path try-lock function: 1228 */ 1229 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1230 { 1231 unsigned long flags; 1232 int ret; 1233 1234 /* 1235 * If the lock already has an owner we fail to get the lock. 1236 * This can be done without taking the @lock->wait_lock as 1237 * it is only being read, and this is a trylock anyway. 1238 */ 1239 if (rt_mutex_owner(lock)) 1240 return 0; 1241 1242 /* 1243 * The mutex has currently no owner. Lock the wait lock and try to 1244 * acquire the lock. We use irqsave here to support early boot calls. 1245 */ 1246 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1247 1248 ret = __rt_mutex_slowtrylock(lock); 1249 1250 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1251 1252 return ret; 1253 } 1254 1255 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) 1256 { 1257 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1258 return 1; 1259 1260 return rt_mutex_slowtrylock(lock); 1261 } 1262 1263 /* 1264 * Slow path to release a rt-mutex. 1265 */ 1266 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) 1267 { 1268 DEFINE_RT_WAKE_Q(wqh); 1269 unsigned long flags; 1270 1271 /* irqsave required to support early boot calls */ 1272 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1273 1274 debug_rt_mutex_unlock(lock); 1275 1276 /* 1277 * We must be careful here if the fast path is enabled. If we 1278 * have no waiters queued we cannot set owner to NULL here 1279 * because of: 1280 * 1281 * foo->lock->owner = NULL; 1282 * rtmutex_lock(foo->lock); <- fast path 1283 * free = atomic_dec_and_test(foo->refcnt); 1284 * rtmutex_unlock(foo->lock); <- fast path 1285 * if (free) 1286 * kfree(foo); 1287 * raw_spin_unlock(foo->lock->wait_lock); 1288 * 1289 * So for the fastpath enabled kernel: 1290 * 1291 * Nothing can set the waiters bit as long as we hold 1292 * lock->wait_lock. So we do the following sequence: 1293 * 1294 * owner = rt_mutex_owner(lock); 1295 * clear_rt_mutex_waiters(lock); 1296 * raw_spin_unlock(&lock->wait_lock); 1297 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1298 * return; 1299 * goto retry; 1300 * 1301 * The fastpath disabled variant is simple as all access to 1302 * lock->owner is serialized by lock->wait_lock: 1303 * 1304 * lock->owner = NULL; 1305 * raw_spin_unlock(&lock->wait_lock); 1306 */ 1307 while (!rt_mutex_has_waiters(lock)) { 1308 /* Drops lock->wait_lock ! */ 1309 if (unlock_rt_mutex_safe(lock, flags) == true) 1310 return; 1311 /* Relock the rtmutex and try again */ 1312 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1313 } 1314 1315 /* 1316 * The wakeup next waiter path does not suffer from the above 1317 * race. See the comments there. 1318 * 1319 * Queue the next waiter for wakeup once we release the wait_lock. 1320 */ 1321 mark_wakeup_next_waiter(&wqh, lock); 1322 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1323 1324 rt_mutex_wake_up_q(&wqh); 1325 } 1326 1327 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) 1328 { 1329 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1330 return; 1331 1332 rt_mutex_slowunlock(lock); 1333 } 1334 1335 #ifdef CONFIG_SMP 1336 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1337 struct rt_mutex_waiter *waiter, 1338 struct task_struct *owner) 1339 { 1340 bool res = true; 1341 1342 rcu_read_lock(); 1343 for (;;) { 1344 /* If owner changed, trylock again. */ 1345 if (owner != rt_mutex_owner(lock)) 1346 break; 1347 /* 1348 * Ensure that @owner is dereferenced after checking that 1349 * the lock owner still matches @owner. If that fails, 1350 * @owner might point to freed memory. If it still matches, 1351 * the rcu_read_lock() ensures the memory stays valid. 1352 */ 1353 barrier(); 1354 /* 1355 * Stop spinning when: 1356 * - the lock owner has been scheduled out 1357 * - current is not longer the top waiter 1358 * - current is requested to reschedule (redundant 1359 * for CONFIG_PREEMPT_RCU=y) 1360 * - the VCPU on which owner runs is preempted 1361 */ 1362 if (!owner->on_cpu || need_resched() || 1363 rt_mutex_waiter_is_top_waiter(lock, waiter) || 1364 vcpu_is_preempted(task_cpu(owner))) { 1365 res = false; 1366 break; 1367 } 1368 cpu_relax(); 1369 } 1370 rcu_read_unlock(); 1371 return res; 1372 } 1373 #else 1374 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1375 struct rt_mutex_waiter *waiter, 1376 struct task_struct *owner) 1377 { 1378 return false; 1379 } 1380 #endif 1381 1382 #ifdef RT_MUTEX_BUILD_MUTEX 1383 /* 1384 * Functions required for: 1385 * - rtmutex, futex on all kernels 1386 * - mutex and rwsem substitutions on RT kernels 1387 */ 1388 1389 /* 1390 * Remove a waiter from a lock and give up 1391 * 1392 * Must be called with lock->wait_lock held and interrupts disabled. It must 1393 * have just failed to try_to_take_rt_mutex(). 1394 */ 1395 static void __sched remove_waiter(struct rt_mutex_base *lock, 1396 struct rt_mutex_waiter *waiter) 1397 { 1398 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1399 struct task_struct *owner = rt_mutex_owner(lock); 1400 struct rt_mutex_base *next_lock; 1401 1402 lockdep_assert_held(&lock->wait_lock); 1403 1404 raw_spin_lock(¤t->pi_lock); 1405 rt_mutex_dequeue(lock, waiter); 1406 current->pi_blocked_on = NULL; 1407 raw_spin_unlock(¤t->pi_lock); 1408 1409 /* 1410 * Only update priority if the waiter was the highest priority 1411 * waiter of the lock and there is an owner to update. 1412 */ 1413 if (!owner || !is_top_waiter) 1414 return; 1415 1416 raw_spin_lock(&owner->pi_lock); 1417 1418 rt_mutex_dequeue_pi(owner, waiter); 1419 1420 if (rt_mutex_has_waiters(lock)) 1421 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1422 1423 rt_mutex_adjust_prio(owner); 1424 1425 /* Store the lock on which owner is blocked or NULL */ 1426 next_lock = task_blocked_on_lock(owner); 1427 1428 raw_spin_unlock(&owner->pi_lock); 1429 1430 /* 1431 * Don't walk the chain, if the owner task is not blocked 1432 * itself. 1433 */ 1434 if (!next_lock) 1435 return; 1436 1437 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1438 get_task_struct(owner); 1439 1440 raw_spin_unlock_irq(&lock->wait_lock); 1441 1442 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1443 next_lock, NULL, current); 1444 1445 raw_spin_lock_irq(&lock->wait_lock); 1446 } 1447 1448 /** 1449 * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop 1450 * @lock: the rt_mutex to take 1451 * @ww_ctx: WW mutex context pointer 1452 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1453 * or TASK_UNINTERRUPTIBLE) 1454 * @timeout: the pre-initialized and started timer, or NULL for none 1455 * @waiter: the pre-initialized rt_mutex_waiter 1456 * 1457 * Must be called with lock->wait_lock held and interrupts disabled 1458 */ 1459 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, 1460 struct ww_acquire_ctx *ww_ctx, 1461 unsigned int state, 1462 struct hrtimer_sleeper *timeout, 1463 struct rt_mutex_waiter *waiter) 1464 { 1465 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1466 struct task_struct *owner; 1467 int ret = 0; 1468 1469 for (;;) { 1470 /* Try to acquire the lock: */ 1471 if (try_to_take_rt_mutex(lock, current, waiter)) 1472 break; 1473 1474 if (timeout && !timeout->task) { 1475 ret = -ETIMEDOUT; 1476 break; 1477 } 1478 if (signal_pending_state(state, current)) { 1479 ret = -EINTR; 1480 break; 1481 } 1482 1483 if (build_ww_mutex() && ww_ctx) { 1484 ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx); 1485 if (ret) 1486 break; 1487 } 1488 1489 if (waiter == rt_mutex_top_waiter(lock)) 1490 owner = rt_mutex_owner(lock); 1491 else 1492 owner = NULL; 1493 raw_spin_unlock_irq(&lock->wait_lock); 1494 1495 if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) 1496 schedule(); 1497 1498 raw_spin_lock_irq(&lock->wait_lock); 1499 set_current_state(state); 1500 } 1501 1502 __set_current_state(TASK_RUNNING); 1503 return ret; 1504 } 1505 1506 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 1507 struct rt_mutex_waiter *w) 1508 { 1509 /* 1510 * If the result is not -EDEADLOCK or the caller requested 1511 * deadlock detection, nothing to do here. 1512 */ 1513 if (res != -EDEADLOCK || detect_deadlock) 1514 return; 1515 1516 if (build_ww_mutex() && w->ww_ctx) 1517 return; 1518 1519 /* 1520 * Yell loudly and stop the task right here. 1521 */ 1522 WARN(1, "rtmutex deadlock detected\n"); 1523 while (1) { 1524 set_current_state(TASK_INTERRUPTIBLE); 1525 schedule(); 1526 } 1527 } 1528 1529 /** 1530 * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held 1531 * @lock: The rtmutex to block lock 1532 * @ww_ctx: WW mutex context pointer 1533 * @state: The task state for sleeping 1534 * @chwalk: Indicator whether full or partial chainwalk is requested 1535 * @waiter: Initializer waiter for blocking 1536 */ 1537 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, 1538 struct ww_acquire_ctx *ww_ctx, 1539 unsigned int state, 1540 enum rtmutex_chainwalk chwalk, 1541 struct rt_mutex_waiter *waiter) 1542 { 1543 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1544 struct ww_mutex *ww = ww_container_of(rtm); 1545 int ret; 1546 1547 lockdep_assert_held(&lock->wait_lock); 1548 1549 /* Try to acquire the lock again: */ 1550 if (try_to_take_rt_mutex(lock, current, NULL)) { 1551 if (build_ww_mutex() && ww_ctx) { 1552 __ww_mutex_check_waiters(rtm, ww_ctx); 1553 ww_mutex_lock_acquired(ww, ww_ctx); 1554 } 1555 return 0; 1556 } 1557 1558 set_current_state(state); 1559 1560 ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); 1561 if (likely(!ret)) 1562 ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); 1563 1564 if (likely(!ret)) { 1565 /* acquired the lock */ 1566 if (build_ww_mutex() && ww_ctx) { 1567 if (!ww_ctx->is_wait_die) 1568 __ww_mutex_check_waiters(rtm, ww_ctx); 1569 ww_mutex_lock_acquired(ww, ww_ctx); 1570 } 1571 } else { 1572 __set_current_state(TASK_RUNNING); 1573 remove_waiter(lock, waiter); 1574 rt_mutex_handle_deadlock(ret, chwalk, waiter); 1575 } 1576 1577 /* 1578 * try_to_take_rt_mutex() sets the waiter bit 1579 * unconditionally. We might have to fix that up. 1580 */ 1581 fixup_rt_mutex_waiters(lock); 1582 return ret; 1583 } 1584 1585 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, 1586 struct ww_acquire_ctx *ww_ctx, 1587 unsigned int state) 1588 { 1589 struct rt_mutex_waiter waiter; 1590 int ret; 1591 1592 rt_mutex_init_waiter(&waiter); 1593 waiter.ww_ctx = ww_ctx; 1594 1595 ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, 1596 &waiter); 1597 1598 debug_rt_mutex_free_waiter(&waiter); 1599 return ret; 1600 } 1601 1602 /* 1603 * rt_mutex_slowlock - Locking slowpath invoked when fast path fails 1604 * @lock: The rtmutex to block lock 1605 * @ww_ctx: WW mutex context pointer 1606 * @state: The task state for sleeping 1607 */ 1608 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, 1609 struct ww_acquire_ctx *ww_ctx, 1610 unsigned int state) 1611 { 1612 unsigned long flags; 1613 int ret; 1614 1615 /* 1616 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1617 * be called in early boot if the cmpxchg() fast path is disabled 1618 * (debug, no architecture support). In this case we will acquire the 1619 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1620 * enable interrupts in that early boot case. So we need to use the 1621 * irqsave/restore variants. 1622 */ 1623 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1624 ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); 1625 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1626 1627 return ret; 1628 } 1629 1630 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, 1631 unsigned int state) 1632 { 1633 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1634 return 0; 1635 1636 return rt_mutex_slowlock(lock, NULL, state); 1637 } 1638 #endif /* RT_MUTEX_BUILD_MUTEX */ 1639 1640 #ifdef RT_MUTEX_BUILD_SPINLOCKS 1641 /* 1642 * Functions required for spin/rw_lock substitution on RT kernels 1643 */ 1644 1645 /** 1646 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks 1647 * @lock: The underlying RT mutex 1648 */ 1649 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) 1650 { 1651 struct rt_mutex_waiter waiter; 1652 struct task_struct *owner; 1653 1654 lockdep_assert_held(&lock->wait_lock); 1655 1656 if (try_to_take_rt_mutex(lock, current, NULL)) 1657 return; 1658 1659 rt_mutex_init_rtlock_waiter(&waiter); 1660 1661 /* Save current state and set state to TASK_RTLOCK_WAIT */ 1662 current_save_and_set_rtlock_wait_state(); 1663 1664 task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); 1665 1666 for (;;) { 1667 /* Try to acquire the lock again */ 1668 if (try_to_take_rt_mutex(lock, current, &waiter)) 1669 break; 1670 1671 if (&waiter == rt_mutex_top_waiter(lock)) 1672 owner = rt_mutex_owner(lock); 1673 else 1674 owner = NULL; 1675 raw_spin_unlock_irq(&lock->wait_lock); 1676 1677 if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) 1678 schedule_rtlock(); 1679 1680 raw_spin_lock_irq(&lock->wait_lock); 1681 set_current_state(TASK_RTLOCK_WAIT); 1682 } 1683 1684 /* Restore the task state */ 1685 current_restore_rtlock_saved_state(); 1686 1687 /* 1688 * try_to_take_rt_mutex() sets the waiter bit unconditionally. 1689 * We might have to fix that up: 1690 */ 1691 fixup_rt_mutex_waiters(lock); 1692 debug_rt_mutex_free_waiter(&waiter); 1693 } 1694 1695 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) 1696 { 1697 unsigned long flags; 1698 1699 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1700 rtlock_slowlock_locked(lock); 1701 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1702 } 1703 1704 #endif /* RT_MUTEX_BUILD_SPINLOCKS */ 1705