kernel/locking/rtmutex.c

   1 /*
   2  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
   3  *
   4  * started by Ingo Molnar and Thomas Gleixner.
   5  *
   6  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   7  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
   8  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
   9  *  Copyright (C) 2006 Esben Nielsen
  10  *
  11  *  See Documentation/locking/rt-mutex-design.txt for details.
  12  */
  13 #include <linux/spinlock.h>
  14 #include <linux/export.h>
  15 #include <linux/sched.h>
  16 #include <linux/sched/rt.h>
  17 #include <linux/sched/deadline.h>
  18 #include <linux/timer.h>
  19
  20 #include "rtmutex_common.h"
  21
  22 /*
  23  * lock->owner state tracking:
  24  *
  25  * lock->owner holds the task_struct pointer of the owner. Bit 0
  26  * is used to keep track of the "lock has waiters" state.
  27  *
  28  * owner        bit0
  29  * NULL         0       lock is free (fast acquire possible)
  30  * NULL         1       lock is free and has waiters and the top waiter
  31  *                              is going to take the lock*
  32  * taskpointer  0       lock is held (fast release possible)
  33  * taskpointer  1       lock is held and has waiters**
  34  *
  35  * The fast atomic compare exchange based acquire and release is only
  36  * possible when bit 0 of lock->owner is 0.
  37  *
  38  * (*) It also can be a transitional state when grabbing the lock
  39  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
  40  * we need to set the bit0 before looking at the lock, and the owner may be
  41  * NULL in this small time, hence this can be a transitional state.
  42  *
  43  * (**) There is a small time when bit 0 is set but there are no
  44  * waiters. This can happen when grabbing the lock in the slow path.
  45  * To prevent a cmpxchg of the owner releasing the lock, we need to
  46  * set this bit before looking at the lock.
  47  */
  48
  49 static void
  50 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
  51 {
  52         unsigned long val = (unsigned long)owner;
  53
  54         if (rt_mutex_has_waiters(lock))
  55                 val |= RT_MUTEX_HAS_WAITERS;
  56
  57         lock->owner = (struct task_struct *)val;
  58 }
  59
  60 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
  61 {
  62         lock->owner = (struct task_struct *)
  63                         ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
  64 }
  65
  66 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  67 {
  68         if (!rt_mutex_has_waiters(lock))
  69                 clear_rt_mutex_waiters(lock);
  70 }
  71
  72 /*
  73  * We can speed up the acquire/release, if there's no debugging state to be
  74  * set up.
  75  */
  76 #ifndef CONFIG_DEBUG_RT_MUTEXES
  77 # define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
  78 # define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
  79 # define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
  80
  81 /*
  82  * Callers must hold the ->wait_lock -- which is the whole purpose as we force
  83  * all future threads that attempt to [Rmw] the lock to the slowpath. As such
  84  * relaxed semantics suffice.
  85  */
  86 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  87 {
  88         unsigned long owner, *p = (unsigned long *) &lock->owner;
  89
  90         do {
  91                 owner = *p;
  92         } while (cmpxchg_relaxed(p, owner,
  93                                  owner | RT_MUTEX_HAS_WAITERS) != owner);
  94 }
  95
  96 /*
  97  * Safe fastpath aware unlock:
  98  * 1) Clear the waiters bit
  99  * 2) Drop lock->wait_lock
 100  * 3) Try to unlock the lock with cmpxchg
 101  */
 102 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
 103                                         unsigned long flags)
 104         __releases(lock->wait_lock)
 105 {
 106         struct task_struct *owner = rt_mutex_owner(lock);
 107
 108         clear_rt_mutex_waiters(lock);
 109         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 110         /*
 111          * If a new waiter comes in between the unlock and the cmpxchg
 112          * we have two situations:
 113          *
 114          * unlock(wait_lock);
 115          *                                      lock(wait_lock);
 116          * cmpxchg(p, owner, 0) == owner
 117          *                                      mark_rt_mutex_waiters(lock);
 118          *                                      acquire(lock);
 119          * or:
 120          *
 121          * unlock(wait_lock);
 122          *                                      lock(wait_lock);
 123          *                                      mark_rt_mutex_waiters(lock);
 124          *
 125          * cmpxchg(p, owner, 0) != owner
 126          *                                      enqueue_waiter();
 127          *                                      unlock(wait_lock);
 128          * lock(wait_lock);
 129          * wake waiter();
 130          * unlock(wait_lock);
 131          *                                      lock(wait_lock);
 132          *                                      acquire(lock);
 133          */
 134         return rt_mutex_cmpxchg_release(lock, owner, NULL);
 135 }
 136
 137 #else
 138 # define rt_mutex_cmpxchg_relaxed(l,c,n)        (0)
 139 # define rt_mutex_cmpxchg_acquire(l,c,n)        (0)
 140 # define rt_mutex_cmpxchg_release(l,c,n)        (0)
 141
 142 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 143 {
 144         lock->owner = (struct task_struct *)
 145                         ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
 146 }
 147
 148 /*
 149  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
 150  */
 151 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
 152                                         unsigned long flags)
 153         __releases(lock->wait_lock)
 154 {
 155         lock->owner = NULL;
 156         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 157         return true;
 158 }
 159 #endif
 160
 161 static inline int
 162 rt_mutex_waiter_less(struct rt_mutex_waiter *left,
 163                      struct rt_mutex_waiter *right)
 164 {
 165         if (left->prio < right->prio)
 166                 return 1;
 167
 168         /*
 169          * If both waiters have dl_prio(), we check the deadlines of the
 170          * associated tasks.
 171          * If left waiter has a dl_prio(), and we didn't return 1 above,
 172          * then right waiter has a dl_prio() too.
 173          */
 174         if (dl_prio(left->prio))
 175                 return dl_time_before(left->task->dl.deadline,
 176                                       right->task->dl.deadline);
 177
 178         return 0;
 179 }
 180
 181 static void
 182 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
 183 {
 184         struct rb_node **link = &lock->waiters.rb_node;
 185         struct rb_node *parent = NULL;
 186         struct rt_mutex_waiter *entry;
 187         int leftmost = 1;
 188
 189         while (*link) {
 190                 parent = *link;
 191                 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
 192                 if (rt_mutex_waiter_less(waiter, entry)) {
 193                         link = &parent->rb_left;
 194                 } else {
 195                         link = &parent->rb_right;
 196                         leftmost = 0;
 197                 }
 198         }
 199
 200         if (leftmost)
 201                 lock->waiters_leftmost = &waiter->tree_entry;
 202
 203         rb_link_node(&waiter->tree_entry, parent, link);
 204         rb_insert_color(&waiter->tree_entry, &lock->waiters);
 205 }
 206
 207 static void
 208 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
 209 {
 210         if (RB_EMPTY_NODE(&waiter->tree_entry))
 211                 return;
 212
 213         if (lock->waiters_leftmost == &waiter->tree_entry)
 214                 lock->waiters_leftmost = rb_next(&waiter->tree_entry);
 215
 216         rb_erase(&waiter->tree_entry, &lock->waiters);
 217         RB_CLEAR_NODE(&waiter->tree_entry);
 218 }
 219
 220 static void
 221 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 222 {
 223         struct rb_node **link = &task->pi_waiters.rb_node;
 224         struct rb_node *parent = NULL;
 225         struct rt_mutex_waiter *entry;
 226         int leftmost = 1;
 227
 228         while (*link) {
 229                 parent = *link;
 230                 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
 231                 if (rt_mutex_waiter_less(waiter, entry)) {
 232                         link = &parent->rb_left;
 233                 } else {
 234                         link = &parent->rb_right;
 235                         leftmost = 0;
 236                 }
 237         }
 238
 239         if (leftmost)
 240                 task->pi_waiters_leftmost = &waiter->pi_tree_entry;
 241
 242         rb_link_node(&waiter->pi_tree_entry, parent, link);
 243         rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
 244 }
 245
 246 static void
 247 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 248 {
 249         if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
 250                 return;
 251
 252         if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
 253                 task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
 254
 255         rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
 256         RB_CLEAR_NODE(&waiter->pi_tree_entry);
 257 }
 258
 259 /*
 260  * Calculate task priority from the waiter tree priority
 261  *
 262  * Return task->normal_prio when the waiter tree is empty or when
 263  * the waiter is not allowed to do priority boosting
 264  */
 265 int rt_mutex_getprio(struct task_struct *task)
 266 {
 267         if (likely(!task_has_pi_waiters(task)))
 268                 return task->normal_prio;
 269
 270         return min(task_top_pi_waiter(task)->prio,
 271                    task->normal_prio);
 272 }
 273
 274 struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
 275 {
 276         if (likely(!task_has_pi_waiters(task)))
 277                 return NULL;
 278
 279         return task_top_pi_waiter(task)->task;
 280 }
 281
 282 /*
 283  * Called by sched_setscheduler() to get the priority which will be
 284  * effective after the change.
 285  */
 286 int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
 287 {
 288         if (!task_has_pi_waiters(task))
 289                 return newprio;
 290
 291         if (task_top_pi_waiter(task)->task->prio <= newprio)
 292                 return task_top_pi_waiter(task)->task->prio;
 293         return newprio;
 294 }
 295
 296 /*
 297  * Get the effective policy based on the current prio value.
 298  */
 299 int rt_mutex_get_effective_policy(int policy, int prio)
 300 {
 301         if (dl_prio(prio))
 302                 return SCHED_DEADLINE;
 303
 304         /* With RT, the default class is SCHED_FIFO. */
 305         if (rt_prio(prio)) {
 306                 if (policy == SCHED_RR)
 307                         return SCHED_RR;
 308                 return SCHED_FIFO;
 309         }
 310
 311         /* With fair, the default class is SCHED_NORMAL. */
 312         switch (policy) {
 313         case SCHED_NORMAL:
 314         case SCHED_IDLE:
 315         case SCHED_BATCH:
 316                 return policy;
 317         }
 318         return SCHED_NORMAL;
 319 }
 320
 321 /*
 322  * Get the effective rt priority based on the current prio value.
 323  */
 324 int rt_mutex_get_effective_rt_prio(int prio)
 325 {
 326         if (!rt_prio(prio))
 327                 return 0;
 328
 329         return MAX_RT_PRIO - 1 - prio;
 330 }
 331
 332 /*
 333  * Adjust the priority of a task, after its pi_waiters got modified.
 334  *
 335  * This can be both boosting and unboosting. task->pi_lock must be held.
 336  */
 337 static void __rt_mutex_adjust_prio(struct task_struct *task)
 338 {
 339         int prio = rt_mutex_getprio(task);
 340
 341         if (task->prio != prio || dl_prio(prio))
 342                 rt_mutex_setprio(task, prio);
 343 }
 344
 345 /*
 346  * Adjust task priority (undo boosting). Called from the exit path of
 347  * rt_mutex_slowunlock() and rt_mutex_slowlock().
 348  *
 349  * (Note: We do this outside of the protection of lock->wait_lock to
 350  * allow the lock to be taken while or before we readjust the priority
 351  * of task. We do not use the spin_xx_mutex() variants here as we are
 352  * outside of the debug path.)
 353  */
 354 void rt_mutex_adjust_prio(struct task_struct *task)
 355 {
 356         unsigned long flags;
 357
 358         raw_spin_lock_irqsave(&task->pi_lock, flags);
 359         __rt_mutex_adjust_prio(task);
 360         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 361 }
 362
 363 /*
 364  * Deadlock detection is conditional:
 365  *
 366  * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
 367  * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
 368  *
 369  * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
 370  * conducted independent of the detect argument.
 371  *
 372  * If the waiter argument is NULL this indicates the deboost path and
 373  * deadlock detection is disabled independent of the detect argument
 374  * and the config settings.
 375  */
 376 static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
 377                                           enum rtmutex_chainwalk chwalk)
 378 {
 379         /*
 380          * This is just a wrapper function for the following call,
 381          * because debug_rt_mutex_detect_deadlock() smells like a magic
 382          * debug feature and I wanted to keep the cond function in the
 383          * main source file along with the comments instead of having
 384          * two of the same in the headers.
 385          */
 386         return debug_rt_mutex_detect_deadlock(waiter, chwalk);
 387 }
 388
 389 /*
 390  * Max number of times we'll walk the boosting chain:
 391  */
 392 int max_lock_depth = 1024;
 393
 394 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
 395 {
 396         return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
 397 }
 398
 399 /*
 400  * Adjust the priority chain. Also used for deadlock detection.
 401  * Decreases task's usage by one - may thus free the task.
 402  *
 403  * @task:       the task owning the mutex (owner) for which a chain walk is
 404  *              probably needed
 405  * @chwalk:     do we have to carry out deadlock detection?
 406  * @orig_lock:  the mutex (can be NULL if we are walking the chain to recheck
 407  *              things for a task that has just got its priority adjusted, and
 408  *              is waiting on a mutex)
 409  * @next_lock:  the mutex on which the owner of @orig_lock was blocked before
 410  *              we dropped its pi_lock. Is never dereferenced, only used for
 411  *              comparison to detect lock chain changes.
 412  * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
 413  *              its priority to the mutex owner (can be NULL in the case
 414  *              depicted above or if the top waiter is gone away and we are
 415  *              actually deboosting the owner)
 416  * @top_task:   the current top waiter
 417  *
 418  * Returns 0 or -EDEADLK.
 419  *
 420  * Chain walk basics and protection scope
 421  *
 422  * [R] refcount on task
 423  * [P] task->pi_lock held
 424  * [L] rtmutex->wait_lock held
 425  *
 426  * Step Description                             Protected by
 427  *      function arguments:
 428  *      @task                                   [R]
 429  *      @orig_lock if != NULL                   @top_task is blocked on it
 430  *      @next_lock                              Unprotected. Cannot be
 431  *                                              dereferenced. Only used for
 432  *                                              comparison.
 433  *      @orig_waiter if != NULL                 @top_task is blocked on it
 434  *      @top_task                               current, or in case of proxy
 435  *                                              locking protected by calling
 436  *                                              code
 437  *      again:
 438  *        loop_sanity_check();
 439  *      retry:
 440  * [1]    lock(task->pi_lock);                  [R] acquire [P]
 441  * [2]    waiter = task->pi_blocked_on;         [P]
 442  * [3]    check_exit_conditions_1();            [P]
 443  * [4]    lock = waiter->lock;                  [P]
 444  * [5]    if (!try_lock(lock->wait_lock)) {     [P] try to acquire [L]
 445  *          unlock(task->pi_lock);              release [P]
 446  *          goto retry;
 447  *        }
 448  * [6]    check_exit_conditions_2();            [P] + [L]
 449  * [7]    requeue_lock_waiter(lock, waiter);    [P] + [L]
 450  * [8]    unlock(task->pi_lock);                release [P]
 451  *        put_task_struct(task);                release [R]
 452  * [9]    check_exit_conditions_3();            [L]
 453  * [10]   task = owner(lock);                   [L]
 454  *        get_task_struct(task);                [L] acquire [R]
 455  *        lock(task->pi_lock);                  [L] acquire [P]
 456  * [11]   requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
 457  * [12]   check_exit_conditions_4();            [P] + [L]
 458  * [13]   unlock(task->pi_lock);                release [P]
 459  *        unlock(lock->wait_lock);              release [L]
 460  *        goto again;
 461  */
 462 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 463                                       enum rtmutex_chainwalk chwalk,
 464                                       struct rt_mutex *orig_lock,
 465                                       struct rt_mutex *next_lock,
 466                                       struct rt_mutex_waiter *orig_waiter,
 467                                       struct task_struct *top_task)
 468 {
 469         struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
 470         struct rt_mutex_waiter *prerequeue_top_waiter;
 471         int ret = 0, depth = 0;
 472         struct rt_mutex *lock;
 473         bool detect_deadlock;
 474         bool requeue = true;
 475
 476         detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
 477
 478         /*
 479          * The (de)boosting is a step by step approach with a lot of
 480          * pitfalls. We want this to be preemptible and we want hold a
 481          * maximum of two locks per step. So we have to check
 482          * carefully whether things change under us.
 483          */
 484  again:
 485         /*
 486          * We limit the lock chain length for each invocation.
 487          */
 488         if (++depth > max_lock_depth) {
 489                 static int prev_max;
 490
 491                 /*
 492                  * Print this only once. If the admin changes the limit,
 493                  * print a new message when reaching the limit again.
 494                  */
 495                 if (prev_max != max_lock_depth) {
 496                         prev_max = max_lock_depth;
 497                         printk(KERN_WARNING "Maximum lock depth %d reached "
 498                                "task: %s (%d)\n", max_lock_depth,
 499                                top_task->comm, task_pid_nr(top_task));
 500                 }
 501                 put_task_struct(task);
 502
 503                 return -EDEADLK;
 504         }
 505
 506         /*
 507          * We are fully preemptible here and only hold the refcount on
 508          * @task. So everything can have changed under us since the
 509          * caller or our own code below (goto retry/again) dropped all
 510          * locks.
 511          */
 512  retry:
 513         /*
 514          * [1] Task cannot go away as we did a get_task() before !
 515          */
 516         raw_spin_lock_irq(&task->pi_lock);
 517
 518         /*
 519          * [2] Get the waiter on which @task is blocked on.
 520          */
 521         waiter = task->pi_blocked_on;
 522
 523         /*
 524          * [3] check_exit_conditions_1() protected by task->pi_lock.
 525          */
 526
 527         /*
 528          * Check whether the end of the boosting chain has been
 529          * reached or the state of the chain has changed while we
 530          * dropped the locks.
 531          */
 532         if (!waiter)
 533                 goto out_unlock_pi;
 534
 535         /*
 536          * Check the orig_waiter state. After we dropped the locks,
 537          * the previous owner of the lock might have released the lock.
 538          */
 539         if (orig_waiter && !rt_mutex_owner(orig_lock))
 540                 goto out_unlock_pi;
 541
 542         /*
 543          * We dropped all locks after taking a refcount on @task, so
 544          * the task might have moved on in the lock chain or even left
 545          * the chain completely and blocks now on an unrelated lock or
 546          * on @orig_lock.
 547          *
 548          * We stored the lock on which @task was blocked in @next_lock,
 549          * so we can detect the chain change.
 550          */
 551         if (next_lock != waiter->lock)
 552                 goto out_unlock_pi;
 553
 554         /*
 555          * Drop out, when the task has no waiters. Note,
 556          * top_waiter can be NULL, when we are in the deboosting
 557          * mode!
 558          */
 559         if (top_waiter) {
 560                 if (!task_has_pi_waiters(task))
 561                         goto out_unlock_pi;
 562                 /*
 563                  * If deadlock detection is off, we stop here if we
 564                  * are not the top pi waiter of the task. If deadlock
 565                  * detection is enabled we continue, but stop the
 566                  * requeueing in the chain walk.
 567                  */
 568                 if (top_waiter != task_top_pi_waiter(task)) {
 569                         if (!detect_deadlock)
 570                                 goto out_unlock_pi;
 571                         else
 572                                 requeue = false;
 573                 }
 574         }
 575
 576         /*
 577          * If the waiter priority is the same as the task priority
 578          * then there is no further priority adjustment necessary.  If
 579          * deadlock detection is off, we stop the chain walk. If its
 580          * enabled we continue, but stop the requeueing in the chain
 581          * walk.
 582          */
 583         if (waiter->prio == task->prio) {
 584                 if (!detect_deadlock)
 585                         goto out_unlock_pi;
 586                 else
 587                         requeue = false;
 588         }
 589
 590         /*
 591          * [4] Get the next lock
 592          */
 593         lock = waiter->lock;
 594         /*
 595          * [5] We need to trylock here as we are holding task->pi_lock,
 596          * which is the reverse lock order versus the other rtmutex
 597          * operations.
 598          */
 599         if (!raw_spin_trylock(&lock->wait_lock)) {
 600                 raw_spin_unlock_irq(&task->pi_lock);
 601                 cpu_relax();
 602                 goto retry;
 603         }
 604
 605         /*
 606          * [6] check_exit_conditions_2() protected by task->pi_lock and
 607          * lock->wait_lock.
 608          *
 609          * Deadlock detection. If the lock is the same as the original
 610          * lock which caused us to walk the lock chain or if the
 611          * current lock is owned by the task which initiated the chain
 612          * walk, we detected a deadlock.
 613          */
 614         if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
 615                 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock);
 616                 raw_spin_unlock(&lock->wait_lock);
 617                 ret = -EDEADLK;
 618                 goto out_unlock_pi;
 619         }
 620
 621         /*
 622          * If we just follow the lock chain for deadlock detection, no
 623          * need to do all the requeue operations. To avoid a truckload
 624          * of conditionals around the various places below, just do the
 625          * minimum chain walk checks.
 626          */
 627         if (!requeue) {
 628                 /*
 629                  * No requeue[7] here. Just release @task [8]
 630                  */
 631                 raw_spin_unlock(&task->pi_lock);
 632                 put_task_struct(task);
 633
 634                 /*
 635                  * [9] check_exit_conditions_3 protected by lock->wait_lock.
 636                  * If there is no owner of the lock, end of chain.
 637                  */
 638                 if (!rt_mutex_owner(lock)) {
 639                         raw_spin_unlock_irq(&lock->wait_lock);
 640                         return 0;
 641                 }
 642
 643                 /* [10] Grab the next task, i.e. owner of @lock */
 644                 task = rt_mutex_owner(lock);
 645                 get_task_struct(task);
 646                 raw_spin_lock(&task->pi_lock);
 647
 648                 /*
 649                  * No requeue [11] here. We just do deadlock detection.
 650                  *
 651                  * [12] Store whether owner is blocked
 652                  * itself. Decision is made after dropping the locks
 653                  */
 654                 next_lock = task_blocked_on_lock(task);
 655                 /*
 656                  * Get the top waiter for the next iteration
 657                  */
 658                 top_waiter = rt_mutex_top_waiter(lock);
 659
 660                 /* [13] Drop locks */
 661                 raw_spin_unlock(&task->pi_lock);
 662                 raw_spin_unlock_irq(&lock->wait_lock);
 663
 664                 /* If owner is not blocked, end of chain. */
 665                 if (!next_lock)
 666                         goto out_put_task;
 667                 goto again;
 668         }
 669
 670         /*
 671          * Store the current top waiter before doing the requeue
 672          * operation on @lock. We need it for the boost/deboost
 673          * decision below.
 674          */
 675         prerequeue_top_waiter = rt_mutex_top_waiter(lock);
 676
 677         /* [7] Requeue the waiter in the lock waiter tree. */
 678         rt_mutex_dequeue(lock, waiter);
 679         waiter->prio = task->prio;
 680         rt_mutex_enqueue(lock, waiter);
 681
 682         /* [8] Release the task */
 683         raw_spin_unlock(&task->pi_lock);
 684         put_task_struct(task);
 685
 686         /*
 687          * [9] check_exit_conditions_3 protected by lock->wait_lock.
 688          *
 689          * We must abort the chain walk if there is no lock owner even
 690          * in the dead lock detection case, as we have nothing to
 691          * follow here. This is the end of the chain we are walking.
 692          */
 693         if (!rt_mutex_owner(lock)) {
 694                 /*
 695                  * If the requeue [7] above changed the top waiter,
 696                  * then we need to wake the new top waiter up to try
 697                  * to get the lock.
 698                  */
 699                 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
 700                         wake_up_process(rt_mutex_top_waiter(lock)->task);
 701                 raw_spin_unlock_irq(&lock->wait_lock);
 702                 return 0;
 703         }
 704
 705         /* [10] Grab the next task, i.e. the owner of @lock */
 706         task = rt_mutex_owner(lock);
 707         get_task_struct(task);
 708         raw_spin_lock(&task->pi_lock);
 709
 710         /* [11] requeue the pi waiters if necessary */
 711         if (waiter == rt_mutex_top_waiter(lock)) {
 712                 /*
 713                  * The waiter became the new top (highest priority)
 714                  * waiter on the lock. Replace the previous top waiter
 715                  * in the owner tasks pi waiters tree with this waiter
 716                  * and adjust the priority of the owner.
 717                  */
 718                 rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
 719                 rt_mutex_enqueue_pi(task, waiter);
 720                 __rt_mutex_adjust_prio(task);
 721
 722         } else if (prerequeue_top_waiter == waiter) {
 723                 /*
 724                  * The waiter was the top waiter on the lock, but is
 725                  * no longer the top prority waiter. Replace waiter in
 726                  * the owner tasks pi waiters tree with the new top
 727                  * (highest priority) waiter and adjust the priority
 728                  * of the owner.
 729                  * The new top waiter is stored in @waiter so that
 730                  * @waiter == @top_waiter evaluates to true below and
 731                  * we continue to deboost the rest of the chain.
 732                  */
 733                 rt_mutex_dequeue_pi(task, waiter);
 734                 waiter = rt_mutex_top_waiter(lock);
 735                 rt_mutex_enqueue_pi(task, waiter);
 736                 __rt_mutex_adjust_prio(task);
 737         } else {
 738                 /*
 739                  * Nothing changed. No need to do any priority
 740                  * adjustment.
 741                  */
 742         }
 743
 744         /*
 745          * [12] check_exit_conditions_4() protected by task->pi_lock
 746          * and lock->wait_lock. The actual decisions are made after we
 747          * dropped the locks.
 748          *
 749          * Check whether the task which owns the current lock is pi
 750          * blocked itself. If yes we store a pointer to the lock for
 751          * the lock chain change detection above. After we dropped
 752          * task->pi_lock next_lock cannot be dereferenced anymore.
 753          */
 754         next_lock = task_blocked_on_lock(task);
 755         /*
 756          * Store the top waiter of @lock for the end of chain walk
 757          * decision below.
 758          */
 759         top_waiter = rt_mutex_top_waiter(lock);
 760
 761         /* [13] Drop the locks */
 762         raw_spin_unlock(&task->pi_lock);
 763         raw_spin_unlock_irq(&lock->wait_lock);
 764
 765         /*
 766          * Make the actual exit decisions [12], based on the stored
 767          * values.
 768          *
 769          * We reached the end of the lock chain. Stop right here. No
 770          * point to go back just to figure that out.
 771          */
 772         if (!next_lock)
 773                 goto out_put_task;
 774
 775         /*
 776          * If the current waiter is not the top waiter on the lock,
 777          * then we can stop the chain walk here if we are not in full
 778          * deadlock detection mode.
 779          */
 780         if (!detect_deadlock && waiter != top_waiter)
 781                 goto out_put_task;
 782
 783         goto again;
 784
 785  out_unlock_pi:
 786         raw_spin_unlock_irq(&task->pi_lock);
 787  out_put_task:
 788         put_task_struct(task);
 789
 790         return ret;
 791 }
 792
 793 /*
 794  * Try to take an rt-mutex
 795  *
 796  * Must be called with lock->wait_lock held and interrupts disabled
 797  *
 798  * @lock:   The lock to be acquired.
 799  * @task:   The task which wants to acquire the lock
 800  * @waiter: The waiter that is queued to the lock's wait tree if the
 801  *          callsite called task_blocked_on_lock(), otherwise NULL
 802  */
 803 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 804                                 struct rt_mutex_waiter *waiter)
 805 {
 806         /*
 807          * Before testing whether we can acquire @lock, we set the
 808          * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
 809          * other tasks which try to modify @lock into the slow path
 810          * and they serialize on @lock->wait_lock.
 811          *
 812          * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
 813          * as explained at the top of this file if and only if:
 814          *
 815          * - There is a lock owner. The caller must fixup the
 816          *   transient state if it does a trylock or leaves the lock
 817          *   function due to a signal or timeout.
 818          *
 819          * - @task acquires the lock and there are no other
 820          *   waiters. This is undone in rt_mutex_set_owner(@task) at
 821          *   the end of this function.
 822          */
 823         mark_rt_mutex_waiters(lock);
 824
 825         /*
 826          * If @lock has an owner, give up.
 827          */
 828         if (rt_mutex_owner(lock))
 829                 return 0;
 830
 831         /*
 832          * If @waiter != NULL, @task has already enqueued the waiter
 833          * into @lock waiter tree. If @waiter == NULL then this is a
 834          * trylock attempt.
 835          */
 836         if (waiter) {
 837                 /*
 838                  * If waiter is not the highest priority waiter of
 839                  * @lock, give up.
 840                  */
 841                 if (waiter != rt_mutex_top_waiter(lock))
 842                         return 0;
 843
 844                 /*
 845                  * We can acquire the lock. Remove the waiter from the
 846                  * lock waiters tree.
 847                  */
 848                 rt_mutex_dequeue(lock, waiter);
 849
 850         } else {
 851                 /*
 852                  * If the lock has waiters already we check whether @task is
 853                  * eligible to take over the lock.
 854                  *
 855                  * If there are no other waiters, @task can acquire
 856                  * the lock.  @task->pi_blocked_on is NULL, so it does
 857                  * not need to be dequeued.
 858                  */
 859                 if (rt_mutex_has_waiters(lock)) {
 860                         /*
 861                          * If @task->prio is greater than or equal to
 862                          * the top waiter priority (kernel view),
 863                          * @task lost.
 864                          */
 865                         if (task->prio >= rt_mutex_top_waiter(lock)->prio)
 866                                 return 0;
 867
 868                         /*
 869                          * The current top waiter stays enqueued. We
 870                          * don't have to change anything in the lock
 871                          * waiters order.
 872                          */
 873                 } else {
 874                         /*
 875                          * No waiters. Take the lock without the
 876                          * pi_lock dance.@task->pi_blocked_on is NULL
 877                          * and we have no waiters to enqueue in @task
 878                          * pi waiters tree.
 879                          */
 880                         goto takeit;
 881                 }
 882         }
 883
 884         /*
 885          * Clear @task->pi_blocked_on. Requires protection by
 886          * @task->pi_lock. Redundant operation for the @waiter == NULL
 887          * case, but conditionals are more expensive than a redundant
 888          * store.
 889          */
 890         raw_spin_lock(&task->pi_lock);
 891         task->pi_blocked_on = NULL;
 892         /*
 893          * Finish the lock acquisition. @task is the new owner. If
 894          * other waiters exist we have to insert the highest priority
 895          * waiter into @task->pi_waiters tree.
 896          */
 897         if (rt_mutex_has_waiters(lock))
 898                 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
 899         raw_spin_unlock(&task->pi_lock);
 900
 901 takeit:
 902         /* We got the lock. */
 903         debug_rt_mutex_lock(lock);
 904
 905         /*
 906          * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
 907          * are still waiters or clears it.
 908          */
 909         rt_mutex_set_owner(lock, task);
 910
 911         rt_mutex_deadlock_account_lock(lock, task);
 912
 913         return 1;
 914 }
 915
 916 /*
 917  * Task blocks on lock.
 918  *
 919  * Prepare waiter and propagate pi chain
 920  *
 921  * This must be called with lock->wait_lock held and interrupts disabled
 922  */
 923 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 924                                    struct rt_mutex_waiter *waiter,
 925                                    struct task_struct *task,
 926                                    enum rtmutex_chainwalk chwalk)
 927 {
 928         struct task_struct *owner = rt_mutex_owner(lock);
 929         struct rt_mutex_waiter *top_waiter = waiter;
 930         struct rt_mutex *next_lock;
 931         int chain_walk = 0, res;
 932
 933         /*
 934          * Early deadlock detection. We really don't want the task to
 935          * enqueue on itself just to untangle the mess later. It's not
 936          * only an optimization. We drop the locks, so another waiter
 937          * can come in before the chain walk detects the deadlock. So
 938          * the other will detect the deadlock and return -EDEADLOCK,
 939          * which is wrong, as the other waiter is not in a deadlock
 940          * situation.
 941          */
 942         if (owner == task)
 943                 return -EDEADLK;
 944
 945         raw_spin_lock(&task->pi_lock);
 946         __rt_mutex_adjust_prio(task);
 947         waiter->task = task;
 948         waiter->lock = lock;
 949         waiter->prio = task->prio;
 950
 951         /* Get the top priority waiter on the lock */
 952         if (rt_mutex_has_waiters(lock))
 953                 top_waiter = rt_mutex_top_waiter(lock);
 954         rt_mutex_enqueue(lock, waiter);
 955
 956         task->pi_blocked_on = waiter;
 957
 958         raw_spin_unlock(&task->pi_lock);
 959
 960         if (!owner)
 961                 return 0;
 962
 963         raw_spin_lock(&owner->pi_lock);
 964         if (waiter == rt_mutex_top_waiter(lock)) {
 965                 rt_mutex_dequeue_pi(owner, top_waiter);
 966                 rt_mutex_enqueue_pi(owner, waiter);
 967
 968                 __rt_mutex_adjust_prio(owner);
 969                 if (owner->pi_blocked_on)
 970                         chain_walk = 1;
 971         } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
 972                 chain_walk = 1;
 973         }
 974
 975         /* Store the lock on which owner is blocked or NULL */
 976         next_lock = task_blocked_on_lock(owner);
 977
 978         raw_spin_unlock(&owner->pi_lock);
 979         /*
 980          * Even if full deadlock detection is on, if the owner is not
 981          * blocked itself, we can avoid finding this out in the chain
 982          * walk.
 983          */
 984         if (!chain_walk || !next_lock)
 985                 return 0;
 986
 987         /*
 988          * The owner can't disappear while holding a lock,
 989          * so the owner struct is protected by wait_lock.
 990          * Gets dropped in rt_mutex_adjust_prio_chain()!
 991          */
 992         get_task_struct(owner);
 993
 994         raw_spin_unlock_irq(&lock->wait_lock);
 995
 996         res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
 997                                          next_lock, waiter, task);
 998
 999         raw_spin_lock_irq(&lock->wait_lock);
1000
1001         return res;
1002 }
1003
1004 /*
1005  * Remove the top waiter from the current tasks pi waiter tree and
1006  * queue it up.
1007  *
1008  * Called with lock->wait_lock held and interrupts disabled.
1009  */
1010 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1011                                     struct rt_mutex *lock)
1012 {
1013         struct rt_mutex_waiter *waiter;
1014
1015         raw_spin_lock(&current->pi_lock);
1016
1017         waiter = rt_mutex_top_waiter(lock);
1018
1019         /*
1020          * Remove it from current->pi_waiters. We do not adjust a
1021          * possible priority boost right now. We execute wakeup in the
1022          * boosted mode and go back to normal after releasing
1023          * lock->wait_lock.
1024          */
1025         rt_mutex_dequeue_pi(current, waiter);
1026
1027         /*
1028          * As we are waking up the top waiter, and the waiter stays
1029          * queued on the lock until it gets the lock, this lock
1030          * obviously has waiters. Just set the bit here and this has
1031          * the added benefit of forcing all new tasks into the
1032          * slow path making sure no task of lower priority than
1033          * the top waiter can steal this lock.
1034          */
1035         lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1036
1037         raw_spin_unlock(&current->pi_lock);
1038
1039         wake_q_add(wake_q, waiter->task);
1040 }
1041
1042 /*
1043  * Remove a waiter from a lock and give up
1044  *
1045  * Must be called with lock->wait_lock held and interrupts disabled. I must
1046  * have just failed to try_to_take_rt_mutex().
1047  */
1048 static void remove_waiter(struct rt_mutex *lock,
1049                           struct rt_mutex_waiter *waiter)
1050 {
1051         bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1052         struct task_struct *owner = rt_mutex_owner(lock);
1053         struct rt_mutex *next_lock;
1054
1055         raw_spin_lock(&current->pi_lock);
1056         rt_mutex_dequeue(lock, waiter);
1057         current->pi_blocked_on = NULL;
1058         raw_spin_unlock(&current->pi_lock);
1059
1060         /*
1061          * Only update priority if the waiter was the highest priority
1062          * waiter of the lock and there is an owner to update.
1063          */
1064         if (!owner || !is_top_waiter)
1065                 return;
1066
1067         raw_spin_lock(&owner->pi_lock);
1068
1069         rt_mutex_dequeue_pi(owner, waiter);
1070
1071         if (rt_mutex_has_waiters(lock))
1072                 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1073
1074         __rt_mutex_adjust_prio(owner);
1075
1076         /* Store the lock on which owner is blocked or NULL */
1077         next_lock = task_blocked_on_lock(owner);
1078
1079         raw_spin_unlock(&owner->pi_lock);
1080
1081         /*
1082          * Don't walk the chain, if the owner task is not blocked
1083          * itself.
1084          */
1085         if (!next_lock)
1086                 return;
1087
1088         /* gets dropped in rt_mutex_adjust_prio_chain()! */
1089         get_task_struct(owner);
1090
1091         raw_spin_unlock_irq(&lock->wait_lock);
1092
1093         rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1094                                    next_lock, NULL, current);
1095
1096         raw_spin_lock_irq(&lock->wait_lock);
1097 }
1098
1099 /*
1100  * Recheck the pi chain, in case we got a priority setting
1101  *
1102  * Called from sched_setscheduler
1103  */
1104 void rt_mutex_adjust_pi(struct task_struct *task)
1105 {
1106         struct rt_mutex_waiter *waiter;
1107         struct rt_mutex *next_lock;
1108         unsigned long flags;
1109
1110         raw_spin_lock_irqsave(&task->pi_lock, flags);
1111
1112         waiter = task->pi_blocked_on;
1113         if (!waiter || (waiter->prio == task->prio &&
1114                         !dl_prio(task->prio))) {
1115                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1116                 return;
1117         }
1118         next_lock = waiter->lock;
1119         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1120
1121         /* gets dropped in rt_mutex_adjust_prio_chain()! */
1122         get_task_struct(task);
1123
1124         rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
1125                                    next_lock, NULL, task);
1126 }
1127
1128 /**
1129  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
1130  * @lock:                the rt_mutex to take
1131  * @state:               the state the task should block in (TASK_INTERRUPTIBLE
1132  *                       or TASK_UNINTERRUPTIBLE)
1133  * @timeout:             the pre-initialized and started timer, or NULL for none
1134  * @waiter:              the pre-initialized rt_mutex_waiter
1135  *
1136  * Must be called with lock->wait_lock held and interrupts disabled
1137  */
1138 static int __sched
1139 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
1140                     struct hrtimer_sleeper *timeout,
1141                     struct rt_mutex_waiter *waiter)
1142 {
1143         int ret = 0;
1144
1145         for (;;) {
1146                 /* Try to acquire the lock: */
1147                 if (try_to_take_rt_mutex(lock, current, waiter))
1148                         break;
1149
1150                 /*
1151                  * TASK_INTERRUPTIBLE checks for signals and
1152                  * timeout. Ignored otherwise.
1153                  */
1154                 if (unlikely(state == TASK_INTERRUPTIBLE)) {
1155                         /* Signal pending? */
1156                         if (signal_pending(current))
1157                                 ret = -EINTR;
1158                         if (timeout && !timeout->task)
1159                                 ret = -ETIMEDOUT;
1160                         if (ret)
1161                                 break;
1162                 }
1163
1164                 raw_spin_unlock_irq(&lock->wait_lock);
1165
1166                 debug_rt_mutex_print_deadlock(waiter);
1167
1168                 schedule();
1169
1170                 raw_spin_lock_irq(&lock->wait_lock);
1171                 set_current_state(state);
1172         }
1173
1174         __set_current_state(TASK_RUNNING);
1175         return ret;
1176 }
1177
1178 static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
1179                                      struct rt_mutex_waiter *w)
1180 {
1181         /*
1182          * If the result is not -EDEADLOCK or the caller requested
1183          * deadlock detection, nothing to do here.
1184          */
1185         if (res != -EDEADLOCK || detect_deadlock)
1186                 return;
1187
1188         /*
1189          * Yell lowdly and stop the task right here.
1190          */
1191         rt_mutex_print_deadlock(w);
1192         while (1) {
1193                 set_current_state(TASK_INTERRUPTIBLE);
1194                 schedule();
1195         }
1196 }
1197
1198 /*
1199  * Slow path lock function:
1200  */
1201 static int __sched
1202 rt_mutex_slowlock(struct rt_mutex *lock, int state,
1203                   struct hrtimer_sleeper *timeout,
1204                   enum rtmutex_chainwalk chwalk)
1205 {
1206         struct rt_mutex_waiter waiter;
1207         unsigned long flags;
1208         int ret = 0;
1209
1210         debug_rt_mutex_init_waiter(&waiter);
1211         RB_CLEAR_NODE(&waiter.pi_tree_entry);
1212         RB_CLEAR_NODE(&waiter.tree_entry);
1213
1214         /*
1215          * Technically we could use raw_spin_[un]lock_irq() here, but this can
1216          * be called in early boot if the cmpxchg() fast path is disabled
1217          * (debug, no architecture support). In this case we will acquire the
1218          * rtmutex with lock->wait_lock held. But we cannot unconditionally
1219          * enable interrupts in that early boot case. So we need to use the
1220          * irqsave/restore variants.
1221          */
1222         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1223
1224         /* Try to acquire the lock again: */
1225         if (try_to_take_rt_mutex(lock, current, NULL)) {
1226                 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1227                 return 0;
1228         }
1229
1230         set_current_state(state);
1231
1232         /* Setup the timer, when timeout != NULL */
1233         if (unlikely(timeout))
1234                 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1235
1236         ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
1237
1238         if (likely(!ret))
1239                 /* sleep on the mutex */
1240                 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
1241
1242         if (unlikely(ret)) {
1243                 __set_current_state(TASK_RUNNING);
1244                 if (rt_mutex_has_waiters(lock))
1245                         remove_waiter(lock, &waiter);
1246                 rt_mutex_handle_deadlock(ret, chwalk, &waiter);
1247         }
1248
1249         /*
1250          * try_to_take_rt_mutex() sets the waiter bit
1251          * unconditionally. We might have to fix that up.
1252          */
1253         fixup_rt_mutex_waiters(lock);
1254
1255         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1256
1257         /* Remove pending timer: */
1258         if (unlikely(timeout))
1259                 hrtimer_cancel(&timeout->timer);
1260
1261         debug_rt_mutex_free_waiter(&waiter);
1262
1263         return ret;
1264 }
1265
1266 /*
1267  * Slow path try-lock function:
1268  */
1269 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
1270 {
1271         unsigned long flags;
1272         int ret;
1273
1274         /*
1275          * If the lock already has an owner we fail to get the lock.
1276          * This can be done without taking the @lock->wait_lock as
1277          * it is only being read, and this is a trylock anyway.
1278          */
1279         if (rt_mutex_owner(lock))
1280                 return 0;
1281
1282         /*
1283          * The mutex has currently no owner. Lock the wait lock and try to
1284          * acquire the lock. We use irqsave here to support early boot calls.
1285          */
1286         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1287
1288         ret = try_to_take_rt_mutex(lock, current, NULL);
1289
1290         /*
1291          * try_to_take_rt_mutex() sets the lock waiters bit
1292          * unconditionally. Clean this up.
1293          */
1294         fixup_rt_mutex_waiters(lock);
1295
1296         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1297
1298         return ret;
1299 }
1300
1301 /*
1302  * Slow path to release a rt-mutex.
1303  * Return whether the current task needs to undo a potential priority boosting.
1304  */
1305 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
1306                                         struct wake_q_head *wake_q)
1307 {
1308         unsigned long flags;
1309
1310         /* irqsave required to support early boot calls */
1311         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1312
1313         debug_rt_mutex_unlock(lock);
1314
1315         rt_mutex_deadlock_account_unlock(current);
1316
1317         /*
1318          * We must be careful here if the fast path is enabled. If we
1319          * have no waiters queued we cannot set owner to NULL here
1320          * because of:
1321          *
1322          * foo->lock->owner = NULL;
1323          *                      rtmutex_lock(foo->lock);   <- fast path
1324          *                      free = atomic_dec_and_test(foo->refcnt);
1325          *                      rtmutex_unlock(foo->lock); <- fast path
1326          *                      if (free)
1327          *                              kfree(foo);
1328          * raw_spin_unlock(foo->lock->wait_lock);
1329          *
1330          * So for the fastpath enabled kernel:
1331          *
1332          * Nothing can set the waiters bit as long as we hold
1333          * lock->wait_lock. So we do the following sequence:
1334          *
1335          *      owner = rt_mutex_owner(lock);
1336          *      clear_rt_mutex_waiters(lock);
1337          *      raw_spin_unlock(&lock->wait_lock);
1338          *      if (cmpxchg(&lock->owner, owner, 0) == owner)
1339          *              return;
1340          *      goto retry;
1341          *
1342          * The fastpath disabled variant is simple as all access to
1343          * lock->owner is serialized by lock->wait_lock:
1344          *
1345          *      lock->owner = NULL;
1346          *      raw_spin_unlock(&lock->wait_lock);
1347          */
1348         while (!rt_mutex_has_waiters(lock)) {
1349                 /* Drops lock->wait_lock ! */
1350                 if (unlock_rt_mutex_safe(lock, flags) == true)
1351                         return false;
1352                 /* Relock the rtmutex and try again */
1353                 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1354         }
1355
1356         /*
1357          * The wakeup next waiter path does not suffer from the above
1358          * race. See the comments there.
1359          *
1360          * Queue the next waiter for wakeup once we release the wait_lock.
1361          */
1362         mark_wakeup_next_waiter(wake_q, lock);
1363
1364         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1365
1366         /* check PI boosting */
1367         return true;
1368 }
1369
1370 /*
1371  * debug aware fast / slowpath lock,trylock,unlock
1372  *
1373  * The atomic acquire/release ops are compiled away, when either the
1374  * architecture does not support cmpxchg or when debugging is enabled.
1375  */
1376 static inline int
1377 rt_mutex_fastlock(struct rt_mutex *lock, int state,
1378                   int (*slowfn)(struct rt_mutex *lock, int state,
1379                                 struct hrtimer_sleeper *timeout,
1380                                 enum rtmutex_chainwalk chwalk))
1381 {
1382         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
1383                 rt_mutex_deadlock_account_lock(lock, current);
1384                 return 0;
1385         } else
1386                 return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
1387 }
1388
1389 static inline int
1390 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
1391                         struct hrtimer_sleeper *timeout,
1392                         enum rtmutex_chainwalk chwalk,
1393                         int (*slowfn)(struct rt_mutex *lock, int state,
1394                                       struct hrtimer_sleeper *timeout,
1395                                       enum rtmutex_chainwalk chwalk))
1396 {
1397         if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
1398             likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
1399                 rt_mutex_deadlock_account_lock(lock, current);
1400                 return 0;
1401         } else
1402                 return slowfn(lock, state, timeout, chwalk);
1403 }
1404
1405 static inline int
1406 rt_mutex_fasttrylock(struct rt_mutex *lock,
1407                      int (*slowfn)(struct rt_mutex *lock))
1408 {
1409         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
1410                 rt_mutex_deadlock_account_lock(lock, current);
1411                 return 1;
1412         }
1413         return slowfn(lock);
1414 }
1415
1416 static inline void
1417 rt_mutex_fastunlock(struct rt_mutex *lock,
1418                     bool (*slowfn)(struct rt_mutex *lock,
1419                                    struct wake_q_head *wqh))
1420 {
1421         WAKE_Q(wake_q);
1422
1423         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
1424                 rt_mutex_deadlock_account_unlock(current);
1425
1426         } else {
1427                 bool deboost = slowfn(lock, &wake_q);
1428
1429                 wake_up_q(&wake_q);
1430
1431                 /* Undo pi boosting if necessary: */
1432                 if (deboost)
1433                         rt_mutex_adjust_prio(current);
1434         }
1435 }
1436
1437 /**
1438  * rt_mutex_lock - lock a rt_mutex
1439  *
1440  * @lock: the rt_mutex to be locked
1441  */
1442 void __sched rt_mutex_lock(struct rt_mutex *lock)
1443 {
1444         might_sleep();
1445
1446         rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
1447 }
1448 EXPORT_SYMBOL_GPL(rt_mutex_lock);
1449
1450 /**
1451  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
1452  *
1453  * @lock:               the rt_mutex to be locked
1454  *
1455  * Returns:
1456  *  0           on success
1457  * -EINTR       when interrupted by a signal
1458  */
1459 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
1460 {
1461         might_sleep();
1462
1463         return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
1464 }
1465 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
1466
1467 /*
1468  * Futex variant with full deadlock detection.
1469  */
1470 int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
1471                               struct hrtimer_sleeper *timeout)
1472 {
1473         might_sleep();
1474
1475         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
1476                                        RT_MUTEX_FULL_CHAINWALK,
1477                                        rt_mutex_slowlock);
1478 }
1479
1480 /**
1481  * rt_mutex_timed_lock - lock a rt_mutex interruptible
1482  *                      the timeout structure is provided
1483  *                      by the caller
1484  *
1485  * @lock:               the rt_mutex to be locked
1486  * @timeout:            timeout structure or NULL (no timeout)
1487  *
1488  * Returns:
1489  *  0           on success
1490  * -EINTR       when interrupted by a signal
1491  * -ETIMEDOUT   when the timeout expired
1492  */
1493 int
1494 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
1495 {
1496         might_sleep();
1497
1498         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
1499                                        RT_MUTEX_MIN_CHAINWALK,
1500                                        rt_mutex_slowlock);
1501 }
1502 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
1503
1504 /**
1505  * rt_mutex_trylock - try to lock a rt_mutex
1506  *
1507  * @lock:       the rt_mutex to be locked
1508  *
1509  * This function can only be called in thread context. It's safe to
1510  * call it from atomic regions, but not from hard interrupt or soft
1511  * interrupt context.
1512  *
1513  * Returns 1 on success and 0 on contention
1514  */
1515 int __sched rt_mutex_trylock(struct rt_mutex *lock)
1516 {
1517         if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
1518                 return 0;
1519
1520         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
1521 }
1522 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
1523
1524 /**
1525  * rt_mutex_unlock - unlock a rt_mutex
1526  *
1527  * @lock: the rt_mutex to be unlocked
1528  */
1529 void __sched rt_mutex_unlock(struct rt_mutex *lock)
1530 {
1531         rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
1532 }
1533 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
1534
1535 /**
1536  * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
1537  * @lock: the rt_mutex to be unlocked
1538  *
1539  * Returns: true/false indicating whether priority adjustment is
1540  * required or not.
1541  */
1542 bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
1543                                    struct wake_q_head *wqh)
1544 {
1545         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
1546                 rt_mutex_deadlock_account_unlock(current);
1547                 return false;
1548         }
1549         return rt_mutex_slowunlock(lock, wqh);
1550 }
1551
1552 /**
1553  * rt_mutex_destroy - mark a mutex unusable
1554  * @lock: the mutex to be destroyed
1555  *
1556  * This function marks the mutex uninitialized, and any subsequent
1557  * use of the mutex is forbidden. The mutex must not be locked when
1558  * this function is called.
1559  */
1560 void rt_mutex_destroy(struct rt_mutex *lock)
1561 {
1562         WARN_ON(rt_mutex_is_locked(lock));
1563 #ifdef CONFIG_DEBUG_RT_MUTEXES
1564         lock->magic = NULL;
1565 #endif
1566 }
1567
1568 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
1569
1570 /**
1571  * __rt_mutex_init - initialize the rt lock
1572  *
1573  * @lock: the rt lock to be initialized
1574  *
1575  * Initialize the rt lock to unlocked state.
1576  *
1577  * Initializing of a locked rt lock is not allowed
1578  */
1579 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
1580 {
1581         lock->owner = NULL;
1582         raw_spin_lock_init(&lock->wait_lock);
1583         lock->waiters = RB_ROOT;
1584         lock->waiters_leftmost = NULL;
1585
1586         debug_rt_mutex_init(lock, name);
1587 }
1588 EXPORT_SYMBOL_GPL(__rt_mutex_init);
1589
1590 /**
1591  * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
1592  *                              proxy owner
1593  *
1594  * @lock:       the rt_mutex to be locked
1595  * @proxy_owner:the task to set as owner
1596  *
1597  * No locking. Caller has to do serializing itself
1598  * Special API call for PI-futex support
1599  */
1600 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
1601                                 struct task_struct *proxy_owner)
1602 {
1603         __rt_mutex_init(lock, NULL);
1604         debug_rt_mutex_proxy_lock(lock, proxy_owner);
1605         rt_mutex_set_owner(lock, proxy_owner);
1606         rt_mutex_deadlock_account_lock(lock, proxy_owner);
1607 }
1608
1609 /**
1610  * rt_mutex_proxy_unlock - release a lock on behalf of owner
1611  *
1612  * @lock:       the rt_mutex to be locked
1613  *
1614  * No locking. Caller has to do serializing itself
1615  * Special API call for PI-futex support
1616  */
1617 void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1618                            struct task_struct *proxy_owner)
1619 {
1620         debug_rt_mutex_proxy_unlock(lock);
1621         rt_mutex_set_owner(lock, NULL);
1622         rt_mutex_deadlock_account_unlock(proxy_owner);
1623 }
1624
1625 /**
1626  * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1627  * @lock:               the rt_mutex to take
1628  * @waiter:             the pre-initialized rt_mutex_waiter
1629  * @task:               the task to prepare
1630  *
1631  * Returns:
1632  *  0 - task blocked on lock
1633  *  1 - acquired the lock for task, caller should wake it up
1634  * <0 - error
1635  *
1636  * Special API call for FUTEX_REQUEUE_PI support.
1637  */
1638 int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1639                               struct rt_mutex_waiter *waiter,
1640                               struct task_struct *task)
1641 {
1642         int ret;
1643
1644         raw_spin_lock_irq(&lock->wait_lock);
1645
1646         if (try_to_take_rt_mutex(lock, task, NULL)) {
1647                 raw_spin_unlock_irq(&lock->wait_lock);
1648                 return 1;
1649         }
1650
1651         /* We enforce deadlock detection for futexes */
1652         ret = task_blocks_on_rt_mutex(lock, waiter, task,
1653                                       RT_MUTEX_FULL_CHAINWALK);
1654
1655         if (ret && !rt_mutex_owner(lock)) {
1656                 /*
1657                  * Reset the return value. We might have
1658                  * returned with -EDEADLK and the owner
1659                  * released the lock while we were walking the
1660                  * pi chain.  Let the waiter sort it out.
1661                  */
1662                 ret = 0;
1663         }
1664
1665         if (unlikely(ret))
1666                 remove_waiter(lock, waiter);
1667
1668         raw_spin_unlock_irq(&lock->wait_lock);
1669
1670         debug_rt_mutex_print_deadlock(waiter);
1671
1672         return ret;
1673 }
1674
1675 /**
1676  * rt_mutex_next_owner - return the next owner of the lock
1677  *
1678  * @lock: the rt lock query
1679  *
1680  * Returns the next owner of the lock or NULL
1681  *
1682  * Caller has to serialize against other accessors to the lock
1683  * itself.
1684  *
1685  * Special API call for PI-futex support
1686  */
1687 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1688 {
1689         if (!rt_mutex_has_waiters(lock))
1690                 return NULL;
1691
1692         return rt_mutex_top_waiter(lock)->task;
1693 }
1694
1695 /**
1696  * rt_mutex_finish_proxy_lock() - Complete lock acquisition
1697  * @lock:               the rt_mutex we were woken on
1698  * @to:                 the timeout, null if none. hrtimer should already have
1699  *                      been started.
1700  * @waiter:             the pre-initialized rt_mutex_waiter
1701  *
1702  * Complete the lock acquisition started our behalf by another thread.
1703  *
1704  * Returns:
1705  *  0 - success
1706  * <0 - error, one of -EINTR, -ETIMEDOUT
1707  *
1708  * Special API call for PI-futex requeue support
1709  */
1710 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1711                                struct hrtimer_sleeper *to,
1712                                struct rt_mutex_waiter *waiter)
1713 {
1714         int ret;
1715
1716         raw_spin_lock_irq(&lock->wait_lock);
1717
1718         set_current_state(TASK_INTERRUPTIBLE);
1719
1720         /* sleep on the mutex */
1721         ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1722
1723         if (unlikely(ret))
1724                 remove_waiter(lock, waiter);
1725
1726         /*
1727          * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1728          * have to fix that up.
1729          */
1730         fixup_rt_mutex_waiters(lock);
1731
1732         raw_spin_unlock_irq(&lock->wait_lock);
1733
1734         return ret;
1735 }