locking/ww_mutex: Optimize ww-mutexes by waking at most one waiter for backoff when acquiring the lock

The wait list is sorted by stamp order, and the only waiting task that may
have to back off is the first waiter with a context.

The regular slow path does not have to wake any other tasks at all, since
all other waiters that would have to back off were either woken up when
the waiter was added to the list, or detected the condition before they
added themselves.

Median timings taken of a contention-heavy GPU workload:

Without this series:

  real    0m59.900s
  user    0m7.516s
  sys     2m16.076s

With changes up to and including this patch:

  real    0m52.946s
  user    0m7.272s
  sys     1m55.964s

Signed-off-by: Nicolai Hähnle <Nicolai.Haehnle@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <dev@mblankhorst.nl>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dri-devel@lists.freedesktop.org
Link: http://lkml.kernel.org/r/1482346000-9927-9-git-send-email-nhaehnle@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Nicolai Hähnle 2016-12-21 19:46:36 +01:00 committed by Ingo Molnar
parent 200b187440
commit 659cf9f582

View File

@ -288,6 +288,36 @@ __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
(a->stamp != b->stamp || a > b);
}
/*
* Wake up any waiters that may have to back off when the lock is held by the
* given context.
*
* Due to the invariants on the wait list, this can only affect the first
* waiter with a context.
*
* The current task must not be on the wait list.
*/
static void __sched
__ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
{
struct mutex_waiter *cur;
lockdep_assert_held(&lock->wait_lock);
list_for_each_entry(cur, &lock->wait_list, list) {
if (!cur->ww_ctx)
continue;
if (cur->ww_ctx->acquired > 0 &&
__ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
debug_mutex_wake_waiter(lock, cur);
wake_up_process(cur->task);
}
break;
}
}
/*
* After acquiring lock with fastpath or when we lost out in contested
* slowpath, set ctx and wake up any waiters so they can recheck.
@ -297,7 +327,6 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
unsigned long flags;
struct mutex_waiter *cur;
ww_mutex_lock_acquired(lock, ctx);
@ -323,16 +352,15 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
* so they can see the new lock->ctx.
*/
spin_lock_mutex(&lock->base.wait_lock, flags);
list_for_each_entry(cur, &lock->base.wait_list, list) {
debug_mutex_wake_waiter(&lock->base, cur);
wake_up_process(cur->task);
}
__ww_mutex_wakeup_for_backoff(&lock->base, ctx);
spin_unlock_mutex(&lock->base.wait_lock, flags);
}
/*
* After acquiring lock in the slowpath set ctx and wake up any
* waiters so they can recheck.
* After acquiring lock in the slowpath set ctx.
*
* Unlike for the fast path, the caller ensures that waiters are woken up where
* necessary.
*
* Callers must hold the mutex wait_lock.
*/
@ -340,19 +368,8 @@ static __always_inline void
ww_mutex_set_context_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
struct mutex_waiter *cur;
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;
/*
* Give any possible sleeping processes the chance to wake up,
* so they can recheck if they have to back off.
*/
list_for_each_entry(cur, &lock->base.wait_list, list) {
debug_mutex_wake_waiter(&lock->base, cur);
wake_up_process(cur->task);
}
}
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
@ -719,8 +736,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
/*
* After waiting to acquire the wait_lock, try again.
*/
if (__mutex_trylock(lock))
if (__mutex_trylock(lock)) {
if (use_ww_ctx && ww_ctx)
__ww_mutex_wakeup_for_backoff(lock, ww_ctx);
goto skip_wait;
}
debug_mutex_lock_common(lock, &waiter);
debug_mutex_add_waiter(lock, &waiter, current);