mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-19 20:34:20 +08:00
locking/rwsem: Implement lock handoff to prevent lock starvation
Because of writer lock stealing, it is possible that a constant stream of incoming writers will cause a waiting writer or reader to wait indefinitely leading to lock starvation. This patch implements a lock handoff mechanism to disable lock stealing and force lock handoff to the first waiter or waiters (for readers) in the queue after at least a 4ms waiting period unless it is a RT writer task which doesn't need to wait. The waiting period is used to avoid discouraging lock stealing too much to affect performance. The setting and clearing of the handoff bit is serialized by the wait_lock. So racing is not possible. A rwsem microbenchmark was run for 5 seconds on a 2-socket 40-core 80-thread Skylake system with a v5.1 based kernel and 240 write_lock threads with 5us sleep critical section. Before the patch, the min/mean/max numbers of locking operations for the locking threads were 1/7,792/173,696. After the patch, the figures became 5,842/6,542/7,458. It can be seen that the rwsem became much more fair, though there was a drop of about 16% in the mean locking operations done which was a tradeoff of having better fairness. Making the waiter set the handoff bit right after the first wakeup can impact performance especially with a mixed reader/writer workload. With the same microbenchmark with short critical section and equal number of reader and writer threads (40/40), the reader/writer locking operation counts with the current patch were: 40 readers, Iterations Min/Mean/Max = 1,793/1,794/1,796 40 writers, Iterations Min/Mean/Max = 1,793/34,956/86,081 By making waiter set handoff bit immediately after wakeup: 40 readers, Iterations Min/Mean/Max = 43/44/46 40 writers, Iterations Min/Mean/Max = 43/1,263/3,191 Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Will Deacon <will.deacon@arm.com> Cc: huang ying <huang.ying.caritas@gmail.com> Link: https://lkml.kernel.org/r/20190520205918.22251-8-longman@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
3f6d517a3e
commit
4f23dbc1e6
@ -61,5 +61,7 @@ LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */
|
|||||||
LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
|
LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
|
||||||
LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
|
LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
|
||||||
LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
|
LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
|
||||||
|
LOCK_EVENT(rwsem_rlock_handoff) /* # of read lock handoffs */
|
||||||
LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
|
LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
|
||||||
LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
|
LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
|
||||||
|
LOCK_EVENT(rwsem_wlock_handoff) /* # of write lock handoffs */
|
||||||
|
@ -10,8 +10,9 @@
|
|||||||
* Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
|
* Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
|
||||||
* and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
|
* and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
|
||||||
*
|
*
|
||||||
* Rwsem count bit fields re-definition and rwsem rearchitecture
|
* Rwsem count bit fields re-definition and rwsem rearchitecture by
|
||||||
* by Waiman Long <longman@redhat.com>.
|
* Waiman Long <longman@redhat.com> and
|
||||||
|
* Peter Zijlstra <peterz@infradead.org>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
@ -74,20 +75,33 @@
|
|||||||
*
|
*
|
||||||
* Bit 0 - writer locked bit
|
* Bit 0 - writer locked bit
|
||||||
* Bit 1 - waiters present bit
|
* Bit 1 - waiters present bit
|
||||||
* Bits 2-7 - reserved
|
* Bit 2 - lock handoff bit
|
||||||
|
* Bits 3-7 - reserved
|
||||||
* Bits 8-X - 24-bit (32-bit) or 56-bit reader count
|
* Bits 8-X - 24-bit (32-bit) or 56-bit reader count
|
||||||
*
|
*
|
||||||
* atomic_long_fetch_add() is used to obtain reader lock, whereas
|
* atomic_long_fetch_add() is used to obtain reader lock, whereas
|
||||||
* atomic_long_cmpxchg() will be used to obtain writer lock.
|
* atomic_long_cmpxchg() will be used to obtain writer lock.
|
||||||
|
*
|
||||||
|
* There are three places where the lock handoff bit may be set or cleared.
|
||||||
|
* 1) rwsem_mark_wake() for readers.
|
||||||
|
* 2) rwsem_try_write_lock() for writers.
|
||||||
|
* 3) Error path of rwsem_down_write_slowpath().
|
||||||
|
*
|
||||||
|
* For all the above cases, wait_lock will be held. A writer must also
|
||||||
|
* be the first one in the wait_list to be eligible for setting the handoff
|
||||||
|
* bit. So concurrent setting/clearing of handoff bit is not possible.
|
||||||
*/
|
*/
|
||||||
#define RWSEM_WRITER_LOCKED (1UL << 0)
|
#define RWSEM_WRITER_LOCKED (1UL << 0)
|
||||||
#define RWSEM_FLAG_WAITERS (1UL << 1)
|
#define RWSEM_FLAG_WAITERS (1UL << 1)
|
||||||
|
#define RWSEM_FLAG_HANDOFF (1UL << 2)
|
||||||
|
|
||||||
#define RWSEM_READER_SHIFT 8
|
#define RWSEM_READER_SHIFT 8
|
||||||
#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
|
#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
|
||||||
#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
|
#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
|
||||||
#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
|
#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
|
||||||
#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
|
#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
|
||||||
#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS)
|
#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
|
||||||
|
RWSEM_FLAG_HANDOFF)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* All writes to owner are protected by WRITE_ONCE() to make sure that
|
* All writes to owner are protected by WRITE_ONCE() to make sure that
|
||||||
@ -216,7 +230,10 @@ struct rwsem_waiter {
|
|||||||
struct list_head list;
|
struct list_head list;
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
enum rwsem_waiter_type type;
|
enum rwsem_waiter_type type;
|
||||||
|
unsigned long timeout;
|
||||||
};
|
};
|
||||||
|
#define rwsem_first_waiter(sem) \
|
||||||
|
list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
|
||||||
|
|
||||||
enum rwsem_wake_type {
|
enum rwsem_wake_type {
|
||||||
RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
|
RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
|
||||||
@ -224,6 +241,19 @@ enum rwsem_wake_type {
|
|||||||
RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
|
RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum writer_wait_state {
|
||||||
|
WRITER_NOT_FIRST, /* Writer is not first in wait list */
|
||||||
|
WRITER_FIRST, /* Writer is first in wait list */
|
||||||
|
WRITER_HANDOFF /* Writer is first & handoff needed */
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The typical HZ value is either 250 or 1000. So set the minimum waiting
|
||||||
|
* time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
|
||||||
|
* queue before initiating the handoff protocol.
|
||||||
|
*/
|
||||||
|
#define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* handle the lock release when processes blocked on it that can now run
|
* handle the lock release when processes blocked on it that can now run
|
||||||
* - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
|
* - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
|
||||||
@ -244,11 +274,13 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
|
|||||||
long oldcount, woken = 0, adjustment = 0;
|
long oldcount, woken = 0, adjustment = 0;
|
||||||
struct list_head wlist;
|
struct list_head wlist;
|
||||||
|
|
||||||
|
lockdep_assert_held(&sem->wait_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Take a peek at the queue head waiter such that we can determine
|
* Take a peek at the queue head waiter such that we can determine
|
||||||
* the wakeup(s) to perform.
|
* the wakeup(s) to perform.
|
||||||
*/
|
*/
|
||||||
waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
|
waiter = rwsem_first_waiter(sem);
|
||||||
|
|
||||||
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
|
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
|
||||||
if (wake_type == RWSEM_WAKE_ANY) {
|
if (wake_type == RWSEM_WAKE_ANY) {
|
||||||
@ -275,7 +307,18 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
|
|||||||
adjustment = RWSEM_READER_BIAS;
|
adjustment = RWSEM_READER_BIAS;
|
||||||
oldcount = atomic_long_fetch_add(adjustment, &sem->count);
|
oldcount = atomic_long_fetch_add(adjustment, &sem->count);
|
||||||
if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
|
if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
|
||||||
atomic_long_sub(adjustment, &sem->count);
|
/*
|
||||||
|
* When we've been waiting "too" long (for writers
|
||||||
|
* to give up the lock), request a HANDOFF to
|
||||||
|
* force the issue.
|
||||||
|
*/
|
||||||
|
if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
|
||||||
|
time_after(jiffies, waiter->timeout)) {
|
||||||
|
adjustment -= RWSEM_FLAG_HANDOFF;
|
||||||
|
lockevent_inc(rwsem_rlock_handoff);
|
||||||
|
}
|
||||||
|
|
||||||
|
atomic_long_add(-adjustment, &sem->count);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
@ -317,6 +360,13 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
|
|||||||
adjustment -= RWSEM_FLAG_WAITERS;
|
adjustment -= RWSEM_FLAG_WAITERS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When we've woken a reader, we no longer need to force writers
|
||||||
|
* to give up the lock and we can clear HANDOFF.
|
||||||
|
*/
|
||||||
|
if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
|
||||||
|
adjustment -= RWSEM_FLAG_HANDOFF;
|
||||||
|
|
||||||
if (adjustment)
|
if (adjustment)
|
||||||
atomic_long_add(adjustment, &sem->count);
|
atomic_long_add(adjustment, &sem->count);
|
||||||
|
|
||||||
@ -346,23 +396,48 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
|
|||||||
* This function must be called with the sem->wait_lock held to prevent
|
* This function must be called with the sem->wait_lock held to prevent
|
||||||
* race conditions between checking the rwsem wait list and setting the
|
* race conditions between checking the rwsem wait list and setting the
|
||||||
* sem->count accordingly.
|
* sem->count accordingly.
|
||||||
|
*
|
||||||
|
* If wstate is WRITER_HANDOFF, it will make sure that either the handoff
|
||||||
|
* bit is set or the lock is acquired with handoff bit cleared.
|
||||||
*/
|
*/
|
||||||
static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
|
static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem,
|
||||||
|
enum writer_wait_state wstate)
|
||||||
{
|
{
|
||||||
long new;
|
long new;
|
||||||
|
|
||||||
if (count & RWSEM_LOCK_MASK)
|
lockdep_assert_held(&sem->wait_lock);
|
||||||
|
|
||||||
|
do {
|
||||||
|
bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
|
||||||
|
|
||||||
|
if (has_handoff && wstate == WRITER_NOT_FIRST)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
new = count;
|
||||||
|
|
||||||
|
if (count & RWSEM_LOCK_MASK) {
|
||||||
|
if (has_handoff || (wstate != WRITER_HANDOFF))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
new |= RWSEM_FLAG_HANDOFF;
|
||||||
|
} else {
|
||||||
|
new |= RWSEM_WRITER_LOCKED;
|
||||||
|
new &= ~RWSEM_FLAG_HANDOFF;
|
||||||
|
|
||||||
|
if (list_is_singular(&sem->wait_list))
|
||||||
|
new &= ~RWSEM_FLAG_WAITERS;
|
||||||
|
}
|
||||||
|
} while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have either acquired the lock with handoff bit cleared or
|
||||||
|
* set the handoff bit.
|
||||||
|
*/
|
||||||
|
if (new & RWSEM_FLAG_HANDOFF)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
new = count + RWSEM_WRITER_LOCKED -
|
rwsem_set_owner(sem);
|
||||||
(list_is_singular(&sem->wait_list) ? RWSEM_FLAG_WAITERS : 0);
|
return true;
|
||||||
|
|
||||||
if (atomic_long_try_cmpxchg_acquire(&sem->count, &count, new)) {
|
|
||||||
rwsem_set_owner(sem);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||||
@ -373,9 +448,9 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
|
|||||||
{
|
{
|
||||||
long count = atomic_long_read(&sem->count);
|
long count = atomic_long_read(&sem->count);
|
||||||
|
|
||||||
while (!(count & RWSEM_LOCK_MASK)) {
|
while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
|
||||||
if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
|
if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
|
||||||
count + RWSEM_WRITER_LOCKED)) {
|
count | RWSEM_WRITER_LOCKED)) {
|
||||||
rwsem_set_owner(sem);
|
rwsem_set_owner(sem);
|
||||||
lockevent_inc(rwsem_opt_wlock);
|
lockevent_inc(rwsem_opt_wlock);
|
||||||
return true;
|
return true;
|
||||||
@ -456,6 +531,11 @@ static noinline enum owner_state rwsem_spin_on_owner(struct rw_semaphore *sem)
|
|||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
if (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF) {
|
||||||
|
state = OWNER_NONSPINNABLE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
tmp = READ_ONCE(sem->owner);
|
tmp = READ_ONCE(sem->owner);
|
||||||
if (tmp != owner) {
|
if (tmp != owner) {
|
||||||
state = rwsem_owner_state((unsigned long)tmp);
|
state = rwsem_owner_state((unsigned long)tmp);
|
||||||
@ -553,16 +633,18 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
|
|||||||
|
|
||||||
waiter.task = current;
|
waiter.task = current;
|
||||||
waiter.type = RWSEM_WAITING_FOR_READ;
|
waiter.type = RWSEM_WAITING_FOR_READ;
|
||||||
|
waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
|
||||||
|
|
||||||
raw_spin_lock_irq(&sem->wait_lock);
|
raw_spin_lock_irq(&sem->wait_lock);
|
||||||
if (list_empty(&sem->wait_list)) {
|
if (list_empty(&sem->wait_list)) {
|
||||||
/*
|
/*
|
||||||
* In case the wait queue is empty and the lock isn't owned
|
* In case the wait queue is empty and the lock isn't owned
|
||||||
* by a writer, this reader can exit the slowpath and return
|
* by a writer or has the handoff bit set, this reader can
|
||||||
* immediately as its RWSEM_READER_BIAS has already been
|
* exit the slowpath and return immediately as its
|
||||||
* set in the count.
|
* RWSEM_READER_BIAS has already been set in the count.
|
||||||
*/
|
*/
|
||||||
if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) {
|
if (!(atomic_long_read(&sem->count) &
|
||||||
|
(RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
|
||||||
raw_spin_unlock_irq(&sem->wait_lock);
|
raw_spin_unlock_irq(&sem->wait_lock);
|
||||||
rwsem_set_reader_owned(sem);
|
rwsem_set_reader_owned(sem);
|
||||||
lockevent_inc(rwsem_rlock_fast);
|
lockevent_inc(rwsem_rlock_fast);
|
||||||
@ -609,8 +691,10 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
|
|||||||
return sem;
|
return sem;
|
||||||
out_nolock:
|
out_nolock:
|
||||||
list_del(&waiter.list);
|
list_del(&waiter.list);
|
||||||
if (list_empty(&sem->wait_list))
|
if (list_empty(&sem->wait_list)) {
|
||||||
atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
|
atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
|
||||||
|
&sem->count);
|
||||||
|
}
|
||||||
raw_spin_unlock_irq(&sem->wait_lock);
|
raw_spin_unlock_irq(&sem->wait_lock);
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
lockevent_inc(rwsem_rlock_fail);
|
lockevent_inc(rwsem_rlock_fail);
|
||||||
@ -624,7 +708,7 @@ static struct rw_semaphore *
|
|||||||
rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
|
rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
|
||||||
{
|
{
|
||||||
long count;
|
long count;
|
||||||
bool waiting = true; /* any queued threads before us */
|
enum writer_wait_state wstate;
|
||||||
struct rwsem_waiter waiter;
|
struct rwsem_waiter waiter;
|
||||||
struct rw_semaphore *ret = sem;
|
struct rw_semaphore *ret = sem;
|
||||||
DEFINE_WAKE_Q(wake_q);
|
DEFINE_WAKE_Q(wake_q);
|
||||||
@ -639,66 +723,95 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
|
|||||||
*/
|
*/
|
||||||
waiter.task = current;
|
waiter.task = current;
|
||||||
waiter.type = RWSEM_WAITING_FOR_WRITE;
|
waiter.type = RWSEM_WAITING_FOR_WRITE;
|
||||||
|
waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
|
||||||
|
|
||||||
raw_spin_lock_irq(&sem->wait_lock);
|
raw_spin_lock_irq(&sem->wait_lock);
|
||||||
|
|
||||||
/* account for this before adding a new element to the list */
|
/* account for this before adding a new element to the list */
|
||||||
if (list_empty(&sem->wait_list))
|
wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
|
||||||
waiting = false;
|
|
||||||
|
|
||||||
list_add_tail(&waiter.list, &sem->wait_list);
|
list_add_tail(&waiter.list, &sem->wait_list);
|
||||||
|
|
||||||
/* we're now waiting on the lock */
|
/* we're now waiting on the lock */
|
||||||
if (waiting) {
|
if (wstate == WRITER_NOT_FIRST) {
|
||||||
count = atomic_long_read(&sem->count);
|
count = atomic_long_read(&sem->count);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there were already threads queued before us and there are
|
* If there were already threads queued before us and:
|
||||||
* no active writers and some readers, the lock must be read
|
* 1) there are no no active locks, wake the front
|
||||||
* owned; so we try to any read locks that were queued ahead
|
* queued process(es) as the handoff bit might be set.
|
||||||
* of us.
|
* 2) there are no active writers and some readers, the lock
|
||||||
|
* must be read owned; so we try to wake any read lock
|
||||||
|
* waiters that were queued ahead of us.
|
||||||
*/
|
*/
|
||||||
if (!(count & RWSEM_WRITER_MASK) &&
|
if (count & RWSEM_WRITER_MASK)
|
||||||
(count & RWSEM_READER_MASK)) {
|
goto wait;
|
||||||
rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
|
|
||||||
/*
|
|
||||||
* The wakeup is normally called _after_ the wait_lock
|
|
||||||
* is released, but given that we are proactively waking
|
|
||||||
* readers we can deal with the wake_q overhead as it is
|
|
||||||
* similar to releasing and taking the wait_lock again
|
|
||||||
* for attempting rwsem_try_write_lock().
|
|
||||||
*/
|
|
||||||
wake_up_q(&wake_q);
|
|
||||||
|
|
||||||
/*
|
rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
|
||||||
* Reinitialize wake_q after use.
|
? RWSEM_WAKE_READERS
|
||||||
*/
|
: RWSEM_WAKE_ANY, &wake_q);
|
||||||
wake_q_init(&wake_q);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The wakeup is normally called _after_ the wait_lock
|
||||||
|
* is released, but given that we are proactively waking
|
||||||
|
* readers we can deal with the wake_q overhead as it is
|
||||||
|
* similar to releasing and taking the wait_lock again
|
||||||
|
* for attempting rwsem_try_write_lock().
|
||||||
|
*/
|
||||||
|
wake_up_q(&wake_q);
|
||||||
|
|
||||||
|
/* We need wake_q again below, reinitialize */
|
||||||
|
wake_q_init(&wake_q);
|
||||||
} else {
|
} else {
|
||||||
count = atomic_long_add_return(RWSEM_FLAG_WAITERS, &sem->count);
|
count = atomic_long_add_return(RWSEM_FLAG_WAITERS, &sem->count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wait:
|
||||||
/* wait until we successfully acquire the lock */
|
/* wait until we successfully acquire the lock */
|
||||||
set_current_state(state);
|
set_current_state(state);
|
||||||
while (true) {
|
while (true) {
|
||||||
if (rwsem_try_write_lock(count, sem))
|
if (rwsem_try_write_lock(count, sem, wstate))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
raw_spin_unlock_irq(&sem->wait_lock);
|
raw_spin_unlock_irq(&sem->wait_lock);
|
||||||
|
|
||||||
/* Block until there are no active lockers. */
|
/* Block until there are no active lockers. */
|
||||||
do {
|
for (;;) {
|
||||||
if (signal_pending_state(state, current))
|
if (signal_pending_state(state, current))
|
||||||
goto out_nolock;
|
goto out_nolock;
|
||||||
|
|
||||||
schedule();
|
schedule();
|
||||||
lockevent_inc(rwsem_sleep_writer);
|
lockevent_inc(rwsem_sleep_writer);
|
||||||
set_current_state(state);
|
set_current_state(state);
|
||||||
|
/*
|
||||||
|
* If HANDOFF bit is set, unconditionally do
|
||||||
|
* a trylock.
|
||||||
|
*/
|
||||||
|
if (wstate == WRITER_HANDOFF)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if ((wstate == WRITER_NOT_FIRST) &&
|
||||||
|
(rwsem_first_waiter(sem) == &waiter))
|
||||||
|
wstate = WRITER_FIRST;
|
||||||
|
|
||||||
count = atomic_long_read(&sem->count);
|
count = atomic_long_read(&sem->count);
|
||||||
} while (count & RWSEM_LOCK_MASK);
|
if (!(count & RWSEM_LOCK_MASK))
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The setting of the handoff bit is deferred
|
||||||
|
* until rwsem_try_write_lock() is called.
|
||||||
|
*/
|
||||||
|
if ((wstate == WRITER_FIRST) && (rt_task(current) ||
|
||||||
|
time_after(jiffies, waiter.timeout))) {
|
||||||
|
wstate = WRITER_HANDOFF;
|
||||||
|
lockevent_inc(rwsem_wlock_handoff);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
raw_spin_lock_irq(&sem->wait_lock);
|
raw_spin_lock_irq(&sem->wait_lock);
|
||||||
|
count = atomic_long_read(&sem->count);
|
||||||
}
|
}
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
list_del(&waiter.list);
|
list_del(&waiter.list);
|
||||||
@ -711,6 +824,10 @@ out_nolock:
|
|||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
raw_spin_lock_irq(&sem->wait_lock);
|
raw_spin_lock_irq(&sem->wait_lock);
|
||||||
list_del(&waiter.list);
|
list_del(&waiter.list);
|
||||||
|
|
||||||
|
if (unlikely(wstate == WRITER_HANDOFF))
|
||||||
|
atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count);
|
||||||
|
|
||||||
if (list_empty(&sem->wait_list))
|
if (list_empty(&sem->wait_list))
|
||||||
atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
|
atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
|
||||||
else
|
else
|
||||||
@ -726,7 +843,7 @@ out_nolock:
|
|||||||
* handle waking up a waiter on the semaphore
|
* handle waking up a waiter on the semaphore
|
||||||
* - up_read/up_write has decremented the active part of count if we come here
|
* - up_read/up_write has decremented the active part of count if we come here
|
||||||
*/
|
*/
|
||||||
static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
|
static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem, long count)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
DEFINE_WAKE_Q(wake_q);
|
DEFINE_WAKE_Q(wake_q);
|
||||||
@ -859,7 +976,7 @@ inline void __up_read(struct rw_semaphore *sem)
|
|||||||
tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
|
tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
|
||||||
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
|
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
|
||||||
RWSEM_FLAG_WAITERS))
|
RWSEM_FLAG_WAITERS))
|
||||||
rwsem_wake(sem);
|
rwsem_wake(sem, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -873,7 +990,7 @@ static inline void __up_write(struct rw_semaphore *sem)
|
|||||||
rwsem_clear_owner(sem);
|
rwsem_clear_owner(sem);
|
||||||
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
|
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
|
||||||
if (unlikely(tmp & RWSEM_FLAG_WAITERS))
|
if (unlikely(tmp & RWSEM_FLAG_WAITERS))
|
||||||
rwsem_wake(sem);
|
rwsem_wake(sem, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user