2022-11-28 11:11:13 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2022-11-26 17:59:16 +08:00
|
|
|
#include <linux/bug.h>
|
|
|
|
#include <linux/compiler.h>
|
2022-11-28 11:11:13 +08:00
|
|
|
#include <linux/export.h>
|
2022-11-26 17:59:16 +08:00
|
|
|
#include <linux/percpu.h>
|
|
|
|
#include <linux/smp.h>
|
2022-11-28 11:11:13 +08:00
|
|
|
#include <asm/qspinlock.h>
|
|
|
|
|
2022-11-26 17:59:16 +08:00
|
|
|
#define MAX_NODES 4
|
|
|
|
|
|
|
|
struct qnode {
|
|
|
|
struct qnode *next;
|
|
|
|
struct qspinlock *lock;
|
|
|
|
u8 locked; /* 1 if lock acquired */
|
|
|
|
};
|
|
|
|
|
|
|
|
struct qnodes {
|
|
|
|
int count;
|
|
|
|
struct qnode nodes[MAX_NODES];
|
|
|
|
};
|
|
|
|
|
2022-11-26 17:59:19 +08:00
|
|
|
/* Tuning parameters */
|
|
|
|
static int steal_spins __read_mostly = (1 << 5);
|
|
|
|
static bool maybe_stealers __read_mostly = true;
|
2022-11-26 17:59:20 +08:00
|
|
|
static int head_spins __read_mostly = (1 << 8);
|
2022-11-26 17:59:19 +08:00
|
|
|
|
2022-11-26 17:59:16 +08:00
|
|
|
static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
|
|
|
|
|
2022-11-26 17:59:19 +08:00
|
|
|
static __always_inline int get_steal_spins(void)
|
|
|
|
{
|
|
|
|
return steal_spins;
|
|
|
|
}
|
|
|
|
|
2022-11-26 17:59:20 +08:00
|
|
|
static __always_inline int get_head_spins(void)
|
|
|
|
{
|
|
|
|
return head_spins;
|
|
|
|
}
|
|
|
|
|
2022-11-26 17:59:18 +08:00
|
|
|
static inline u32 encode_tail_cpu(int cpu)
|
2022-11-26 17:59:16 +08:00
|
|
|
{
|
|
|
|
return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
|
|
|
|
}
|
|
|
|
|
2022-11-26 17:59:18 +08:00
|
|
|
static inline int decode_tail_cpu(u32 val)
|
2022-11-26 17:59:16 +08:00
|
|
|
{
|
|
|
|
return (val >> _Q_TAIL_CPU_OFFSET) - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to acquire the lock if it was not already locked. If the tail matches
|
|
|
|
* mytail then clear it, otherwise leave it unchnaged. Return previous value.
|
|
|
|
*
|
|
|
|
* This is used by the head of the queue to acquire the lock and clean up
|
|
|
|
* its tail if it was the last one queued.
|
|
|
|
*/
|
2022-11-26 17:59:19 +08:00
|
|
|
static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
|
2022-11-26 17:59:16 +08:00
|
|
|
{
|
2022-11-26 17:59:21 +08:00
|
|
|
u32 newval = queued_spin_encode_locked_val();
|
2022-11-26 17:59:18 +08:00
|
|
|
u32 prev, tmp;
|
|
|
|
|
|
|
|
asm volatile(
|
2022-11-26 17:59:19 +08:00
|
|
|
"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n"
|
|
|
|
/* This test is necessary if there could be stealers */
|
|
|
|
" andi. %1,%0,%5 \n"
|
|
|
|
" bne 3f \n"
|
|
|
|
/* Test whether the lock tail == mytail */
|
|
|
|
" and %1,%0,%6 \n"
|
2022-11-26 17:59:18 +08:00
|
|
|
" cmpw 0,%1,%3 \n"
|
|
|
|
/* Merge the new locked value */
|
|
|
|
" or %1,%1,%4 \n"
|
|
|
|
" bne 2f \n"
|
|
|
|
/* If the lock tail matched, then clear it, otherwise leave it. */
|
2022-11-26 17:59:19 +08:00
|
|
|
" andc %1,%1,%6 \n"
|
2022-11-26 17:59:18 +08:00
|
|
|
"2: stwcx. %1,0,%2 \n"
|
|
|
|
" bne- 1b \n"
|
|
|
|
"\t" PPC_ACQUIRE_BARRIER " \n"
|
|
|
|
"3: \n"
|
|
|
|
: "=&r" (prev), "=&r" (tmp)
|
|
|
|
: "r" (&lock->val), "r"(tail), "r" (newval),
|
2022-11-26 17:59:19 +08:00
|
|
|
"i" (_Q_LOCKED_VAL),
|
2022-11-26 17:59:18 +08:00
|
|
|
"r" (_Q_TAIL_CPU_MASK),
|
|
|
|
"i" (IS_ENABLED(CONFIG_PPC64))
|
|
|
|
: "cr0", "memory");
|
|
|
|
|
|
|
|
return prev;
|
2022-11-26 17:59:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Publish our tail, replacing previous tail. Return previous value.
|
|
|
|
*
|
|
|
|
* This provides a release barrier for publishing node, this pairs with the
|
|
|
|
* acquire barrier in get_tail_qnode() when the next CPU finds this tail
|
|
|
|
* value.
|
|
|
|
*/
|
2022-11-26 17:59:18 +08:00
|
|
|
static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
|
2022-11-26 17:59:16 +08:00
|
|
|
{
|
2022-11-26 17:59:18 +08:00
|
|
|
u32 prev, tmp;
|
|
|
|
|
|
|
|
asm volatile(
|
|
|
|
"\t" PPC_RELEASE_BARRIER " \n"
|
|
|
|
"1: lwarx %0,0,%2 # publish_tail_cpu \n"
|
|
|
|
" andc %1,%0,%4 \n"
|
|
|
|
" or %1,%1,%3 \n"
|
|
|
|
" stwcx. %1,0,%2 \n"
|
|
|
|
" bne- 1b \n"
|
|
|
|
: "=&r" (prev), "=&r"(tmp)
|
|
|
|
: "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
|
|
|
|
: "cr0", "memory");
|
|
|
|
|
|
|
|
return prev;
|
2022-11-26 17:59:16 +08:00
|
|
|
}
|
|
|
|
|
2022-11-26 17:59:20 +08:00
|
|
|
static __always_inline u32 set_mustq(struct qspinlock *lock)
|
|
|
|
{
|
|
|
|
u32 prev;
|
|
|
|
|
|
|
|
asm volatile(
|
|
|
|
"1: lwarx %0,0,%1 # set_mustq \n"
|
|
|
|
" or %0,%0,%2 \n"
|
|
|
|
" stwcx. %0,0,%1 \n"
|
|
|
|
" bne- 1b \n"
|
|
|
|
: "=&r" (prev)
|
|
|
|
: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
|
|
|
|
: "cr0", "memory");
|
|
|
|
|
|
|
|
return prev;
|
|
|
|
}
|
|
|
|
|
2022-11-26 17:59:18 +08:00
|
|
|
static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
|
2022-11-26 17:59:16 +08:00
|
|
|
{
|
|
|
|
int cpu = decode_tail_cpu(val);
|
|
|
|
struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* After publishing the new tail and finding a previous tail in the
|
|
|
|
* previous val (which is the control dependency), this barrier
|
|
|
|
* orders the release barrier in publish_tail_cpu performed by the
|
|
|
|
* last CPU, with subsequently looking at its qnode structures
|
|
|
|
* after the barrier.
|
|
|
|
*/
|
|
|
|
smp_acquire__after_ctrl_dep();
|
|
|
|
|
|
|
|
for (idx = 0; idx < MAX_NODES; idx++) {
|
|
|
|
struct qnode *qnode = &qnodesp->nodes[idx];
|
|
|
|
if (qnode->lock == lock)
|
|
|
|
return qnode;
|
|
|
|
}
|
|
|
|
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
|
2022-11-26 17:59:19 +08:00
|
|
|
static inline bool try_to_steal_lock(struct qspinlock *lock)
|
|
|
|
{
|
|
|
|
int iters = 0;
|
|
|
|
|
|
|
|
if (!steal_spins)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Attempt to steal the lock */
|
|
|
|
do {
|
|
|
|
u32 val = READ_ONCE(lock->val);
|
|
|
|
|
2022-11-26 17:59:20 +08:00
|
|
|
if (val & _Q_MUST_Q_VAL)
|
|
|
|
break;
|
|
|
|
|
2022-11-26 17:59:19 +08:00
|
|
|
if (unlikely(!(val & _Q_LOCKED_VAL))) {
|
|
|
|
if (__queued_spin_trylock_steal(lock))
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
cpu_relax();
|
|
|
|
}
|
|
|
|
|
|
|
|
iters++;
|
|
|
|
} while (iters < get_steal_spins());
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2022-11-26 17:59:16 +08:00
|
|
|
static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
|
2022-11-28 11:11:13 +08:00
|
|
|
{
|
2022-11-26 17:59:16 +08:00
|
|
|
struct qnodes *qnodesp;
|
|
|
|
struct qnode *next, *node;
|
2022-11-26 17:59:18 +08:00
|
|
|
u32 val, old, tail;
|
2022-11-26 17:59:20 +08:00
|
|
|
bool mustq = false;
|
2022-11-26 17:59:16 +08:00
|
|
|
int idx;
|
2022-11-26 17:59:20 +08:00
|
|
|
int iters = 0;
|
2022-11-26 17:59:16 +08:00
|
|
|
|
|
|
|
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
|
|
|
|
|
|
|
|
qnodesp = this_cpu_ptr(&qnodes);
|
|
|
|
if (unlikely(qnodesp->count >= MAX_NODES)) {
|
|
|
|
while (!queued_spin_trylock(lock))
|
|
|
|
cpu_relax();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
idx = qnodesp->count++;
|
|
|
|
/*
|
|
|
|
* Ensure that we increment the head node->count before initialising
|
|
|
|
* the actual node. If the compiler is kind enough to reorder these
|
|
|
|
* stores, then an IRQ could overwrite our assignments.
|
|
|
|
*/
|
|
|
|
barrier();
|
|
|
|
node = &qnodesp->nodes[idx];
|
|
|
|
node->next = NULL;
|
|
|
|
node->lock = lock;
|
|
|
|
node->locked = 0;
|
|
|
|
|
|
|
|
tail = encode_tail_cpu(smp_processor_id());
|
|
|
|
|
|
|
|
old = publish_tail_cpu(lock, tail);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there was a previous node; link it and wait until reaching the
|
|
|
|
* head of the waitqueue.
|
|
|
|
*/
|
|
|
|
if (old & _Q_TAIL_CPU_MASK) {
|
|
|
|
struct qnode *prev = get_tail_qnode(lock, old);
|
|
|
|
|
|
|
|
/* Link @node into the waitqueue. */
|
|
|
|
WRITE_ONCE(prev->next, node);
|
|
|
|
|
|
|
|
/* Wait for mcs node lock to be released */
|
|
|
|
while (!node->locked)
|
|
|
|
cpu_relax();
|
|
|
|
|
|
|
|
smp_rmb(); /* acquire barrier for the mcs lock */
|
|
|
|
}
|
|
|
|
|
2022-11-26 17:59:19 +08:00
|
|
|
again:
|
2022-11-26 17:59:16 +08:00
|
|
|
/* We're at the head of the waitqueue, wait for the lock. */
|
|
|
|
for (;;) {
|
2022-11-26 17:59:18 +08:00
|
|
|
val = READ_ONCE(lock->val);
|
2022-11-26 17:59:16 +08:00
|
|
|
if (!(val & _Q_LOCKED_VAL))
|
|
|
|
break;
|
|
|
|
|
|
|
|
cpu_relax();
|
2022-11-26 17:59:20 +08:00
|
|
|
if (!maybe_stealers)
|
|
|
|
continue;
|
|
|
|
iters++;
|
|
|
|
|
|
|
|
if (!mustq && iters >= get_head_spins()) {
|
|
|
|
mustq = true;
|
|
|
|
set_mustq(lock);
|
|
|
|
val |= _Q_MUST_Q_VAL;
|
|
|
|
}
|
2022-11-26 17:59:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* If we're the last queued, must clean up the tail. */
|
2022-11-26 17:59:19 +08:00
|
|
|
old = trylock_clean_tail(lock, tail);
|
|
|
|
if (unlikely(old & _Q_LOCKED_VAL)) {
|
|
|
|
BUG_ON(!maybe_stealers);
|
|
|
|
goto again; /* Can only be true if maybe_stealers. */
|
|
|
|
}
|
|
|
|
|
2022-11-26 17:59:16 +08:00
|
|
|
if ((old & _Q_TAIL_CPU_MASK) == tail)
|
2022-11-26 17:59:19 +08:00
|
|
|
goto release; /* We were the tail, no next. */
|
2022-11-26 17:59:16 +08:00
|
|
|
|
|
|
|
/* There is a next, must wait for node->next != NULL (MCS protocol) */
|
|
|
|
while (!(next = READ_ONCE(node->next)))
|
2022-11-28 11:11:13 +08:00
|
|
|
cpu_relax();
|
2022-11-26 17:59:16 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Unlock the next mcs waiter node. Release barrier is not required
|
|
|
|
* here because the acquirer is only accessing the lock word, and
|
|
|
|
* the acquire barrier we took the lock with orders that update vs
|
|
|
|
* this store to locked. The corresponding barrier is the smp_rmb()
|
|
|
|
* acquire barrier for mcs lock, above.
|
|
|
|
*/
|
|
|
|
WRITE_ONCE(next->locked, 1);
|
|
|
|
|
|
|
|
release:
|
|
|
|
qnodesp->count--; /* release the node */
|
|
|
|
}
|
|
|
|
|
|
|
|
void queued_spin_lock_slowpath(struct qspinlock *lock)
|
|
|
|
{
|
2022-11-26 17:59:19 +08:00
|
|
|
if (try_to_steal_lock(lock))
|
|
|
|
return;
|
|
|
|
|
2022-11-26 17:59:16 +08:00
|
|
|
queued_spin_lock_mcs_queue(lock);
|
2022-11-28 11:11:13 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(queued_spin_lock_slowpath);
|
|
|
|
|
|
|
|
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
|
|
|
void pv_spinlocks_init(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
2022-11-26 17:59:19 +08:00
|
|
|
|
|
|
|
#include <linux/debugfs.h>
|
|
|
|
static int steal_spins_set(void *data, u64 val)
|
|
|
|
{
|
|
|
|
static DEFINE_MUTEX(lock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The lock slow path has a !maybe_stealers case that can assume
|
|
|
|
* the head of queue will not see concurrent waiters. That waiter
|
|
|
|
* is unsafe in the presence of stealers, so must keep them away
|
|
|
|
* from one another.
|
|
|
|
*/
|
|
|
|
|
|
|
|
mutex_lock(&lock);
|
|
|
|
if (val && !steal_spins) {
|
|
|
|
maybe_stealers = true;
|
|
|
|
/* wait for queue head waiter to go away */
|
|
|
|
synchronize_rcu();
|
|
|
|
steal_spins = val;
|
|
|
|
} else if (!val && steal_spins) {
|
|
|
|
steal_spins = val;
|
|
|
|
/* wait for all possible stealers to go away */
|
|
|
|
synchronize_rcu();
|
|
|
|
maybe_stealers = false;
|
|
|
|
} else {
|
|
|
|
steal_spins = val;
|
|
|
|
}
|
|
|
|
mutex_unlock(&lock);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int steal_spins_get(void *data, u64 *val)
|
|
|
|
{
|
|
|
|
*val = steal_spins;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
|
|
|
|
|
2022-11-26 17:59:20 +08:00
|
|
|
static int head_spins_set(void *data, u64 val)
|
|
|
|
{
|
|
|
|
head_spins = val;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int head_spins_get(void *data, u64 *val)
|
|
|
|
{
|
|
|
|
*val = head_spins;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
|
|
|
|
|
2022-11-26 17:59:19 +08:00
|
|
|
static __init int spinlock_debugfs_init(void)
|
|
|
|
{
|
|
|
|
debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
|
2022-11-26 17:59:20 +08:00
|
|
|
debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
|
2022-11-26 17:59:19 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
device_initcall(spinlock_debugfs_init);
|