2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-29 15:43:59 +08:00
linux-next/arch/arm/include/asm/cmpxchg.h
Will Deacon c32ffce0f6 ARM: 7984/1: prefetch: add prefetchw invocations for barriered atomics
After a bunch of benchmarking on the interaction between dmb and pldw,
it turns out that issuing the pldw *after* the dmb instruction can
give modest performance gains (~3% atomic_add_return improvement on a
dual A15).

This patch adds prefetchw invocations to our barriered atomic operations
including cmpxchg, test_and_xxx and futexes.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-02-25 11:30:20 +00:00

289 lines
6.3 KiB
C

#ifndef __ASM_ARM_CMPXCHG_H
#define __ASM_ARM_CMPXCHG_H
#include <linux/irqflags.h>
#include <linux/prefetch.h>
#include <asm/barrier.h>
#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
/*
* On the StrongARM, "swp" is terminally broken since it bypasses the
* cache totally. This means that the cache becomes inconsistent, and,
* since we use normal loads/stores as well, this is really bad.
* Typically, this causes oopsen in filp_close, but could have other,
* more disastrous effects. There are two work-arounds:
* 1. Disable interrupts and emulate the atomic swap
* 2. Clean the cache, perform atomic swap, flush the cache
*
* We choose (1) since its the "easiest" to achieve here and is not
* dependent on the processor type.
*
* NOTE that this solution won't work on an SMP system, so explcitly
* forbid it here.
*/
#define swp_is_buggy
#endif
static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
{
extern void __bad_xchg(volatile void *, int);
unsigned long ret;
#ifdef swp_is_buggy
unsigned long flags;
#endif
#if __LINUX_ARM_ARCH__ >= 6
unsigned int tmp;
#endif
smp_mb();
prefetchw((const void *)ptr);
switch (size) {
#if __LINUX_ARM_ARCH__ >= 6
case 1:
asm volatile("@ __xchg1\n"
"1: ldrexb %0, [%3]\n"
" strexb %1, %2, [%3]\n"
" teq %1, #0\n"
" bne 1b"
: "=&r" (ret), "=&r" (tmp)
: "r" (x), "r" (ptr)
: "memory", "cc");
break;
case 4:
asm volatile("@ __xchg4\n"
"1: ldrex %0, [%3]\n"
" strex %1, %2, [%3]\n"
" teq %1, #0\n"
" bne 1b"
: "=&r" (ret), "=&r" (tmp)
: "r" (x), "r" (ptr)
: "memory", "cc");
break;
#elif defined(swp_is_buggy)
#ifdef CONFIG_SMP
#error SMP is not supported on this platform
#endif
case 1:
raw_local_irq_save(flags);
ret = *(volatile unsigned char *)ptr;
*(volatile unsigned char *)ptr = x;
raw_local_irq_restore(flags);
break;
case 4:
raw_local_irq_save(flags);
ret = *(volatile unsigned long *)ptr;
*(volatile unsigned long *)ptr = x;
raw_local_irq_restore(flags);
break;
#else
case 1:
asm volatile("@ __xchg1\n"
" swpb %0, %1, [%2]"
: "=&r" (ret)
: "r" (x), "r" (ptr)
: "memory", "cc");
break;
case 4:
asm volatile("@ __xchg4\n"
" swp %0, %1, [%2]"
: "=&r" (ret)
: "r" (x), "r" (ptr)
: "memory", "cc");
break;
#endif
default:
__bad_xchg(ptr, size), ret = 0;
break;
}
smp_mb();
return ret;
}
#define xchg(ptr,x) \
((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
#include <asm-generic/cmpxchg-local.h>
#if __LINUX_ARM_ARCH__ < 6
/* min ARCH < ARMv6 */
#ifdef CONFIG_SMP
#error "SMP is not supported on this platform"
#endif
/*
* cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
* them available.
*/
#define cmpxchg_local(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
(unsigned long)(n), sizeof(*(ptr))))
#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
#ifndef CONFIG_SMP
#include <asm-generic/cmpxchg.h>
#endif
#else /* min ARCH >= ARMv6 */
extern void __bad_cmpxchg(volatile void *ptr, int size);
/*
* cmpxchg only support 32-bits operands on ARMv6.
*/
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
unsigned long oldval, res;
prefetchw((const void *)ptr);
switch (size) {
#ifndef CONFIG_CPU_V6 /* min ARCH >= ARMv6K */
case 1:
do {
asm volatile("@ __cmpxchg1\n"
" ldrexb %1, [%2]\n"
" mov %0, #0\n"
" teq %1, %3\n"
" strexbeq %0, %4, [%2]\n"
: "=&r" (res), "=&r" (oldval)
: "r" (ptr), "Ir" (old), "r" (new)
: "memory", "cc");
} while (res);
break;
case 2:
do {
asm volatile("@ __cmpxchg1\n"
" ldrexh %1, [%2]\n"
" mov %0, #0\n"
" teq %1, %3\n"
" strexheq %0, %4, [%2]\n"
: "=&r" (res), "=&r" (oldval)
: "r" (ptr), "Ir" (old), "r" (new)
: "memory", "cc");
} while (res);
break;
#endif
case 4:
do {
asm volatile("@ __cmpxchg4\n"
" ldrex %1, [%2]\n"
" mov %0, #0\n"
" teq %1, %3\n"
" strexeq %0, %4, [%2]\n"
: "=&r" (res), "=&r" (oldval)
: "r" (ptr), "Ir" (old), "r" (new)
: "memory", "cc");
} while (res);
break;
default:
__bad_cmpxchg(ptr, size);
oldval = 0;
}
return oldval;
}
static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
unsigned long ret;
smp_mb();
ret = __cmpxchg(ptr, old, new, size);
smp_mb();
return ret;
}
#define cmpxchg(ptr,o,n) \
((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \
(unsigned long)(o), \
(unsigned long)(n), \
sizeof(*(ptr))))
static inline unsigned long __cmpxchg_local(volatile void *ptr,
unsigned long old,
unsigned long new, int size)
{
unsigned long ret;
switch (size) {
#ifdef CONFIG_CPU_V6 /* min ARCH == ARMv6 */
case 1:
case 2:
ret = __cmpxchg_local_generic(ptr, old, new, size);
break;
#endif
default:
ret = __cmpxchg(ptr, old, new, size);
}
return ret;
}
static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
unsigned long long old,
unsigned long long new)
{
unsigned long long oldval;
unsigned long res;
prefetchw(ptr);
__asm__ __volatile__(
"1: ldrexd %1, %H1, [%3]\n"
" teq %1, %4\n"
" teqeq %H1, %H4\n"
" bne 2f\n"
" strexd %0, %5, %H5, [%3]\n"
" teq %0, #0\n"
" bne 1b\n"
"2:"
: "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
: "r" (ptr), "r" (old), "r" (new)
: "cc");
return oldval;
}
static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr,
unsigned long long old,
unsigned long long new)
{
unsigned long long ret;
smp_mb();
ret = __cmpxchg64(ptr, old, new);
smp_mb();
return ret;
}
#define cmpxchg_local(ptr,o,n) \
((__typeof__(*(ptr)))__cmpxchg_local((ptr), \
(unsigned long)(o), \
(unsigned long)(n), \
sizeof(*(ptr))))
#define cmpxchg64(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \
(unsigned long long)(o), \
(unsigned long long)(n)))
#define cmpxchg64_relaxed(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64((ptr), \
(unsigned long long)(o), \
(unsigned long long)(n)))
#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n))
#endif /* __LINUX_ARM_ARCH__ >= 6 */
#endif /* __ASM_ARM_CMPXCHG_H */