mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 04:18:39 +08:00
riscv: select DCACHE_WORD_ACCESS for efficient unaligned access HW
DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string comparisons in the vfs layer. This patch implements support for load_unaligned_zeropad in much the same way as has been done for arm64. Here is the test program and step: $ cat tt.c #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #define ITERATIONS 1000000 #define PATH "123456781234567812345678123456781" int main(void) { unsigned long i; struct stat buf; for (i = 0; i < ITERATIONS; i++) stat(PATH, &buf); return 0; } $ gcc -O2 tt.c $ touch 123456781234567812345678123456781 $ time ./a.out Per my test on T-HEAD C910 platforms, the above test performance is improved by about 7.5%. Signed-off-by: Jisheng Zhang <jszhang@kernel.org> Link: https://lore.kernel.org/r/20231225044207.3821-3-jszhang@kernel.org Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
parent
b6da6cbe13
commit
d0fdc20b04
@ -654,6 +654,7 @@ config RISCV_MISALIGNED
|
||||
config RISCV_EFFICIENT_UNALIGNED_ACCESS
|
||||
bool "Assume the CPU supports fast unaligned memory accesses"
|
||||
depends on NONPORTABLE
|
||||
select DCACHE_WORD_ACCESS if MMU
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
help
|
||||
Say Y here if you want the kernel to assume that the CPU supports
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define EX_TYPE_FIXUP 1
|
||||
#define EX_TYPE_BPF 2
|
||||
#define EX_TYPE_UACCESS_ERR_ZERO 3
|
||||
#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
||||
@ -47,6 +48,11 @@
|
||||
#define EX_DATA_REG_ZERO_SHIFT 5
|
||||
#define EX_DATA_REG_ZERO GENMASK(9, 5)
|
||||
|
||||
#define EX_DATA_REG_DATA_SHIFT 0
|
||||
#define EX_DATA_REG_DATA GENMASK(4, 0)
|
||||
#define EX_DATA_REG_ADDR_SHIFT 5
|
||||
#define EX_DATA_REG_ADDR GENMASK(9, 5)
|
||||
|
||||
#define EX_DATA_REG(reg, gpr) \
|
||||
"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
|
||||
|
||||
@ -62,6 +68,15 @@
|
||||
#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
|
||||
_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
|
||||
|
||||
#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
|
||||
__DEFINE_ASM_GPR_NUMS \
|
||||
__ASM_EXTABLE_RAW(#insn, #fixup, \
|
||||
__stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \
|
||||
"(" \
|
||||
EX_DATA_REG(DATA, data) " | " \
|
||||
EX_DATA_REG(ADDR, addr) \
|
||||
")")
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#else /* CONFIG_MMU */
|
||||
|
@ -9,6 +9,7 @@
|
||||
#define _ASM_RISCV_WORD_AT_A_TIME_H
|
||||
|
||||
|
||||
#include <asm/asm-extable.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
struct word_at_a_time {
|
||||
@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask)
|
||||
/* The mask we created is directly usable as a bytemask */
|
||||
#define zero_bytemask(mask) (mask)
|
||||
|
||||
#ifdef CONFIG_DCACHE_WORD_ACCESS
|
||||
|
||||
/*
|
||||
* Load an unaligned word from kernel space.
|
||||
*
|
||||
* In the (very unlikely) case of the word being a page-crosser
|
||||
* and the next page not being mapped, take the exception and
|
||||
* return zeroes in the non-existing part.
|
||||
*/
|
||||
static inline unsigned long load_unaligned_zeropad(const void *addr)
|
||||
{
|
||||
unsigned long ret;
|
||||
|
||||
/* Load word from unaligned pointer addr */
|
||||
asm(
|
||||
"1: " REG_L " %0, %2\n"
|
||||
"2:\n"
|
||||
_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
|
||||
: "=&r" (ret)
|
||||
: "r" (addr), "m" (*(unsigned long *)addr));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_DCACHE_WORD_ACCESS */
|
||||
|
||||
#endif /* _ASM_RISCV_WORD_AT_A_TIME_H */
|
||||
|
@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset)
|
||||
{
|
||||
if (unlikely(!offset || offset > MAX_REG_OFFSET))
|
||||
return 0;
|
||||
|
||||
return *(unsigned long *)((unsigned long)regs + offset);
|
||||
}
|
||||
|
||||
static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
|
||||
unsigned long val)
|
||||
{
|
||||
@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
|
||||
int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
|
||||
unsigned long data, addr, offset;
|
||||
|
||||
addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long));
|
||||
|
||||
offset = addr & 0x7UL;
|
||||
addr &= ~0x7UL;
|
||||
|
||||
data = *(unsigned long *)addr >> (offset * 8);
|
||||
|
||||
regs_set_gpr(regs, reg_data * sizeof(unsigned long), data);
|
||||
|
||||
regs->epc = get_ex_fixup(ex);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool fixup_exception(struct pt_regs *regs)
|
||||
{
|
||||
const struct exception_table_entry *ex;
|
||||
@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs)
|
||||
return ex_handler_bpf(ex, regs);
|
||||
case EX_TYPE_UACCESS_ERR_ZERO:
|
||||
return ex_handler_uaccess_err_zero(ex, regs);
|
||||
case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
|
||||
return ex_handler_load_unaligned_zeropad(ex, regs);
|
||||
}
|
||||
|
||||
BUG();
|
||||
|
Loading…
Reference in New Issue
Block a user