linux/arch/riscv/kernel/stacktrace.c
Anton Blanchard 5d5fc33ce5
riscv: Improve exception and system call latency
Many CPUs implement return address branch prediction as a stack. The
RISCV architecture refers to this as a return address stack (RAS). If
this gets corrupted then the CPU will mispredict at least one but
potentally many function returns.

There are two issues with the current RISCV exception code:

- We are using the alternate link stack (x5/t0) for the indirect branch
  which makes the hardware think this is a function return. This will
  corrupt the RAS.

- We modify the return address of handle_exception to point to
  ret_from_exception. This will also corrupt the RAS.

Testing the null system call latency before and after the patch:

Visionfive2 (StarFive JH7110 / U74)
baseline: 189.87 ns
patched:  176.76 ns

Lichee pi 4a (T-Head TH1520 / C910)
baseline: 666.58 ns
patched:  636.90 ns

Just over 7% on the U74 and just over 4% on the C910.

Signed-off-by: Anton Blanchard <antonb@tenstorrent.com>
Signed-off-by: Cyril Bur <cyrilbur@tenstorrent.com>
Tested-by: Jisheng Zhang <jszhang@kernel.org>
Reviewed-by: Jisheng Zhang <jszhang@kernel.org>
Link: https://lore.kernel.org/r/20240607061335.2197383-1-cyrilbur@tenstorrent.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2024-07-26 05:50:45 -07:00

165 lines
3.7 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008 ARM Limited
* Copyright (C) 2014 Regents of the University of California
*/
#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <linux/ftrace.h>
#include <asm/stacktrace.h>
#ifdef CONFIG_FRAME_POINTER
extern asmlinkage void handle_exception(void);
static inline int fp_is_valid(unsigned long fp, unsigned long sp)
{
unsigned long low, high;
low = sp + sizeof(struct stackframe);
high = ALIGN(sp, THREAD_SIZE);
return !(fp < low || fp > high || fp & 0x07);
}
void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long fp, sp, pc;
int graph_idx = 0;
int level = 0;
if (regs) {
fp = frame_pointer(regs);
sp = user_stack_pointer(regs);
pc = instruction_pointer(regs);
} else if (task == NULL || task == current) {
fp = (unsigned long)__builtin_frame_address(0);
sp = current_stack_pointer;
pc = (unsigned long)walk_stackframe;
level = -1;
} else {
/* task blocked in __switch_to */
fp = task->thread.s[0];
sp = task->thread.sp;
pc = task->thread.ra;
}
for (;;) {
struct stackframe *frame;
if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc))))
break;
if (unlikely(!fp_is_valid(fp, sp)))
break;
/* Unwind stack frame */
frame = (struct stackframe *)fp - 1;
sp = fp;
if (regs && (regs->epc == pc) && fp_is_valid(frame->ra, sp)) {
/* We hit function where ra is not saved on the stack */
fp = frame->ra;
pc = regs->ra;
} else {
fp = frame->fp;
pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
&frame->ra);
if (pc == (unsigned long)handle_exception) {
if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
break;
pc = ((struct pt_regs *)sp)->epc;
fp = ((struct pt_regs *)sp)->s0;
}
}
}
}
#else /* !CONFIG_FRAME_POINTER */
void notrace walk_stackframe(struct task_struct *task,
struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long sp, pc;
unsigned long *ksp;
if (regs) {
sp = user_stack_pointer(regs);
pc = instruction_pointer(regs);
} else if (task == NULL || task == current) {
sp = current_stack_pointer;
pc = (unsigned long)walk_stackframe;
} else {
/* task blocked in __switch_to */
sp = task->thread.sp;
pc = task->thread.ra;
}
if (unlikely(sp & 0x7))
return;
ksp = (unsigned long *)sp;
while (!kstack_end(ksp)) {
if (__kernel_text_address(pc) && unlikely(!fn(arg, pc)))
break;
pc = READ_ONCE_NOCHECK(*ksp++) - 0x4;
}
}
#endif /* CONFIG_FRAME_POINTER */
static bool print_trace_address(void *arg, unsigned long pc)
{
const char *loglvl = arg;
print_ip_sym(loglvl, pc);
return true;
}
noinline void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
const char *loglvl)
{
walk_stackframe(task, regs, print_trace_address, (void *)loglvl);
}
void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
{
pr_cont("%sCall Trace:\n", loglvl);
dump_backtrace(NULL, task, loglvl);
}
static bool save_wchan(void *arg, unsigned long pc)
{
if (!in_sched_functions(pc)) {
unsigned long *p = arg;
*p = pc;
return false;
}
return true;
}
unsigned long __get_wchan(struct task_struct *task)
{
unsigned long pc = 0;
if (!try_get_task_stack(task))
return 0;
walk_stackframe(task, NULL, save_wchan, &pc);
put_task_stack(task);
return pc;
}
noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
{
walk_stackframe(task, regs, consume_entry, cookie);
}