mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-05 21:35:04 +08:00
c6d3cd32fd
When CONFIG_FUNCTION_GRAPH_TRACER is selected and the function graph tracer is in use, unwind_frame() may erroneously associate a traced function with an incorrect return address. This can happen when starting an unwind from a pt_regs, or when unwinding across an exception boundary. This can be seen when recording with perf while the function graph tracer is in use. For example: | # echo function_graph > /sys/kernel/debug/tracing/current_tracer | # perf record -g -e raw_syscalls:sys_enter:k /bin/true | # perf report ... reports the callchain erroneously as: | el0t_64_sync | el0t_64_sync_handler | el0_svc_common.constprop.0 | perf_callchain | get_perf_callchain | syscall_trace_enter | syscall_trace_enter ... whereas when the function graph tracer is not in use, it reports: | el0t_64_sync | el0t_64_sync_handler | el0_svc | do_el0_svc | el0_svc_common.constprop.0 | syscall_trace_enter | syscall_trace_enter The underlying problem is that ftrace_graph_get_ret_stack() takes an index offset from the most recent entry added to the fgraph return stack. We start an unwind at offset 0, and increment the offset each time we encounter a rewritten return address (i.e. when we see `return_to_handler`). This is broken in two cases: 1) Between creating a pt_regs and starting the unwind, function calls may place entries on the stack, leaving an arbitrary offset which we can only determine by performing a full unwind from the caller of the unwind code (and relying on none of the unwind code being instrumented). This can result in erroneous entries being reported in a backtrace recorded by perf or kfence when the function graph tracer is in use. Currently show_regs() is unaffected as dump_backtrace() performs an initial unwind. 2) When unwinding across an exception boundary (whether continuing an unwind or starting a new unwind from regs), we currently always skip the LR of the interrupted context. Where this was live and contained a rewritten address, we won't consume the corresponding fgraph ret stack entry, leaving subsequent entries off-by-one. This can result in erroneous entries being reported in a backtrace performed by any in-kernel unwinder when that backtrace crosses an exception boundary, with entries after the boundary being reported incorrectly. This includes perf, kfence, show_regs(), panic(), etc. To fix this, we need to be able to uniquely identify each rewritten return address such that we can map this back to the original return address. We can use HAVE_FUNCTION_GRAPH_RET_ADDR_PTR to associate each rewritten return address with a unique location on the stack. As the return address is passed in the LR (and so is not guaranteed a unique location in memory), we use the FP upon entry to the function (i.e. the address of the caller's frame record) as the return address pointer. Any nested call will have a different FP value as the caller must create its own frame record and update FP to point to this. Since ftrace_graph_ret_addr() requires the return address with the PAC stripped, the stripping of the PAC is moved before the fixup of the rewritten address. As we would unconditionally strip the PAC, moving this earlier is not harmful, and we can avoid a redundant strip in the return address fixup code. I've tested this with the perf case above, the ftrace selftests, and a number of ad-hoc unwinder tests. The tests all pass, and I have seen no unexpected behaviour as a result of this change. I've tested with pointer authentication under QEMU TCG where magic-sysrq+l correctly recovers the original return addresses. Note that this doesn't fix the issue of skipping a live LR at an exception boundary, which is a more general problem and requires more substantial rework. Were we to consume the LR in all cases this would result in warnings where the interrupted context's LR contains `return_to_handler`, but the FP has been altered, e.g. | func: | <--- ftrace entry ---> // logs FP & LR, rewrites LR | STP FP, LR, [SP, #-16]! | MOV FP, SP | <--- INTERRUPT ---> ... as ftrace_graph_get_ret_stack() fill not find a matching entry, triggering the WARN_ON_ONCE() in unwind_frame(). Link: https://lore.kernel.org/r/20211025164925.GB2001@C02TD0UTHF1T.local Link: https://lore.kernel.org/r/20211027132529.30027-1-mark.rutland@arm.com Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com> Cc: Mark Brown <broonie@kernel.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Will Deacon <will@kernel.org> Reviewed-by: Mark Brown <broonie@kernel.org> Link: https://lore.kernel.org/r/20211029162245.39761-1-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org>
120 lines
3.3 KiB
C
120 lines
3.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* arch/arm64/include/asm/ftrace.h
|
|
*
|
|
* Copyright (C) 2013 Linaro Limited
|
|
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
|
|
*/
|
|
#ifndef __ASM_FTRACE_H
|
|
#define __ASM_FTRACE_H
|
|
|
|
#include <asm/insn.h>
|
|
|
|
#define HAVE_FUNCTION_GRAPH_FP_TEST
|
|
|
|
/*
|
|
* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a
|
|
* "return address pointer" which can be used to uniquely identify a return
|
|
* address which has been overwritten.
|
|
*
|
|
* On arm64 we use the address of the caller's frame record, which remains the
|
|
* same for the lifetime of the instrumented function, unlike the return
|
|
* address in the LR.
|
|
*/
|
|
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
|
|
#define ARCH_SUPPORTS_FTRACE_OPS 1
|
|
#else
|
|
#define MCOUNT_ADDR ((unsigned long)function_nocfi(_mcount))
|
|
#endif
|
|
|
|
/* The BL at the callsite's adjusted rec->ip */
|
|
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
|
|
|
|
#define FTRACE_PLT_IDX 0
|
|
#define FTRACE_REGS_PLT_IDX 1
|
|
#define NR_FTRACE_PLTS 2
|
|
|
|
/*
|
|
* Currently, gcc tends to save the link register after the local variables
|
|
* on the stack. This causes the max stack tracer to report the function
|
|
* frame sizes for the wrong functions. By defining
|
|
* ARCH_FTRACE_SHIFT_STACK_TRACER, it will tell the stack tracer to expect
|
|
* to find the return address on the stack after the local variables have
|
|
* been set up.
|
|
*
|
|
* Note, this may change in the future, and we will need to deal with that
|
|
* if it were to happen.
|
|
*/
|
|
#define ARCH_FTRACE_SHIFT_STACK_TRACER 1
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <linux/compat.h>
|
|
|
|
extern void _mcount(unsigned long);
|
|
extern void *return_address(unsigned int);
|
|
|
|
struct dyn_arch_ftrace {
|
|
/* No extra data needed for arm64 */
|
|
};
|
|
|
|
extern unsigned long ftrace_graph_call;
|
|
|
|
extern void return_to_handler(void);
|
|
|
|
static inline unsigned long ftrace_call_adjust(unsigned long addr)
|
|
{
|
|
/*
|
|
* Adjust addr to point at the BL in the callsite.
|
|
* See ftrace_init_nop() for the callsite sequence.
|
|
*/
|
|
if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
|
|
return addr + AARCH64_INSN_SIZE;
|
|
/*
|
|
* addr is the address of the mcount call instruction.
|
|
* recordmcount does the necessary offset calculation.
|
|
*/
|
|
return addr;
|
|
}
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
|
|
struct dyn_ftrace;
|
|
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
|
|
#define ftrace_init_nop ftrace_init_nop
|
|
#endif
|
|
|
|
#define ftrace_return_address(n) return_address(n)
|
|
|
|
/*
|
|
* Because AArch32 mode does not share the same syscall table with AArch64,
|
|
* tracing compat syscalls may result in reporting bogus syscalls or even
|
|
* hang-up, so just do not trace them.
|
|
* See kernel/trace/trace_syscalls.c
|
|
*
|
|
* x86 code says:
|
|
* If the user really wants these, then they should use the
|
|
* raw syscall tracepoints with filtering.
|
|
*/
|
|
#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
|
|
static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
|
|
{
|
|
return is_compat_task();
|
|
}
|
|
|
|
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
|
|
|
|
static inline bool arch_syscall_match_sym_name(const char *sym,
|
|
const char *name)
|
|
{
|
|
/*
|
|
* Since all syscall functions have __arm64_ prefix, we must skip it.
|
|
* However, as we described above, we decided to ignore compat
|
|
* syscalls, so we don't care about __arm64_compat_ prefix here.
|
|
*/
|
|
return !strcmp(sym + 8, name);
|
|
}
|
|
#endif /* ifndef __ASSEMBLY__ */
|
|
|
|
#endif /* __ASM_FTRACE_H */
|