linux/arch/x86/kernel/ftrace_32.S
Mark Rutland fee86a4ed5 ftrace: selftest: remove broken trace_direct_tramp
The ftrace selftest code has a trace_direct_tramp() function which it
uses as a direct call trampoline. This happens to work on x86, since the
direct call's return address is in the usual place, and can be returned
to via a RET, but in general the calling convention for direct calls is
different from regular function calls, and requires a trampoline written
in assembly.

On s390, regular function calls place the return address in %r14, and an
ftrace patch-site in an instrumented function places the trampoline's
return address (which is within the instrumented function) in %r0,
preserving the original %r14 value in-place. As a regular C function
will return to the address in %r14, using a C function as the trampoline
results in the trampoline returning to the caller of the instrumented
function, skipping the body of the instrumented function.

Note that the s390 issue is not detcted by the ftrace selftest code, as
the instrumented function is trivial, and returning back into the caller
happens to be equivalent.

On arm64, regular function calls place the return address in x30, and
an ftrace patch-site in an instrumented function saves this into r9
and places the trampoline's return address (within the instrumented
function) in x30. A regular C function will return to the address in
x30, but will not restore x9 into x30. Consequently, using a C function
as the trampoline results in returning to the trampoline's return
address having corrupted x30, such that when the instrumented function
returns, it will return back into itself.

To avoid future issues in this area, remove the trace_direct_tramp()
function, and require that each architecture with direct calls provides
a stub trampoline, named ftrace_stub_direct_tramp. This can be written
to handle the architecture's trampoline calling convention, and in
future could be used elsewhere (e.g. in the ftrace ops sample, to
measure the overhead of direct calls), so we may as well always build it
in.

Link: https://lkml.kernel.org/r/20230321140424.345218-8-revest@chromium.org

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Florent Revest <revest@chromium.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2023-03-21 13:59:29 -04:00

199 lines
4.0 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2017 Steven Rostedt, VMware Inc.
*/
#include <linux/linkage.h>
#include <asm/page_types.h>
#include <asm/segment.h>
#include <asm/export.h>
#include <asm/ftrace.h>
#include <asm/nospec-branch.h>
#include <asm/frame.h>
#include <asm/asm-offsets.h>
#ifdef CONFIG_FRAME_POINTER
# define MCOUNT_FRAME 1 /* using frame = true */
#else
# define MCOUNT_FRAME 0 /* using frame = false */
#endif
SYM_FUNC_START(__fentry__)
RET
SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__)
SYM_CODE_START(ftrace_caller)
#ifdef CONFIG_FRAME_POINTER
/*
* Frame pointers are of ip followed by bp.
* Since fentry is an immediate jump, we are left with
* parent-ip, function-ip. We need to add a frame with
* parent-ip followed by ebp.
*/
pushl 4(%esp) /* parent ip */
pushl %ebp
movl %esp, %ebp
pushl 2*4(%esp) /* function ip */
/* For mcount, the function ip is directly above */
pushl %ebp
movl %esp, %ebp
#endif
pushl %eax
pushl %ecx
pushl %edx
pushl $0 /* Pass NULL as regs pointer */
#ifdef CONFIG_FRAME_POINTER
/* Load parent ebp into edx */
movl 4*4(%esp), %edx
#else
/* There's no frame pointer, load the appropriate stack addr instead */
lea 4*4(%esp), %edx
#endif
movl (MCOUNT_FRAME+4)*4(%esp), %eax /* load the rip */
/* Get the parent ip */
movl 4(%edx), %edx /* edx has ebp */
movl function_trace_op, %ecx
subl $MCOUNT_INSN_SIZE, %eax
.globl ftrace_call
ftrace_call:
call ftrace_stub
addl $4, %esp /* skip NULL pointer */
popl %edx
popl %ecx
popl %eax
#ifdef CONFIG_FRAME_POINTER
popl %ebp
addl $4,%esp /* skip function ip */
popl %ebp /* this is the orig bp */
addl $4, %esp /* skip parent ip */
#endif
.Lftrace_ret:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
jmp ftrace_stub
#endif
/* This is weak to keep gas from relaxing the jumps */
SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
RET
SYM_CODE_END(ftrace_caller)
SYM_CODE_START(ftrace_regs_caller)
/*
* We're here from an mcount/fentry CALL, and the stack frame looks like:
*
* <previous context>
* RET-IP
*
* The purpose of this function is to call out in an emulated INT3
* environment with a stack frame like:
*
* <previous context>
* gap / RET-IP
* gap
* gap
* gap
* pt_regs
*
* We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds
*/
subl $3*4, %esp # RET-IP + 3 gaps
pushl %ss # ss
pushl %esp # points at ss
addl $5*4, (%esp) # make it point at <previous context>
pushfl # flags
pushl $__KERNEL_CS # cs
pushl 7*4(%esp) # ip <- RET-IP
pushl $0 # orig_eax
pushl %gs
pushl %fs
pushl %es
pushl %ds
pushl %eax
pushl %ebp
pushl %edi
pushl %esi
pushl %edx
pushl %ecx
pushl %ebx
ENCODE_FRAME_POINTER
movl PT_EIP(%esp), %eax # 1st argument: IP
subl $MCOUNT_INSN_SIZE, %eax
movl 21*4(%esp), %edx # 2nd argument: parent ip
movl function_trace_op, %ecx # 3rd argument: ftrace_pos
pushl %esp # 4th argument: pt_regs
SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
call ftrace_stub
addl $4, %esp # skip 4th argument
/* place IP below the new SP */
movl PT_OLDESP(%esp), %eax
movl PT_EIP(%esp), %ecx
movl %ecx, -4(%eax)
/* place EAX below that */
movl PT_EAX(%esp), %ecx
movl %ecx, -8(%eax)
popl %ebx
popl %ecx
popl %edx
popl %esi
popl %edi
popl %ebp
lea -8(%eax), %esp
popl %eax
jmp .Lftrace_ret
SYM_CODE_END(ftrace_regs_caller)
SYM_FUNC_START(ftrace_stub_direct_tramp)
CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(ftrace_stub_direct_tramp)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
SYM_CODE_START(ftrace_graph_caller)
pushl %eax
pushl %ecx
pushl %edx
movl 3*4(%esp), %eax
/* Even with frame pointers, fentry doesn't have one here */
lea 4*4(%esp), %edx
movl $0, %ecx
subl $MCOUNT_INSN_SIZE, %eax
call prepare_ftrace_return
popl %edx
popl %ecx
popl %eax
RET
SYM_CODE_END(ftrace_graph_caller)
.globl return_to_handler
return_to_handler:
pushl %eax
pushl %edx
movl $0, %eax
call ftrace_return_to_handler
movl %eax, %ecx
popl %edx
popl %eax
JMP_NOSPEC ecx
#endif