linux/arch/riscv/kernel/stacktrace.c

164 lines
3.7 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008 ARM Limited
* Copyright (C) 2014 Regents of the University of California
*/
#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <linux/ftrace.h>
#include <asm/stacktrace.h>
#ifdef CONFIG_FRAME_POINTER
riscv: stacktrace: Make walk_stackframe cross pt_regs frame The current walk_stackframe with FRAME_POINTER would stop unwinding at ret_from_exception: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192 Call Trace: [<ffffffe0002038c8>] walk_stackframe+0x0/0xee [<ffffffe000aecf48>] show_stack+0x32/0x4a [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e [<ffffffe000af1648>] dump_stack+0x14/0x1c [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138 [<ffffffe000239aec>] __might_sleep+0x10/0x18 [<ffffffe000afe3fe>] down_read+0x22/0xa4 [<ffffffe000207588>] do_page_fault+0xb0/0x2fe [<ffffffe000201b80>] ret_from_exception+0x0/0xc The optimization would help walk_stackframe cross the pt_regs frame and get more backtrace of debug info: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192 Call Trace: [<ffffffe0002038c8>] walk_stackframe+0x0/0xee [<ffffffe000aecf48>] show_stack+0x32/0x4a [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e [<ffffffe000af1648>] dump_stack+0x14/0x1c [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138 [<ffffffe000239aec>] __might_sleep+0x10/0x18 [<ffffffe000afe3fe>] down_read+0x22/0xa4 [<ffffffe000207588>] do_page_fault+0xb0/0x2fe [<ffffffe000201b80>] ret_from_exception+0x0/0xc [<ffffffe000613c06>] riscv_intc_irq+0x1a/0x72 [<ffffffe000201b80>] ret_from_exception+0x0/0xc [<ffffffe00033f44a>] vma_link+0x54/0x160 [<ffffffe000341d7a>] mmap_region+0x2cc/0x4d0 [<ffffffe000342256>] do_mmap+0x2d8/0x3ac [<ffffffe000326318>] vm_mmap_pgoff+0x70/0xb8 [<ffffffe00032638a>] vm_mmap+0x2a/0x36 [<ffffffe0003cfdde>] elf_map+0x72/0x84 [<ffffffe0003d05f8>] load_elf_binary+0x69a/0xec8 [<ffffffe000376240>] bprm_execve+0x246/0x53a [<ffffffe00037786c>] kernel_execve+0xe8/0x124 [<ffffffe000aecdf2>] run_init_process+0xfa/0x10c [<ffffffe000aece16>] try_to_run_init_process+0x12/0x3c [<ffffffe000afa920>] kernel_init+0xb4/0xf8 [<ffffffe000201b80>] ret_from_exception+0x0/0xc Here is the error injection test code for the above output: drivers/irqchip/irq-riscv-intc.c: static asmlinkage void riscv_intc_irq(struct pt_regs *regs) { unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG; + u32 tmp; __get_user(tmp, (u32 *)0); Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Guo Ren <guoren@kernel.org> Link: https://lore.kernel.org/r/20221109064937.3643993-3-guoren@kernel.org [Palmer: use SYM_CODE_*] Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-11-09 14:49:37 +08:00
extern asmlinkage void ret_from_exception(void);
riscv: stacktrace: fixed walk_stackframe() If the load access fault occures in a leaf function (with CONFIG_FRAME_POINTER=y), when wrong stack trace will be displayed: [<ffffffff804853c2>] regmap_mmio_read32le+0xe/0x1c ---[ end trace 0000000000000000 ]--- Registers dump: ra 0xffffffff80485758 <regmap_mmio_read+36> sp 0xffffffc80200b9a0 fp 0xffffffc80200b9b0 pc 0xffffffff804853ba <regmap_mmio_read32le+6> Stack dump: 0xffffffc80200b9a0: 0xffffffc80200b9e0 0xffffffc80200b9e0 0xffffffc80200b9b0: 0xffffffff8116d7e8 0x0000000000000100 0xffffffc80200b9c0: 0xffffffd8055b9400 0xffffffd8055b9400 0xffffffc80200b9d0: 0xffffffc80200b9f0 0xffffffff8047c526 0xffffffc80200b9e0: 0xffffffc80200ba30 0xffffffff8047fe9a The assembler dump of the function preambula: add sp,sp,-16 sd s0,8(sp) add s0,sp,16 In the fist stack frame, where ra is not stored on the stack we can observe: 0(sp) 8(sp) .---------------------------------------------. sp->| frame->fp | frame->ra (saved fp) | |---------------------------------------------| fp->| .... | .... | |---------------------------------------------| | | | and in the code check is performed: if (regs && (regs->epc == pc) && (frame->fp & 0x7)) I see no reason to check frame->fp value at all, because it is can be uninitialized value on the stack. A better way is to check frame->ra to be an address on the stack. After the stacktrace shows as expect: [<ffffffff804853c2>] regmap_mmio_read32le+0xe/0x1c [<ffffffff80485758>] regmap_mmio_read+0x24/0x52 [<ffffffff8047c526>] _regmap_bus_reg_read+0x1a/0x22 [<ffffffff8047fe9a>] _regmap_read+0x5c/0xea [<ffffffff80480376>] _regmap_update_bits+0x76/0xc0 ... ---[ end trace 0000000000000000 ]--- As pointed by Samuel Holland it is incorrect to remove check of the stackframe entirely. Changes since v2 [2]: - Add accidentally forgotten curly brace Changes since v1 [1]: - Instead of just dropping frame->fp check, replace it with validation of frame->ra, which should be a stack address. - Move frame pointer validation into the separate function. [1] https://lore.kernel.org/linux-riscv/20240426072701.6463-1-dev.mbstr@gmail.com/ [2] https://lore.kernel.org/linux-riscv/20240521131314.48895-1-dev.mbstr@gmail.com/ Fixes: f766f77a74f5 ("riscv/stacktrace: Fix stack output without ra on the stack top") Signed-off-by: Matthew Bystrin <dev.mbstr@gmail.com> Reviewed-by: Samuel Holland <samuel.holland@sifive.com> Link: https://lore.kernel.org/r/20240521191727.62012-1-dev.mbstr@gmail.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2024-05-22 03:13:13 +08:00
static inline int fp_is_valid(unsigned long fp, unsigned long sp)
{
unsigned long low, high;
low = sp + sizeof(struct stackframe);
high = ALIGN(sp, THREAD_SIZE);
return !(fp < low || fp > high || fp & 0x07);
}
void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long fp, sp, pc;
riscv: eliminate unreliable __builtin_frame_address(1) I tried different pieces of code which uses __builtin_frame_address(1) (with both gcc version 7.5.0 and 10.3.0) to verify whether it works as expected on riscv64. The result is negative. What the compiler had generated is as below: 31 fp = (unsigned long)__builtin_frame_address(1); 0xffffffff80006024 <+200>: ld s1,0(s0) It takes '0(s0)' as the address of frame 1 (caller), but the actual address should be '-16(s0)'. | ... | <-+ +-----------------+ | | return address | | | previous fp | | | saved registers | | | local variables | | $fp --> | ... | | +-----------------+ | | return address | | | previous fp --------+ | saved registers | $sp --> | local variables | +-----------------+ This leads the kernel can not dump the full stack trace on riscv. [ 7.222126][ T1] Call Trace: [ 7.222804][ T1] [<ffffffff80006058>] dump_backtrace+0x2c/0x3a This problem is not exposed on most riscv builds just because the '0(s0)' occasionally is the address frame 2 (caller's caller), if only ra and fp are stored in frame 1 (caller). | ... | <-+ +-----------------+ | | return address | | $fp --> | previous fp | | +-----------------+ | | return address | | | previous fp --------+ | saved registers | $sp --> | local variables | +-----------------+ This could be a *bug* of gcc that should be fixed. But as noted in gcc manual "Calling this function with a nonzero argument can have unpredictable effects, including crashing the calling program.", let's remove the '__builtin_frame_address(1)' in backtrace code. With this fix now it can show full stack trace: [ 10.444838][ T1] Call Trace: [ 10.446199][ T1] [<ffffffff8000606c>] dump_backtrace+0x2c/0x3a [ 10.447711][ T1] [<ffffffff800060ac>] show_stack+0x32/0x3e [ 10.448710][ T1] [<ffffffff80a005c0>] dump_stack_lvl+0x58/0x7a [ 10.449941][ T1] [<ffffffff80a005f6>] dump_stack+0x14/0x1c [ 10.450929][ T1] [<ffffffff804c04ee>] ubsan_epilogue+0x10/0x5a [ 10.451869][ T1] [<ffffffff804c092e>] __ubsan_handle_load_invalid_value+0x6c/0x78 [ 10.453049][ T1] [<ffffffff8018f834>] __pagevec_release+0x62/0x64 [ 10.455476][ T1] [<ffffffff80190830>] truncate_inode_pages_range+0x132/0x5be [ 10.456798][ T1] [<ffffffff80190ce0>] truncate_inode_pages+0x24/0x30 [ 10.457853][ T1] [<ffffffff8045bb04>] kill_bdev+0x32/0x3c ... Signed-off-by: Changbin Du <changbin.du@gmail.com> Fixes: eac2f3059e02 ("riscv: stacktrace: fix the riscv stacktrace when CONFIG_FRAME_POINTER enabled") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-01-17 23:44:33 +08:00
int level = 0;
if (regs) {
fp = frame_pointer(regs);
sp = user_stack_pointer(regs);
pc = instruction_pointer(regs);
riscv: stacktrace: Fix NULL pointer dereference When CONFIG_FRAME_POINTER=y, calling dump_stack() can always trigger NULL pointer dereference panic similar as below: [ 0.396060] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5+ #47 [ 0.396692] Hardware name: riscv-virtio,qemu (DT) [ 0.397176] Call Trace: [ 0.398191] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000960 [ 0.399487] Oops [#1] [ 0.399739] Modules linked in: [ 0.400135] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5+ #47 [ 0.400570] Hardware name: riscv-virtio,qemu (DT) [ 0.400926] epc : walk_stackframe+0xc4/0xdc [ 0.401291] ra : dump_backtrace+0x30/0x38 [ 0.401630] epc : ffffffff80004922 ra : ffffffff8000496a sp : ffffffe000f3bd00 [ 0.402115] gp : ffffffff80cfdcb8 tp : ffffffe000f30000 t0 : ffffffff80d0b0cf [ 0.402602] t1 : ffffffff80d0b0c0 t2 : 0000000000000000 s0 : ffffffe000f3bd60 [ 0.403071] s1 : ffffffff808bc2e8 a0 : 0000000000001000 a1 : 0000000000000000 [ 0.403448] a2 : ffffffff803d7088 a3 : ffffffff808bc2e8 a4 : 6131725dbc24d400 [ 0.403820] a5 : 0000000000001000 a6 : 0000000000000002 a7 : ffffffffffffffff [ 0.404226] s2 : 0000000000000000 s3 : 0000000000000000 s4 : 0000000000000000 [ 0.404634] s5 : ffffffff803d7088 s6 : ffffffff808bc2e8 s7 : ffffffff80630650 [ 0.405085] s8 : ffffffff80912a80 s9 : 0000000000000008 s10: ffffffff804000fc [ 0.405388] s11: 0000000000000000 t3 : 0000000000000043 t4 : ffffffffffffffff [ 0.405616] t5 : 000000000000003d t6 : ffffffe000f3baa8 [ 0.405793] status: 0000000000000100 badaddr: 0000000000000960 cause: 000000000000000d [ 0.406135] [<ffffffff80004922>] walk_stackframe+0xc4/0xdc [ 0.407032] [<ffffffff8000496a>] dump_backtrace+0x30/0x38 [ 0.407797] [<ffffffff803d7100>] show_stack+0x40/0x4c [ 0.408234] [<ffffffff803d9e5c>] dump_stack+0x90/0xb6 [ 0.409019] [<ffffffff8040423e>] ptdump_init+0x20/0xc4 [ 0.409681] [<ffffffff800015b6>] do_one_initcall+0x4c/0x226 [ 0.410110] [<ffffffff80401094>] kernel_init_freeable+0x1f4/0x258 [ 0.410562] [<ffffffff803dba88>] kernel_init+0x22/0x148 [ 0.410959] [<ffffffff800029e2>] ret_from_exception+0x0/0x14 [ 0.412241] ---[ end trace b2ab92c901b96251 ]--- [ 0.413099] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b The reason is the task is NULL when we finally call walk_stackframe() the NULL is passed from __dump_stack(): |static void __dump_stack(void) |{ | dump_stack_print_info(KERN_DEFAULT); | show_stack(NULL, NULL, KERN_DEFAULT); |} Fix this issue by checking "task == NULL" case in walk_stackframe(). Fixes: eac2f3059e02 ("riscv: stacktrace: fix the riscv stacktrace when CONFIG_FRAME_POINTER enabled") Signed-off-by: Jisheng Zhang <jszhang@kernel.org> Reviewed-by: Atish Patra <atish.patra@wdc.com> Tested-by: Wende Tan <twd2.me@gmail.com> Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
2021-07-16 21:40:51 +08:00
} else if (task == NULL || task == current) {
riscv: eliminate unreliable __builtin_frame_address(1) I tried different pieces of code which uses __builtin_frame_address(1) (with both gcc version 7.5.0 and 10.3.0) to verify whether it works as expected on riscv64. The result is negative. What the compiler had generated is as below: 31 fp = (unsigned long)__builtin_frame_address(1); 0xffffffff80006024 <+200>: ld s1,0(s0) It takes '0(s0)' as the address of frame 1 (caller), but the actual address should be '-16(s0)'. | ... | <-+ +-----------------+ | | return address | | | previous fp | | | saved registers | | | local variables | | $fp --> | ... | | +-----------------+ | | return address | | | previous fp --------+ | saved registers | $sp --> | local variables | +-----------------+ This leads the kernel can not dump the full stack trace on riscv. [ 7.222126][ T1] Call Trace: [ 7.222804][ T1] [<ffffffff80006058>] dump_backtrace+0x2c/0x3a This problem is not exposed on most riscv builds just because the '0(s0)' occasionally is the address frame 2 (caller's caller), if only ra and fp are stored in frame 1 (caller). | ... | <-+ +-----------------+ | | return address | | $fp --> | previous fp | | +-----------------+ | | return address | | | previous fp --------+ | saved registers | $sp --> | local variables | +-----------------+ This could be a *bug* of gcc that should be fixed. But as noted in gcc manual "Calling this function with a nonzero argument can have unpredictable effects, including crashing the calling program.", let's remove the '__builtin_frame_address(1)' in backtrace code. With this fix now it can show full stack trace: [ 10.444838][ T1] Call Trace: [ 10.446199][ T1] [<ffffffff8000606c>] dump_backtrace+0x2c/0x3a [ 10.447711][ T1] [<ffffffff800060ac>] show_stack+0x32/0x3e [ 10.448710][ T1] [<ffffffff80a005c0>] dump_stack_lvl+0x58/0x7a [ 10.449941][ T1] [<ffffffff80a005f6>] dump_stack+0x14/0x1c [ 10.450929][ T1] [<ffffffff804c04ee>] ubsan_epilogue+0x10/0x5a [ 10.451869][ T1] [<ffffffff804c092e>] __ubsan_handle_load_invalid_value+0x6c/0x78 [ 10.453049][ T1] [<ffffffff8018f834>] __pagevec_release+0x62/0x64 [ 10.455476][ T1] [<ffffffff80190830>] truncate_inode_pages_range+0x132/0x5be [ 10.456798][ T1] [<ffffffff80190ce0>] truncate_inode_pages+0x24/0x30 [ 10.457853][ T1] [<ffffffff8045bb04>] kill_bdev+0x32/0x3c ... Signed-off-by: Changbin Du <changbin.du@gmail.com> Fixes: eac2f3059e02 ("riscv: stacktrace: fix the riscv stacktrace when CONFIG_FRAME_POINTER enabled") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-01-17 23:44:33 +08:00
fp = (unsigned long)__builtin_frame_address(0);
sp = current_stack_pointer;
riscv: eliminate unreliable __builtin_frame_address(1) I tried different pieces of code which uses __builtin_frame_address(1) (with both gcc version 7.5.0 and 10.3.0) to verify whether it works as expected on riscv64. The result is negative. What the compiler had generated is as below: 31 fp = (unsigned long)__builtin_frame_address(1); 0xffffffff80006024 <+200>: ld s1,0(s0) It takes '0(s0)' as the address of frame 1 (caller), but the actual address should be '-16(s0)'. | ... | <-+ +-----------------+ | | return address | | | previous fp | | | saved registers | | | local variables | | $fp --> | ... | | +-----------------+ | | return address | | | previous fp --------+ | saved registers | $sp --> | local variables | +-----------------+ This leads the kernel can not dump the full stack trace on riscv. [ 7.222126][ T1] Call Trace: [ 7.222804][ T1] [<ffffffff80006058>] dump_backtrace+0x2c/0x3a This problem is not exposed on most riscv builds just because the '0(s0)' occasionally is the address frame 2 (caller's caller), if only ra and fp are stored in frame 1 (caller). | ... | <-+ +-----------------+ | | return address | | $fp --> | previous fp | | +-----------------+ | | return address | | | previous fp --------+ | saved registers | $sp --> | local variables | +-----------------+ This could be a *bug* of gcc that should be fixed. But as noted in gcc manual "Calling this function with a nonzero argument can have unpredictable effects, including crashing the calling program.", let's remove the '__builtin_frame_address(1)' in backtrace code. With this fix now it can show full stack trace: [ 10.444838][ T1] Call Trace: [ 10.446199][ T1] [<ffffffff8000606c>] dump_backtrace+0x2c/0x3a [ 10.447711][ T1] [<ffffffff800060ac>] show_stack+0x32/0x3e [ 10.448710][ T1] [<ffffffff80a005c0>] dump_stack_lvl+0x58/0x7a [ 10.449941][ T1] [<ffffffff80a005f6>] dump_stack+0x14/0x1c [ 10.450929][ T1] [<ffffffff804c04ee>] ubsan_epilogue+0x10/0x5a [ 10.451869][ T1] [<ffffffff804c092e>] __ubsan_handle_load_invalid_value+0x6c/0x78 [ 10.453049][ T1] [<ffffffff8018f834>] __pagevec_release+0x62/0x64 [ 10.455476][ T1] [<ffffffff80190830>] truncate_inode_pages_range+0x132/0x5be [ 10.456798][ T1] [<ffffffff80190ce0>] truncate_inode_pages+0x24/0x30 [ 10.457853][ T1] [<ffffffff8045bb04>] kill_bdev+0x32/0x3c ... Signed-off-by: Changbin Du <changbin.du@gmail.com> Fixes: eac2f3059e02 ("riscv: stacktrace: fix the riscv stacktrace when CONFIG_FRAME_POINTER enabled") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-01-17 23:44:33 +08:00
pc = (unsigned long)walk_stackframe;
riscv: stacktrace: Fix missing the first frame When running kfence_test, I found some testcases failed like this: # test_out_of_bounds_read: EXPECTATION FAILED at mm/kfence/kfence_test.c:346 Expected report_matches(&expect) to be true, but is false not ok 1 - test_out_of_bounds_read The corresponding call-trace is: BUG: KFENCE: out-of-bounds read in kunit_try_run_case+0x38/0x84 Out-of-bounds read at 0x(____ptrval____) (32B right of kfence-#10): kunit_try_run_case+0x38/0x84 kunit_generic_run_threadfn_adapter+0x12/0x1e kthread+0xc8/0xde ret_from_exception+0x0/0xc The kfence_test using the first frame of call trace to check whether the testcase is succeed or not. Commit 6a00ef449370 ("riscv: eliminate unreliable __builtin_frame_address(1)") skip first frame for all case, which results the kfence_test failed. Indeed, we only need to skip the first frame for case (task==NULL || task==current). With this patch, the call-trace will be: BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0x88/0x19e Out-of-bounds read at 0x(____ptrval____) (1B left of kfence-#7): test_out_of_bounds_read+0x88/0x19e kunit_try_run_case+0x38/0x84 kunit_generic_run_threadfn_adapter+0x12/0x1e kthread+0xc8/0xde ret_from_exception+0x0/0xc Fixes: 6a00ef449370 ("riscv: eliminate unreliable __builtin_frame_address(1)") Signed-off-by: Liu Shixin <liushixin2@huawei.com> Tested-by: Samuel Holland <samuel@sholland.org> Link: https://lore.kernel.org/r/20221207025038.1022045-1-liushixin2@huawei.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-12-07 10:50:38 +08:00
level = -1;
} else {
/* task blocked in __switch_to */
fp = task->thread.s[0];
sp = task->thread.sp;
pc = task->thread.ra;
}
for (;;) {
struct stackframe *frame;
riscv: stacktrace: Fix missing the first frame When running kfence_test, I found some testcases failed like this: # test_out_of_bounds_read: EXPECTATION FAILED at mm/kfence/kfence_test.c:346 Expected report_matches(&expect) to be true, but is false not ok 1 - test_out_of_bounds_read The corresponding call-trace is: BUG: KFENCE: out-of-bounds read in kunit_try_run_case+0x38/0x84 Out-of-bounds read at 0x(____ptrval____) (32B right of kfence-#10): kunit_try_run_case+0x38/0x84 kunit_generic_run_threadfn_adapter+0x12/0x1e kthread+0xc8/0xde ret_from_exception+0x0/0xc The kfence_test using the first frame of call trace to check whether the testcase is succeed or not. Commit 6a00ef449370 ("riscv: eliminate unreliable __builtin_frame_address(1)") skip first frame for all case, which results the kfence_test failed. Indeed, we only need to skip the first frame for case (task==NULL || task==current). With this patch, the call-trace will be: BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0x88/0x19e Out-of-bounds read at 0x(____ptrval____) (1B left of kfence-#7): test_out_of_bounds_read+0x88/0x19e kunit_try_run_case+0x38/0x84 kunit_generic_run_threadfn_adapter+0x12/0x1e kthread+0xc8/0xde ret_from_exception+0x0/0xc Fixes: 6a00ef449370 ("riscv: eliminate unreliable __builtin_frame_address(1)") Signed-off-by: Liu Shixin <liushixin2@huawei.com> Tested-by: Samuel Holland <samuel@sholland.org> Link: https://lore.kernel.org/r/20221207025038.1022045-1-liushixin2@huawei.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-12-07 10:50:38 +08:00
if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc))))
break;
riscv: stacktrace: fixed walk_stackframe() If the load access fault occures in a leaf function (with CONFIG_FRAME_POINTER=y), when wrong stack trace will be displayed: [<ffffffff804853c2>] regmap_mmio_read32le+0xe/0x1c ---[ end trace 0000000000000000 ]--- Registers dump: ra 0xffffffff80485758 <regmap_mmio_read+36> sp 0xffffffc80200b9a0 fp 0xffffffc80200b9b0 pc 0xffffffff804853ba <regmap_mmio_read32le+6> Stack dump: 0xffffffc80200b9a0: 0xffffffc80200b9e0 0xffffffc80200b9e0 0xffffffc80200b9b0: 0xffffffff8116d7e8 0x0000000000000100 0xffffffc80200b9c0: 0xffffffd8055b9400 0xffffffd8055b9400 0xffffffc80200b9d0: 0xffffffc80200b9f0 0xffffffff8047c526 0xffffffc80200b9e0: 0xffffffc80200ba30 0xffffffff8047fe9a The assembler dump of the function preambula: add sp,sp,-16 sd s0,8(sp) add s0,sp,16 In the fist stack frame, where ra is not stored on the stack we can observe: 0(sp) 8(sp) .---------------------------------------------. sp->| frame->fp | frame->ra (saved fp) | |---------------------------------------------| fp->| .... | .... | |---------------------------------------------| | | | and in the code check is performed: if (regs && (regs->epc == pc) && (frame->fp & 0x7)) I see no reason to check frame->fp value at all, because it is can be uninitialized value on the stack. A better way is to check frame->ra to be an address on the stack. After the stacktrace shows as expect: [<ffffffff804853c2>] regmap_mmio_read32le+0xe/0x1c [<ffffffff80485758>] regmap_mmio_read+0x24/0x52 [<ffffffff8047c526>] _regmap_bus_reg_read+0x1a/0x22 [<ffffffff8047fe9a>] _regmap_read+0x5c/0xea [<ffffffff80480376>] _regmap_update_bits+0x76/0xc0 ... ---[ end trace 0000000000000000 ]--- As pointed by Samuel Holland it is incorrect to remove check of the stackframe entirely. Changes since v2 [2]: - Add accidentally forgotten curly brace Changes since v1 [1]: - Instead of just dropping frame->fp check, replace it with validation of frame->ra, which should be a stack address. - Move frame pointer validation into the separate function. [1] https://lore.kernel.org/linux-riscv/20240426072701.6463-1-dev.mbstr@gmail.com/ [2] https://lore.kernel.org/linux-riscv/20240521131314.48895-1-dev.mbstr@gmail.com/ Fixes: f766f77a74f5 ("riscv/stacktrace: Fix stack output without ra on the stack top") Signed-off-by: Matthew Bystrin <dev.mbstr@gmail.com> Reviewed-by: Samuel Holland <samuel.holland@sifive.com> Link: https://lore.kernel.org/r/20240521191727.62012-1-dev.mbstr@gmail.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2024-05-22 03:13:13 +08:00
if (unlikely(!fp_is_valid(fp, sp)))
break;
riscv: stacktrace: fixed walk_stackframe() If the load access fault occures in a leaf function (with CONFIG_FRAME_POINTER=y), when wrong stack trace will be displayed: [<ffffffff804853c2>] regmap_mmio_read32le+0xe/0x1c ---[ end trace 0000000000000000 ]--- Registers dump: ra 0xffffffff80485758 <regmap_mmio_read+36> sp 0xffffffc80200b9a0 fp 0xffffffc80200b9b0 pc 0xffffffff804853ba <regmap_mmio_read32le+6> Stack dump: 0xffffffc80200b9a0: 0xffffffc80200b9e0 0xffffffc80200b9e0 0xffffffc80200b9b0: 0xffffffff8116d7e8 0x0000000000000100 0xffffffc80200b9c0: 0xffffffd8055b9400 0xffffffd8055b9400 0xffffffc80200b9d0: 0xffffffc80200b9f0 0xffffffff8047c526 0xffffffc80200b9e0: 0xffffffc80200ba30 0xffffffff8047fe9a The assembler dump of the function preambula: add sp,sp,-16 sd s0,8(sp) add s0,sp,16 In the fist stack frame, where ra is not stored on the stack we can observe: 0(sp) 8(sp) .---------------------------------------------. sp->| frame->fp | frame->ra (saved fp) | |---------------------------------------------| fp->| .... | .... | |---------------------------------------------| | | | and in the code check is performed: if (regs && (regs->epc == pc) && (frame->fp & 0x7)) I see no reason to check frame->fp value at all, because it is can be uninitialized value on the stack. A better way is to check frame->ra to be an address on the stack. After the stacktrace shows as expect: [<ffffffff804853c2>] regmap_mmio_read32le+0xe/0x1c [<ffffffff80485758>] regmap_mmio_read+0x24/0x52 [<ffffffff8047c526>] _regmap_bus_reg_read+0x1a/0x22 [<ffffffff8047fe9a>] _regmap_read+0x5c/0xea [<ffffffff80480376>] _regmap_update_bits+0x76/0xc0 ... ---[ end trace 0000000000000000 ]--- As pointed by Samuel Holland it is incorrect to remove check of the stackframe entirely. Changes since v2 [2]: - Add accidentally forgotten curly brace Changes since v1 [1]: - Instead of just dropping frame->fp check, replace it with validation of frame->ra, which should be a stack address. - Move frame pointer validation into the separate function. [1] https://lore.kernel.org/linux-riscv/20240426072701.6463-1-dev.mbstr@gmail.com/ [2] https://lore.kernel.org/linux-riscv/20240521131314.48895-1-dev.mbstr@gmail.com/ Fixes: f766f77a74f5 ("riscv/stacktrace: Fix stack output without ra on the stack top") Signed-off-by: Matthew Bystrin <dev.mbstr@gmail.com> Reviewed-by: Samuel Holland <samuel.holland@sifive.com> Link: https://lore.kernel.org/r/20240521191727.62012-1-dev.mbstr@gmail.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2024-05-22 03:13:13 +08:00
/* Unwind stack frame */
frame = (struct stackframe *)fp - 1;
sp = fp;
riscv: stacktrace: fixed walk_stackframe() If the load access fault occures in a leaf function (with CONFIG_FRAME_POINTER=y), when wrong stack trace will be displayed: [<ffffffff804853c2>] regmap_mmio_read32le+0xe/0x1c ---[ end trace 0000000000000000 ]--- Registers dump: ra 0xffffffff80485758 <regmap_mmio_read+36> sp 0xffffffc80200b9a0 fp 0xffffffc80200b9b0 pc 0xffffffff804853ba <regmap_mmio_read32le+6> Stack dump: 0xffffffc80200b9a0: 0xffffffc80200b9e0 0xffffffc80200b9e0 0xffffffc80200b9b0: 0xffffffff8116d7e8 0x0000000000000100 0xffffffc80200b9c0: 0xffffffd8055b9400 0xffffffd8055b9400 0xffffffc80200b9d0: 0xffffffc80200b9f0 0xffffffff8047c526 0xffffffc80200b9e0: 0xffffffc80200ba30 0xffffffff8047fe9a The assembler dump of the function preambula: add sp,sp,-16 sd s0,8(sp) add s0,sp,16 In the fist stack frame, where ra is not stored on the stack we can observe: 0(sp) 8(sp) .---------------------------------------------. sp->| frame->fp | frame->ra (saved fp) | |---------------------------------------------| fp->| .... | .... | |---------------------------------------------| | | | and in the code check is performed: if (regs && (regs->epc == pc) && (frame->fp & 0x7)) I see no reason to check frame->fp value at all, because it is can be uninitialized value on the stack. A better way is to check frame->ra to be an address on the stack. After the stacktrace shows as expect: [<ffffffff804853c2>] regmap_mmio_read32le+0xe/0x1c [<ffffffff80485758>] regmap_mmio_read+0x24/0x52 [<ffffffff8047c526>] _regmap_bus_reg_read+0x1a/0x22 [<ffffffff8047fe9a>] _regmap_read+0x5c/0xea [<ffffffff80480376>] _regmap_update_bits+0x76/0xc0 ... ---[ end trace 0000000000000000 ]--- As pointed by Samuel Holland it is incorrect to remove check of the stackframe entirely. Changes since v2 [2]: - Add accidentally forgotten curly brace Changes since v1 [1]: - Instead of just dropping frame->fp check, replace it with validation of frame->ra, which should be a stack address. - Move frame pointer validation into the separate function. [1] https://lore.kernel.org/linux-riscv/20240426072701.6463-1-dev.mbstr@gmail.com/ [2] https://lore.kernel.org/linux-riscv/20240521131314.48895-1-dev.mbstr@gmail.com/ Fixes: f766f77a74f5 ("riscv/stacktrace: Fix stack output without ra on the stack top") Signed-off-by: Matthew Bystrin <dev.mbstr@gmail.com> Reviewed-by: Samuel Holland <samuel.holland@sifive.com> Link: https://lore.kernel.org/r/20240521191727.62012-1-dev.mbstr@gmail.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2024-05-22 03:13:13 +08:00
if (regs && (regs->epc == pc) && fp_is_valid(frame->ra, sp)) {
/* We hit function where ra is not saved on the stack */
fp = frame->ra;
pc = regs->ra;
} else {
fp = frame->fp;
pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
&frame->ra);
riscv: stacktrace: Make walk_stackframe cross pt_regs frame The current walk_stackframe with FRAME_POINTER would stop unwinding at ret_from_exception: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192 Call Trace: [<ffffffe0002038c8>] walk_stackframe+0x0/0xee [<ffffffe000aecf48>] show_stack+0x32/0x4a [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e [<ffffffe000af1648>] dump_stack+0x14/0x1c [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138 [<ffffffe000239aec>] __might_sleep+0x10/0x18 [<ffffffe000afe3fe>] down_read+0x22/0xa4 [<ffffffe000207588>] do_page_fault+0xb0/0x2fe [<ffffffe000201b80>] ret_from_exception+0x0/0xc The optimization would help walk_stackframe cross the pt_regs frame and get more backtrace of debug info: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192 Call Trace: [<ffffffe0002038c8>] walk_stackframe+0x0/0xee [<ffffffe000aecf48>] show_stack+0x32/0x4a [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e [<ffffffe000af1648>] dump_stack+0x14/0x1c [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138 [<ffffffe000239aec>] __might_sleep+0x10/0x18 [<ffffffe000afe3fe>] down_read+0x22/0xa4 [<ffffffe000207588>] do_page_fault+0xb0/0x2fe [<ffffffe000201b80>] ret_from_exception+0x0/0xc [<ffffffe000613c06>] riscv_intc_irq+0x1a/0x72 [<ffffffe000201b80>] ret_from_exception+0x0/0xc [<ffffffe00033f44a>] vma_link+0x54/0x160 [<ffffffe000341d7a>] mmap_region+0x2cc/0x4d0 [<ffffffe000342256>] do_mmap+0x2d8/0x3ac [<ffffffe000326318>] vm_mmap_pgoff+0x70/0xb8 [<ffffffe00032638a>] vm_mmap+0x2a/0x36 [<ffffffe0003cfdde>] elf_map+0x72/0x84 [<ffffffe0003d05f8>] load_elf_binary+0x69a/0xec8 [<ffffffe000376240>] bprm_execve+0x246/0x53a [<ffffffe00037786c>] kernel_execve+0xe8/0x124 [<ffffffe000aecdf2>] run_init_process+0xfa/0x10c [<ffffffe000aece16>] try_to_run_init_process+0x12/0x3c [<ffffffe000afa920>] kernel_init+0xb4/0xf8 [<ffffffe000201b80>] ret_from_exception+0x0/0xc Here is the error injection test code for the above output: drivers/irqchip/irq-riscv-intc.c: static asmlinkage void riscv_intc_irq(struct pt_regs *regs) { unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG; + u32 tmp; __get_user(tmp, (u32 *)0); Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Guo Ren <guoren@kernel.org> Link: https://lore.kernel.org/r/20221109064937.3643993-3-guoren@kernel.org [Palmer: use SYM_CODE_*] Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-11-09 14:49:37 +08:00
if (pc == (unsigned long)ret_from_exception) {
if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
break;
pc = ((struct pt_regs *)sp)->epc;
fp = ((struct pt_regs *)sp)->s0;
}
}
}
}
#else /* !CONFIG_FRAME_POINTER */
void notrace walk_stackframe(struct task_struct *task,
struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long sp, pc;
unsigned long *ksp;
if (regs) {
sp = user_stack_pointer(regs);
pc = instruction_pointer(regs);
} else if (task == NULL || task == current) {
sp = current_stack_pointer;
pc = (unsigned long)walk_stackframe;
} else {
/* task blocked in __switch_to */
sp = task->thread.sp;
pc = task->thread.ra;
}
if (unlikely(sp & 0x7))
return;
ksp = (unsigned long *)sp;
while (!kstack_end(ksp)) {
if (__kernel_text_address(pc) && unlikely(!fn(arg, pc)))
break;
riscv: Use READ_ONCE_NOCHECK in imprecise unwinding stack mode When CONFIG_FRAME_POINTER is unset, the stack unwinding function walk_stackframe randomly reads the stack and then, when KASAN is enabled, it can lead to the following backtrace: [ 0.000000] ================================================================== [ 0.000000] BUG: KASAN: stack-out-of-bounds in walk_stackframe+0xa6/0x11a [ 0.000000] Read of size 8 at addr ffffffff81807c40 by task swapper/0 [ 0.000000] [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.2.0-12919-g24203e6db61f #43 [ 0.000000] Hardware name: riscv-virtio,qemu (DT) [ 0.000000] Call Trace: [ 0.000000] [<ffffffff80007ba8>] walk_stackframe+0x0/0x11a [ 0.000000] [<ffffffff80099ecc>] init_param_lock+0x26/0x2a [ 0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a [ 0.000000] [<ffffffff80c49c80>] dump_stack_lvl+0x22/0x36 [ 0.000000] [<ffffffff80c3783e>] print_report+0x198/0x4a8 [ 0.000000] [<ffffffff80099ecc>] init_param_lock+0x26/0x2a [ 0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a [ 0.000000] [<ffffffff8015f68a>] kasan_report+0x9a/0xc8 [ 0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a [ 0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a [ 0.000000] [<ffffffff8006e99c>] desc_make_final+0x80/0x84 [ 0.000000] [<ffffffff8009a04e>] stack_trace_save+0x88/0xa6 [ 0.000000] [<ffffffff80099fc2>] filter_irq_stacks+0x72/0x76 [ 0.000000] [<ffffffff8006b95e>] devkmsg_read+0x32a/0x32e [ 0.000000] [<ffffffff8015ec16>] kasan_save_stack+0x28/0x52 [ 0.000000] [<ffffffff8006e998>] desc_make_final+0x7c/0x84 [ 0.000000] [<ffffffff8009a04a>] stack_trace_save+0x84/0xa6 [ 0.000000] [<ffffffff8015ec52>] kasan_set_track+0x12/0x20 [ 0.000000] [<ffffffff8015f22e>] __kasan_slab_alloc+0x58/0x5e [ 0.000000] [<ffffffff8015e7ea>] __kmem_cache_create+0x21e/0x39a [ 0.000000] [<ffffffff80e133ac>] create_boot_cache+0x70/0x9c [ 0.000000] [<ffffffff80e17ab2>] kmem_cache_init+0x6c/0x11e [ 0.000000] [<ffffffff80e00fd6>] mm_init+0xd8/0xfe [ 0.000000] [<ffffffff80e011d8>] start_kernel+0x190/0x3ca [ 0.000000] [ 0.000000] The buggy address belongs to stack of task swapper/0 [ 0.000000] and is located at offset 0 in frame: [ 0.000000] stack_trace_save+0x0/0xa6 [ 0.000000] [ 0.000000] This frame has 1 object: [ 0.000000] [32, 56) 'c' [ 0.000000] [ 0.000000] The buggy address belongs to the physical page: [ 0.000000] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x81a07 [ 0.000000] flags: 0x1000(reserved|zone=0) [ 0.000000] raw: 0000000000001000 ff600003f1e3d150 ff600003f1e3d150 0000000000000000 [ 0.000000] raw: 0000000000000000 0000000000000000 00000001ffffffff [ 0.000000] page dumped because: kasan: bad access detected [ 0.000000] [ 0.000000] Memory state around the buggy address: [ 0.000000] ffffffff81807b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] ffffffff81807b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] >ffffffff81807c00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 f3 [ 0.000000] ^ [ 0.000000] ffffffff81807c80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] ffffffff81807d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] ================================================================== Fix that by using READ_ONCE_NOCHECK when reading the stack in imprecise mode. Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly") Reported-by: Chathura Rajapaksha <chathura.abeyrathne.lk@gmail.com> Link: https://lore.kernel.org/all/CAD7mqryDQCYyJ1gAmtMm8SASMWAQ4i103ptTb0f6Oda=tPY2=A@mail.gmail.com/ Suggested-by: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Link: https://lore.kernel.org/r/20230308091639.602024-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2023-03-08 17:16:39 +08:00
pc = READ_ONCE_NOCHECK(*ksp++) - 0x4;
}
}
#endif /* CONFIG_FRAME_POINTER */
static bool print_trace_address(void *arg, unsigned long pc)
{
const char *loglvl = arg;
print_ip_sym(loglvl, pc);
return true;
}
noinline void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
const char *loglvl)
{
walk_stackframe(task, regs, print_trace_address, (void *)loglvl);
}
void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
{
pr_cont("%sCall Trace:\n", loglvl);
dump_backtrace(NULL, task, loglvl);
}
static bool save_wchan(void *arg, unsigned long pc)
{
if (!in_sched_functions(pc)) {
unsigned long *p = arg;
*p = pc;
return false;
}
return true;
}
unsigned long __get_wchan(struct task_struct *task)
{
unsigned long pc = 0;
if (!try_get_task_stack(task))
return 0;
walk_stackframe(task, NULL, save_wchan, &pc);
put_task_stack(task);
return pc;
}
noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
{
walk_stackframe(task, regs, consume_entry, cookie);
}