2019-05-29 22:18:00 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2017-07-11 09:04:30 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2012 Regents of the University of California
|
|
|
|
* Copyright (C) 2017 SiFive
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/linkage.h>
|
|
|
|
|
|
|
|
#include <asm/asm.h>
|
|
|
|
#include <asm/csr.h>
|
|
|
|
#include <asm/unistd.h>
|
|
|
|
#include <asm/thread_info.h>
|
|
|
|
#include <asm/asm-offsets.h>
|
2021-03-22 22:26:05 +08:00
|
|
|
#include <asm/errata_list.h>
|
2017-07-11 09:04:30 +08:00
|
|
|
|
2020-02-28 03:15:03 +08:00
|
|
|
#if !IS_ENABLED(CONFIG_PREEMPTION)
|
|
|
|
.set resume_kernel, restore_all
|
|
|
|
#endif
|
2017-07-11 09:04:30 +08:00
|
|
|
|
2020-02-28 03:15:03 +08:00
|
|
|
ENTRY(handle_exception)
|
2017-07-11 09:04:30 +08:00
|
|
|
/*
|
|
|
|
* If coming from userspace, preserve the user thread pointer and load
|
2019-10-28 20:10:32 +08:00
|
|
|
* the kernel thread pointer. If we came from the kernel, the scratch
|
|
|
|
* register will contain 0, and we should continue on the current TP.
|
2017-07-11 09:04:30 +08:00
|
|
|
*/
|
2019-10-28 20:10:32 +08:00
|
|
|
csrrw tp, CSR_SCRATCH, tp
|
2017-07-11 09:04:30 +08:00
|
|
|
bnez tp, _save_context
|
|
|
|
|
|
|
|
_restore_kernel_tpsp:
|
2019-10-28 20:10:32 +08:00
|
|
|
csrr tp, CSR_SCRATCH
|
2017-07-11 09:04:30 +08:00
|
|
|
REG_S sp, TASK_TI_KERNEL_SP(tp)
|
riscv: add VMAP_STACK overflow detection
This patch adds stack overflow detection to riscv, usable when
CONFIG_VMAP_STACK=y.
Overflow is detected in kernel exception entry(kernel/entry.S), if the
kernel stack is overflow and been detected, the overflow handler is
invoked on a per-cpu overflow stack. This approach preserves GPRs and
the original exception information.
The overflow detect is performed before any attempt is made to access
the stack and the principle of stack overflow detection: kernel stacks
are aligned to double their size, enabling overflow to be detected with
a single bit test. For example, a 16K stack is aligned to 32K, ensuring
that bit 14 of the SP must be zero. On an overflow (or underflow), this
bit is flipped. Thus, overflow (of less than the size of the stack) can
be detected by testing whether this bit is set.
This gives us a useful error message on stack overflow, as can be
trigger with the LKDTM overflow test:
[ 388.053267] lkdtm: Performing direct entry EXHAUST_STACK
[ 388.053663] lkdtm: Calling function with 1024 frame size to depth 32 ...
[ 388.054016] lkdtm: loop 32/32 ...
[ 388.054186] lkdtm: loop 31/32 ...
[ 388.054491] lkdtm: loop 30/32 ...
[ 388.054672] lkdtm: loop 29/32 ...
[ 388.054859] lkdtm: loop 28/32 ...
[ 388.055010] lkdtm: loop 27/32 ...
[ 388.055163] lkdtm: loop 26/32 ...
[ 388.055309] lkdtm: loop 25/32 ...
[ 388.055481] lkdtm: loop 24/32 ...
[ 388.055653] lkdtm: loop 23/32 ...
[ 388.055837] lkdtm: loop 22/32 ...
[ 388.056015] lkdtm: loop 21/32 ...
[ 388.056188] lkdtm: loop 20/32 ...
[ 388.058145] Insufficient stack space to handle exception!
[ 388.058153] Task stack: [0xffffffd014260000..0xffffffd014264000]
[ 388.058160] Overflow stack: [0xffffffe1f8d2c220..0xffffffe1f8d2d220]
[ 388.058168] CPU: 0 PID: 89 Comm: bash Not tainted 5.12.0-rc8-dirty #90
[ 388.058175] Hardware name: riscv-virtio,qemu (DT)
[ 388.058187] epc : number+0x32/0x2c0
[ 388.058247] ra : vsnprintf+0x2ae/0x3f0
[ 388.058255] epc : ffffffe0002d38f6 ra : ffffffe0002d814e sp : ffffffd01425ffc0
[ 388.058263] gp : ffffffe0012e4010 tp : ffffffe08014da00 t0 : ffffffd0142606e8
[ 388.058271] t1 : 0000000000000000 t2 : 0000000000000000 s0 : ffffffd014260070
[ 388.058303] s1 : ffffffd014260158 a0 : ffffffd01426015e a1 : ffffffd014260158
[ 388.058311] a2 : 0000000000000013 a3 : ffff0a01ffffff10 a4 : ffffffe000c398e0
[ 388.058319] a5 : 511b02ec65f3e300 a6 : 0000000000a1749a a7 : 0000000000000000
[ 388.058327] s2 : ffffffff000000ff s3 : 00000000ffff0a01 s4 : ffffffe0012e50a8
[ 388.058335] s5 : 0000000000ffff0a s6 : ffffffe0012e50a8 s7 : ffffffe000da1cc0
[ 388.058343] s8 : ffffffffffffffff s9 : ffffffd0142602b0 s10: ffffffd0142602a8
[ 388.058351] s11: ffffffd01426015e t3 : 00000000000f0000 t4 : ffffffffffffffff
[ 388.058359] t5 : 000000000000002f t6 : ffffffd014260158
[ 388.058366] status: 0000000000000100 badaddr: ffffffd01425fff8 cause: 000000000000000f
[ 388.058374] Kernel panic - not syncing: Kernel stack overflow
[ 388.058381] CPU: 0 PID: 89 Comm: bash Not tainted 5.12.0-rc8-dirty #90
[ 388.058387] Hardware name: riscv-virtio,qemu (DT)
[ 388.058393] Call Trace:
[ 388.058400] [<ffffffe000004944>] walk_stackframe+0x0/0xce
[ 388.058406] [<ffffffe0006f0b28>] dump_backtrace+0x38/0x46
[ 388.058412] [<ffffffe0006f0b46>] show_stack+0x10/0x18
[ 388.058418] [<ffffffe0006f3690>] dump_stack+0x74/0x8e
[ 388.058424] [<ffffffe0006f0d52>] panic+0xfc/0x2b2
[ 388.058430] [<ffffffe0006f0acc>] print_trace_address+0x0/0x24
[ 388.058436] [<ffffffe0002d814e>] vsnprintf+0x2ae/0x3f0
[ 388.058956] SMP: stopping secondary CPUs
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
2021-06-21 11:28:55 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_VMAP_STACK
|
|
|
|
addi sp, sp, -(PT_SIZE_ON_STACK)
|
|
|
|
srli sp, sp, THREAD_SHIFT
|
|
|
|
andi sp, sp, 0x1
|
|
|
|
bnez sp, handle_kernel_stack_overflow
|
|
|
|
REG_L sp, TASK_TI_KERNEL_SP(tp)
|
|
|
|
#endif
|
|
|
|
|
2017-07-11 09:04:30 +08:00
|
|
|
_save_context:
|
|
|
|
REG_S sp, TASK_TI_USER_SP(tp)
|
|
|
|
REG_L sp, TASK_TI_KERNEL_SP(tp)
|
|
|
|
addi sp, sp, -(PT_SIZE_ON_STACK)
|
|
|
|
REG_S x1, PT_RA(sp)
|
|
|
|
REG_S x3, PT_GP(sp)
|
|
|
|
REG_S x5, PT_T0(sp)
|
|
|
|
REG_S x6, PT_T1(sp)
|
|
|
|
REG_S x7, PT_T2(sp)
|
|
|
|
REG_S x8, PT_S0(sp)
|
|
|
|
REG_S x9, PT_S1(sp)
|
|
|
|
REG_S x10, PT_A0(sp)
|
|
|
|
REG_S x11, PT_A1(sp)
|
|
|
|
REG_S x12, PT_A2(sp)
|
|
|
|
REG_S x13, PT_A3(sp)
|
|
|
|
REG_S x14, PT_A4(sp)
|
|
|
|
REG_S x15, PT_A5(sp)
|
|
|
|
REG_S x16, PT_A6(sp)
|
|
|
|
REG_S x17, PT_A7(sp)
|
|
|
|
REG_S x18, PT_S2(sp)
|
|
|
|
REG_S x19, PT_S3(sp)
|
|
|
|
REG_S x20, PT_S4(sp)
|
|
|
|
REG_S x21, PT_S5(sp)
|
|
|
|
REG_S x22, PT_S6(sp)
|
|
|
|
REG_S x23, PT_S7(sp)
|
|
|
|
REG_S x24, PT_S8(sp)
|
|
|
|
REG_S x25, PT_S9(sp)
|
|
|
|
REG_S x26, PT_S10(sp)
|
|
|
|
REG_S x27, PT_S11(sp)
|
|
|
|
REG_S x28, PT_T3(sp)
|
|
|
|
REG_S x29, PT_T4(sp)
|
|
|
|
REG_S x30, PT_T5(sp)
|
|
|
|
REG_S x31, PT_T6(sp)
|
|
|
|
|
|
|
|
/*
|
2018-01-05 02:55:55 +08:00
|
|
|
* Disable user-mode memory access as it should only be set in the
|
|
|
|
* actual user copy routines.
|
|
|
|
*
|
|
|
|
* Disable the FPU to detect illegal usage of floating point in kernel
|
|
|
|
* space.
|
2017-07-11 09:04:30 +08:00
|
|
|
*/
|
2018-01-05 02:55:55 +08:00
|
|
|
li t0, SR_SUM | SR_FS
|
2017-07-11 09:04:30 +08:00
|
|
|
|
|
|
|
REG_L s0, TASK_TI_USER_SP(tp)
|
2019-10-28 20:10:32 +08:00
|
|
|
csrrc s1, CSR_STATUS, t0
|
|
|
|
csrr s2, CSR_EPC
|
|
|
|
csrr s3, CSR_TVAL
|
|
|
|
csrr s4, CSR_CAUSE
|
|
|
|
csrr s5, CSR_SCRATCH
|
2017-07-11 09:04:30 +08:00
|
|
|
REG_S s0, PT_SP(sp)
|
2019-10-28 20:10:32 +08:00
|
|
|
REG_S s1, PT_STATUS(sp)
|
|
|
|
REG_S s2, PT_EPC(sp)
|
|
|
|
REG_S s3, PT_BADADDR(sp)
|
|
|
|
REG_S s4, PT_CAUSE(sp)
|
2017-07-11 09:04:30 +08:00
|
|
|
REG_S s5, PT_TP(sp)
|
|
|
|
|
|
|
|
/*
|
2019-10-28 20:10:32 +08:00
|
|
|
* Set the scratch register to 0, so that if a recursive exception
|
2017-07-11 09:04:30 +08:00
|
|
|
* occurs, the exception vector knows it came from the kernel
|
|
|
|
*/
|
2019-10-28 20:10:32 +08:00
|
|
|
csrw CSR_SCRATCH, x0
|
2017-07-11 09:04:30 +08:00
|
|
|
|
|
|
|
/* Load the global pointer */
|
|
|
|
.option push
|
|
|
|
.option norelax
|
|
|
|
la gp, __global_pointer$
|
|
|
|
.option pop
|
|
|
|
|
2020-06-27 21:57:08 +08:00
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
|
|
call trace_hardirqs_off
|
|
|
|
#endif
|
2020-06-24 17:03:16 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_CONTEXT_TRACKING
|
|
|
|
/* If previous state is in user mode, call context_tracking_user_exit. */
|
|
|
|
li a0, SR_PP
|
|
|
|
and a0, s1, a0
|
|
|
|
bnez a0, skip_context_tracking
|
|
|
|
call context_tracking_user_exit
|
|
|
|
skip_context_tracking:
|
|
|
|
#endif
|
|
|
|
|
2017-07-11 09:04:30 +08:00
|
|
|
/*
|
|
|
|
* MSB of cause differentiates between
|
|
|
|
* interrupts and exceptions
|
|
|
|
*/
|
|
|
|
bge s4, zero, 1f
|
|
|
|
|
2020-06-27 21:57:08 +08:00
|
|
|
la ra, ret_from_exception
|
|
|
|
|
2017-07-11 09:04:30 +08:00
|
|
|
/* Handle interrupts */
|
2018-03-08 07:57:28 +08:00
|
|
|
move a0, sp /* pt_regs */
|
2021-10-20 18:33:49 +08:00
|
|
|
la a1, generic_handle_arch_irq
|
2020-06-01 17:15:42 +08:00
|
|
|
jr a1
|
2017-07-11 09:04:30 +08:00
|
|
|
1:
|
2019-10-28 20:10:32 +08:00
|
|
|
/*
|
|
|
|
* Exceptions run with interrupts enabled or disabled depending on the
|
|
|
|
* state of SR_PIE in m/sstatus.
|
|
|
|
*/
|
|
|
|
andi t0, s1, SR_PIE
|
2019-09-16 16:47:41 +08:00
|
|
|
beqz t0, 1f
|
2021-03-30 02:16:24 +08:00
|
|
|
/* kprobes, entered via ebreak, must have interrupts disabled. */
|
|
|
|
li t0, EXC_BREAKPOINT
|
|
|
|
beq s4, t0, 1f
|
2020-12-19 08:20:51 +08:00
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
|
|
call trace_hardirqs_on
|
|
|
|
#endif
|
2019-10-28 20:10:32 +08:00
|
|
|
csrs CSR_STATUS, SR_IE
|
2018-01-30 15:51:45 +08:00
|
|
|
|
2019-09-16 16:47:41 +08:00
|
|
|
1:
|
2020-06-27 21:57:08 +08:00
|
|
|
la ra, ret_from_exception
|
2017-07-11 09:04:30 +08:00
|
|
|
/* Handle syscalls */
|
|
|
|
li t0, EXC_SYSCALL
|
|
|
|
beq s4, t0, handle_syscall
|
|
|
|
|
|
|
|
/* Handle other exceptions */
|
|
|
|
slli t0, s4, RISCV_LGPTR
|
|
|
|
la t1, excp_vect_table
|
|
|
|
la t2, excp_vect_table_end
|
|
|
|
move a0, sp /* pt_regs */
|
|
|
|
add t0, t1, t0
|
|
|
|
/* Check if exception code lies within bounds */
|
|
|
|
bgeu t0, t2, 1f
|
|
|
|
REG_L t0, 0(t0)
|
|
|
|
jr t0
|
|
|
|
1:
|
|
|
|
tail do_trap_unknown
|
|
|
|
|
|
|
|
handle_syscall:
|
2020-12-13 21:50:36 +08:00
|
|
|
#ifdef CONFIG_RISCV_M_MODE
|
|
|
|
/*
|
|
|
|
* When running is M-Mode (no MMU config), MPIE does not get set.
|
|
|
|
* As a result, we need to force enable interrupts here because
|
|
|
|
* handle_exception did not do set SR_IE as it always sees SR_PIE
|
|
|
|
* being cleared.
|
|
|
|
*/
|
|
|
|
csrs CSR_STATUS, SR_IE
|
|
|
|
#endif
|
2020-06-24 17:03:16 +08:00
|
|
|
#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING)
|
2020-06-27 21:57:08 +08:00
|
|
|
/* Recover a0 - a7 for system calls */
|
|
|
|
REG_L a0, PT_A0(sp)
|
|
|
|
REG_L a1, PT_A1(sp)
|
|
|
|
REG_L a2, PT_A2(sp)
|
|
|
|
REG_L a3, PT_A3(sp)
|
|
|
|
REG_L a4, PT_A4(sp)
|
|
|
|
REG_L a5, PT_A5(sp)
|
|
|
|
REG_L a6, PT_A6(sp)
|
|
|
|
REG_L a7, PT_A7(sp)
|
|
|
|
#endif
|
2017-07-11 09:04:30 +08:00
|
|
|
/* save the initial A0 value (needed in signal handlers) */
|
|
|
|
REG_S a0, PT_ORIG_A0(sp)
|
|
|
|
/*
|
|
|
|
* Advance SEPC to avoid executing the original
|
|
|
|
* scall instruction on sret
|
|
|
|
*/
|
|
|
|
addi s2, s2, 0x4
|
2019-10-28 20:10:32 +08:00
|
|
|
REG_S s2, PT_EPC(sp)
|
2017-07-11 09:04:30 +08:00
|
|
|
/* Trace syscalls, but only if requested by the user. */
|
|
|
|
REG_L t0, TASK_TI_FLAGS(tp)
|
2018-10-29 18:48:53 +08:00
|
|
|
andi t0, t0, _TIF_SYSCALL_WORK
|
2017-07-11 09:04:30 +08:00
|
|
|
bnez t0, handle_syscall_trace_enter
|
|
|
|
check_syscall_nr:
|
|
|
|
/* Check to make sure we don't jump to a bogus syscall number. */
|
|
|
|
li t0, __NR_syscalls
|
|
|
|
la s0, sys_ni_syscall
|
2019-10-05 08:12:22 +08:00
|
|
|
/*
|
|
|
|
* Syscall number held in a7.
|
|
|
|
* If syscall number is above allowed value, redirect to ni_syscall.
|
|
|
|
*/
|
2020-12-22 06:52:00 +08:00
|
|
|
bgeu a7, t0, 1f
|
2019-10-05 08:12:22 +08:00
|
|
|
/* Call syscall */
|
2017-07-11 09:04:30 +08:00
|
|
|
la s0, sys_call_table
|
|
|
|
slli t0, a7, RISCV_LGPTR
|
|
|
|
add s0, s0, t0
|
|
|
|
REG_L s0, 0(s0)
|
|
|
|
1:
|
|
|
|
jalr s0
|
|
|
|
|
|
|
|
ret_from_syscall:
|
|
|
|
/* Set user a0 to kernel a0 */
|
|
|
|
REG_S a0, PT_A0(sp)
|
2019-10-05 08:12:22 +08:00
|
|
|
/*
|
|
|
|
* We didn't execute the actual syscall.
|
|
|
|
* Seccomp already set return value for the current task pt_regs.
|
|
|
|
* (If it was configured with SECCOMP_RET_ERRNO/TRACE)
|
|
|
|
*/
|
|
|
|
ret_from_syscall_rejected:
|
2017-07-11 09:04:30 +08:00
|
|
|
/* Trace syscalls, but only if requested by the user. */
|
|
|
|
REG_L t0, TASK_TI_FLAGS(tp)
|
2018-10-29 18:48:53 +08:00
|
|
|
andi t0, t0, _TIF_SYSCALL_WORK
|
2017-07-11 09:04:30 +08:00
|
|
|
bnez t0, handle_syscall_trace_exit
|
|
|
|
|
|
|
|
ret_from_exception:
|
2019-10-28 20:10:32 +08:00
|
|
|
REG_L s0, PT_STATUS(sp)
|
|
|
|
csrc CSR_STATUS, SR_IE
|
2020-06-27 21:57:08 +08:00
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
|
|
call trace_hardirqs_off
|
|
|
|
#endif
|
2019-10-28 20:10:32 +08:00
|
|
|
#ifdef CONFIG_RISCV_M_MODE
|
|
|
|
/* the MPP value is too large to be used as an immediate arg for addi */
|
|
|
|
li t0, SR_MPP
|
|
|
|
and s0, s0, t0
|
|
|
|
#else
|
2018-01-05 01:35:03 +08:00
|
|
|
andi s0, s0, SR_SPP
|
2019-10-28 20:10:32 +08:00
|
|
|
#endif
|
2019-01-03 11:32:33 +08:00
|
|
|
bnez s0, resume_kernel
|
2017-07-11 09:04:30 +08:00
|
|
|
|
|
|
|
resume_userspace:
|
|
|
|
/* Interrupts must be disabled here so flags are checked atomically */
|
|
|
|
REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */
|
|
|
|
andi s1, s0, _TIF_WORK_MASK
|
|
|
|
bnez s1, work_pending
|
|
|
|
|
2020-06-24 17:03:16 +08:00
|
|
|
#ifdef CONFIG_CONTEXT_TRACKING
|
|
|
|
call context_tracking_user_enter
|
|
|
|
#endif
|
|
|
|
|
2017-07-11 09:04:30 +08:00
|
|
|
/* Save unwound kernel stack pointer in thread_info */
|
|
|
|
addi s0, sp, PT_SIZE_ON_STACK
|
|
|
|
REG_S s0, TASK_TI_KERNEL_SP(tp)
|
|
|
|
|
|
|
|
/*
|
2019-10-28 20:10:32 +08:00
|
|
|
* Save TP into the scratch register , so we can find the kernel data
|
|
|
|
* structures again.
|
2017-07-11 09:04:30 +08:00
|
|
|
*/
|
2019-10-28 20:10:32 +08:00
|
|
|
csrw CSR_SCRATCH, tp
|
2017-07-11 09:04:30 +08:00
|
|
|
|
|
|
|
restore_all:
|
2020-06-27 21:57:08 +08:00
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
|
|
REG_L s1, PT_STATUS(sp)
|
|
|
|
andi t0, s1, SR_PIE
|
|
|
|
beqz t0, 1f
|
|
|
|
call trace_hardirqs_on
|
|
|
|
j 2f
|
|
|
|
1:
|
|
|
|
call trace_hardirqs_off
|
|
|
|
2:
|
|
|
|
#endif
|
2020-02-28 03:15:03 +08:00
|
|
|
REG_L a0, PT_STATUS(sp)
|
|
|
|
/*
|
|
|
|
* The current load reservation is effectively part of the processor's
|
|
|
|
* state, in the sense that load reservations cannot be shared between
|
|
|
|
* different hart contexts. We can't actually save and restore a load
|
|
|
|
* reservation, so instead here we clear any existing reservation --
|
|
|
|
* it's always legal for implementations to clear load reservations at
|
|
|
|
* any point (as long as the forward progress guarantee is kept, but
|
|
|
|
* we'll ignore that here).
|
|
|
|
*
|
|
|
|
* Dangling load reservations can be the result of taking a trap in the
|
|
|
|
* middle of an LR/SC sequence, but can also be the result of a taken
|
|
|
|
* forward branch around an SC -- which is how we implement CAS. As a
|
|
|
|
* result we need to clear reservations between the last CAS and the
|
|
|
|
* jump back to the new context. While it is unlikely the store
|
|
|
|
* completes, implementations are allowed to expand reservations to be
|
|
|
|
* arbitrarily large.
|
|
|
|
*/
|
|
|
|
REG_L a2, PT_EPC(sp)
|
|
|
|
REG_SC x0, a2, PT_EPC(sp)
|
|
|
|
|
|
|
|
csrw CSR_STATUS, a0
|
|
|
|
csrw CSR_EPC, a2
|
|
|
|
|
|
|
|
REG_L x1, PT_RA(sp)
|
|
|
|
REG_L x3, PT_GP(sp)
|
|
|
|
REG_L x4, PT_TP(sp)
|
|
|
|
REG_L x5, PT_T0(sp)
|
|
|
|
REG_L x6, PT_T1(sp)
|
|
|
|
REG_L x7, PT_T2(sp)
|
|
|
|
REG_L x8, PT_S0(sp)
|
|
|
|
REG_L x9, PT_S1(sp)
|
|
|
|
REG_L x10, PT_A0(sp)
|
|
|
|
REG_L x11, PT_A1(sp)
|
|
|
|
REG_L x12, PT_A2(sp)
|
|
|
|
REG_L x13, PT_A3(sp)
|
|
|
|
REG_L x14, PT_A4(sp)
|
|
|
|
REG_L x15, PT_A5(sp)
|
|
|
|
REG_L x16, PT_A6(sp)
|
|
|
|
REG_L x17, PT_A7(sp)
|
|
|
|
REG_L x18, PT_S2(sp)
|
|
|
|
REG_L x19, PT_S3(sp)
|
|
|
|
REG_L x20, PT_S4(sp)
|
|
|
|
REG_L x21, PT_S5(sp)
|
|
|
|
REG_L x22, PT_S6(sp)
|
|
|
|
REG_L x23, PT_S7(sp)
|
|
|
|
REG_L x24, PT_S8(sp)
|
|
|
|
REG_L x25, PT_S9(sp)
|
|
|
|
REG_L x26, PT_S10(sp)
|
|
|
|
REG_L x27, PT_S11(sp)
|
|
|
|
REG_L x28, PT_T3(sp)
|
|
|
|
REG_L x29, PT_T4(sp)
|
|
|
|
REG_L x30, PT_T5(sp)
|
|
|
|
REG_L x31, PT_T6(sp)
|
|
|
|
|
|
|
|
REG_L x2, PT_SP(sp)
|
|
|
|
|
2019-10-28 20:10:32 +08:00
|
|
|
#ifdef CONFIG_RISCV_M_MODE
|
|
|
|
mret
|
|
|
|
#else
|
2017-07-11 09:04:30 +08:00
|
|
|
sret
|
2019-10-28 20:10:32 +08:00
|
|
|
#endif
|
2017-07-11 09:04:30 +08:00
|
|
|
|
2019-10-16 03:18:03 +08:00
|
|
|
#if IS_ENABLED(CONFIG_PREEMPTION)
|
2019-01-03 11:32:33 +08:00
|
|
|
resume_kernel:
|
|
|
|
REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
|
|
|
|
bnez s0, restore_all
|
|
|
|
REG_L s0, TASK_TI_FLAGS(tp)
|
|
|
|
andi s0, s0, _TIF_NEED_RESCHED
|
|
|
|
beqz s0, restore_all
|
|
|
|
call preempt_schedule_irq
|
2019-09-23 22:36:17 +08:00
|
|
|
j restore_all
|
2019-01-03 11:32:33 +08:00
|
|
|
#endif
|
|
|
|
|
2017-07-11 09:04:30 +08:00
|
|
|
work_pending:
|
|
|
|
/* Enter slow path for supplementary processing */
|
|
|
|
la ra, ret_from_exception
|
|
|
|
andi s1, s0, _TIF_NEED_RESCHED
|
|
|
|
bnez s1, work_resched
|
|
|
|
work_notifysig:
|
|
|
|
/* Handle pending signals and notify-resume requests */
|
2019-10-28 20:10:32 +08:00
|
|
|
csrs CSR_STATUS, SR_IE /* Enable interrupts for do_notify_resume() */
|
2017-07-11 09:04:30 +08:00
|
|
|
move a0, sp /* pt_regs */
|
|
|
|
move a1, s0 /* current_thread_info->flags */
|
|
|
|
tail do_notify_resume
|
|
|
|
work_resched:
|
|
|
|
tail schedule
|
|
|
|
|
|
|
|
/* Slow paths for ptrace. */
|
|
|
|
handle_syscall_trace_enter:
|
|
|
|
move a0, sp
|
|
|
|
call do_syscall_trace_enter
|
riscv: fix seccomp reject syscall code path
If secure_computing() rejected a system call, we were previously setting
the system call number to -1, to indicate to later code that the syscall
failed. However, if something (e.g. a user notification) was sleeping, and
received a signal, we may set a0 to -ERESTARTSYS and re-try the system call
again.
In this case, seccomp "denies" the syscall (because of the signal), and we
would set a7 to -1, thus losing the value of the system call we want to
restart.
Instead, let's return -1 from do_syscall_trace_enter() to indicate that the
syscall was rejected, so we don't clobber the value in case of -ERESTARTSYS
or whatever.
This commit fixes the user_notification_signal seccomp selftest on riscv to
no longer hang. That test expects the system call to be re-issued after the
signal, and it wasn't due to the above bug. Now that it is, everything
works normally.
Note that in the ptrace (tracer) case, the tracer can set the register
values to whatever they want, so we still need to keep the code that
handles out-of-bounds syscalls. However, we can drop the comment.
We can also drop syscall_set_nr(), since it is no longer used anywhere, and
the code that re-loads the value in a7 because of it.
Reported in: https://lore.kernel.org/bpf/CAEn-LTp=ss0Dfv6J00=rCAy+N78U2AmhqJNjfqjr2FDpPYjxEQ@mail.gmail.com/
Reported-by: David Abdurachmanov <david.abdurachmanov@gmail.com>
Signed-off-by: Tycho Andersen <tycho@tycho.ws>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
2020-02-08 23:18:17 +08:00
|
|
|
move t0, a0
|
2017-07-11 09:04:30 +08:00
|
|
|
REG_L a0, PT_A0(sp)
|
|
|
|
REG_L a1, PT_A1(sp)
|
|
|
|
REG_L a2, PT_A2(sp)
|
|
|
|
REG_L a3, PT_A3(sp)
|
|
|
|
REG_L a4, PT_A4(sp)
|
|
|
|
REG_L a5, PT_A5(sp)
|
|
|
|
REG_L a6, PT_A6(sp)
|
|
|
|
REG_L a7, PT_A7(sp)
|
riscv: fix seccomp reject syscall code path
If secure_computing() rejected a system call, we were previously setting
the system call number to -1, to indicate to later code that the syscall
failed. However, if something (e.g. a user notification) was sleeping, and
received a signal, we may set a0 to -ERESTARTSYS and re-try the system call
again.
In this case, seccomp "denies" the syscall (because of the signal), and we
would set a7 to -1, thus losing the value of the system call we want to
restart.
Instead, let's return -1 from do_syscall_trace_enter() to indicate that the
syscall was rejected, so we don't clobber the value in case of -ERESTARTSYS
or whatever.
This commit fixes the user_notification_signal seccomp selftest on riscv to
no longer hang. That test expects the system call to be re-issued after the
signal, and it wasn't due to the above bug. Now that it is, everything
works normally.
Note that in the ptrace (tracer) case, the tracer can set the register
values to whatever they want, so we still need to keep the code that
handles out-of-bounds syscalls. However, we can drop the comment.
We can also drop syscall_set_nr(), since it is no longer used anywhere, and
the code that re-loads the value in a7 because of it.
Reported in: https://lore.kernel.org/bpf/CAEn-LTp=ss0Dfv6J00=rCAy+N78U2AmhqJNjfqjr2FDpPYjxEQ@mail.gmail.com/
Reported-by: David Abdurachmanov <david.abdurachmanov@gmail.com>
Signed-off-by: Tycho Andersen <tycho@tycho.ws>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
2020-02-08 23:18:17 +08:00
|
|
|
bnez t0, ret_from_syscall_rejected
|
2017-07-11 09:04:30 +08:00
|
|
|
j check_syscall_nr
|
|
|
|
handle_syscall_trace_exit:
|
|
|
|
move a0, sp
|
|
|
|
call do_syscall_trace_exit
|
|
|
|
j ret_from_exception
|
|
|
|
|
riscv: add VMAP_STACK overflow detection
This patch adds stack overflow detection to riscv, usable when
CONFIG_VMAP_STACK=y.
Overflow is detected in kernel exception entry(kernel/entry.S), if the
kernel stack is overflow and been detected, the overflow handler is
invoked on a per-cpu overflow stack. This approach preserves GPRs and
the original exception information.
The overflow detect is performed before any attempt is made to access
the stack and the principle of stack overflow detection: kernel stacks
are aligned to double their size, enabling overflow to be detected with
a single bit test. For example, a 16K stack is aligned to 32K, ensuring
that bit 14 of the SP must be zero. On an overflow (or underflow), this
bit is flipped. Thus, overflow (of less than the size of the stack) can
be detected by testing whether this bit is set.
This gives us a useful error message on stack overflow, as can be
trigger with the LKDTM overflow test:
[ 388.053267] lkdtm: Performing direct entry EXHAUST_STACK
[ 388.053663] lkdtm: Calling function with 1024 frame size to depth 32 ...
[ 388.054016] lkdtm: loop 32/32 ...
[ 388.054186] lkdtm: loop 31/32 ...
[ 388.054491] lkdtm: loop 30/32 ...
[ 388.054672] lkdtm: loop 29/32 ...
[ 388.054859] lkdtm: loop 28/32 ...
[ 388.055010] lkdtm: loop 27/32 ...
[ 388.055163] lkdtm: loop 26/32 ...
[ 388.055309] lkdtm: loop 25/32 ...
[ 388.055481] lkdtm: loop 24/32 ...
[ 388.055653] lkdtm: loop 23/32 ...
[ 388.055837] lkdtm: loop 22/32 ...
[ 388.056015] lkdtm: loop 21/32 ...
[ 388.056188] lkdtm: loop 20/32 ...
[ 388.058145] Insufficient stack space to handle exception!
[ 388.058153] Task stack: [0xffffffd014260000..0xffffffd014264000]
[ 388.058160] Overflow stack: [0xffffffe1f8d2c220..0xffffffe1f8d2d220]
[ 388.058168] CPU: 0 PID: 89 Comm: bash Not tainted 5.12.0-rc8-dirty #90
[ 388.058175] Hardware name: riscv-virtio,qemu (DT)
[ 388.058187] epc : number+0x32/0x2c0
[ 388.058247] ra : vsnprintf+0x2ae/0x3f0
[ 388.058255] epc : ffffffe0002d38f6 ra : ffffffe0002d814e sp : ffffffd01425ffc0
[ 388.058263] gp : ffffffe0012e4010 tp : ffffffe08014da00 t0 : ffffffd0142606e8
[ 388.058271] t1 : 0000000000000000 t2 : 0000000000000000 s0 : ffffffd014260070
[ 388.058303] s1 : ffffffd014260158 a0 : ffffffd01426015e a1 : ffffffd014260158
[ 388.058311] a2 : 0000000000000013 a3 : ffff0a01ffffff10 a4 : ffffffe000c398e0
[ 388.058319] a5 : 511b02ec65f3e300 a6 : 0000000000a1749a a7 : 0000000000000000
[ 388.058327] s2 : ffffffff000000ff s3 : 00000000ffff0a01 s4 : ffffffe0012e50a8
[ 388.058335] s5 : 0000000000ffff0a s6 : ffffffe0012e50a8 s7 : ffffffe000da1cc0
[ 388.058343] s8 : ffffffffffffffff s9 : ffffffd0142602b0 s10: ffffffd0142602a8
[ 388.058351] s11: ffffffd01426015e t3 : 00000000000f0000 t4 : ffffffffffffffff
[ 388.058359] t5 : 000000000000002f t6 : ffffffd014260158
[ 388.058366] status: 0000000000000100 badaddr: ffffffd01425fff8 cause: 000000000000000f
[ 388.058374] Kernel panic - not syncing: Kernel stack overflow
[ 388.058381] CPU: 0 PID: 89 Comm: bash Not tainted 5.12.0-rc8-dirty #90
[ 388.058387] Hardware name: riscv-virtio,qemu (DT)
[ 388.058393] Call Trace:
[ 388.058400] [<ffffffe000004944>] walk_stackframe+0x0/0xce
[ 388.058406] [<ffffffe0006f0b28>] dump_backtrace+0x38/0x46
[ 388.058412] [<ffffffe0006f0b46>] show_stack+0x10/0x18
[ 388.058418] [<ffffffe0006f3690>] dump_stack+0x74/0x8e
[ 388.058424] [<ffffffe0006f0d52>] panic+0xfc/0x2b2
[ 388.058430] [<ffffffe0006f0acc>] print_trace_address+0x0/0x24
[ 388.058436] [<ffffffe0002d814e>] vsnprintf+0x2ae/0x3f0
[ 388.058956] SMP: stopping secondary CPUs
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
2021-06-21 11:28:55 +08:00
|
|
|
#ifdef CONFIG_VMAP_STACK
|
|
|
|
handle_kernel_stack_overflow:
|
|
|
|
la sp, shadow_stack
|
|
|
|
addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
|
|
|
|
|
|
|
|
//save caller register to shadow stack
|
|
|
|
addi sp, sp, -(PT_SIZE_ON_STACK)
|
|
|
|
REG_S x1, PT_RA(sp)
|
|
|
|
REG_S x5, PT_T0(sp)
|
|
|
|
REG_S x6, PT_T1(sp)
|
|
|
|
REG_S x7, PT_T2(sp)
|
|
|
|
REG_S x10, PT_A0(sp)
|
|
|
|
REG_S x11, PT_A1(sp)
|
|
|
|
REG_S x12, PT_A2(sp)
|
|
|
|
REG_S x13, PT_A3(sp)
|
|
|
|
REG_S x14, PT_A4(sp)
|
|
|
|
REG_S x15, PT_A5(sp)
|
|
|
|
REG_S x16, PT_A6(sp)
|
|
|
|
REG_S x17, PT_A7(sp)
|
|
|
|
REG_S x28, PT_T3(sp)
|
|
|
|
REG_S x29, PT_T4(sp)
|
|
|
|
REG_S x30, PT_T5(sp)
|
|
|
|
REG_S x31, PT_T6(sp)
|
|
|
|
|
|
|
|
la ra, restore_caller_reg
|
|
|
|
tail get_overflow_stack
|
|
|
|
|
|
|
|
restore_caller_reg:
|
|
|
|
//save per-cpu overflow stack
|
|
|
|
REG_S a0, -8(sp)
|
|
|
|
//restore caller register from shadow_stack
|
|
|
|
REG_L x1, PT_RA(sp)
|
|
|
|
REG_L x5, PT_T0(sp)
|
|
|
|
REG_L x6, PT_T1(sp)
|
|
|
|
REG_L x7, PT_T2(sp)
|
|
|
|
REG_L x10, PT_A0(sp)
|
|
|
|
REG_L x11, PT_A1(sp)
|
|
|
|
REG_L x12, PT_A2(sp)
|
|
|
|
REG_L x13, PT_A3(sp)
|
|
|
|
REG_L x14, PT_A4(sp)
|
|
|
|
REG_L x15, PT_A5(sp)
|
|
|
|
REG_L x16, PT_A6(sp)
|
|
|
|
REG_L x17, PT_A7(sp)
|
|
|
|
REG_L x28, PT_T3(sp)
|
|
|
|
REG_L x29, PT_T4(sp)
|
|
|
|
REG_L x30, PT_T5(sp)
|
|
|
|
REG_L x31, PT_T6(sp)
|
|
|
|
|
|
|
|
//load per-cpu overflow stack
|
|
|
|
REG_L sp, -8(sp)
|
|
|
|
addi sp, sp, -(PT_SIZE_ON_STACK)
|
|
|
|
|
|
|
|
//save context to overflow stack
|
|
|
|
REG_S x1, PT_RA(sp)
|
|
|
|
REG_S x3, PT_GP(sp)
|
|
|
|
REG_S x5, PT_T0(sp)
|
|
|
|
REG_S x6, PT_T1(sp)
|
|
|
|
REG_S x7, PT_T2(sp)
|
|
|
|
REG_S x8, PT_S0(sp)
|
|
|
|
REG_S x9, PT_S1(sp)
|
|
|
|
REG_S x10, PT_A0(sp)
|
|
|
|
REG_S x11, PT_A1(sp)
|
|
|
|
REG_S x12, PT_A2(sp)
|
|
|
|
REG_S x13, PT_A3(sp)
|
|
|
|
REG_S x14, PT_A4(sp)
|
|
|
|
REG_S x15, PT_A5(sp)
|
|
|
|
REG_S x16, PT_A6(sp)
|
|
|
|
REG_S x17, PT_A7(sp)
|
|
|
|
REG_S x18, PT_S2(sp)
|
|
|
|
REG_S x19, PT_S3(sp)
|
|
|
|
REG_S x20, PT_S4(sp)
|
|
|
|
REG_S x21, PT_S5(sp)
|
|
|
|
REG_S x22, PT_S6(sp)
|
|
|
|
REG_S x23, PT_S7(sp)
|
|
|
|
REG_S x24, PT_S8(sp)
|
|
|
|
REG_S x25, PT_S9(sp)
|
|
|
|
REG_S x26, PT_S10(sp)
|
|
|
|
REG_S x27, PT_S11(sp)
|
|
|
|
REG_S x28, PT_T3(sp)
|
|
|
|
REG_S x29, PT_T4(sp)
|
|
|
|
REG_S x30, PT_T5(sp)
|
|
|
|
REG_S x31, PT_T6(sp)
|
|
|
|
|
|
|
|
REG_L s0, TASK_TI_KERNEL_SP(tp)
|
|
|
|
csrr s1, CSR_STATUS
|
|
|
|
csrr s2, CSR_EPC
|
|
|
|
csrr s3, CSR_TVAL
|
|
|
|
csrr s4, CSR_CAUSE
|
|
|
|
csrr s5, CSR_SCRATCH
|
|
|
|
REG_S s0, PT_SP(sp)
|
|
|
|
REG_S s1, PT_STATUS(sp)
|
|
|
|
REG_S s2, PT_EPC(sp)
|
|
|
|
REG_S s3, PT_BADADDR(sp)
|
|
|
|
REG_S s4, PT_CAUSE(sp)
|
|
|
|
REG_S s5, PT_TP(sp)
|
|
|
|
move a0, sp
|
|
|
|
tail handle_bad_stack
|
|
|
|
#endif
|
|
|
|
|
2017-07-11 09:04:30 +08:00
|
|
|
END(handle_exception)
|
|
|
|
|
|
|
|
ENTRY(ret_from_fork)
|
|
|
|
la ra, ret_from_exception
|
|
|
|
tail schedule_tail
|
|
|
|
ENDPROC(ret_from_fork)
|
|
|
|
|
|
|
|
ENTRY(ret_from_kernel_thread)
|
|
|
|
call schedule_tail
|
|
|
|
/* Call fn(arg) */
|
|
|
|
la ra, ret_from_exception
|
|
|
|
move a0, s1
|
|
|
|
jr s0
|
|
|
|
ENDPROC(ret_from_kernel_thread)
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Integer register context switch
|
|
|
|
* The callee-saved registers must be saved and restored.
|
|
|
|
*
|
|
|
|
* a0: previous task_struct (must be preserved across the switch)
|
|
|
|
* a1: next task_struct
|
|
|
|
*
|
|
|
|
* The value of a0 and a1 must be preserved by this function, as that's how
|
|
|
|
* arguments are passed to schedule_tail.
|
|
|
|
*/
|
|
|
|
ENTRY(__switch_to)
|
|
|
|
/* Save context into prev->thread */
|
|
|
|
li a4, TASK_THREAD_RA
|
|
|
|
add a3, a0, a4
|
|
|
|
add a4, a1, a4
|
|
|
|
REG_S ra, TASK_THREAD_RA_RA(a3)
|
|
|
|
REG_S sp, TASK_THREAD_SP_RA(a3)
|
|
|
|
REG_S s0, TASK_THREAD_S0_RA(a3)
|
|
|
|
REG_S s1, TASK_THREAD_S1_RA(a3)
|
|
|
|
REG_S s2, TASK_THREAD_S2_RA(a3)
|
|
|
|
REG_S s3, TASK_THREAD_S3_RA(a3)
|
|
|
|
REG_S s4, TASK_THREAD_S4_RA(a3)
|
|
|
|
REG_S s5, TASK_THREAD_S5_RA(a3)
|
|
|
|
REG_S s6, TASK_THREAD_S6_RA(a3)
|
|
|
|
REG_S s7, TASK_THREAD_S7_RA(a3)
|
|
|
|
REG_S s8, TASK_THREAD_S8_RA(a3)
|
|
|
|
REG_S s9, TASK_THREAD_S9_RA(a3)
|
|
|
|
REG_S s10, TASK_THREAD_S10_RA(a3)
|
|
|
|
REG_S s11, TASK_THREAD_S11_RA(a3)
|
|
|
|
/* Restore context from next->thread */
|
|
|
|
REG_L ra, TASK_THREAD_RA_RA(a4)
|
|
|
|
REG_L sp, TASK_THREAD_SP_RA(a4)
|
|
|
|
REG_L s0, TASK_THREAD_S0_RA(a4)
|
|
|
|
REG_L s1, TASK_THREAD_S1_RA(a4)
|
|
|
|
REG_L s2, TASK_THREAD_S2_RA(a4)
|
|
|
|
REG_L s3, TASK_THREAD_S3_RA(a4)
|
|
|
|
REG_L s4, TASK_THREAD_S4_RA(a4)
|
|
|
|
REG_L s5, TASK_THREAD_S5_RA(a4)
|
|
|
|
REG_L s6, TASK_THREAD_S6_RA(a4)
|
|
|
|
REG_L s7, TASK_THREAD_S7_RA(a4)
|
|
|
|
REG_L s8, TASK_THREAD_S8_RA(a4)
|
|
|
|
REG_L s9, TASK_THREAD_S9_RA(a4)
|
|
|
|
REG_L s10, TASK_THREAD_S10_RA(a4)
|
|
|
|
REG_L s11, TASK_THREAD_S11_RA(a4)
|
2020-07-12 21:41:49 +08:00
|
|
|
/* The offset of thread_info in task_struct is zero. */
|
2017-07-11 09:04:30 +08:00
|
|
|
move tp, a1
|
|
|
|
ret
|
|
|
|
ENDPROC(__switch_to)
|
|
|
|
|
2019-10-28 20:10:41 +08:00
|
|
|
#ifndef CONFIG_MMU
|
|
|
|
#define do_page_fault do_trap_unknown
|
|
|
|
#endif
|
|
|
|
|
2017-07-11 09:04:30 +08:00
|
|
|
.section ".rodata"
|
2021-03-17 16:17:25 +08:00
|
|
|
.align LGREG
|
2017-07-11 09:04:30 +08:00
|
|
|
/* Exception vector table */
|
|
|
|
ENTRY(excp_vect_table)
|
|
|
|
RISCV_PTR do_trap_insn_misaligned
|
2021-03-22 22:26:05 +08:00
|
|
|
ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault)
|
2017-07-11 09:04:30 +08:00
|
|
|
RISCV_PTR do_trap_insn_illegal
|
|
|
|
RISCV_PTR do_trap_break
|
|
|
|
RISCV_PTR do_trap_load_misaligned
|
|
|
|
RISCV_PTR do_trap_load_fault
|
|
|
|
RISCV_PTR do_trap_store_misaligned
|
|
|
|
RISCV_PTR do_trap_store_fault
|
|
|
|
RISCV_PTR do_trap_ecall_u /* system call, gets intercepted */
|
|
|
|
RISCV_PTR do_trap_ecall_s
|
|
|
|
RISCV_PTR do_trap_unknown
|
|
|
|
RISCV_PTR do_trap_ecall_m
|
2021-03-22 22:26:05 +08:00
|
|
|
/* instruciton page fault */
|
|
|
|
ALT_PAGE_FAULT(RISCV_PTR do_page_fault)
|
2017-07-11 09:04:30 +08:00
|
|
|
RISCV_PTR do_page_fault /* load page fault */
|
|
|
|
RISCV_PTR do_trap_unknown
|
|
|
|
RISCV_PTR do_page_fault /* store page fault */
|
|
|
|
excp_vect_table_end:
|
|
|
|
END(excp_vect_table)
|
2019-10-28 20:10:41 +08:00
|
|
|
|
|
|
|
#ifndef CONFIG_MMU
|
|
|
|
ENTRY(__user_rt_sigreturn)
|
|
|
|
li a7, __NR_rt_sigreturn
|
|
|
|
scall
|
|
|
|
END(__user_rt_sigreturn)
|
|
|
|
#endif
|