mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-12 15:24:00 +08:00
Merge branch 'for-next/scs' into for-next/core
Support for Clang's Shadow Call Stack in the kernel (Sami Tolvanen and Will Deacon) * for-next/scs: arm64: entry-ftrace.S: Update comment to indicate that x18 is live scs: Move DEFINE_SCS macro into core code scs: Remove references to asm/scs.h from core code scs: Move scs_overflow_check() out of architecture code arm64: scs: Use 'scs_sp' register alias for x18 scs: Move accounting into alloc/free functions arm64: scs: Store absolute SCS stack pointer value in thread_info efi/libstub: Disable Shadow Call Stack arm64: scs: Add shadow stacks for SDEI arm64: Implement Shadow Call Stack arm64: Disable SCS for hypervisor code arm64: vdso: Disable Shadow Call Stack arm64: efi: Restore register x18 if it was corrupted arm64: Preserve register x18 when CPU is suspended arm64: Reserve register x18 from general allocation with SCS scs: Disable when function graph tracing is enabled scs: Add support for stack usage debugging scs: Add page accounting for shadow call stack allocations scs: Add support for Clang's Shadow Call Stack (SCS)
This commit is contained in:
commit
082af5ec50
6
Makefile
6
Makefile
@ -866,6 +866,12 @@ ifdef CONFIG_LIVEPATCH
|
||||
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
|
||||
endif
|
||||
|
||||
ifdef CONFIG_SHADOW_CALL_STACK
|
||||
CC_FLAGS_SCS := -fsanitize=shadow-call-stack
|
||||
KBUILD_CFLAGS += $(CC_FLAGS_SCS)
|
||||
export CC_FLAGS_SCS
|
||||
endif
|
||||
|
||||
# arch Makefile may override CC so keep this after arch Makefile is included
|
||||
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
|
||||
|
||||
|
25
arch/Kconfig
25
arch/Kconfig
@ -533,6 +533,31 @@ config STACKPROTECTOR_STRONG
|
||||
about 20% of all kernel functions, which increases the kernel code
|
||||
size by about 2%.
|
||||
|
||||
config ARCH_SUPPORTS_SHADOW_CALL_STACK
|
||||
bool
|
||||
help
|
||||
An architecture should select this if it supports Clang's Shadow
|
||||
Call Stack and implements runtime support for shadow stack
|
||||
switching.
|
||||
|
||||
config SHADOW_CALL_STACK
|
||||
bool "Clang Shadow Call Stack"
|
||||
depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
|
||||
depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
|
||||
help
|
||||
This option enables Clang's Shadow Call Stack, which uses a
|
||||
shadow stack to protect function return addresses from being
|
||||
overwritten by an attacker. More information can be found in
|
||||
Clang's documentation:
|
||||
|
||||
https://clang.llvm.org/docs/ShadowCallStack.html
|
||||
|
||||
Note that security guarantees in the kernel differ from the
|
||||
ones documented for user space. The kernel must store addresses
|
||||
of shadow stacks in memory, which means an attacker capable of
|
||||
reading and writing arbitrary memory may be able to locate them
|
||||
and hijack control flow by modifying the stacks.
|
||||
|
||||
config HAVE_ARCH_WITHIN_STACK_FRAMES
|
||||
bool
|
||||
help
|
||||
|
@ -68,6 +68,7 @@ config ARM64
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_USE_SYM_ANNOTATIONS
|
||||
select ARCH_SUPPORTS_MEMORY_FAILURE
|
||||
select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
|
||||
select ARCH_SUPPORTS_ATOMIC_RMW
|
||||
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
|
||||
select ARCH_SUPPORTS_NUMA_BALANCING
|
||||
@ -1026,6 +1027,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
|
||||
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
|
||||
def_bool y if PGTABLE_LEVELS > 2
|
||||
|
||||
# Supported by clang >= 7.0
|
||||
config CC_HAVE_SHADOW_CALL_STACK
|
||||
def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
|
||||
|
||||
config SECCOMP
|
||||
bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
---help---
|
||||
|
@ -87,6 +87,10 @@ endif
|
||||
|
||||
KBUILD_CFLAGS += $(branch-prot-flags-y)
|
||||
|
||||
ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
|
||||
KBUILD_CFLAGS += -ffixed-x18
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
|
||||
KBUILD_CPPFLAGS += -mbig-endian
|
||||
CHECKFLAGS += -D__AARCH64EB__
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
#define __hyp_text __section(.hyp.text) notrace
|
||||
#define __hyp_text __section(.hyp.text) notrace __noscs
|
||||
|
||||
#define read_sysreg_elx(r,nvh,vh) \
|
||||
({ \
|
||||
|
29
arch/arm64/include/asm/scs.h
Normal file
29
arch/arm64/include/asm/scs.h
Normal file
@ -0,0 +1,29 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_SCS_H
|
||||
#define _ASM_SCS_H
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
scs_sp .req x18
|
||||
|
||||
.macro scs_load tsk, tmp
|
||||
ldr scs_sp, [\tsk, #TSK_TI_SCS_SP]
|
||||
.endm
|
||||
|
||||
.macro scs_save tsk, tmp
|
||||
str scs_sp, [\tsk, #TSK_TI_SCS_SP]
|
||||
.endm
|
||||
#else
|
||||
.macro scs_load tsk, tmp
|
||||
.endm
|
||||
|
||||
.macro scs_save tsk, tmp
|
||||
.endm
|
||||
#endif /* CONFIG_SHADOW_CALL_STACK */
|
||||
|
||||
#endif /* __ASSEMBLY __ */
|
||||
|
||||
#endif /* _ASM_SCS_H */
|
@ -2,7 +2,7 @@
|
||||
#ifndef __ASM_SUSPEND_H
|
||||
#define __ASM_SUSPEND_H
|
||||
|
||||
#define NR_CTX_REGS 12
|
||||
#define NR_CTX_REGS 13
|
||||
#define NR_CALLEE_SAVED_REGS 12
|
||||
|
||||
/*
|
||||
|
@ -41,6 +41,10 @@ struct thread_info {
|
||||
#endif
|
||||
} preempt;
|
||||
};
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
void *scs_base;
|
||||
void *scs_sp;
|
||||
#endif
|
||||
};
|
||||
|
||||
#define thread_saved_pc(tsk) \
|
||||
@ -100,11 +104,20 @@ void arch_release_task_struct(struct task_struct *tsk);
|
||||
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
|
||||
_TIF_SYSCALL_EMU)
|
||||
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
#define INIT_SCS \
|
||||
.scs_base = init_shadow_call_stack, \
|
||||
.scs_sp = init_shadow_call_stack,
|
||||
#else
|
||||
#define INIT_SCS
|
||||
#endif
|
||||
|
||||
#define INIT_THREAD_INFO(tsk) \
|
||||
{ \
|
||||
.flags = _TIF_FOREIGN_FPSTATE, \
|
||||
.preempt_count = INIT_PREEMPT_COUNT, \
|
||||
.addr_limit = KERNEL_DS, \
|
||||
INIT_SCS \
|
||||
}
|
||||
|
||||
#endif /* __ASM_THREAD_INFO_H */
|
||||
|
@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
|
||||
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
|
||||
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
|
||||
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
|
||||
obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
|
||||
|
||||
obj-y += vdso/ probes/
|
||||
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
|
||||
|
@ -33,6 +33,10 @@ int main(void)
|
||||
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
|
||||
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
|
||||
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
|
||||
#endif
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
DEFINE(TSK_TI_SCS_BASE, offsetof(struct task_struct, thread_info.scs_base));
|
||||
DEFINE(TSK_TI_SCS_SP, offsetof(struct task_struct, thread_info.scs_sp));
|
||||
#endif
|
||||
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
|
||||
#ifdef CONFIG_STACKPROTECTOR
|
||||
|
@ -34,5 +34,14 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
|
||||
ldp x29, x30, [sp], #32
|
||||
b.ne 0f
|
||||
ret
|
||||
0: b efi_handle_corrupted_x18 // tail call
|
||||
0:
|
||||
/*
|
||||
* With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
|
||||
* shadow stack pointer, which we need to restore before returning to
|
||||
* potentially instrumented code. This is safe because the wrapper is
|
||||
* called with preemption disabled and a separate shadow stack is used
|
||||
* for interrupts.
|
||||
*/
|
||||
mov x18, x2
|
||||
b efi_handle_corrupted_x18 // tail call
|
||||
SYM_FUNC_END(__efi_rt_asm_wrapper)
|
||||
|
@ -23,8 +23,9 @@
|
||||
*
|
||||
* ... where <entry> is either ftrace_caller or ftrace_regs_caller.
|
||||
*
|
||||
* Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
|
||||
* live, and x9-x18 are safe to clobber.
|
||||
* Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are
|
||||
* live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to
|
||||
* clobber.
|
||||
*
|
||||
* We save the callsite's context into a pt_regs before invoking any ftrace
|
||||
* callbacks. So that we can get a sensible backtrace, we create a stack record
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/scs.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/asm-uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
@ -179,6 +180,8 @@ alternative_cb_end
|
||||
apply_ssbd 1, x22, x23
|
||||
|
||||
ptrauth_keys_install_kernel tsk, x20, x22, x23
|
||||
|
||||
scs_load tsk, x20
|
||||
.else
|
||||
add x21, sp, #S_FRAME_SIZE
|
||||
get_current_task tsk
|
||||
@ -343,6 +346,8 @@ alternative_else_nop_endif
|
||||
msr cntkctl_el1, x1
|
||||
4:
|
||||
#endif
|
||||
scs_save tsk, x0
|
||||
|
||||
/* No kernel C function calls after this as user keys are set. */
|
||||
ptrauth_keys_install_user tsk, x0, x1, x2
|
||||
|
||||
@ -388,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
|
||||
|
||||
.macro irq_stack_entry
|
||||
mov x19, sp // preserve the original sp
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
mov x24, scs_sp // preserve the original shadow stack
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Compare sp with the base of the task stack.
|
||||
@ -405,15 +413,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
|
||||
|
||||
/* switch to the irq stack */
|
||||
mov sp, x26
|
||||
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
/* also switch to the irq shadow stack */
|
||||
adr_this_cpu scs_sp, irq_shadow_call_stack, x26
|
||||
#endif
|
||||
|
||||
9998:
|
||||
.endm
|
||||
|
||||
/*
|
||||
* x19 should be preserved between irq_stack_entry and
|
||||
* irq_stack_exit.
|
||||
* The callee-saved regs (x19-x29) should be preserved between
|
||||
* irq_stack_entry and irq_stack_exit, but note that kernel_entry
|
||||
* uses x20-x23 to store data for later use.
|
||||
*/
|
||||
.macro irq_stack_exit
|
||||
mov sp, x19
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
mov scs_sp, x24
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* GPRs used by entry code */
|
||||
@ -902,6 +920,8 @@ SYM_FUNC_START(cpu_switch_to)
|
||||
mov sp, x9
|
||||
msr sp_el0, x1
|
||||
ptrauth_keys_install_kernel x1, x8, x9, x10
|
||||
scs_save x0, x8
|
||||
scs_load x1, x8
|
||||
ret
|
||||
SYM_FUNC_END(cpu_switch_to)
|
||||
NOKPROBE(cpu_switch_to)
|
||||
@ -1030,13 +1050,16 @@ SYM_CODE_START(__sdei_asm_handler)
|
||||
|
||||
mov x19, x1
|
||||
|
||||
#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
|
||||
ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
/*
|
||||
* entry.S may have been using sp as a scratch register, find whether
|
||||
* this is a normal or critical event and switch to the appropriate
|
||||
* stack for this CPU.
|
||||
*/
|
||||
ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
|
||||
cbnz w4, 1f
|
||||
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
|
||||
b 2f
|
||||
@ -1046,6 +1069,15 @@ SYM_CODE_START(__sdei_asm_handler)
|
||||
mov sp, x5
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
/* Use a separate shadow call stack for normal and critical events */
|
||||
cbnz w4, 3f
|
||||
adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal, tmp=x6
|
||||
b 4f
|
||||
3: adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical, tmp=x6
|
||||
4:
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We may have interrupted userspace, or a guest, or exit-from or
|
||||
* return-to either of these. We can't trust sp_el0, restore it.
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <asm/pgtable-hwdef.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/scs.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/sysreg.h>
|
||||
#include <asm/thread_info.h>
|
||||
@ -433,6 +434,10 @@ SYM_FUNC_START_LOCAL(__primary_switched)
|
||||
stp xzr, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
adr_l scs_sp, init_shadow_call_stack // Set shadow call stack
|
||||
#endif
|
||||
|
||||
str_l x21, __fdt_pointer, x5 // Save FDT pointer
|
||||
|
||||
ldr_l x4, kimage_vaddr // Save the offset between
|
||||
@ -745,6 +750,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
|
||||
ldr x2, [x0, #CPU_BOOT_TASK]
|
||||
cbz x2, __secondary_too_slow
|
||||
msr sp_el0, x2
|
||||
scs_load x2, x3
|
||||
mov x29, #0
|
||||
mov x30, #0
|
||||
|
||||
|
16
arch/arm64/kernel/scs.c
Normal file
16
arch/arm64/kernel/scs.c
Normal file
@ -0,0 +1,16 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Shadow Call Stack support.
|
||||
*
|
||||
* Copyright (C) 2019 Google LLC
|
||||
*/
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/scs.h>
|
||||
|
||||
DEFINE_SCS(irq_shadow_call_stack);
|
||||
|
||||
#ifdef CONFIG_ARM_SDE_INTERFACE
|
||||
DEFINE_SCS(sdei_shadow_call_stack_normal);
|
||||
DEFINE_SCS(sdei_shadow_call_stack_critical);
|
||||
#endif
|
@ -29,7 +29,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
|
||||
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
|
||||
ccflags-y += -DDISABLE_BRANCH_PROFILING
|
||||
|
||||
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
|
||||
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
|
||||
KBUILD_CFLAGS += $(DISABLE_LTO)
|
||||
KASAN_SANITIZE := n
|
||||
UBSAN_SANITIZE := n
|
||||
|
@ -58,6 +58,8 @@
|
||||
* cpu_do_suspend - save CPU registers context
|
||||
*
|
||||
* x0: virtual address of context pointer
|
||||
*
|
||||
* This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
|
||||
*/
|
||||
SYM_FUNC_START(cpu_do_suspend)
|
||||
mrs x2, tpidr_el0
|
||||
@ -82,6 +84,11 @@ alternative_endif
|
||||
stp x8, x9, [x0, #48]
|
||||
stp x10, x11, [x0, #64]
|
||||
stp x12, x13, [x0, #80]
|
||||
/*
|
||||
* Save x18 as it may be used as a platform register, e.g. by shadow
|
||||
* call stack.
|
||||
*/
|
||||
str x18, [x0, #96]
|
||||
ret
|
||||
SYM_FUNC_END(cpu_do_suspend)
|
||||
|
||||
@ -98,6 +105,13 @@ SYM_FUNC_START(cpu_do_resume)
|
||||
ldp x9, x10, [x0, #48]
|
||||
ldp x11, x12, [x0, #64]
|
||||
ldp x13, x14, [x0, #80]
|
||||
/*
|
||||
* Restore x18, as it may be used as a platform register, and clear
|
||||
* the buffer to minimize the risk of exposure when used for shadow
|
||||
* call stack.
|
||||
*/
|
||||
ldr x18, [x0, #96]
|
||||
str xzr, [x0, #96]
|
||||
msr tpidr_el0, x2
|
||||
msr tpidrro_el0, x3
|
||||
msr contextidr_el1, x4
|
||||
|
@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
|
||||
"Node %d AnonPages: %8lu kB\n"
|
||||
"Node %d Shmem: %8lu kB\n"
|
||||
"Node %d KernelStack: %8lu kB\n"
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
"Node %d ShadowCallStack:%8lu kB\n"
|
||||
#endif
|
||||
"Node %d PageTables: %8lu kB\n"
|
||||
"Node %d NFS_Unstable: %8lu kB\n"
|
||||
"Node %d Bounce: %8lu kB\n"
|
||||
@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
|
||||
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
|
||||
nid, K(i.sharedram),
|
||||
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
|
||||
#endif
|
||||
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
|
||||
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
|
||||
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
|
||||
|
@ -32,6 +32,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
|
||||
$(call cc-option,-fno-stack-protector) \
|
||||
-D__DISABLE_EXPORTS
|
||||
|
||||
# remove SCS flags from all objects in this directory
|
||||
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
|
||||
|
||||
GCOV_PROFILE := n
|
||||
KASAN_SANITIZE := n
|
||||
UBSAN_SANITIZE := n
|
||||
|
@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
|
||||
show_val_kb(m, "SUnreclaim: ", sunreclaim);
|
||||
seq_printf(m, "KernelStack: %8lu kB\n",
|
||||
global_zone_page_state(NR_KERNEL_STACK_KB));
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
seq_printf(m, "ShadowCallStack:%8lu kB\n",
|
||||
global_zone_page_state(NR_KERNEL_SCS_KB));
|
||||
#endif
|
||||
show_val_kb(m, "PageTables: ",
|
||||
global_zone_page_state(NR_PAGETABLE));
|
||||
|
||||
|
@ -42,3 +42,7 @@
|
||||
* compilers, like ICC.
|
||||
*/
|
||||
#define barrier() __asm__ __volatile__("" : : : "memory")
|
||||
|
||||
#if __has_feature(shadow_call_stack)
|
||||
# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
|
||||
#endif
|
||||
|
@ -193,6 +193,10 @@ struct ftrace_likely_data {
|
||||
# define randomized_struct_fields_end
|
||||
#endif
|
||||
|
||||
#ifndef __noscs
|
||||
# define __noscs
|
||||
#endif
|
||||
|
||||
#ifndef asm_volatile_goto
|
||||
#define asm_volatile_goto(x...) asm goto(x)
|
||||
#endif
|
||||
|
@ -156,6 +156,9 @@ enum zone_stat_item {
|
||||
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
|
||||
NR_PAGETABLE, /* used for pagetables */
|
||||
NR_KERNEL_STACK_KB, /* measured in KiB */
|
||||
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
|
||||
NR_KERNEL_SCS_KB, /* measured in KiB */
|
||||
#endif
|
||||
/* Second 128 byte cacheline */
|
||||
NR_BOUNCE,
|
||||
#if IS_ENABLED(CONFIG_ZSMALLOC)
|
||||
|
72
include/linux/scs.h
Normal file
72
include/linux/scs.h
Normal file
@ -0,0 +1,72 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Shadow Call Stack support.
|
||||
*
|
||||
* Copyright (C) 2019 Google LLC
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_SCS_H
|
||||
#define _LINUX_SCS_H
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/poison.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
|
||||
/*
|
||||
* In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
|
||||
* architecture) provided ~40% safety margin on stack usage while keeping
|
||||
* memory allocation overhead reasonable.
|
||||
*/
|
||||
#define SCS_SIZE SZ_1K
|
||||
#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
|
||||
|
||||
/* An illegal pointer value to mark the end of the shadow stack. */
|
||||
#define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA)
|
||||
|
||||
/* Allocate a static per-CPU shadow stack */
|
||||
#define DEFINE_SCS(name) \
|
||||
DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
|
||||
|
||||
#define task_scs(tsk) (task_thread_info(tsk)->scs_base)
|
||||
#define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp)
|
||||
|
||||
void scs_init(void);
|
||||
int scs_prepare(struct task_struct *tsk, int node);
|
||||
void scs_release(struct task_struct *tsk);
|
||||
|
||||
static inline void scs_task_reset(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* Reset the shadow stack to the base address in case the task
|
||||
* is reused.
|
||||
*/
|
||||
task_scs_sp(tsk) = task_scs(tsk);
|
||||
}
|
||||
|
||||
static inline unsigned long *__scs_magic(void *s)
|
||||
{
|
||||
return (unsigned long *)(s + SCS_SIZE) - 1;
|
||||
}
|
||||
|
||||
static inline bool task_scs_end_corrupted(struct task_struct *tsk)
|
||||
{
|
||||
unsigned long *magic = __scs_magic(task_scs(tsk));
|
||||
unsigned long sz = task_scs_sp(tsk) - task_scs(tsk);
|
||||
|
||||
return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
|
||||
}
|
||||
|
||||
#else /* CONFIG_SHADOW_CALL_STACK */
|
||||
|
||||
static inline void scs_init(void) {}
|
||||
static inline void scs_task_reset(struct task_struct *tsk) {}
|
||||
static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
|
||||
static inline void scs_release(struct task_struct *tsk) {}
|
||||
static inline bool task_scs_end_corrupted(struct task_struct *tsk) { return false; }
|
||||
|
||||
#endif /* CONFIG_SHADOW_CALL_STACK */
|
||||
|
||||
#endif /* _LINUX_SCS_H */
|
@ -11,6 +11,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/numa.h>
|
||||
#include <linux/scs.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <linux/uaccess.h>
|
||||
@ -50,6 +51,13 @@ static struct sighand_struct init_sighand = {
|
||||
.signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
|
||||
__init_task_data = {
|
||||
[(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Set up the first task table, touch at your own risk!. Base=0,
|
||||
* limit=0x1fffff (=2MB)
|
||||
|
@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
|
||||
obj-$(CONFIG_IRQ_WORK) += irq_work.o
|
||||
obj-$(CONFIG_CPU_PM) += cpu_pm.o
|
||||
obj-$(CONFIG_BPF) += bpf/
|
||||
obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
|
||||
|
||||
obj-$(CONFIG_PERF_EVENTS) += events/
|
||||
|
||||
|
@ -94,6 +94,7 @@
|
||||
#include <linux/thread_info.h>
|
||||
#include <linux/stackleak.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/scs.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
@ -456,6 +457,8 @@ void put_task_stack(struct task_struct *tsk)
|
||||
|
||||
void free_task(struct task_struct *tsk)
|
||||
{
|
||||
scs_release(tsk);
|
||||
|
||||
#ifndef CONFIG_THREAD_INFO_IN_TASK
|
||||
/*
|
||||
* The task is finally done with both the stack and thread_info,
|
||||
@ -840,6 +843,8 @@ void __init fork_init(void)
|
||||
NULL, free_vm_stack_cache);
|
||||
#endif
|
||||
|
||||
scs_init();
|
||||
|
||||
lockdep_init_task(&init_task);
|
||||
uprobes_init();
|
||||
}
|
||||
@ -899,6 +904,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
||||
if (err)
|
||||
goto free_stack;
|
||||
|
||||
err = scs_prepare(tsk, node);
|
||||
if (err)
|
||||
goto free_stack;
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* We must handle setting up seccomp filters once we're under
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include <linux/kcov.h>
|
||||
#include <linux/scs.h>
|
||||
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/tlb.h>
|
||||
@ -3877,6 +3878,9 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
|
||||
#ifdef CONFIG_SCHED_STACK_END_CHECK
|
||||
if (task_stack_end_corrupted(prev))
|
||||
panic("corrupted stack end detected inside scheduler\n");
|
||||
|
||||
if (task_scs_end_corrupted(prev))
|
||||
panic("corrupted shadow stack detected inside scheduler\n");
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
|
||||
@ -6040,6 +6044,7 @@ void init_idle(struct task_struct *idle, int cpu)
|
||||
idle->se.exec_start = sched_clock();
|
||||
idle->flags |= PF_IDLE;
|
||||
|
||||
scs_task_reset(idle);
|
||||
kasan_unpoison_task_stack(idle);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
104
kernel/scs.c
Normal file
104
kernel/scs.c
Normal file
@ -0,0 +1,104 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Shadow Call Stack support.
|
||||
*
|
||||
* Copyright (C) 2019 Google LLC
|
||||
*/
|
||||
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/scs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmstat.h>
|
||||
|
||||
static struct kmem_cache *scs_cache;
|
||||
|
||||
static void __scs_account(void *s, int account)
|
||||
{
|
||||
struct page *scs_page = virt_to_page(s);
|
||||
|
||||
mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB,
|
||||
account * (SCS_SIZE / SZ_1K));
|
||||
}
|
||||
|
||||
static void *scs_alloc(int node)
|
||||
{
|
||||
void *s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
|
||||
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
||||
*__scs_magic(s) = SCS_END_MAGIC;
|
||||
|
||||
/*
|
||||
* Poison the allocation to catch unintentional accesses to
|
||||
* the shadow stack when KASAN is enabled.
|
||||
*/
|
||||
kasan_poison_object_data(scs_cache, s);
|
||||
__scs_account(s, 1);
|
||||
return s;
|
||||
}
|
||||
|
||||
static void scs_free(void *s)
|
||||
{
|
||||
__scs_account(s, -1);
|
||||
kasan_unpoison_object_data(scs_cache, s);
|
||||
kmem_cache_free(scs_cache, s);
|
||||
}
|
||||
|
||||
void __init scs_init(void)
|
||||
{
|
||||
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL);
|
||||
}
|
||||
|
||||
int scs_prepare(struct task_struct *tsk, int node)
|
||||
{
|
||||
void *s = scs_alloc(node);
|
||||
|
||||
if (!s)
|
||||
return -ENOMEM;
|
||||
|
||||
task_scs(tsk) = task_scs_sp(tsk) = s;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void scs_check_usage(struct task_struct *tsk)
|
||||
{
|
||||
static unsigned long highest;
|
||||
|
||||
unsigned long *p, prev, curr = highest, used = 0;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE))
|
||||
return;
|
||||
|
||||
for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
|
||||
if (!READ_ONCE_NOCHECK(*p))
|
||||
break;
|
||||
used++;
|
||||
}
|
||||
|
||||
while (used > curr) {
|
||||
prev = cmpxchg_relaxed(&highest, curr, used);
|
||||
|
||||
if (prev == curr) {
|
||||
pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
|
||||
tsk->comm, task_pid_nr(tsk), used);
|
||||
break;
|
||||
}
|
||||
|
||||
curr = prev;
|
||||
}
|
||||
}
|
||||
|
||||
void scs_release(struct task_struct *tsk)
|
||||
{
|
||||
void *s = task_scs(tsk);
|
||||
|
||||
if (!s)
|
||||
return;
|
||||
|
||||
WARN(task_scs_end_corrupted(tsk),
|
||||
"corrupted shadow stack detected when freeing task\n");
|
||||
scs_check_usage(tsk);
|
||||
scs_free(s);
|
||||
}
|
@ -5411,6 +5411,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
|
||||
" managed:%lukB"
|
||||
" mlocked:%lukB"
|
||||
" kernel_stack:%lukB"
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
" shadow_call_stack:%lukB"
|
||||
#endif
|
||||
" pagetables:%lukB"
|
||||
" bounce:%lukB"
|
||||
" free_pcp:%lukB"
|
||||
@ -5433,6 +5436,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
|
||||
K(zone_managed_pages(zone)),
|
||||
K(zone_page_state(zone, NR_MLOCK)),
|
||||
zone_page_state(zone, NR_KERNEL_STACK_KB),
|
||||
#ifdef CONFIG_SHADOW_CALL_STACK
|
||||
zone_page_state(zone, NR_KERNEL_SCS_KB),
|
||||
#endif
|
||||
K(zone_page_state(zone, NR_PAGETABLE)),
|
||||
K(zone_page_state(zone, NR_BOUNCE)),
|
||||
K(free_pcp),
|
||||
|
@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
|
||||
"nr_mlock",
|
||||
"nr_page_table_pages",
|
||||
"nr_kernel_stack",
|
||||
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
|
||||
"nr_shadow_call_stack",
|
||||
#endif
|
||||
"nr_bounce",
|
||||
#if IS_ENABLED(CONFIG_ZSMALLOC)
|
||||
"nr_zspages",
|
||||
|
Loading…
Reference in New Issue
Block a user