linux/kernel/stackleak.c

// SPDX-License-Identifier: GPL-2.0
/*
 * This code fills the used part of the kernel stack with a poison value
 * before returning to userspace. It's part of the STACKLEAK feature
 * ported from grsecurity/PaX.
 *
 * Author: Alexander Popov <alex.popov@linux.com>
 *
 * STACKLEAK reduces the information which kernel stack leak bugs can
 * reveal and blocks some uninitialized stack variable attacks.
 */

#include <linux/stackleak.h>

#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
#include <linux/jump_label.h>
#include <linux/sysctl.h>

static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);

int stack_erasing_sysctl(struct ctl_table *table, int write,
			void __user *buffer, size_t *lenp, loff_t *ppos)
{
	int ret = 0;
	int state = !static_branch_unlikely(&stack_erasing_bypass);
	int prev_state = state;

	table->data = &state;
	table->maxlen = sizeof(int);
	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
	state = !!state;
	if (ret || !write || state == prev_state)
		return ret;

	if (state)
		static_branch_disable(&stack_erasing_bypass);
	else
		static_branch_enable(&stack_erasing_bypass);

	pr_warn("stackleak: kernel stack erasing is %s\n",
					state ? "enabled" : "disabled");
	return ret;
}

#define skip_erasing()	static_branch_unlikely(&stack_erasing_bypass)
#else
#define skip_erasing()	false
#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */

asmlinkage void stackleak_erase(void)
{
	/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
	unsigned long kstack_ptr = current->lowest_stack;
	unsigned long boundary = (unsigned long)end_of_stack(current);
	unsigned int poison_count = 0;
	const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);

	if (skip_erasing())
		return;

	/* Check that 'lowest_stack' value is sane */
	if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
		kstack_ptr = boundary;

	/* Search for the poison value in the kernel stack */
	while (kstack_ptr > boundary && poison_count <= depth) {
		if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
			poison_count++;
		else
			poison_count = 0;

		kstack_ptr -= sizeof(unsigned long);
	}

	/*
	 * One 'long int' at the bottom of the thread stack is reserved and
	 * should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).
	 */
	if (kstack_ptr == boundary)
		kstack_ptr += sizeof(unsigned long);

#ifdef CONFIG_STACKLEAK_METRICS
	current->prev_lowest_stack = kstack_ptr;
#endif

	/*
	 * Now write the poison value to the kernel stack. Start from
	 * 'kstack_ptr' and move up till the new 'boundary'. We assume that
	 * the stack pointer doesn't change when we write poison.
	 */
	if (on_thread_stack())
		boundary = current_stack_pointer;
	else
		boundary = current_top_of_stack();

	while (kstack_ptr < boundary) {
		*(unsigned long *)kstack_ptr = STACKLEAK_POISON;
		kstack_ptr += sizeof(unsigned long);
	}

	/* Reset the 'lowest_stack' value for the next syscall */
	current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
}

void __used stackleak_track_stack(void)
{
	/*
	 * N.B. stackleak_erase() fills the kernel stack with the poison value,
	 * which has the register width. That code assumes that the value
	 * of 'lowest_stack' is aligned on the register width boundary.
	 *
	 * That is true for x86 and x86_64 because of the kernel stack
	 * alignment on these platforms (for details, see 'cc_stack_align' in
	 * arch/x86/Makefile). Take care of that when you port STACKLEAK to
	 * new platforms.
	 */
	unsigned long sp = (unsigned long)&sp;

	/*
	 * Having CONFIG_STACKLEAK_TRACK_MIN_SIZE larger than
	 * STACKLEAK_SEARCH_DEPTH makes the poison search in
	 * stackleak_erase() unreliable. Let's prevent that.
	 */
	BUILD_BUG_ON(CONFIG_STACKLEAK_TRACK_MIN_SIZE > STACKLEAK_SEARCH_DEPTH);

	if (sp < current->lowest_stack &&
	    sp >= (unsigned long)task_stack_page(current) +
						sizeof(unsigned long)) {
		current->lowest_stack = sp;
	}
}
EXPORT_SYMBOL(stackleak_track_stack);
x86/entry: Add STACKLEAK erasing the kernel stack at the end of syscalls The STACKLEAK feature (initially developed by PaX Team) has the following benefits: 1. Reduces the information that can be revealed through kernel stack leak bugs. The idea of erasing the thread stack at the end of syscalls is similar to CONFIG_PAGE_POISONING and memzero_explicit() in kernel crypto, which all comply with FDP_RIP.2 (Full Residual Information Protection) of the Common Criteria standard. 2. Blocks some uninitialized stack variable attacks (e.g. CVE-2017-17712, CVE-2010-2963). That kind of bugs should be killed by improving C compilers in future, which might take a long time. This commit introduces the code filling the used part of the kernel stack with a poison value before returning to userspace. Full STACKLEAK feature also contains the gcc plugin which comes in a separate commit. The STACKLEAK feature is ported from grsecurity/PaX. More information at: https://grsecurity.net/ https://pax.grsecurity.net/ This code is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on our understanding of the code. Changes or omissions from the original code are ours and don't reflect the original grsecurity/PaX code. Performance impact: Hardware: Intel Core i7-4770, 16 GB RAM Test #1: building the Linux kernel on a single core 0.91% slowdown Test #2: hackbench -s 4096 -l 2000 -g 15 -f 25 -P 4.2% slowdown So the STACKLEAK description in Kconfig includes: "The tradeoff is the performance impact: on a single CPU system kernel compilation sees a 1% slowdown, other systems and workloads may vary and you are advised to test this feature on your expected workload before deploying it". Signed-off-by: Alexander Popov <alex.popov@linux.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Kees Cook <keescook@chromium.org> 2018-08-17 06:16:58 +08:00			`// SPDX-License-Identifier: GPL-2.0`
			`/*`
			`* This code fills the used part of the kernel stack with a poison value`
			`* before returning to userspace. It's part of the STACKLEAK feature`
			`* ported from grsecurity/PaX.`
			`*`
			`* Author: Alexander Popov <alex.popov@linux.com>`
			`*`
			`* STACKLEAK reduces the information which kernel stack leak bugs can`
			`* reveal and blocks some uninitialized stack variable attacks.`
			`*/`

			`#include <linux/stackleak.h>`

stackleak: Allow runtime disabling of kernel stack erasing Introduce CONFIG_STACKLEAK_RUNTIME_DISABLE option, which provides 'stack_erasing' sysctl. It can be used in runtime to control kernel stack erasing for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK. Suggested-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Alexander Popov <alex.popov@linux.com> Tested-by: Laura Abbott <labbott@redhat.com> Signed-off-by: Kees Cook <keescook@chromium.org> 2018-08-17 06:17:03 +08:00			`#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE`
			`#include <linux/jump_label.h>`
			`#include <linux/sysctl.h>`

			`static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);`

			`int stack_erasing_sysctl(struct ctl_table *table, int write,`
			`void __user buffer, size_t lenp, loff_t *ppos)`
			`{`
			`int ret = 0;`
			`int state = !static_branch_unlikely(&stack_erasing_bypass);`
			`int prev_state = state;`

			`table->data = &state;`
			`table->maxlen = sizeof(int);`
			`ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);`
			`state = !!state;`
			`if (ret \|\| !write \|\| state == prev_state)`
			`return ret;`

			`if (state)`
			`static_branch_disable(&stack_erasing_bypass);`
			`else`
			`static_branch_enable(&stack_erasing_bypass);`

			`pr_warn("stackleak: kernel stack erasing is %s\n",`
			`state ? "enabled" : "disabled");`
			`return ret;`
			`}`

			`#define skip_erasing() static_branch_unlikely(&stack_erasing_bypass)`
			`#else`
			`#define skip_erasing() false`
			`#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */`

x86/entry: Add STACKLEAK erasing the kernel stack at the end of syscalls The STACKLEAK feature (initially developed by PaX Team) has the following benefits: 1. Reduces the information that can be revealed through kernel stack leak bugs. The idea of erasing the thread stack at the end of syscalls is similar to CONFIG_PAGE_POISONING and memzero_explicit() in kernel crypto, which all comply with FDP_RIP.2 (Full Residual Information Protection) of the Common Criteria standard. 2. Blocks some uninitialized stack variable attacks (e.g. CVE-2017-17712, CVE-2010-2963). That kind of bugs should be killed by improving C compilers in future, which might take a long time. This commit introduces the code filling the used part of the kernel stack with a poison value before returning to userspace. Full STACKLEAK feature also contains the gcc plugin which comes in a separate commit. The STACKLEAK feature is ported from grsecurity/PaX. More information at: https://grsecurity.net/ https://pax.grsecurity.net/ This code is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on our understanding of the code. Changes or omissions from the original code are ours and don't reflect the original grsecurity/PaX code. Performance impact: Hardware: Intel Core i7-4770, 16 GB RAM Test #1: building the Linux kernel on a single core 0.91% slowdown Test #2: hackbench -s 4096 -l 2000 -g 15 -f 25 -P 4.2% slowdown So the STACKLEAK description in Kconfig includes: "The tradeoff is the performance impact: on a single CPU system kernel compilation sees a 1% slowdown, other systems and workloads may vary and you are advised to test this feature on your expected workload before deploying it". Signed-off-by: Alexander Popov <alex.popov@linux.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Kees Cook <keescook@chromium.org> 2018-08-17 06:16:58 +08:00			`asmlinkage void stackleak_erase(void)`
			`{`
			`/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */`
			`unsigned long kstack_ptr = current->lowest_stack;`
			`unsigned long boundary = (unsigned long)end_of_stack(current);`
			`unsigned int poison_count = 0;`
			`const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);`

stackleak: Allow runtime disabling of kernel stack erasing Introduce CONFIG_STACKLEAK_RUNTIME_DISABLE option, which provides 'stack_erasing' sysctl. It can be used in runtime to control kernel stack erasing for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK. Suggested-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Alexander Popov <alex.popov@linux.com> Tested-by: Laura Abbott <labbott@redhat.com> Signed-off-by: Kees Cook <keescook@chromium.org> 2018-08-17 06:17:03 +08:00			`if (skip_erasing())`
			`return;`

x86/entry: Add STACKLEAK erasing the kernel stack at the end of syscalls The STACKLEAK feature (initially developed by PaX Team) has the following benefits: 1. Reduces the information that can be revealed through kernel stack leak bugs. The idea of erasing the thread stack at the end of syscalls is similar to CONFIG_PAGE_POISONING and memzero_explicit() in kernel crypto, which all comply with FDP_RIP.2 (Full Residual Information Protection) of the Common Criteria standard. 2. Blocks some uninitialized stack variable attacks (e.g. CVE-2017-17712, CVE-2010-2963). That kind of bugs should be killed by improving C compilers in future, which might take a long time. This commit introduces the code filling the used part of the kernel stack with a poison value before returning to userspace. Full STACKLEAK feature also contains the gcc plugin which comes in a separate commit. The STACKLEAK feature is ported from grsecurity/PaX. More information at: https://grsecurity.net/ https://pax.grsecurity.net/ This code is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on our understanding of the code. Changes or omissions from the original code are ours and don't reflect the original grsecurity/PaX code. Performance impact: Hardware: Intel Core i7-4770, 16 GB RAM Test #1: building the Linux kernel on a single core 0.91% slowdown Test #2: hackbench -s 4096 -l 2000 -g 15 -f 25 -P 4.2% slowdown So the STACKLEAK description in Kconfig includes: "The tradeoff is the performance impact: on a single CPU system kernel compilation sees a 1% slowdown, other systems and workloads may vary and you are advised to test this feature on your expected workload before deploying it". Signed-off-by: Alexander Popov <alex.popov@linux.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Kees Cook <keescook@chromium.org> 2018-08-17 06:16:58 +08:00			`/* Check that 'lowest_stack' value is sane */`
			`if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))`
			`kstack_ptr = boundary;`

			`/* Search for the poison value in the kernel stack */`
			`while (kstack_ptr > boundary && poison_count <= depth) {`
			`if ((unsigned long )kstack_ptr == STACKLEAK_POISON)`
			`poison_count++;`
			`else`
			`poison_count = 0;`

			`kstack_ptr -= sizeof(unsigned long);`
			`}`

			`/*`
			`* One 'long int' at the bottom of the thread stack is reserved and`
			`* should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).`
			`*/`
			`if (kstack_ptr == boundary)`
			`kstack_ptr += sizeof(unsigned long);`

fs/proc: Show STACKLEAK metrics in the /proc file system Introduce CONFIG_STACKLEAK_METRICS providing STACKLEAK information about tasks via the /proc file system. In particular, /proc/<pid>/stack_depth shows the maximum kernel stack consumption for the current and previous syscalls. Although this information is not precise, it can be useful for estimating the STACKLEAK performance impact for your workloads. Suggested-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Alexander Popov <alex.popov@linux.com> Tested-by: Laura Abbott <labbott@redhat.com> Signed-off-by: Kees Cook <keescook@chromium.org> 2018-08-17 06:17:01 +08:00			`#ifdef CONFIG_STACKLEAK_METRICS`
			`current->prev_lowest_stack = kstack_ptr;`
			`#endif`

x86/entry: Add STACKLEAK erasing the kernel stack at the end of syscalls The STACKLEAK feature (initially developed by PaX Team) has the following benefits: 1. Reduces the information that can be revealed through kernel stack leak bugs. The idea of erasing the thread stack at the end of syscalls is similar to CONFIG_PAGE_POISONING and memzero_explicit() in kernel crypto, which all comply with FDP_RIP.2 (Full Residual Information Protection) of the Common Criteria standard. 2. Blocks some uninitialized stack variable attacks (e.g. CVE-2017-17712, CVE-2010-2963). That kind of bugs should be killed by improving C compilers in future, which might take a long time. This commit introduces the code filling the used part of the kernel stack with a poison value before returning to userspace. Full STACKLEAK feature also contains the gcc plugin which comes in a separate commit. The STACKLEAK feature is ported from grsecurity/PaX. More information at: https://grsecurity.net/ https://pax.grsecurity.net/ This code is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on our understanding of the code. Changes or omissions from the original code are ours and don't reflect the original grsecurity/PaX code. Performance impact: Hardware: Intel Core i7-4770, 16 GB RAM Test #1: building the Linux kernel on a single core 0.91% slowdown Test #2: hackbench -s 4096 -l 2000 -g 15 -f 25 -P 4.2% slowdown So the STACKLEAK description in Kconfig includes: "The tradeoff is the performance impact: on a single CPU system kernel compilation sees a 1% slowdown, other systems and workloads may vary and you are advised to test this feature on your expected workload before deploying it". Signed-off-by: Alexander Popov <alex.popov@linux.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Kees Cook <keescook@chromium.org> 2018-08-17 06:16:58 +08:00			`/*`
			`* Now write the poison value to the kernel stack. Start from`
			`* 'kstack_ptr' and move up till the new 'boundary'. We assume that`
			`* the stack pointer doesn't change when we write poison.`
			`*/`
			`if (on_thread_stack())`
			`boundary = current_stack_pointer;`
			`else`
			`boundary = current_top_of_stack();`

			`while (kstack_ptr < boundary) {`
			`(unsigned long )kstack_ptr = STACKLEAK_POISON;`
			`kstack_ptr += sizeof(unsigned long);`
			`}`

			`/* Reset the 'lowest_stack' value for the next syscall */`
			`current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;`
			`}`

gcc-plugins: Add STACKLEAK plugin for tracking the kernel stack The STACKLEAK feature erases the kernel stack before returning from syscalls. That reduces the information which kernel stack leak bugs can reveal and blocks some uninitialized stack variable attacks. This commit introduces the STACKLEAK gcc plugin. It is needed for tracking the lowest border of the kernel stack, which is important for the code erasing the used part of the kernel stack at the end of syscalls (comes in a separate commit). The STACKLEAK feature is ported from grsecurity/PaX. More information at: https://grsecurity.net/ https://pax.grsecurity.net/ This code is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on our understanding of the code. Changes or omissions from the original code are ours and don't reflect the original grsecurity/PaX code. Signed-off-by: Alexander Popov <alex.popov@linux.com> Tested-by: Laura Abbott <labbott@redhat.com> Signed-off-by: Kees Cook <keescook@chromium.org> 2018-08-17 06:16:59 +08:00			`void __used stackleak_track_stack(void)`
			`{`
			`/*`
			`* N.B. stackleak_erase() fills the kernel stack with the poison value,`
			`* which has the register width. That code assumes that the value`
			`* of 'lowest_stack' is aligned on the register width boundary.`
			`*`
			`* That is true for x86 and x86_64 because of the kernel stack`
			`* alignment on these platforms (for details, see 'cc_stack_align' in`
			`* arch/x86/Makefile). Take care of that when you port STACKLEAK to`
			`* new platforms.`
			`*/`
			`unsigned long sp = (unsigned long)&sp;`

			`/*`
			`* Having CONFIG_STACKLEAK_TRACK_MIN_SIZE larger than`
			`* STACKLEAK_SEARCH_DEPTH makes the poison search in`
			`* stackleak_erase() unreliable. Let's prevent that.`
			`*/`
			`BUILD_BUG_ON(CONFIG_STACKLEAK_TRACK_MIN_SIZE > STACKLEAK_SEARCH_DEPTH);`

			`if (sp < current->lowest_stack &&`
			`sp >= (unsigned long)task_stack_page(current) +`
			`sizeof(unsigned long)) {`
			`current->lowest_stack = sp;`
			`}`
			`}`
			`EXPORT_SYMBOL(stackleak_track_stack);`