mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-23 20:24:12 +08:00
mm: Hardened usercopy
This is the start of porting PAX_USERCOPY into the mainline kernel. This is the first set of features, controlled by CONFIG_HARDENED_USERCOPY. The work is based on code by PaX Team and Brad Spengler, and an earlier port from Casey Schaufler. Additional non-slab page tests are from Rik van Riel. This patch contains the logic for validating several conditions when performing copy_to_user() and copy_from_user() on the kernel object being copied to/from: - address range doesn't wrap around - address range isn't NULL or zero-allocated (with a non-zero copy size) - if on the slab allocator: - object size must be less than or equal to copy size (when check is implemented in the allocator, which appear in subsequent patches) - otherwise, object must not span page allocations (excepting Reserved and CMA ranges) - if on the stack - object must not extend before/after the current process stack - object must be contained by a valid stack frame (when there is arch/build support for identifying stack frames) - object must not overlap with kernel text Signed-off-by: Kees Cook <keescook@chromium.org> Tested-by: Valdis Kletnieks <valdis.kletnieks@vt.edu> Tested-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
0f60a8efe4
commit
f5509cc18d
@ -155,6 +155,18 @@ void kfree(const void *);
|
||||
void kzfree(const void *);
|
||||
size_t ksize(const void *);
|
||||
|
||||
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
|
||||
const char *__check_heap_object(const void *ptr, unsigned long n,
|
||||
struct page *page);
|
||||
#else
|
||||
static inline const char *__check_heap_object(const void *ptr,
|
||||
unsigned long n,
|
||||
struct page *page)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
|
||||
* alignment larger than the alignment of a 64-bit integer.
|
||||
|
@ -155,6 +155,21 @@ static inline int arch_within_stack_frames(const void * const stack,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HARDENED_USERCOPY
|
||||
extern void __check_object_size(const void *ptr, unsigned long n,
|
||||
bool to_user);
|
||||
|
||||
static inline void check_object_size(const void *ptr, unsigned long n,
|
||||
bool to_user)
|
||||
{
|
||||
__check_object_size(ptr, n, to_user);
|
||||
}
|
||||
#else
|
||||
static inline void check_object_size(const void *ptr, unsigned long n,
|
||||
bool to_user)
|
||||
{ }
|
||||
#endif /* CONFIG_HARDENED_USERCOPY */
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _LINUX_THREAD_INFO_H */
|
||||
|
@ -21,6 +21,9 @@ KCOV_INSTRUMENT_memcontrol.o := n
|
||||
KCOV_INSTRUMENT_mmzone.o := n
|
||||
KCOV_INSTRUMENT_vmstat.o := n
|
||||
|
||||
# Since __builtin_frame_address does work as used, disable the warning.
|
||||
CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address)
|
||||
|
||||
mmu-y := nommu.o
|
||||
mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
|
||||
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
|
||||
@ -99,3 +102,4 @@ obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
|
||||
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
|
||||
obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
|
||||
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
|
||||
obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
|
||||
|
268
mm/usercopy.c
Normal file
268
mm/usercopy.c
Normal file
@ -0,0 +1,268 @@
|
||||
/*
|
||||
* This implements the various checks for CONFIG_HARDENED_USERCOPY*,
|
||||
* which are designed to protect kernel memory from needless exposure
|
||||
* and overwrite under many unintended conditions. This code is based
|
||||
* on PAX_USERCOPY, which is:
|
||||
*
|
||||
* Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source
|
||||
* Security Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
*/
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
enum {
|
||||
BAD_STACK = -1,
|
||||
NOT_STACK = 0,
|
||||
GOOD_FRAME,
|
||||
GOOD_STACK,
|
||||
};
|
||||
|
||||
/*
|
||||
* Checks if a given pointer and length is contained by the current
|
||||
* stack frame (if possible).
|
||||
*
|
||||
* Returns:
|
||||
* NOT_STACK: not at all on the stack
|
||||
* GOOD_FRAME: fully within a valid stack frame
|
||||
* GOOD_STACK: fully on the stack (when can't do frame-checking)
|
||||
* BAD_STACK: error condition (invalid stack position or bad stack frame)
|
||||
*/
|
||||
static noinline int check_stack_object(const void *obj, unsigned long len)
|
||||
{
|
||||
const void * const stack = task_stack_page(current);
|
||||
const void * const stackend = stack + THREAD_SIZE;
|
||||
int ret;
|
||||
|
||||
/* Object is not on the stack at all. */
|
||||
if (obj + len <= stack || stackend <= obj)
|
||||
return NOT_STACK;
|
||||
|
||||
/*
|
||||
* Reject: object partially overlaps the stack (passing the
|
||||
* the check above means at least one end is within the stack,
|
||||
* so if this check fails, the other end is outside the stack).
|
||||
*/
|
||||
if (obj < stack || stackend < obj + len)
|
||||
return BAD_STACK;
|
||||
|
||||
/* Check if object is safely within a valid frame. */
|
||||
ret = arch_within_stack_frames(stack, stackend, obj, len);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return GOOD_STACK;
|
||||
}
|
||||
|
||||
static void report_usercopy(const void *ptr, unsigned long len,
|
||||
bool to_user, const char *type)
|
||||
{
|
||||
pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n",
|
||||
to_user ? "exposure" : "overwrite",
|
||||
to_user ? "from" : "to", ptr, type ? : "unknown", len);
|
||||
/*
|
||||
* For greater effect, it would be nice to do do_group_exit(),
|
||||
* but BUG() actually hooks all the lock-breaking and per-arch
|
||||
* Oops code, so that is used here instead.
|
||||
*/
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
|
||||
static bool overlaps(const void *ptr, unsigned long n, unsigned long low,
|
||||
unsigned long high)
|
||||
{
|
||||
unsigned long check_low = (uintptr_t)ptr;
|
||||
unsigned long check_high = check_low + n;
|
||||
|
||||
/* Does not overlap if entirely above or entirely below. */
|
||||
if (check_low >= high || check_high < low)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Is this address range in the kernel text area? */
|
||||
static inline const char *check_kernel_text_object(const void *ptr,
|
||||
unsigned long n)
|
||||
{
|
||||
unsigned long textlow = (unsigned long)_stext;
|
||||
unsigned long texthigh = (unsigned long)_etext;
|
||||
unsigned long textlow_linear, texthigh_linear;
|
||||
|
||||
if (overlaps(ptr, n, textlow, texthigh))
|
||||
return "<kernel text>";
|
||||
|
||||
/*
|
||||
* Some architectures have virtual memory mappings with a secondary
|
||||
* mapping of the kernel text, i.e. there is more than one virtual
|
||||
* kernel address that points to the kernel image. It is usually
|
||||
* when there is a separate linear physical memory mapping, in that
|
||||
* __pa() is not just the reverse of __va(). This can be detected
|
||||
* and checked:
|
||||
*/
|
||||
textlow_linear = (unsigned long)__va(__pa(textlow));
|
||||
/* No different mapping: we're done. */
|
||||
if (textlow_linear == textlow)
|
||||
return NULL;
|
||||
|
||||
/* Check the secondary mapping... */
|
||||
texthigh_linear = (unsigned long)__va(__pa(texthigh));
|
||||
if (overlaps(ptr, n, textlow_linear, texthigh_linear))
|
||||
return "<linear kernel text>";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline const char *check_bogus_address(const void *ptr, unsigned long n)
|
||||
{
|
||||
/* Reject if object wraps past end of memory. */
|
||||
if (ptr + n < ptr)
|
||||
return "<wrapped address>";
|
||||
|
||||
/* Reject if NULL or ZERO-allocation. */
|
||||
if (ZERO_OR_NULL_PTR(ptr))
|
||||
return "<null>";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline const char *check_heap_object(const void *ptr, unsigned long n,
|
||||
bool to_user)
|
||||
{
|
||||
struct page *page, *endpage;
|
||||
const void *end = ptr + n - 1;
|
||||
bool is_reserved, is_cma;
|
||||
|
||||
/*
|
||||
* Some architectures (arm64) return true for virt_addr_valid() on
|
||||
* vmalloced addresses. Work around this by checking for vmalloc
|
||||
* first.
|
||||
*/
|
||||
if (is_vmalloc_addr(ptr))
|
||||
return NULL;
|
||||
|
||||
if (!virt_addr_valid(ptr))
|
||||
return NULL;
|
||||
|
||||
page = virt_to_head_page(ptr);
|
||||
|
||||
/* Check slab allocator for flags and size. */
|
||||
if (PageSlab(page))
|
||||
return __check_heap_object(ptr, n, page);
|
||||
|
||||
/*
|
||||
* Sometimes the kernel data regions are not marked Reserved (see
|
||||
* check below). And sometimes [_sdata,_edata) does not cover
|
||||
* rodata and/or bss, so check each range explicitly.
|
||||
*/
|
||||
|
||||
/* Allow reads of kernel rodata region (if not marked as Reserved). */
|
||||
if (ptr >= (const void *)__start_rodata &&
|
||||
end <= (const void *)__end_rodata) {
|
||||
if (!to_user)
|
||||
return "<rodata>";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Allow kernel data region (if not marked as Reserved). */
|
||||
if (ptr >= (const void *)_sdata && end <= (const void *)_edata)
|
||||
return NULL;
|
||||
|
||||
/* Allow kernel bss region (if not marked as Reserved). */
|
||||
if (ptr >= (const void *)__bss_start &&
|
||||
end <= (const void *)__bss_stop)
|
||||
return NULL;
|
||||
|
||||
/* Is the object wholly within one base page? */
|
||||
if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
|
||||
((unsigned long)end & (unsigned long)PAGE_MASK)))
|
||||
return NULL;
|
||||
|
||||
/* Allow if start and end are inside the same compound page. */
|
||||
endpage = virt_to_head_page(end);
|
||||
if (likely(endpage == page))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Reject if range is entirely either Reserved (i.e. special or
|
||||
* device memory), or CMA. Otherwise, reject since the object spans
|
||||
* several independently allocated pages.
|
||||
*/
|
||||
is_reserved = PageReserved(page);
|
||||
is_cma = is_migrate_cma_page(page);
|
||||
if (!is_reserved && !is_cma)
|
||||
goto reject;
|
||||
|
||||
for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
|
||||
page = virt_to_head_page(ptr);
|
||||
if (is_reserved && !PageReserved(page))
|
||||
goto reject;
|
||||
if (is_cma && !is_migrate_cma_page(page))
|
||||
goto reject;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
reject:
|
||||
return "<spans multiple pages>";
|
||||
}
|
||||
|
||||
/*
|
||||
* Validates that the given object is:
|
||||
* - not bogus address
|
||||
* - known-safe heap or stack object
|
||||
* - not in kernel text
|
||||
*/
|
||||
void __check_object_size(const void *ptr, unsigned long n, bool to_user)
|
||||
{
|
||||
const char *err;
|
||||
|
||||
/* Skip all tests if size is zero. */
|
||||
if (!n)
|
||||
return;
|
||||
|
||||
/* Check for invalid addresses. */
|
||||
err = check_bogus_address(ptr, n);
|
||||
if (err)
|
||||
goto report;
|
||||
|
||||
/* Check for bad heap object. */
|
||||
err = check_heap_object(ptr, n, to_user);
|
||||
if (err)
|
||||
goto report;
|
||||
|
||||
/* Check for bad stack object. */
|
||||
switch (check_stack_object(ptr, n)) {
|
||||
case NOT_STACK:
|
||||
/* Object is not touching the current process stack. */
|
||||
break;
|
||||
case GOOD_FRAME:
|
||||
case GOOD_STACK:
|
||||
/*
|
||||
* Object is either in the correct frame (when it
|
||||
* is possible to check) or just generally on the
|
||||
* process stack (when frame checking not available).
|
||||
*/
|
||||
return;
|
||||
default:
|
||||
err = "<process stack>";
|
||||
goto report;
|
||||
}
|
||||
|
||||
/* Check for object in kernel to avoid text exposure. */
|
||||
err = check_kernel_text_object(ptr, n);
|
||||
if (!err)
|
||||
return;
|
||||
|
||||
report:
|
||||
report_usercopy(ptr, n, to_user, err);
|
||||
}
|
||||
EXPORT_SYMBOL(__check_object_size);
|
@ -118,6 +118,34 @@ config LSM_MMAP_MIN_ADDR
|
||||
this low address space will need the permission specific to the
|
||||
systems running LSM.
|
||||
|
||||
config HAVE_HARDENED_USERCOPY_ALLOCATOR
|
||||
bool
|
||||
help
|
||||
The heap allocator implements __check_heap_object() for
|
||||
validating memory ranges against heap object sizes in
|
||||
support of CONFIG_HARDENED_USERCOPY.
|
||||
|
||||
config HAVE_ARCH_HARDENED_USERCOPY
|
||||
bool
|
||||
help
|
||||
The architecture supports CONFIG_HARDENED_USERCOPY by
|
||||
calling check_object_size() just before performing the
|
||||
userspace copies in the low level implementation of
|
||||
copy_to_user() and copy_from_user().
|
||||
|
||||
config HARDENED_USERCOPY
|
||||
bool "Harden memory copies between kernel and userspace"
|
||||
depends on HAVE_ARCH_HARDENED_USERCOPY
|
||||
select BUG
|
||||
help
|
||||
This option checks for obviously wrong memory regions when
|
||||
copying memory to/from the kernel (via copy_to_user() and
|
||||
copy_from_user() functions) by rejecting memory ranges that
|
||||
are larger than the specified heap object, span multiple
|
||||
separately allocates pages, are not on the process stack,
|
||||
or are part of the kernel text. This kills entire classes
|
||||
of heap overflow exploits and similar kernel memory exposures.
|
||||
|
||||
source security/selinux/Kconfig
|
||||
source security/smack/Kconfig
|
||||
source security/tomoyo/Kconfig
|
||||
|
Loading…
Reference in New Issue
Block a user