mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-25 13:14:07 +08:00
c33c794828
Convert all instances of direct pte_t* dereferencing to instead use ptep_get() helper. This means that by default, the accesses change from a C dereference to a READ_ONCE(). This is technically the correct thing to do since where pgtables are modified by HW (for access/dirty) they are volatile and therefore we should always ensure READ_ONCE() semantics. But more importantly, by always using the helper, it can be overridden by the architecture to fully encapsulate the contents of the pte. Arch code is deliberately not converted, as the arch code knows best. It is intended that arch code (arm64) will override the default with its own implementation that can (e.g.) hide certain bits from the core code, or determine young/dirty status by mixing in state from another source. Conversion was done using Coccinelle: ---- // $ make coccicheck \ // COCCI=ptepget.cocci \ // SPFLAGS="--include-headers" \ // MODE=patch virtual patch @ depends on patch @ pte_t *v; @@ - *v + ptep_get(v) ---- Then reviewed and hand-edited to avoid multiple unnecessary calls to ptep_get(), instead opting to store the result of a single call in a variable, where it is correct to do so. This aims to negate any cost of READ_ONCE() and will benefit arch-overrides that may be more complex. Included is a fix for an issue in an earlier version of this patch that was pointed out by kernel test robot. The issue arose because config MMU=n elides definition of the ptep helper functions, including ptep_get(). HUGETLB_PAGE=n configs still define a simple huge_ptep_clear_flush() for linking purposes, which dereferences the ptep. So when both configs are disabled, this caused a build error because ptep_get() is not defined. Fix by continuing to do a direct dereference when MMU=n. This is safe because for this config the arch code cannot be trying to virtualize the ptes because none of the ptep helpers are defined. Link: https://lkml.kernel.org/r/20230612151545.3317766-4-ryan.roberts@arm.com Reported-by: kernel test robot <lkp@intel.com> Link: https://lore.kernel.org/oe-kbuild-all/202305120142.yXsNEo6H-lkp@intel.com/ Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Potapenko <glider@google.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andrey Konovalov <andreyknvl@gmail.com> Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com> Cc: Christian Brauner <brauner@kernel.org> Cc: Christoph Hellwig <hch@infradead.org> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Dave Airlie <airlied@gmail.com> Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport (IBM) <rppt@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Naoya Horiguchi <naoya.horiguchi@nec.com> Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> Cc: Pavel Tatashin <pasha.tatashin@soleen.com> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: SeongJae Park <sj@kernel.org> Cc: Shakeel Butt <shakeelb@google.com> Cc: Uladzislau Rezki (Sony) <urezki@gmail.com> Cc: Vincenzo Frascino <vincenzo.frascino@arm.com> Cc: Yu Zhao <yuzhao@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
651 lines
18 KiB
C
651 lines
18 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* This file contains KASAN runtime code that manages shadow memory for
|
|
* generic and software tag-based KASAN modes.
|
|
*
|
|
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
|
|
* Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
|
|
*
|
|
* Some code borrowed from https://github.com/xairy/kasan-prototype by
|
|
* Andrey Konovalov <andreyknvl@gmail.com>
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/kasan.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/kfence.h>
|
|
#include <linux/kmemleak.h>
|
|
#include <linux/memory.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/string.h>
|
|
#include <linux/types.h>
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include "kasan.h"
|
|
|
|
bool __kasan_check_read(const volatile void *p, unsigned int size)
|
|
{
|
|
return kasan_check_range((void *)p, size, false, _RET_IP_);
|
|
}
|
|
EXPORT_SYMBOL(__kasan_check_read);
|
|
|
|
bool __kasan_check_write(const volatile void *p, unsigned int size)
|
|
{
|
|
return kasan_check_range((void *)p, size, true, _RET_IP_);
|
|
}
|
|
EXPORT_SYMBOL(__kasan_check_write);
|
|
|
|
#if !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX) && !defined(CONFIG_GENERIC_ENTRY)
|
|
/*
|
|
* CONFIG_GENERIC_ENTRY relies on compiler emitted mem*() calls to not be
|
|
* instrumented. KASAN enabled toolchains should emit __asan_mem*() functions
|
|
* for the sites they want to instrument.
|
|
*
|
|
* If we have a compiler that can instrument meminstrinsics, never override
|
|
* these, so that non-instrumented files can safely consider them as builtins.
|
|
*/
|
|
#undef memset
|
|
void *memset(void *addr, int c, size_t len)
|
|
{
|
|
if (!kasan_check_range(addr, len, true, _RET_IP_))
|
|
return NULL;
|
|
|
|
return __memset(addr, c, len);
|
|
}
|
|
|
|
#ifdef __HAVE_ARCH_MEMMOVE
|
|
#undef memmove
|
|
void *memmove(void *dest, const void *src, size_t len)
|
|
{
|
|
if (!kasan_check_range(src, len, false, _RET_IP_) ||
|
|
!kasan_check_range(dest, len, true, _RET_IP_))
|
|
return NULL;
|
|
|
|
return __memmove(dest, src, len);
|
|
}
|
|
#endif
|
|
|
|
#undef memcpy
|
|
void *memcpy(void *dest, const void *src, size_t len)
|
|
{
|
|
if (!kasan_check_range(src, len, false, _RET_IP_) ||
|
|
!kasan_check_range(dest, len, true, _RET_IP_))
|
|
return NULL;
|
|
|
|
return __memcpy(dest, src, len);
|
|
}
|
|
#endif
|
|
|
|
void *__asan_memset(void *addr, int c, ssize_t len)
|
|
{
|
|
if (!kasan_check_range(addr, len, true, _RET_IP_))
|
|
return NULL;
|
|
|
|
return __memset(addr, c, len);
|
|
}
|
|
EXPORT_SYMBOL(__asan_memset);
|
|
|
|
#ifdef __HAVE_ARCH_MEMMOVE
|
|
void *__asan_memmove(void *dest, const void *src, ssize_t len)
|
|
{
|
|
if (!kasan_check_range(src, len, false, _RET_IP_) ||
|
|
!kasan_check_range(dest, len, true, _RET_IP_))
|
|
return NULL;
|
|
|
|
return __memmove(dest, src, len);
|
|
}
|
|
EXPORT_SYMBOL(__asan_memmove);
|
|
#endif
|
|
|
|
void *__asan_memcpy(void *dest, const void *src, ssize_t len)
|
|
{
|
|
if (!kasan_check_range(src, len, false, _RET_IP_) ||
|
|
!kasan_check_range(dest, len, true, _RET_IP_))
|
|
return NULL;
|
|
|
|
return __memcpy(dest, src, len);
|
|
}
|
|
EXPORT_SYMBOL(__asan_memcpy);
|
|
|
|
#ifdef CONFIG_KASAN_SW_TAGS
|
|
void *__hwasan_memset(void *addr, int c, ssize_t len) __alias(__asan_memset);
|
|
EXPORT_SYMBOL(__hwasan_memset);
|
|
#ifdef __HAVE_ARCH_MEMMOVE
|
|
void *__hwasan_memmove(void *dest, const void *src, ssize_t len) __alias(__asan_memmove);
|
|
EXPORT_SYMBOL(__hwasan_memmove);
|
|
#endif
|
|
void *__hwasan_memcpy(void *dest, const void *src, ssize_t len) __alias(__asan_memcpy);
|
|
EXPORT_SYMBOL(__hwasan_memcpy);
|
|
#endif
|
|
|
|
void kasan_poison(const void *addr, size_t size, u8 value, bool init)
|
|
{
|
|
void *shadow_start, *shadow_end;
|
|
|
|
if (!kasan_arch_is_ready())
|
|
return;
|
|
|
|
/*
|
|
* Perform shadow offset calculation based on untagged address, as
|
|
* some of the callers (e.g. kasan_poison_object_data) pass tagged
|
|
* addresses to this function.
|
|
*/
|
|
addr = kasan_reset_tag(addr);
|
|
|
|
/* Skip KFENCE memory if called explicitly outside of sl*b. */
|
|
if (is_kfence_address(addr))
|
|
return;
|
|
|
|
if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
|
|
return;
|
|
if (WARN_ON(size & KASAN_GRANULE_MASK))
|
|
return;
|
|
|
|
shadow_start = kasan_mem_to_shadow(addr);
|
|
shadow_end = kasan_mem_to_shadow(addr + size);
|
|
|
|
__memset(shadow_start, value, shadow_end - shadow_start);
|
|
}
|
|
EXPORT_SYMBOL(kasan_poison);
|
|
|
|
#ifdef CONFIG_KASAN_GENERIC
|
|
void kasan_poison_last_granule(const void *addr, size_t size)
|
|
{
|
|
if (!kasan_arch_is_ready())
|
|
return;
|
|
|
|
if (size & KASAN_GRANULE_MASK) {
|
|
u8 *shadow = (u8 *)kasan_mem_to_shadow(addr + size);
|
|
*shadow = size & KASAN_GRANULE_MASK;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void kasan_unpoison(const void *addr, size_t size, bool init)
|
|
{
|
|
u8 tag = get_tag(addr);
|
|
|
|
/*
|
|
* Perform shadow offset calculation based on untagged address, as
|
|
* some of the callers (e.g. kasan_unpoison_object_data) pass tagged
|
|
* addresses to this function.
|
|
*/
|
|
addr = kasan_reset_tag(addr);
|
|
|
|
/*
|
|
* Skip KFENCE memory if called explicitly outside of sl*b. Also note
|
|
* that calls to ksize(), where size is not a multiple of machine-word
|
|
* size, would otherwise poison the invalid portion of the word.
|
|
*/
|
|
if (is_kfence_address(addr))
|
|
return;
|
|
|
|
if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
|
|
return;
|
|
|
|
/* Unpoison all granules that cover the object. */
|
|
kasan_poison(addr, round_up(size, KASAN_GRANULE_SIZE), tag, false);
|
|
|
|
/* Partially poison the last granule for the generic mode. */
|
|
if (IS_ENABLED(CONFIG_KASAN_GENERIC))
|
|
kasan_poison_last_granule(addr, size);
|
|
}
|
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
static bool shadow_mapped(unsigned long addr)
|
|
{
|
|
pgd_t *pgd = pgd_offset_k(addr);
|
|
p4d_t *p4d;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
|
|
if (pgd_none(*pgd))
|
|
return false;
|
|
p4d = p4d_offset(pgd, addr);
|
|
if (p4d_none(*p4d))
|
|
return false;
|
|
pud = pud_offset(p4d, addr);
|
|
if (pud_none(*pud))
|
|
return false;
|
|
|
|
/*
|
|
* We can't use pud_large() or pud_huge(), the first one is
|
|
* arch-specific, the last one depends on HUGETLB_PAGE. So let's abuse
|
|
* pud_bad(), if pud is bad then it's bad because it's huge.
|
|
*/
|
|
if (pud_bad(*pud))
|
|
return true;
|
|
pmd = pmd_offset(pud, addr);
|
|
if (pmd_none(*pmd))
|
|
return false;
|
|
|
|
if (pmd_bad(*pmd))
|
|
return true;
|
|
pte = pte_offset_kernel(pmd, addr);
|
|
return !pte_none(ptep_get(pte));
|
|
}
|
|
|
|
static int __meminit kasan_mem_notifier(struct notifier_block *nb,
|
|
unsigned long action, void *data)
|
|
{
|
|
struct memory_notify *mem_data = data;
|
|
unsigned long nr_shadow_pages, start_kaddr, shadow_start;
|
|
unsigned long shadow_end, shadow_size;
|
|
|
|
nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT;
|
|
start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn);
|
|
shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr);
|
|
shadow_size = nr_shadow_pages << PAGE_SHIFT;
|
|
shadow_end = shadow_start + shadow_size;
|
|
|
|
if (WARN_ON(mem_data->nr_pages % KASAN_GRANULE_SIZE) ||
|
|
WARN_ON(start_kaddr % KASAN_MEMORY_PER_SHADOW_PAGE))
|
|
return NOTIFY_BAD;
|
|
|
|
switch (action) {
|
|
case MEM_GOING_ONLINE: {
|
|
void *ret;
|
|
|
|
/*
|
|
* If shadow is mapped already than it must have been mapped
|
|
* during the boot. This could happen if we onlining previously
|
|
* offlined memory.
|
|
*/
|
|
if (shadow_mapped(shadow_start))
|
|
return NOTIFY_OK;
|
|
|
|
ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start,
|
|
shadow_end, GFP_KERNEL,
|
|
PAGE_KERNEL, VM_NO_GUARD,
|
|
pfn_to_nid(mem_data->start_pfn),
|
|
__builtin_return_address(0));
|
|
if (!ret)
|
|
return NOTIFY_BAD;
|
|
|
|
kmemleak_ignore(ret);
|
|
return NOTIFY_OK;
|
|
}
|
|
case MEM_CANCEL_ONLINE:
|
|
case MEM_OFFLINE: {
|
|
struct vm_struct *vm;
|
|
|
|
/*
|
|
* shadow_start was either mapped during boot by kasan_init()
|
|
* or during memory online by __vmalloc_node_range().
|
|
* In the latter case we can use vfree() to free shadow.
|
|
* Non-NULL result of the find_vm_area() will tell us if
|
|
* that was the second case.
|
|
*
|
|
* Currently it's not possible to free shadow mapped
|
|
* during boot by kasan_init(). It's because the code
|
|
* to do that hasn't been written yet. So we'll just
|
|
* leak the memory.
|
|
*/
|
|
vm = find_vm_area((void *)shadow_start);
|
|
if (vm)
|
|
vfree((void *)shadow_start);
|
|
}
|
|
}
|
|
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
static int __init kasan_memhotplug_init(void)
|
|
{
|
|
hotplug_memory_notifier(kasan_mem_notifier, DEFAULT_CALLBACK_PRI);
|
|
|
|
return 0;
|
|
}
|
|
|
|
core_initcall(kasan_memhotplug_init);
|
|
#endif
|
|
|
|
#ifdef CONFIG_KASAN_VMALLOC
|
|
|
|
void __init __weak kasan_populate_early_vm_area_shadow(void *start,
|
|
unsigned long size)
|
|
{
|
|
}
|
|
|
|
static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
|
|
void *unused)
|
|
{
|
|
unsigned long page;
|
|
pte_t pte;
|
|
|
|
if (likely(!pte_none(ptep_get(ptep))))
|
|
return 0;
|
|
|
|
page = __get_free_page(GFP_KERNEL);
|
|
if (!page)
|
|
return -ENOMEM;
|
|
|
|
memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE);
|
|
pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL);
|
|
|
|
spin_lock(&init_mm.page_table_lock);
|
|
if (likely(pte_none(ptep_get(ptep)))) {
|
|
set_pte_at(&init_mm, addr, ptep, pte);
|
|
page = 0;
|
|
}
|
|
spin_unlock(&init_mm.page_table_lock);
|
|
if (page)
|
|
free_page(page);
|
|
return 0;
|
|
}
|
|
|
|
int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
|
|
{
|
|
unsigned long shadow_start, shadow_end;
|
|
int ret;
|
|
|
|
if (!kasan_arch_is_ready())
|
|
return 0;
|
|
|
|
if (!is_vmalloc_or_module_addr((void *)addr))
|
|
return 0;
|
|
|
|
shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr);
|
|
shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size);
|
|
|
|
/*
|
|
* User Mode Linux maps enough shadow memory for all of virtual memory
|
|
* at boot, so doesn't need to allocate more on vmalloc, just clear it.
|
|
*
|
|
* The remaining CONFIG_UML checks in this file exist for the same
|
|
* reason.
|
|
*/
|
|
if (IS_ENABLED(CONFIG_UML)) {
|
|
__memset((void *)shadow_start, KASAN_VMALLOC_INVALID, shadow_end - shadow_start);
|
|
return 0;
|
|
}
|
|
|
|
shadow_start = PAGE_ALIGN_DOWN(shadow_start);
|
|
shadow_end = PAGE_ALIGN(shadow_end);
|
|
|
|
ret = apply_to_page_range(&init_mm, shadow_start,
|
|
shadow_end - shadow_start,
|
|
kasan_populate_vmalloc_pte, NULL);
|
|
if (ret)
|
|
return ret;
|
|
|
|
flush_cache_vmap(shadow_start, shadow_end);
|
|
|
|
/*
|
|
* We need to be careful about inter-cpu effects here. Consider:
|
|
*
|
|
* CPU#0 CPU#1
|
|
* WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ;
|
|
* p[99] = 1;
|
|
*
|
|
* With compiler instrumentation, that ends up looking like this:
|
|
*
|
|
* CPU#0 CPU#1
|
|
* // vmalloc() allocates memory
|
|
* // let a = area->addr
|
|
* // we reach kasan_populate_vmalloc
|
|
* // and call kasan_unpoison:
|
|
* STORE shadow(a), unpoison_val
|
|
* ...
|
|
* STORE shadow(a+99), unpoison_val x = LOAD p
|
|
* // rest of vmalloc process <data dependency>
|
|
* STORE p, a LOAD shadow(x+99)
|
|
*
|
|
* If there is no barrier between the end of unpoisoning the shadow
|
|
* and the store of the result to p, the stores could be committed
|
|
* in a different order by CPU#0, and CPU#1 could erroneously observe
|
|
* poison in the shadow.
|
|
*
|
|
* We need some sort of barrier between the stores.
|
|
*
|
|
* In the vmalloc() case, this is provided by a smp_wmb() in
|
|
* clear_vm_uninitialized_flag(). In the per-cpu allocator and in
|
|
* get_vm_area() and friends, the caller gets shadow allocated but
|
|
* doesn't have any pages mapped into the virtual address space that
|
|
* has been reserved. Mapping those pages in will involve taking and
|
|
* releasing a page-table lock, which will provide the barrier.
|
|
*/
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
|
|
void *unused)
|
|
{
|
|
unsigned long page;
|
|
|
|
page = (unsigned long)__va(pte_pfn(ptep_get(ptep)) << PAGE_SHIFT);
|
|
|
|
spin_lock(&init_mm.page_table_lock);
|
|
|
|
if (likely(!pte_none(ptep_get(ptep)))) {
|
|
pte_clear(&init_mm, addr, ptep);
|
|
free_page(page);
|
|
}
|
|
spin_unlock(&init_mm.page_table_lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Release the backing for the vmalloc region [start, end), which
|
|
* lies within the free region [free_region_start, free_region_end).
|
|
*
|
|
* This can be run lazily, long after the region was freed. It runs
|
|
* under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap
|
|
* infrastructure.
|
|
*
|
|
* How does this work?
|
|
* -------------------
|
|
*
|
|
* We have a region that is page aligned, labeled as A.
|
|
* That might not map onto the shadow in a way that is page-aligned:
|
|
*
|
|
* start end
|
|
* v v
|
|
* |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc
|
|
* -------- -------- -------- -------- --------
|
|
* | | | | |
|
|
* | | | /-------/ |
|
|
* \-------\|/------/ |/---------------/
|
|
* ||| ||
|
|
* |??AAAAAA|AAAAAAAA|AA??????| < shadow
|
|
* (1) (2) (3)
|
|
*
|
|
* First we align the start upwards and the end downwards, so that the
|
|
* shadow of the region aligns with shadow page boundaries. In the
|
|
* example, this gives us the shadow page (2). This is the shadow entirely
|
|
* covered by this allocation.
|
|
*
|
|
* Then we have the tricky bits. We want to know if we can free the
|
|
* partially covered shadow pages - (1) and (3) in the example. For this,
|
|
* we are given the start and end of the free region that contains this
|
|
* allocation. Extending our previous example, we could have:
|
|
*
|
|
* free_region_start free_region_end
|
|
* | start end |
|
|
* v v v v
|
|
* |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc
|
|
* -------- -------- -------- -------- --------
|
|
* | | | | |
|
|
* | | | /-------/ |
|
|
* \-------\|/------/ |/---------------/
|
|
* ||| ||
|
|
* |FFAAAAAA|AAAAAAAA|AAF?????| < shadow
|
|
* (1) (2) (3)
|
|
*
|
|
* Once again, we align the start of the free region up, and the end of
|
|
* the free region down so that the shadow is page aligned. So we can free
|
|
* page (1) - we know no allocation currently uses anything in that page,
|
|
* because all of it is in the vmalloc free region. But we cannot free
|
|
* page (3), because we can't be sure that the rest of it is unused.
|
|
*
|
|
* We only consider pages that contain part of the original region for
|
|
* freeing: we don't try to free other pages from the free region or we'd
|
|
* end up trying to free huge chunks of virtual address space.
|
|
*
|
|
* Concurrency
|
|
* -----------
|
|
*
|
|
* How do we know that we're not freeing a page that is simultaneously
|
|
* being used for a fresh allocation in kasan_populate_vmalloc(_pte)?
|
|
*
|
|
* We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running
|
|
* at the same time. While we run under free_vmap_area_lock, the population
|
|
* code does not.
|
|
*
|
|
* free_vmap_area_lock instead operates to ensure that the larger range
|
|
* [free_region_start, free_region_end) is safe: because __alloc_vmap_area and
|
|
* the per-cpu region-finding algorithm both run under free_vmap_area_lock,
|
|
* no space identified as free will become used while we are running. This
|
|
* means that so long as we are careful with alignment and only free shadow
|
|
* pages entirely covered by the free region, we will not run in to any
|
|
* trouble - any simultaneous allocations will be for disjoint regions.
|
|
*/
|
|
void kasan_release_vmalloc(unsigned long start, unsigned long end,
|
|
unsigned long free_region_start,
|
|
unsigned long free_region_end)
|
|
{
|
|
void *shadow_start, *shadow_end;
|
|
unsigned long region_start, region_end;
|
|
unsigned long size;
|
|
|
|
if (!kasan_arch_is_ready())
|
|
return;
|
|
|
|
region_start = ALIGN(start, KASAN_MEMORY_PER_SHADOW_PAGE);
|
|
region_end = ALIGN_DOWN(end, KASAN_MEMORY_PER_SHADOW_PAGE);
|
|
|
|
free_region_start = ALIGN(free_region_start, KASAN_MEMORY_PER_SHADOW_PAGE);
|
|
|
|
if (start != region_start &&
|
|
free_region_start < region_start)
|
|
region_start -= KASAN_MEMORY_PER_SHADOW_PAGE;
|
|
|
|
free_region_end = ALIGN_DOWN(free_region_end, KASAN_MEMORY_PER_SHADOW_PAGE);
|
|
|
|
if (end != region_end &&
|
|
free_region_end > region_end)
|
|
region_end += KASAN_MEMORY_PER_SHADOW_PAGE;
|
|
|
|
shadow_start = kasan_mem_to_shadow((void *)region_start);
|
|
shadow_end = kasan_mem_to_shadow((void *)region_end);
|
|
|
|
if (shadow_end > shadow_start) {
|
|
size = shadow_end - shadow_start;
|
|
if (IS_ENABLED(CONFIG_UML)) {
|
|
__memset(shadow_start, KASAN_SHADOW_INIT, shadow_end - shadow_start);
|
|
return;
|
|
}
|
|
apply_to_existing_page_range(&init_mm,
|
|
(unsigned long)shadow_start,
|
|
size, kasan_depopulate_vmalloc_pte,
|
|
NULL);
|
|
flush_tlb_kernel_range((unsigned long)shadow_start,
|
|
(unsigned long)shadow_end);
|
|
}
|
|
}
|
|
|
|
void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
|
|
kasan_vmalloc_flags_t flags)
|
|
{
|
|
/*
|
|
* Software KASAN modes unpoison both VM_ALLOC and non-VM_ALLOC
|
|
* mappings, so the KASAN_VMALLOC_VM_ALLOC flag is ignored.
|
|
* Software KASAN modes can't optimize zeroing memory by combining it
|
|
* with setting memory tags, so the KASAN_VMALLOC_INIT flag is ignored.
|
|
*/
|
|
|
|
if (!kasan_arch_is_ready())
|
|
return (void *)start;
|
|
|
|
if (!is_vmalloc_or_module_addr(start))
|
|
return (void *)start;
|
|
|
|
/*
|
|
* Don't tag executable memory with the tag-based mode.
|
|
* The kernel doesn't tolerate having the PC register tagged.
|
|
*/
|
|
if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) &&
|
|
!(flags & KASAN_VMALLOC_PROT_NORMAL))
|
|
return (void *)start;
|
|
|
|
start = set_tag(start, kasan_random_tag());
|
|
kasan_unpoison(start, size, false);
|
|
return (void *)start;
|
|
}
|
|
|
|
/*
|
|
* Poison the shadow for a vmalloc region. Called as part of the
|
|
* freeing process at the time the region is freed.
|
|
*/
|
|
void __kasan_poison_vmalloc(const void *start, unsigned long size)
|
|
{
|
|
if (!kasan_arch_is_ready())
|
|
return;
|
|
|
|
if (!is_vmalloc_or_module_addr(start))
|
|
return;
|
|
|
|
size = round_up(size, KASAN_GRANULE_SIZE);
|
|
kasan_poison(start, size, KASAN_VMALLOC_INVALID, false);
|
|
}
|
|
|
|
#else /* CONFIG_KASAN_VMALLOC */
|
|
|
|
int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask)
|
|
{
|
|
void *ret;
|
|
size_t scaled_size;
|
|
size_t shadow_size;
|
|
unsigned long shadow_start;
|
|
|
|
shadow_start = (unsigned long)kasan_mem_to_shadow(addr);
|
|
scaled_size = (size + KASAN_GRANULE_SIZE - 1) >>
|
|
KASAN_SHADOW_SCALE_SHIFT;
|
|
shadow_size = round_up(scaled_size, PAGE_SIZE);
|
|
|
|
if (WARN_ON(!PAGE_ALIGNED(shadow_start)))
|
|
return -EINVAL;
|
|
|
|
if (IS_ENABLED(CONFIG_UML)) {
|
|
__memset((void *)shadow_start, KASAN_SHADOW_INIT, shadow_size);
|
|
return 0;
|
|
}
|
|
|
|
ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
|
|
shadow_start + shadow_size,
|
|
GFP_KERNEL,
|
|
PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
|
|
__builtin_return_address(0));
|
|
|
|
if (ret) {
|
|
struct vm_struct *vm = find_vm_area(addr);
|
|
__memset(ret, KASAN_SHADOW_INIT, shadow_size);
|
|
vm->flags |= VM_KASAN;
|
|
kmemleak_ignore(ret);
|
|
|
|
if (vm->flags & VM_DEFER_KMEMLEAK)
|
|
kmemleak_vmalloc(vm, size, gfp_mask);
|
|
|
|
return 0;
|
|
}
|
|
|
|
return -ENOMEM;
|
|
}
|
|
|
|
void kasan_free_module_shadow(const struct vm_struct *vm)
|
|
{
|
|
if (IS_ENABLED(CONFIG_UML))
|
|
return;
|
|
|
|
if (vm->flags & VM_KASAN)
|
|
vfree(kasan_mem_to_shadow(vm->addr));
|
|
}
|
|
|
|
#endif
|