2021-06-09 09:34:23 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
|
|
|
|
/*
|
|
|
|
* MMU-generic set_memory implementation for powerpc
|
|
|
|
*
|
|
|
|
* Copyright 2019-2021, IBM Corporation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <linux/set_memory.h>
|
|
|
|
|
|
|
|
#include <asm/mmu.h>
|
|
|
|
#include <asm/page.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
|
2024-02-16 18:17:33 +08:00
|
|
|
#include <mm/mmu_decl.h>
|
2021-06-09 09:34:23 +08:00
|
|
|
|
2021-12-24 19:07:33 +08:00
|
|
|
static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr,
|
|
|
|
unsigned long old, unsigned long new)
|
|
|
|
{
|
|
|
|
return pte_update(&init_mm, addr, ptep, old & ~new, new & ~old, 0);
|
|
|
|
}
|
|
|
|
|
2021-06-09 09:34:23 +08:00
|
|
|
/*
|
2021-12-24 19:07:33 +08:00
|
|
|
* Updates the attributes of a page atomically.
|
2021-06-09 09:34:23 +08:00
|
|
|
*
|
powerpc/mm: Fix set_memory_*() against concurrent accesses
Laurent reported that STRICT_MODULE_RWX was causing intermittent crashes
on one of his systems:
kernel tried to execute exec-protected page (c008000004073278) - exploit attempt? (uid: 0)
BUG: Unable to handle kernel instruction fetch
Faulting instruction address: 0xc008000004073278
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
Modules linked in: drm virtio_console fuse drm_panel_orientation_quirks ...
CPU: 3 PID: 44 Comm: kworker/3:1 Not tainted 5.14.0-rc4+ #12
Workqueue: events control_work_handler [virtio_console]
NIP: c008000004073278 LR: c008000004073278 CTR: c0000000001e9de0
REGS: c00000002e4ef7e0 TRAP: 0400 Not tainted (5.14.0-rc4+)
MSR: 800000004280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 24002822 XER: 200400cf
...
NIP fill_queue+0xf0/0x210 [virtio_console]
LR fill_queue+0xf0/0x210 [virtio_console]
Call Trace:
fill_queue+0xb4/0x210 [virtio_console] (unreliable)
add_port+0x1a8/0x470 [virtio_console]
control_work_handler+0xbc/0x1e8 [virtio_console]
process_one_work+0x290/0x590
worker_thread+0x88/0x620
kthread+0x194/0x1a0
ret_from_kernel_thread+0x5c/0x64
Jordan, Fabiano & Murilo were able to reproduce and identify that the
problem is caused by the call to module_enable_ro() in do_init_module(),
which happens after the module's init function has already been called.
Our current implementation of change_page_attr() is not safe against
concurrent accesses, because it invalidates the PTE before flushing the
TLB and then installing the new PTE. That leaves a window in time where
there is no valid PTE for the page, if another CPU tries to access the
page at that time we see something like the fault above.
We can't simply switch to set_pte_at()/flush TLB, because our hash MMU
code doesn't handle a set_pte_at() of a valid PTE. See [1].
But we do have pte_update(), which replaces the old PTE with the new,
meaning there's no window where the PTE is invalid. And the hash MMU
version hash__pte_update() deals with synchronising the hash page table
correctly.
[1]: https://lore.kernel.org/linuxppc-dev/87y318wp9r.fsf@linux.ibm.com/
Fixes: 1f9ad21c3b38 ("powerpc/mm: Implement set_memory() routines")
Reported-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Murilo Opsfelder Araújo <muriloo@linux.ibm.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210818120518.3603172-1-mpe@ellerman.id.au
2021-08-15 12:10:24 +08:00
|
|
|
* This sequence is safe against concurrent updates, and also allows updating the
|
|
|
|
* attributes of a page currently being executed or accessed.
|
2021-06-09 09:34:23 +08:00
|
|
|
*/
|
|
|
|
static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
|
|
|
|
{
|
|
|
|
long action = (long)data;
|
|
|
|
|
powerpc: align address to page boundary in change_page_attr()
Aligning address to page boundary allows flush_tlb_kernel_range()
to know it's a single page flush and use tlbie instead of tlbia.
On 603 we now have the following code in first leg of
change_page_attr():
2c: 55 29 00 3c rlwinm r9,r9,0,0,30
30: 91 23 00 00 stw r9,0(r3)
34: 7c 00 22 64 tlbie r4,r0
38: 7c 00 04 ac hwsync
3c: 38 60 00 00 li r3,0
40: 4e 80 00 20 blr
Before we had:
28: 55 29 00 3c rlwinm r9,r9,0,0,30
2c: 91 23 00 00 stw r9,0(r3)
30: 54 89 00 26 rlwinm r9,r4,0,0,19
34: 38 84 10 00 addi r4,r4,4096
38: 7c 89 20 50 subf r4,r9,r4
3c: 28 04 10 00 cmplwi r4,4096
40: 41 81 00 30 bgt 70 <change_page_attr+0x70>
44: 7c 00 4a 64 tlbie r9,r0
48: 7c 00 04 ac hwsync
4c: 38 60 00 00 li r3,0
50: 4e 80 00 20 blr
...
70: 94 21 ff f0 stwu r1,-16(r1)
74: 7c 08 02 a6 mflr r0
78: 90 01 00 14 stw r0,20(r1)
7c: 48 00 00 01 bl 7c <change_page_attr+0x7c>
7c: R_PPC_REL24 _tlbia
80: 80 01 00 14 lwz r0,20(r1)
84: 38 60 00 00 li r3,0
88: 7c 08 03 a6 mtlr r0
8c: 38 21 00 10 addi r1,r1,16
90: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/6bb118fb2ee89fa3c1f9cf90ed19f88220002cb0.1647877467.git.christophe.leroy@csgroup.eu
2022-03-21 23:44:45 +08:00
|
|
|
addr &= PAGE_MASK;
|
2021-12-24 19:07:33 +08:00
|
|
|
/* modify the PTE bits as desired */
|
2021-06-09 09:34:23 +08:00
|
|
|
switch (action) {
|
|
|
|
case SET_MEMORY_RO:
|
2021-12-24 19:07:33 +08:00
|
|
|
/* Don't clear DIRTY bit */
|
|
|
|
pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO);
|
2021-06-09 09:34:23 +08:00
|
|
|
break;
|
2024-02-16 18:12:05 +08:00
|
|
|
case SET_MEMORY_ROX:
|
|
|
|
/* Don't clear DIRTY bit */
|
|
|
|
pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_ROX);
|
|
|
|
break;
|
2021-06-09 09:34:23 +08:00
|
|
|
case SET_MEMORY_RW:
|
2021-12-24 19:07:33 +08:00
|
|
|
pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW);
|
2021-06-09 09:34:23 +08:00
|
|
|
break;
|
|
|
|
case SET_MEMORY_NX:
|
2021-12-24 19:07:33 +08:00
|
|
|
pte_update_delta(ptep, addr, _PAGE_KERNEL_ROX, _PAGE_KERNEL_RO);
|
2021-06-09 09:34:23 +08:00
|
|
|
break;
|
|
|
|
case SET_MEMORY_X:
|
2021-12-24 19:07:33 +08:00
|
|
|
pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX);
|
2021-06-09 09:34:23 +08:00
|
|
|
break;
|
2021-12-24 19:07:40 +08:00
|
|
|
case SET_MEMORY_NP:
|
|
|
|
pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0);
|
|
|
|
break;
|
|
|
|
case SET_MEMORY_P:
|
|
|
|
pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0);
|
|
|
|
break;
|
2021-06-09 09:34:23 +08:00
|
|
|
default:
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See ptesync comment in radix__set_pte_at() */
|
|
|
|
if (radix_enabled())
|
|
|
|
asm volatile("ptesync": : :"memory");
|
powerpc/mm: Fix set_memory_*() against concurrent accesses
Laurent reported that STRICT_MODULE_RWX was causing intermittent crashes
on one of his systems:
kernel tried to execute exec-protected page (c008000004073278) - exploit attempt? (uid: 0)
BUG: Unable to handle kernel instruction fetch
Faulting instruction address: 0xc008000004073278
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
Modules linked in: drm virtio_console fuse drm_panel_orientation_quirks ...
CPU: 3 PID: 44 Comm: kworker/3:1 Not tainted 5.14.0-rc4+ #12
Workqueue: events control_work_handler [virtio_console]
NIP: c008000004073278 LR: c008000004073278 CTR: c0000000001e9de0
REGS: c00000002e4ef7e0 TRAP: 0400 Not tainted (5.14.0-rc4+)
MSR: 800000004280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 24002822 XER: 200400cf
...
NIP fill_queue+0xf0/0x210 [virtio_console]
LR fill_queue+0xf0/0x210 [virtio_console]
Call Trace:
fill_queue+0xb4/0x210 [virtio_console] (unreliable)
add_port+0x1a8/0x470 [virtio_console]
control_work_handler+0xbc/0x1e8 [virtio_console]
process_one_work+0x290/0x590
worker_thread+0x88/0x620
kthread+0x194/0x1a0
ret_from_kernel_thread+0x5c/0x64
Jordan, Fabiano & Murilo were able to reproduce and identify that the
problem is caused by the call to module_enable_ro() in do_init_module(),
which happens after the module's init function has already been called.
Our current implementation of change_page_attr() is not safe against
concurrent accesses, because it invalidates the PTE before flushing the
TLB and then installing the new PTE. That leaves a window in time where
there is no valid PTE for the page, if another CPU tries to access the
page at that time we see something like the fault above.
We can't simply switch to set_pte_at()/flush TLB, because our hash MMU
code doesn't handle a set_pte_at() of a valid PTE. See [1].
But we do have pte_update(), which replaces the old PTE with the new,
meaning there's no window where the PTE is invalid. And the hash MMU
version hash__pte_update() deals with synchronising the hash page table
correctly.
[1]: https://lore.kernel.org/linuxppc-dev/87y318wp9r.fsf@linux.ibm.com/
Fixes: 1f9ad21c3b38 ("powerpc/mm: Implement set_memory() routines")
Reported-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Murilo Opsfelder Araújo <muriloo@linux.ibm.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210818120518.3603172-1-mpe@ellerman.id.au
2021-08-15 12:10:24 +08:00
|
|
|
|
|
|
|
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
|
|
|
|
|
2021-06-09 09:34:23 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int change_memory_attr(unsigned long addr, int numpages, long action)
|
|
|
|
{
|
|
|
|
unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
|
|
|
|
unsigned long size = numpages * PAGE_SIZE;
|
|
|
|
|
|
|
|
if (!numpages)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (WARN_ON_ONCE(is_vmalloc_or_module_addr((void *)addr) &&
|
|
|
|
is_vm_area_hugepages((void *)addr)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
|
|
/*
|
|
|
|
* On hash, the linear mapping is not in the Linux page table so
|
|
|
|
* apply_to_existing_page_range() will have no effect. If in the future
|
|
|
|
* the set_memory_* functions are used on the linear map this will need
|
|
|
|
* to be updated.
|
|
|
|
*/
|
|
|
|
if (!radix_enabled()) {
|
|
|
|
int region = get_region_id(addr);
|
|
|
|
|
|
|
|
if (WARN_ON_ONCE(region != VMALLOC_REGION_ID && region != IO_REGION_ID))
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return apply_to_existing_page_range(&init_mm, start, size,
|
|
|
|
change_page_attr, (void *)action);
|
|
|
|
}
|
2024-02-16 18:17:33 +08:00
|
|
|
|
|
|
|
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
|
|
|
|
#ifdef CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
|
|
|
void __kernel_map_pages(struct page *page, int numpages, int enable)
|
|
|
|
{
|
2024-02-16 18:17:34 +08:00
|
|
|
int err;
|
2024-02-16 18:17:33 +08:00
|
|
|
unsigned long addr = (unsigned long)page_address(page);
|
|
|
|
|
|
|
|
if (PageHighMem(page))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
|
2024-02-16 18:17:34 +08:00
|
|
|
err = hash__kernel_map_pages(page, numpages, enable);
|
2024-02-16 18:17:33 +08:00
|
|
|
else if (enable)
|
2024-02-16 18:17:34 +08:00
|
|
|
err = set_memory_p(addr, numpages);
|
2024-02-16 18:17:33 +08:00
|
|
|
else
|
2024-02-16 18:17:34 +08:00
|
|
|
err = set_memory_np(addr, numpages);
|
|
|
|
|
|
|
|
if (err)
|
|
|
|
panic("%s: changing memory protections failed\n", __func__);
|
2024-02-16 18:17:33 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|