s390/mm: simplify kernel mapping setup

The kernel mapping is setup in two stages: in the decompressor map all
pages with RWX permissions, and within the kernel change all mappings to
their final permissions, where most of the mappings are changed from RWX to
RWNX.

Change this and map all pages RWNX from the beginning, however without
enabling noexec via control register modification. This means that
effectively all pages are used with RWX permissions like before. When the
final permissions have been applied to the kernel mapping enable noexec via
control register modification.

This allows to remove quite a bit of non-obvious code.

Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
This commit is contained in:
Heiko Carstens 2023-08-25 14:29:48 +02:00
parent b6f10e2f66
commit c0f1d47812
4 changed files with 15 additions and 114 deletions

View File

@ -53,10 +53,8 @@ static void detect_facilities(void)
}
if (test_facility(78))
machine.has_edat2 = 1;
if (test_facility(130)) {
if (test_facility(130))
machine.has_nx = 1;
__ctl_set_bit(0, 20);
}
}
static void setup_lpp(void)

View File

@ -287,7 +287,9 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
if (kasan_pte_populate_zero_shadow(pte, mode))
continue;
entry = __pte(_pa(addr, PAGE_SIZE, mode));
entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
entry = set_pte_bit(entry, PAGE_KERNEL);
if (!machine.has_nx)
entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
set_pte(pte, entry);
pages++;
}
@ -311,7 +313,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
continue;
if (can_large_pmd(pmd, addr, next)) {
entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
entry = set_pmd_bit(entry, SEGMENT_KERNEL);
if (!machine.has_nx)
entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
set_pmd(pmd, entry);
pages++;
continue;
@ -342,7 +346,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
continue;
if (can_large_pud(pud, addr, next)) {
entry = __pud(_pa(addr, _REGION3_SIZE, mode));
entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
entry = set_pud_bit(entry, REGION3_KERNEL);
if (!machine.has_nx)
entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
set_pud(pud, entry);
pages++;
continue;

View File

@ -232,10 +232,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
__ctl_set_bit(0, 17);
}
if (test_facility(130)) {
if (test_facility(130))
S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
__ctl_set_bit(0, 20);
}
if (test_facility(133))
S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
if (test_facility(139) && (tod_clock_base.tod >> 63)) {

View File

@ -5,7 +5,6 @@
#include <linux/memory_hotplug.h>
#include <linux/memblock.h>
#include <linux/kasan.h>
#include <linux/pfn.h>
#include <linux/mm.h>
#include <linux/init.h>
@ -650,108 +649,8 @@ void vmem_unmap_4k_page(unsigned long addr)
mutex_unlock(&vmem_mutex);
}
static int __init memblock_region_cmp(const void *a, const void *b)
{
const struct memblock_region *r1 = a;
const struct memblock_region *r2 = b;
if (r1->base < r2->base)
return -1;
if (r1->base > r2->base)
return 1;
return 0;
}
static void __init memblock_region_swap(void *a, void *b, int size)
{
swap(*(struct memblock_region *)a, *(struct memblock_region *)b);
}
#ifdef CONFIG_KASAN
#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
static inline int set_memory_kasan(unsigned long start, unsigned long end)
{
start = PAGE_ALIGN_DOWN(__sha(start));
end = PAGE_ALIGN(__sha(end));
return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT);
}
#endif
/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
* additional memory segments.
*/
void __init vmem_map_init(void)
{
struct memblock_region memory_rwx_regions[] = {
{
.base = 0,
.size = sizeof(struct lowcore),
.flags = MEMBLOCK_NONE,
#ifdef CONFIG_NUMA
.nid = NUMA_NO_NODE,
#endif
},
{
.base = __pa(_stext),
.size = _etext - _stext,
.flags = MEMBLOCK_NONE,
#ifdef CONFIG_NUMA
.nid = NUMA_NO_NODE,
#endif
},
{
.base = __pa(_sinittext),
.size = _einittext - _sinittext,
.flags = MEMBLOCK_NONE,
#ifdef CONFIG_NUMA
.nid = NUMA_NO_NODE,
#endif
},
{
.base = __stext_amode31,
.size = __etext_amode31 - __stext_amode31,
.flags = MEMBLOCK_NONE,
#ifdef CONFIG_NUMA
.nid = NUMA_NO_NODE,
#endif
},
};
struct memblock_type memory_rwx = {
.regions = memory_rwx_regions,
.cnt = ARRAY_SIZE(memory_rwx_regions),
.max = ARRAY_SIZE(memory_rwx_regions),
};
phys_addr_t base, end;
u64 i;
/*
* Set RW+NX attribute on all memory, except regions enumerated with
* memory_rwx exclude type. These regions need different attributes,
* which are enforced afterwards.
*
* __for_each_mem_range() iterate and exclude types should be sorted.
* The relative location of _stext and _sinittext is hardcoded in the
* linker script. However a location of __stext_amode31 and the kernel
* image itself are chosen dynamically. Thus, sort the exclude type.
*/
sort(&memory_rwx_regions,
ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
memblock_region_cmp, memblock_region_swap);
__for_each_mem_range(i, &memblock.memory, &memory_rwx,
NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
set_memory_rwnx((unsigned long)__va(base),
(end - base) >> PAGE_SHIFT);
}
#ifdef CONFIG_KASAN
for_each_mem_range(i, &base, &end) {
set_memory_kasan((unsigned long)__va(base),
(unsigned long)__va(end));
}
#endif
set_memory_rox((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT);
set_memory_ro((unsigned long)_etext,
@ -762,14 +661,14 @@ void __init vmem_map_init(void)
(__etext_amode31 - __stext_amode31) >> PAGE_SHIFT);
/* lowcore must be executable for LPSWE */
if (static_key_enabled(&cpu_has_bear))
set_memory_nx(0, 1);
set_memory_nx(PAGE_SIZE, 1);
if (!static_key_enabled(&cpu_has_bear))
set_memory_x(0, 1);
if (debug_pagealloc_enabled()) {
set_memory_4k((unsigned long)__va(0),
ident_map_size >> PAGE_SHIFT);
}
if (MACHINE_HAS_NX)
ctl_set_bit(0, 20);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}