linux/arch/s390/mm/init.c
Alexander Gordeev 4df29d2b90 s390/smp: rework absolute lowcore access
Temporary unsetting of the prefix page in memcpy_absolute() routine
poses a risk of executing code path with unexpectedly disabled prefix
page. This rework avoids the prefix page uninstalling and disabling
of normal and machine check interrupts when accessing the absolute
zero memory.

Although memcpy_absolute() routine can access the whole memory, it is
only used to update the absolute zero lowcore. This rework therefore
introduces a new mechanism for the absolute zero lowcore access and
scraps memcpy_absolute() routine for good.

Instead, an area is reserved in the virtual memory that is used for
the absolute lowcore access only. That area holds an array of 8KB
virtual mappings - one per CPU. Whenever a CPU is brought online, the
corresponding item is mapped to the real address of the previously
installed prefix page.

The absolute zero lowcore access works like this: a CPU calls the
new primitive get_abs_lowcore() to obtain its 8KB mapping as a
pointer to the struct lowcore. Virtual address references to that
pointer get translated to the real addresses of the prefix page,
which in turn gets swapped with the absolute zero memory addresses
due to prefixing. Once the pointer is not needed it must be released
with put_abs_lowcore() primitive:

	struct lowcore *abs_lc;
	unsigned long flags;

	abs_lc = get_abs_lowcore(&flags);
	abs_lc->... = ...;
	put_abs_lowcore(abs_lc, flags);

To ensure the described mechanism works large segment- and region-
table entries must be avoided for the 8KB mappings. Failure to do
so results in usage of Region-Frame Absolute Address (RFAA) or
Segment-Frame Absolute Address (SFAA) large page fields. In that
case absolute addresses would be used to address the prefix page
instead of the real ones and the prefixing would get bypassed.

Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2022-09-14 16:46:00 +02:00

311 lines
7.7 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* S390 version
* Copyright IBM Corp. 1999
* Author(s): Hartmut Penner (hp@de.ibm.com)
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1995 Linus Torvalds
*/
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/swiotlb.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/memblock.h>
#include <linux/memory.h>
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/initrd.h>
#include <linux/export.h>
#include <linux/cma.h>
#include <linux/gfp.h>
#include <linux/dma-direct.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/kfence.h>
#include <asm/ptdump.h>
#include <asm/dma.h>
#include <asm/abs_lowcore.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/ctl_reg.h>
#include <asm/sclp.h>
#include <asm/set_memory.h>
#include <asm/kasan.h>
#include <asm/dma-mapping.h>
#include <asm/uv.h>
#include <linux/virtio_anchor.h>
#include <linux/virtio_config.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
static pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
unsigned long s390_invalid_asce;
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(zero_page_mask);
static void __init setup_zero_pages(void)
{
unsigned int order;
struct page *page;
int i;
/* Latest machines require a mapping granularity of 512KB */
order = 7;
/* Limit number of empty zero pages for small memory sizes */
while (order > 2 && (totalram_pages() >> 10) < (1UL << order))
order--;
empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!empty_zero_page)
panic("Out of memory in setup_zero_pages");
page = virt_to_page((void *) empty_zero_page);
split_page(page, order);
for (i = 1 << order; i > 0; i--) {
mark_page_reserved(page);
page++;
}
zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
}
/*
* paging_init() sets up the page tables
*/
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
unsigned long pgd_type, asce_bits;
psw_t psw;
s390_invalid_asce = (unsigned long)invalid_pg_dir;
s390_invalid_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
init_mm.pgd = swapper_pg_dir;
if (VMALLOC_END > _REGION2_SIZE) {
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
pgd_type = _REGION2_ENTRY_EMPTY;
} else {
asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
pgd_type = _REGION3_ENTRY_EMPTY;
}
init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
S390_lowcore.kernel_asce = init_mm.context.asce;
S390_lowcore.user_asce = s390_invalid_asce;
crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
vmem_map_init();
kasan_copy_shadow_mapping();
/* enable virtual mapping in kernel mode */
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.user_asce, 7, 7);
__ctl_load(S390_lowcore.kernel_asce, 13, 13);
psw.mask = __extract_psw();
psw_bits(psw).dat = 1;
psw_bits(psw).as = PSW_BITS_AS_HOME;
__load_psw_mask(psw.mask);
kasan_free_early_identity();
sparse_init();
zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
free_area_init(max_zone_pfns);
}
void mark_rodata_ro(void)
{
unsigned long size = __end_ro_after_init - __start_ro_after_init;
set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
debug_checkwx();
}
int set_memory_encrypted(unsigned long addr, int numpages)
{
int i;
/* make specified pages unshared, (swiotlb, dma_free) */
for (i = 0; i < numpages; ++i) {
uv_remove_shared(addr);
addr += PAGE_SIZE;
}
return 0;
}
int set_memory_decrypted(unsigned long addr, int numpages)
{
int i;
/* make specified pages shared (swiotlb, dma_alloca) */
for (i = 0; i < numpages; ++i) {
uv_set_shared(addr);
addr += PAGE_SIZE;
}
return 0;
}
/* are we a protected virtualization guest? */
bool force_dma_unencrypted(struct device *dev)
{
return is_prot_virt_guest();
}
/* protected virtualization */
static void pv_init(void)
{
if (!is_prot_virt_guest())
return;
virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
/* make sure bounce buffers are shared */
swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE);
swiotlb_update_mem_attributes();
}
void __init mem_init(void)
{
cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(0, mm_cpumask(&init_mm));
set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
pv_init();
kfence_split_mapping();
/* Setup guest page hinting */
cmma_init();
/* this will put all low memory onto the freelists */
memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */
cmma_init_nodat();
}
void free_initmem(void)
{
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RW | SET_MEMORY_NX);
free_reserved_area(sclp_early_sccb,
sclp_early_sccb + EXT_SCCB_READ_SCP,
POISON_FREE_INITMEM, "unused early sccb");
free_initmem_default(POISON_FREE_INITMEM);
}
unsigned long memory_block_size_bytes(void)
{
/*
* Make sure the memory block size is always greater
* or equal than the memory increment size.
*/
return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp.rzm);
}
#ifdef CONFIG_MEMORY_HOTPLUG
#ifdef CONFIG_CMA
/* Prevent memory blocks which contain cma regions from going offline */
struct s390_cma_mem_data {
unsigned long start;
unsigned long end;
};
static int s390_cma_check_range(struct cma *cma, void *data)
{
struct s390_cma_mem_data *mem_data;
unsigned long start, end;
mem_data = data;
start = cma_get_base(cma);
end = start + cma_get_size(cma);
if (end < mem_data->start)
return 0;
if (start >= mem_data->end)
return 0;
return -EBUSY;
}
static int s390_cma_mem_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct s390_cma_mem_data mem_data;
struct memory_notify *arg;
int rc = 0;
arg = data;
mem_data.start = arg->start_pfn << PAGE_SHIFT;
mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT);
if (action == MEM_GOING_OFFLINE)
rc = cma_for_each_area(s390_cma_check_range, &mem_data);
return notifier_from_errno(rc);
}
static struct notifier_block s390_cma_mem_nb = {
.notifier_call = s390_cma_mem_notifier,
};
static int __init s390_cma_mem_init(void)
{
return register_memory_notifier(&s390_cma_mem_nb);
}
device_initcall(s390_cma_mem_init);
#endif /* CONFIG_CMA */
int arch_add_memory(int nid, u64 start, u64 size,
struct mhp_params *params)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long size_pages = PFN_DOWN(size);
int rc;
if (WARN_ON_ONCE(params->altmap))
return -EINVAL;
if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
return -EINVAL;
VM_BUG_ON(!mhp_range_allowed(start, size, true));
rc = vmem_add_mapping(start, size);
if (rc)
return rc;
rc = __add_pages(nid, start_pfn, size_pages, params);
if (rc)
vmem_remove_mapping(start, size);
return rc;
}
void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
__remove_pages(start_pfn, nr_pages, altmap);
vmem_remove_mapping(start, size);
}
#endif /* CONFIG_MEMORY_HOTPLUG */