KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* Copyright (C) 2020 - Google LLC
|
|
|
|
* Author: Quentin Perret <qperret@google.com>
|
|
|
|
*/
|
|
|
|
|
2023-04-20 20:33:56 +08:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/kmemleak.h>
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
#include <linux/memblock.h>
|
2022-11-11 03:02:46 +08:00
|
|
|
#include <linux/mutex.h>
|
2021-03-19 18:01:35 +08:00
|
|
|
#include <linux/sort.h>
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
|
2021-12-03 01:10:48 +08:00
|
|
|
#include <asm/kvm_pkvm.h>
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
|
2021-12-03 01:10:48 +08:00
|
|
|
#include "hyp_constants.h"
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
|
2023-04-20 20:33:56 +08:00
|
|
|
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
|
|
|
|
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
|
|
|
|
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
|
|
|
|
|
|
|
|
phys_addr_t hyp_mem_base;
|
|
|
|
phys_addr_t hyp_mem_size;
|
|
|
|
|
2021-03-19 18:01:35 +08:00
|
|
|
static int cmp_hyp_memblock(const void *p1, const void *p2)
|
|
|
|
{
|
|
|
|
const struct memblock_region *r1 = p1;
|
|
|
|
const struct memblock_region *r2 = p2;
|
|
|
|
|
|
|
|
return r1->base < r2->base ? -1 : (r1->base > r2->base);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init sort_memblock_regions(void)
|
|
|
|
{
|
|
|
|
sort(hyp_memory,
|
|
|
|
*hyp_memblock_nr_ptr,
|
|
|
|
sizeof(struct memblock_region),
|
|
|
|
cmp_hyp_memblock,
|
|
|
|
NULL);
|
|
|
|
}
|
|
|
|
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
static int __init register_memblock_regions(void)
|
|
|
|
{
|
|
|
|
struct memblock_region *reg;
|
|
|
|
|
|
|
|
for_each_mem_region(reg) {
|
|
|
|
if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
|
|
|
|
(*hyp_memblock_nr_ptr)++;
|
|
|
|
}
|
2021-03-19 18:01:35 +08:00
|
|
|
sort_memblock_regions();
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void __init kvm_hyp_reserve(void)
|
|
|
|
{
|
2022-11-11 03:02:36 +08:00
|
|
|
u64 hyp_mem_pages = 0;
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (kvm_get_mode() != KVM_MODE_PROTECTED)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ret = register_memblock_regions();
|
|
|
|
if (ret) {
|
|
|
|
*hyp_memblock_nr_ptr = 0;
|
|
|
|
kvm_err("Failed to register hyp memblocks: %d\n", ret);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
hyp_mem_pages += hyp_s1_pgtable_pages();
|
2021-06-08 19:45:15 +08:00
|
|
|
hyp_mem_pages += host_s2_pgtable_pages();
|
2022-11-11 03:02:45 +08:00
|
|
|
hyp_mem_pages += hyp_vm_table_pages();
|
2022-11-11 03:02:36 +08:00
|
|
|
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to allocate a PMD-aligned region to reduce TLB pressure once
|
|
|
|
* this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
|
|
|
|
*/
|
|
|
|
hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
|
2021-09-03 06:00:26 +08:00
|
|
|
hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
|
|
|
|
PMD_SIZE);
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
if (!hyp_mem_base)
|
2021-09-03 06:00:26 +08:00
|
|
|
hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 18:01:25 +08:00
|
|
|
else
|
|
|
|
hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
|
|
|
|
|
|
|
|
if (!hyp_mem_base) {
|
|
|
|
kvm_err("Failed to reserve hyp memory\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
|
|
|
|
hyp_mem_base);
|
|
|
|
}
|
2022-11-11 03:02:46 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocates and donates memory for hypervisor VM structs at EL2.
|
|
|
|
*
|
|
|
|
* Allocates space for the VM state, which includes the hyp vm as well as
|
|
|
|
* the hyp vcpus.
|
|
|
|
*
|
|
|
|
* Stores an opaque handler in the kvm struct for future reference.
|
|
|
|
*
|
|
|
|
* Return 0 on success, negative error code on failure.
|
|
|
|
*/
|
|
|
|
static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
|
|
|
|
{
|
|
|
|
size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
|
|
|
|
struct kvm_vcpu *host_vcpu;
|
|
|
|
pkvm_handle_t handle;
|
|
|
|
void *pgd, *hyp_vm;
|
|
|
|
unsigned long idx;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (host_kvm->created_vcpus < 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The PGD pages will be reclaimed using a hyp_memcache which implies
|
|
|
|
* page granularity. So, use alloc_pages_exact() to get individual
|
|
|
|
* refcounts.
|
|
|
|
*/
|
|
|
|
pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
|
|
|
|
if (!pgd)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/* Allocate memory to donate to hyp for vm and vcpu pointers. */
|
|
|
|
hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
|
|
|
|
size_mul(sizeof(void *),
|
|
|
|
host_kvm->created_vcpus)));
|
|
|
|
hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
|
|
|
|
if (!hyp_vm) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_pgd;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Donate the VM memory to hyp and let hyp initialize it. */
|
|
|
|
ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
|
|
|
|
if (ret < 0)
|
|
|
|
goto free_vm;
|
|
|
|
|
|
|
|
handle = ret;
|
|
|
|
|
|
|
|
host_kvm->arch.pkvm.handle = handle;
|
|
|
|
|
|
|
|
/* Donate memory for the vcpus at hyp and initialize it. */
|
|
|
|
hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
|
|
|
|
kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
|
|
|
|
void *hyp_vcpu;
|
|
|
|
|
|
|
|
/* Indexing of the vcpus to be sequential starting at 0. */
|
|
|
|
if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto destroy_vm;
|
|
|
|
}
|
|
|
|
|
|
|
|
hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
|
|
|
|
if (!hyp_vcpu) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto destroy_vm;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
|
|
|
|
hyp_vcpu);
|
2022-11-11 03:02:53 +08:00
|
|
|
if (ret) {
|
|
|
|
free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
|
2022-11-11 03:02:46 +08:00
|
|
|
goto destroy_vm;
|
2022-11-11 03:02:53 +08:00
|
|
|
}
|
2022-11-11 03:02:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
destroy_vm:
|
|
|
|
pkvm_destroy_hyp_vm(host_kvm);
|
|
|
|
return ret;
|
|
|
|
free_vm:
|
|
|
|
free_pages_exact(hyp_vm, hyp_vm_sz);
|
|
|
|
free_pgd:
|
|
|
|
free_pages_exact(pgd, pgd_sz);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int pkvm_create_hyp_vm(struct kvm *host_kvm)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
mutex_lock(&host_kvm->lock);
|
|
|
|
if (!host_kvm->arch.pkvm.handle)
|
|
|
|
ret = __pkvm_create_hyp_vm(host_kvm);
|
|
|
|
mutex_unlock(&host_kvm->lock);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
|
|
|
|
{
|
2022-11-11 03:02:53 +08:00
|
|
|
if (host_kvm->arch.pkvm.handle) {
|
2022-11-11 03:02:46 +08:00
|
|
|
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
|
|
|
|
host_kvm->arch.pkvm.handle));
|
|
|
|
}
|
|
|
|
|
2022-11-11 03:02:53 +08:00
|
|
|
host_kvm->arch.pkvm.handle = 0;
|
|
|
|
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
|
2022-11-11 03:02:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int pkvm_init_host_vm(struct kvm *host_kvm)
|
|
|
|
{
|
|
|
|
mutex_init(&host_kvm->lock);
|
|
|
|
return 0;
|
|
|
|
}
|
2023-04-20 20:33:56 +08:00
|
|
|
|
|
|
|
static void __init _kvm_host_prot_finalize(void *arg)
|
|
|
|
{
|
|
|
|
int *err = arg;
|
|
|
|
|
|
|
|
if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
|
|
|
|
WRITE_ONCE(*err, -EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init pkvm_drop_host_privileges(void)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Flip the static key upfront as that may no longer be possible
|
|
|
|
* once the host stage 2 is installed.
|
|
|
|
*/
|
|
|
|
static_branch_enable(&kvm_protected_mode_initialized);
|
|
|
|
on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init finalize_pkvm(void)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!is_protected_kvm_enabled())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Exclude HYP sections from kmemleak so that they don't get peeked
|
|
|
|
* at, which would end badly once inaccessible.
|
|
|
|
*/
|
|
|
|
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
|
|
|
|
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
|
|
|
|
|
|
|
|
ret = pkvm_drop_host_privileges();
|
|
|
|
if (ret)
|
|
|
|
pr_err("Failed to finalize Hyp protection: %d\n", ret);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
device_initcall_sync(finalize_pkvm);
|