mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-11-26 19:44:21 +08:00
Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm.git
This commit is contained in:
commit
f38b0c9b20
@ -147,10 +147,29 @@ described as 'basic' will be available.
|
||||
The new VM has no virtual cpus and no memory.
|
||||
You probably want to use 0 as machine type.
|
||||
|
||||
X86:
|
||||
^^^^
|
||||
|
||||
Supported X86 VM types can be queried via KVM_CAP_VM_TYPES.
|
||||
|
||||
S390:
|
||||
^^^^^
|
||||
|
||||
In order to create user controlled virtual machines on S390, check
|
||||
KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
|
||||
privileged user (CAP_SYS_ADMIN).
|
||||
|
||||
MIPS:
|
||||
^^^^^
|
||||
|
||||
To use hardware assisted virtualization on MIPS (VZ ASE) rather than
|
||||
the default trap & emulate implementation (which changes the virtual
|
||||
memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
|
||||
flag KVM_VM_MIPS_VZ.
|
||||
|
||||
ARM64:
|
||||
^^^^^^
|
||||
|
||||
On arm64, the physical address size for a VM (IPA Size limit) is limited
|
||||
to 40bits by default. The limit can be configured if the host supports the
|
||||
extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
|
||||
@ -608,18 +627,6 @@ interrupt number dequeues the interrupt.
|
||||
This is an asynchronous vcpu ioctl and can be invoked from any thread.
|
||||
|
||||
|
||||
4.17 KVM_DEBUG_GUEST
|
||||
--------------------
|
||||
|
||||
:Capability: basic
|
||||
:Architectures: none
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: none)
|
||||
:Returns: -1 on error
|
||||
|
||||
Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead.
|
||||
|
||||
|
||||
4.18 KVM_GET_MSRS
|
||||
-----------------
|
||||
|
||||
@ -6192,6 +6199,130 @@ to know what fields can be changed for the system register described by
|
||||
``op0, op1, crn, crm, op2``. KVM rejects ID register values that describe a
|
||||
superset of the features supported by the system.
|
||||
|
||||
4.140 KVM_SET_USER_MEMORY_REGION2
|
||||
---------------------------------
|
||||
|
||||
:Capability: KVM_CAP_USER_MEMORY2
|
||||
:Architectures: all
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_userspace_memory_region2 (in)
|
||||
:Returns: 0 on success, -1 on error
|
||||
|
||||
KVM_SET_USER_MEMORY_REGION2 is an extension to KVM_SET_USER_MEMORY_REGION that
|
||||
allows mapping guest_memfd memory into a guest. All fields shared with
|
||||
KVM_SET_USER_MEMORY_REGION identically. Userspace can set KVM_MEM_GUEST_MEMFD
|
||||
in flags to have KVM bind the memory region to a given guest_memfd range of
|
||||
[guest_memfd_offset, guest_memfd_offset + memory_size]. The target guest_memfd
|
||||
must point at a file created via KVM_CREATE_GUEST_MEMFD on the current VM, and
|
||||
the target range must not be bound to any other memory region. All standard
|
||||
bounds checks apply (use common sense).
|
||||
|
||||
::
|
||||
|
||||
struct kvm_userspace_memory_region2 {
|
||||
__u32 slot;
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size; /* bytes */
|
||||
__u64 userspace_addr; /* start of the userspace allocated memory */
|
||||
__u64 guest_memfd_offset;
|
||||
__u32 guest_memfd;
|
||||
__u32 pad1;
|
||||
__u64 pad2[14];
|
||||
};
|
||||
|
||||
A KVM_MEM_GUEST_MEMFD region _must_ have a valid guest_memfd (private memory) and
|
||||
userspace_addr (shared memory). However, "valid" for userspace_addr simply
|
||||
means that the address itself must be a legal userspace address. The backing
|
||||
mapping for userspace_addr is not required to be valid/populated at the time of
|
||||
KVM_SET_USER_MEMORY_REGION2, e.g. shared memory can be lazily mapped/allocated
|
||||
on-demand.
|
||||
|
||||
When mapping a gfn into the guest, KVM selects shared vs. private, i.e consumes
|
||||
userspace_addr vs. guest_memfd, based on the gfn's KVM_MEMORY_ATTRIBUTE_PRIVATE
|
||||
state. At VM creation time, all memory is shared, i.e. the PRIVATE attribute
|
||||
is '0' for all gfns. Userspace can control whether memory is shared/private by
|
||||
toggling KVM_MEMORY_ATTRIBUTE_PRIVATE via KVM_SET_MEMORY_ATTRIBUTES as needed.
|
||||
|
||||
4.141 KVM_SET_MEMORY_ATTRIBUTES
|
||||
-------------------------------
|
||||
|
||||
:Capability: KVM_CAP_MEMORY_ATTRIBUTES
|
||||
:Architectures: x86
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_memory_attributes (in)
|
||||
:Returns: 0 on success, <0 on error
|
||||
|
||||
KVM_SET_MEMORY_ATTRIBUTES allows userspace to set memory attributes for a range
|
||||
of guest physical memory.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_memory_attributes {
|
||||
__u64 address;
|
||||
__u64 size;
|
||||
__u64 attributes;
|
||||
__u64 flags;
|
||||
};
|
||||
|
||||
#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
|
||||
|
||||
The address and size must be page aligned. The supported attributes can be
|
||||
retrieved via ioctl(KVM_CHECK_EXTENSION) on KVM_CAP_MEMORY_ATTRIBUTES. If
|
||||
executed on a VM, KVM_CAP_MEMORY_ATTRIBUTES precisely returns the attributes
|
||||
supported by that VM. If executed at system scope, KVM_CAP_MEMORY_ATTRIBUTES
|
||||
returns all attributes supported by KVM. The only attribute defined at this
|
||||
time is KVM_MEMORY_ATTRIBUTE_PRIVATE, which marks the associated gfn as being
|
||||
guest private memory.
|
||||
|
||||
Note, there is no "get" API. Userspace is responsible for explicitly tracking
|
||||
the state of a gfn/page as needed.
|
||||
|
||||
The "flags" field is reserved for future extensions and must be '0'.
|
||||
|
||||
4.142 KVM_CREATE_GUEST_MEMFD
|
||||
----------------------------
|
||||
|
||||
:Capability: KVM_CAP_GUEST_MEMFD
|
||||
:Architectures: none
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_create_guest_memfd(in)
|
||||
:Returns: 0 on success, <0 on error
|
||||
|
||||
KVM_CREATE_GUEST_MEMFD creates an anonymous file and returns a file descriptor
|
||||
that refers to it. guest_memfd files are roughly analogous to files created
|
||||
via memfd_create(), e.g. guest_memfd files live in RAM, have volatile storage,
|
||||
and are automatically released when the last reference is dropped. Unlike
|
||||
"regular" memfd_create() files, guest_memfd files are bound to their owning
|
||||
virtual machine (see below), cannot be mapped, read, or written by userspace,
|
||||
and cannot be resized (guest_memfd files do however support PUNCH_HOLE).
|
||||
|
||||
::
|
||||
|
||||
struct kvm_create_guest_memfd {
|
||||
__u64 size;
|
||||
__u64 flags;
|
||||
__u64 reserved[6];
|
||||
};
|
||||
|
||||
Conceptually, the inode backing a guest_memfd file represents physical memory,
|
||||
i.e. is coupled to the virtual machine as a thing, not to a "struct kvm". The
|
||||
file itself, which is bound to a "struct kvm", is that instance's view of the
|
||||
underlying memory, e.g. effectively provides the translation of guest addresses
|
||||
to host memory. This allows for use cases where multiple KVM structures are
|
||||
used to manage a single virtual machine, e.g. when performing intrahost
|
||||
migration of a virtual machine.
|
||||
|
||||
KVM currently only supports mapping guest_memfd via KVM_SET_USER_MEMORY_REGION2,
|
||||
and more specifically via the guest_memfd and guest_memfd_offset fields in
|
||||
"struct kvm_userspace_memory_region2", where guest_memfd_offset is the offset
|
||||
into the guest_memfd instance. For a given guest_memfd file, there can be at
|
||||
most one mapping per page, i.e. binding multiple memory regions to a single
|
||||
guest_memfd range is not allowed (any number of memory regions can be bound to
|
||||
a single guest_memfd file, but the bound ranges must not overlap).
|
||||
|
||||
See KVM_SET_USER_MEMORY_REGION2 for additional details.
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
|
||||
@ -6824,6 +6955,30 @@ array field represents return values. The userspace should update the return
|
||||
values of SBI call before resuming the VCPU. For more details on RISC-V SBI
|
||||
spec refer, https://github.com/riscv/riscv-sbi-doc.
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_MEMORY_FAULT */
|
||||
struct {
|
||||
#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
|
||||
__u64 flags;
|
||||
__u64 gpa;
|
||||
__u64 size;
|
||||
} memory_fault;
|
||||
|
||||
KVM_EXIT_MEMORY_FAULT indicates the vCPU has encountered a memory fault that
|
||||
could not be resolved by KVM. The 'gpa' and 'size' (in bytes) describe the
|
||||
guest physical address range [gpa, gpa + size) of the fault. The 'flags' field
|
||||
describes properties of the faulting access that are likely pertinent:
|
||||
|
||||
- KVM_MEMORY_EXIT_FLAG_PRIVATE - When set, indicates the memory fault occurred
|
||||
on a private memory access. When clear, indicates the fault occurred on a
|
||||
shared access.
|
||||
|
||||
Note! KVM_EXIT_MEMORY_FAULT is unique among all KVM exit reasons in that it
|
||||
accompanies a return code of '-1', not '0'! errno will always be set to EFAULT
|
||||
or EHWPOISON when KVM exits with KVM_EXIT_MEMORY_FAULT, userspace should assume
|
||||
kvm_run.exit_reason is stale/undefined for all other error numbers.
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_NOTIFY */
|
||||
@ -7858,6 +8013,27 @@ This capability is aimed to mitigate the threat that malicious VMs can
|
||||
cause CPU stuck (due to event windows don't open up) and make the CPU
|
||||
unavailable to host or other VMs.
|
||||
|
||||
7.34 KVM_CAP_MEMORY_FAULT_INFO
|
||||
------------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP.
|
||||
|
||||
The presence of this capability indicates that KVM_RUN will fill
|
||||
kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if
|
||||
there is a valid memslot but no backing VMA for the corresponding host virtual
|
||||
address.
|
||||
|
||||
The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns
|
||||
an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set
|
||||
to KVM_EXIT_MEMORY_FAULT.
|
||||
|
||||
Note: Userspaces which attempt to resolve memory faults so that they can retry
|
||||
KVM_RUN are encouraged to guard against repeatedly receiving the same
|
||||
error/annotated fault.
|
||||
|
||||
See KVM_EXIT_MEMORY_FAULT for more information.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
@ -8596,6 +8772,19 @@ block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
|
||||
64-bit bitmap (each bit describing a block size). The default value is
|
||||
0, to disable the eager page splitting.
|
||||
|
||||
8.41 KVM_CAP_VM_TYPES
|
||||
---------------------
|
||||
|
||||
:Capability: KVM_CAP_MEMORY_ATTRIBUTES
|
||||
:Architectures: x86
|
||||
:Type: system ioctl
|
||||
|
||||
This capability returns a bitmap of support VM types. The 1-setting of bit @n
|
||||
means the VM type with value @n is supported. Possible values of @n are::
|
||||
|
||||
#define KVM_X86_DEFAULT_VM 0
|
||||
#define KVM_X86_SW_PROTECTED_VM 1
|
||||
|
||||
9. Known KVM API problems
|
||||
=========================
|
||||
|
||||
|
@ -954,8 +954,6 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
|
||||
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events);
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
void kvm_arm_halt_guest(struct kvm *kvm);
|
||||
void kvm_arm_resume_guest(struct kvm *kvm);
|
||||
|
||||
|
@ -22,15 +22,13 @@ menuconfig KVM
|
||||
bool "Kernel-based Virtual Machine (KVM) support"
|
||||
depends on HAVE_KVM
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select KVM_MMIO
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_XFER_TO_GUEST_WORK
|
||||
select KVM_VFIO
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_DIRTY_RING_ACQ_REL
|
||||
select NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
select HAVE_KVM_MSI
|
||||
|
@ -203,7 +203,6 @@ void kvm_flush_tlb_all(void);
|
||||
void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa);
|
||||
int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write);
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable);
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
|
@ -22,13 +22,12 @@ config KVM
|
||||
depends on AS_HAS_LVZ_EXTENSION
|
||||
depends on HAVE_KVM
|
||||
select HAVE_KVM_DIRTY_RING_ACQ_REL
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select KVM_MMIO
|
||||
select KVM_XFER_TO_GUEST_WORK
|
||||
select MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
help
|
||||
Support hosting virtualized guest machines using
|
||||
|
@ -675,7 +675,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
|
||||
*
|
||||
* There are several ways to safely use this helper:
|
||||
*
|
||||
* - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before
|
||||
* - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before
|
||||
* consuming it. In this case, mmu_lock doesn't need to be held during the
|
||||
* lookup, but it does need to be held while checking the MMU notifier.
|
||||
*
|
||||
@ -855,7 +855,7 @@ retry:
|
||||
|
||||
/* Check if an invalidation has taken place since we got pfn */
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (mmu_invalidate_retry_hva(kvm, mmu_seq, hva)) {
|
||||
if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) {
|
||||
/*
|
||||
* This can happen when mappings are changed asynchronously, but
|
||||
* also synchronously if a COW is triggered by
|
||||
|
@ -810,8 +810,6 @@ int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn);
|
||||
pgd_t *kvm_pgd_alloc(void);
|
||||
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
/* Emulation */
|
||||
enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
|
||||
int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
|
||||
|
@ -22,10 +22,9 @@ config KVM
|
||||
select EXPORT_UASM
|
||||
select PREEMPT_NOTIFIERS
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select INTERVAL_TREE
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
help
|
||||
|
@ -63,8 +63,6 @@
|
||||
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
#define HPTEG_CACHE_NUM (1 << 15)
|
||||
#define HPTEG_HASH_BITS_PTE 13
|
||||
#define HPTEG_HASH_BITS_PTE_LONG 12
|
||||
|
@ -20,7 +20,6 @@ if VIRTUALIZATION
|
||||
config KVM
|
||||
bool
|
||||
select PREEMPT_NOTIFIERS
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select KVM_VFIO
|
||||
select IRQ_BYPASS_MANAGER
|
||||
@ -42,7 +41,7 @@ config KVM_BOOK3S_64_HANDLER
|
||||
config KVM_BOOK3S_PR_POSSIBLE
|
||||
bool
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
|
||||
config KVM_BOOK3S_HV_POSSIBLE
|
||||
bool
|
||||
@ -85,7 +84,7 @@ config KVM_BOOK3S_64_HV
|
||||
tristate "KVM for POWER7 and later using hypervisor mode in host"
|
||||
depends on KVM_BOOK3S_64 && PPC_POWERNV
|
||||
select KVM_BOOK3S_HV_POSSIBLE
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select CMA
|
||||
help
|
||||
Support running unmodified book3s_64 guest kernels in
|
||||
@ -194,7 +193,7 @@ config KVM_E500V2
|
||||
depends on !CONTEXT_TRACKING_USER
|
||||
select KVM
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
help
|
||||
Support running unmodified E500 guest kernels in virtual machines on
|
||||
E500v2 host processors.
|
||||
@ -211,7 +210,7 @@ config KVM_E500MC
|
||||
select KVM
|
||||
select KVM_MMIO
|
||||
select KVM_BOOKE_HV
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
help
|
||||
Support running unmodified E500MC/E5500/E6500 guest kernels in
|
||||
virtual machines on E500MC/E5500/E6500 host processors.
|
||||
@ -225,7 +224,6 @@ config KVM_MPIC
|
||||
bool "KVM in-kernel MPIC emulation"
|
||||
depends on KVM && PPC_E500
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
select HAVE_KVM_MSI
|
||||
help
|
||||
@ -238,7 +236,6 @@ config KVM_XICS
|
||||
bool "KVM in-kernel XICS emulation"
|
||||
depends on KVM_BOOK3S_64 && !KVM_MPIC
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
default y
|
||||
help
|
||||
Include support for the XICS (eXternal Interrupt Controller
|
||||
|
@ -6240,7 +6240,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
|
||||
}
|
||||
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct kvm_memslots *slots = __kvm_memslots(kvm, i);
|
||||
int bkt;
|
||||
|
@ -578,7 +578,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
case KVM_CAP_IRQFD_RESAMPLE:
|
||||
r = !xive_enabled();
|
||||
break;
|
||||
@ -632,13 +632,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
break;
|
||||
#endif
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
r = hv_enabled;
|
||||
#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
BUILD_BUG_ON(!IS_ENABLED(CONFIG_KVM_GENERIC_MMU_NOTIFIER));
|
||||
r = 1;
|
||||
#else
|
||||
r = 0;
|
||||
#endif
|
||||
break;
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
case KVM_CAP_PPC_HTAB_FD:
|
||||
|
@ -267,8 +267,6 @@ struct kvm_vcpu_arch {
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
|
||||
|
||||
void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
|
||||
|
@ -20,9 +20,7 @@ if VIRTUALIZATION
|
||||
config KVM
|
||||
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
|
||||
depends on RISCV_SBI && MMU
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
select HAVE_KVM_MSI
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
@ -30,7 +28,7 @@ config KVM
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
select KVM_MMIO
|
||||
select KVM_XFER_TO_GUEST_WORK
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
help
|
||||
Support hosting virtualized guest machines.
|
||||
|
@ -23,11 +23,9 @@ config KVM
|
||||
select PREEMPT_NOTIFIERS
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select HAVE_KVM_EVENTFD
|
||||
select KVM_ASYNC_PF
|
||||
select KVM_ASYNC_PF_SYNC
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
select HAVE_KVM_INVALID_WAKEUPS
|
||||
select HAVE_KVM_NO_POLL
|
||||
|
@ -1255,6 +1255,7 @@ enum kvm_apicv_inhibit {
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned long vm_type;
|
||||
unsigned long n_used_mmu_pages;
|
||||
unsigned long n_requested_mmu_pages;
|
||||
unsigned long n_max_mmu_pages;
|
||||
@ -1848,6 +1849,9 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_init_vm(struct kvm *kvm);
|
||||
void kvm_mmu_uninit_vm(struct kvm *kvm);
|
||||
|
||||
void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot);
|
||||
|
||||
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
@ -2086,6 +2090,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
|
||||
void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
|
||||
int tdp_max_root_level, int tdp_huge_page_level);
|
||||
|
||||
#ifdef CONFIG_KVM_PRIVATE_MEM
|
||||
#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM)
|
||||
#else
|
||||
#define kvm_arch_has_private_mem(kvm) false
|
||||
#endif
|
||||
|
||||
static inline u16 kvm_read_ldt(void)
|
||||
{
|
||||
u16 ldt;
|
||||
@ -2133,16 +2143,15 @@ enum {
|
||||
#define HF_SMM_MASK (1 << 1)
|
||||
#define HF_SMM_INSIDE_NMI_MASK (1 << 2)
|
||||
|
||||
# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
|
||||
# define KVM_ADDRESS_SPACE_NUM 2
|
||||
# define KVM_MAX_NR_ADDRESS_SPACES 2
|
||||
/* SMM is currently unsupported for guests with private memory. */
|
||||
# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2)
|
||||
# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
|
||||
# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
|
||||
#else
|
||||
# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
|
||||
#endif
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_has_extint(struct kvm_vcpu *v);
|
||||
|
@ -562,4 +562,7 @@ struct kvm_pmu_event_filter {
|
||||
/* x86-specific KVM_EXIT_HYPERCALL flags. */
|
||||
#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
|
||||
|
||||
#define KVM_X86_DEFAULT_VM 0
|
||||
#define KVM_X86_SW_PROTECTED_VM 1
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
@ -24,16 +24,14 @@ config KVM
|
||||
depends on HIGH_RES_TIMERS
|
||||
depends on X86_LOCAL_APIC
|
||||
select PREEMPT_NOTIFIERS
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_PFNCACHE
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_DIRTY_RING_TSO
|
||||
select HAVE_KVM_DIRTY_RING_ACQ_REL
|
||||
select IRQ_BYPASS_MANAGER
|
||||
select HAVE_KVM_IRQ_BYPASS
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
select HAVE_KVM_EVENTFD
|
||||
select KVM_ASYNC_PF
|
||||
select USER_RETURN_NOTIFIER
|
||||
select KVM_MMIO
|
||||
@ -77,6 +75,18 @@ config KVM_WERROR
|
||||
|
||||
If in doubt, say "N".
|
||||
|
||||
config KVM_SW_PROTECTED_VM
|
||||
bool "Enable support for KVM software-protected VMs"
|
||||
depends on EXPERT
|
||||
depends on X86_64
|
||||
select KVM_GENERIC_PRIVATE_MEM
|
||||
help
|
||||
Enable support for KVM software-protected VMs. Currently "protected"
|
||||
means the VM can be backed with memory provided by
|
||||
KVM_CREATE_GUEST_MEMFD.
|
||||
|
||||
If unsure, say "N".
|
||||
|
||||
config KVM_INTEL
|
||||
tristate "KVM for Intel (and compatible) processors support"
|
||||
depends on KVM && IA32_FEAT_CTL
|
||||
|
@ -111,7 +111,7 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
int bkt;
|
||||
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
|
@ -795,16 +795,26 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
|
||||
return &slot->arch.lpage_info[level - 2][idx];
|
||||
}
|
||||
|
||||
/*
|
||||
* The most significant bit in disallow_lpage tracks whether or not memory
|
||||
* attributes are mixed, i.e. not identical for all gfns at the current level.
|
||||
* The lower order bits are used to refcount other cases where a hugepage is
|
||||
* disallowed, e.g. if KVM has shadow a page table at the gfn.
|
||||
*/
|
||||
#define KVM_LPAGE_MIXED_FLAG BIT(31)
|
||||
|
||||
static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, int count)
|
||||
{
|
||||
struct kvm_lpage_info *linfo;
|
||||
int i;
|
||||
int old, i;
|
||||
|
||||
for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
|
||||
linfo = lpage_info_slot(gfn, slot, i);
|
||||
|
||||
old = linfo->disallow_lpage;
|
||||
linfo->disallow_lpage += count;
|
||||
WARN_ON_ONCE(linfo->disallow_lpage < 0);
|
||||
WARN_ON_ONCE((old ^ linfo->disallow_lpage) & KVM_LPAGE_MIXED_FLAG);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3056,7 +3066,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
|
||||
*
|
||||
* There are several ways to safely use this helper:
|
||||
*
|
||||
* - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before
|
||||
* - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before
|
||||
* consuming it. In this case, mmu_lock doesn't need to be held during the
|
||||
* lookup, but it does need to be held while checking the MMU notifier.
|
||||
*
|
||||
@ -3137,9 +3147,9 @@ out:
|
||||
return level;
|
||||
}
|
||||
|
||||
int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int max_level)
|
||||
static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, int max_level, bool is_private)
|
||||
{
|
||||
struct kvm_lpage_info *linfo;
|
||||
int host_level;
|
||||
@ -3151,6 +3161,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_private)
|
||||
return max_level;
|
||||
|
||||
if (max_level == PG_LEVEL_4K)
|
||||
return PG_LEVEL_4K;
|
||||
|
||||
@ -3158,6 +3171,16 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
return min(host_level, max_level);
|
||||
}
|
||||
|
||||
int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int max_level)
|
||||
{
|
||||
bool is_private = kvm_slot_can_be_private(slot) &&
|
||||
kvm_mem_is_private(kvm, gfn);
|
||||
|
||||
return __kvm_mmu_max_mapping_level(kvm, slot, gfn, max_level, is_private);
|
||||
}
|
||||
|
||||
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
struct kvm_memory_slot *slot = fault->slot;
|
||||
@ -3178,8 +3201,9 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
* Enforce the iTLB multihit workaround after capturing the requested
|
||||
* level, which will be used to do precise, accurate accounting.
|
||||
*/
|
||||
fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
|
||||
fault->gfn, fault->max_level);
|
||||
fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot,
|
||||
fault->gfn, fault->max_level,
|
||||
fault->is_private);
|
||||
if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
|
||||
return;
|
||||
|
||||
@ -3739,7 +3763,7 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
|
||||
kvm_page_track_write_tracking_enabled(kvm))
|
||||
goto out_success;
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
kvm_for_each_memslot(slot, bkt, slots) {
|
||||
/*
|
||||
@ -4259,6 +4283,55 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
|
||||
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
|
||||
}
|
||||
|
||||
static inline u8 kvm_max_level_for_order(int order)
|
||||
{
|
||||
BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
|
||||
|
||||
KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) &&
|
||||
order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) &&
|
||||
order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K));
|
||||
|
||||
if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
|
||||
return PG_LEVEL_1G;
|
||||
|
||||
if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
|
||||
return PG_LEVEL_2M;
|
||||
|
||||
return PG_LEVEL_4K;
|
||||
}
|
||||
|
||||
static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT,
|
||||
PAGE_SIZE, fault->write, fault->exec,
|
||||
fault->is_private);
|
||||
}
|
||||
|
||||
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
int max_order, r;
|
||||
|
||||
if (!kvm_slot_can_be_private(fault->slot)) {
|
||||
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn,
|
||||
&max_order);
|
||||
if (r) {
|
||||
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
|
||||
return r;
|
||||
}
|
||||
|
||||
fault->max_level = min(kvm_max_level_for_order(max_order),
|
||||
fault->max_level);
|
||||
fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY);
|
||||
|
||||
return RET_PF_CONTINUE;
|
||||
}
|
||||
|
||||
static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
struct kvm_memory_slot *slot = fault->slot;
|
||||
@ -4291,6 +4364,14 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
return RET_PF_EMULATE;
|
||||
}
|
||||
|
||||
if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
|
||||
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (fault->is_private)
|
||||
return kvm_faultin_pfn_private(vcpu, fault);
|
||||
|
||||
async = false;
|
||||
fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
|
||||
fault->write, &fault->map_writable,
|
||||
@ -4366,7 +4447,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
|
||||
return true;
|
||||
|
||||
return fault->slot &&
|
||||
mmu_invalidate_retry_hva(vcpu->kvm, fault->mmu_seq, fault->hva);
|
||||
mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
|
||||
}
|
||||
|
||||
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
@ -6228,7 +6309,7 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e
|
||||
if (!kvm_memslots_have_rmaps(kvm))
|
||||
return flush;
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
|
||||
kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
|
||||
@ -6260,7 +6341,9 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
kvm_mmu_invalidate_begin(kvm, 0, -1ul);
|
||||
kvm_mmu_invalidate_begin(kvm);
|
||||
|
||||
kvm_mmu_invalidate_range_add(kvm, gfn_start, gfn_end);
|
||||
|
||||
flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
|
||||
|
||||
@ -6270,7 +6353,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);
|
||||
|
||||
kvm_mmu_invalidate_end(kvm, 0, -1ul);
|
||||
kvm_mmu_invalidate_end(kvm);
|
||||
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
@ -6723,7 +6806,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
|
||||
* modifier prior to checking for a wrap of the MMIO generation so
|
||||
* that a wrap in any address space is detected.
|
||||
*/
|
||||
gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1);
|
||||
gen &= ~((u64)kvm_arch_nr_memslot_as_ids(kvm) - 1);
|
||||
|
||||
/*
|
||||
* The very rare case: if the MMIO generation number has wrapped,
|
||||
@ -7176,3 +7259,163 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
|
||||
if (kvm->arch.nx_huge_page_recovery_thread)
|
||||
kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_gfn_range *range)
|
||||
{
|
||||
/*
|
||||
* Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only
|
||||
* supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM
|
||||
* can simply ignore such slots. But if userspace is making memory
|
||||
* PRIVATE, then KVM must prevent the guest from accessing the memory
|
||||
* as shared. And if userspace is making memory SHARED and this point
|
||||
* is reached, then at least one page within the range was previously
|
||||
* PRIVATE, i.e. the slot's possible hugepage ranges are changing.
|
||||
* Zapping SPTEs in this case ensures KVM will reassess whether or not
|
||||
* a hugepage can be used for affected ranges.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm)))
|
||||
return false;
|
||||
|
||||
return kvm_unmap_gfn_range(kvm, range);
|
||||
}
|
||||
|
||||
static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int level)
|
||||
{
|
||||
return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG;
|
||||
}
|
||||
|
||||
static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int level)
|
||||
{
|
||||
lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG;
|
||||
}
|
||||
|
||||
static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int level)
|
||||
{
|
||||
lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG;
|
||||
}
|
||||
|
||||
static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, int level, unsigned long attrs)
|
||||
{
|
||||
const unsigned long start = gfn;
|
||||
const unsigned long end = start + KVM_PAGES_PER_HPAGE(level);
|
||||
|
||||
if (level == PG_LEVEL_2M)
|
||||
return kvm_range_has_memory_attributes(kvm, start, end, attrs);
|
||||
|
||||
for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) {
|
||||
if (hugepage_test_mixed(slot, gfn, level - 1) ||
|
||||
attrs != kvm_get_memory_attributes(kvm, gfn))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_gfn_range *range)
|
||||
{
|
||||
unsigned long attrs = range->arg.attributes;
|
||||
struct kvm_memory_slot *slot = range->slot;
|
||||
int level;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
/*
|
||||
* Calculate which ranges can be mapped with hugepages even if the slot
|
||||
* can't map memory PRIVATE. KVM mustn't create a SHARED hugepage over
|
||||
* a range that has PRIVATE GFNs, and conversely converting a range to
|
||||
* SHARED may now allow hugepages.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* The sequence matters here: upper levels consume the result of lower
|
||||
* level's scanning.
|
||||
*/
|
||||
for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
|
||||
gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level);
|
||||
gfn_t gfn = gfn_round_for_level(range->start, level);
|
||||
|
||||
/* Process the head page if it straddles the range. */
|
||||
if (gfn != range->start || gfn + nr_pages > range->end) {
|
||||
/*
|
||||
* Skip mixed tracking if the aligned gfn isn't covered
|
||||
* by the memslot, KVM can't use a hugepage due to the
|
||||
* misaligned address regardless of memory attributes.
|
||||
*/
|
||||
if (gfn >= slot->base_gfn) {
|
||||
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
|
||||
hugepage_clear_mixed(slot, gfn, level);
|
||||
else
|
||||
hugepage_set_mixed(slot, gfn, level);
|
||||
}
|
||||
gfn += nr_pages;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pages entirely covered by the range are guaranteed to have
|
||||
* only the attributes which were just set.
|
||||
*/
|
||||
for ( ; gfn + nr_pages <= range->end; gfn += nr_pages)
|
||||
hugepage_clear_mixed(slot, gfn, level);
|
||||
|
||||
/*
|
||||
* Process the last tail page if it straddles the range and is
|
||||
* contained by the memslot. Like the head page, KVM can't
|
||||
* create a hugepage if the slot size is misaligned.
|
||||
*/
|
||||
if (gfn < range->end &&
|
||||
(gfn + nr_pages) <= (slot->base_gfn + slot->npages)) {
|
||||
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
|
||||
hugepage_clear_mixed(slot, gfn, level);
|
||||
else
|
||||
hugepage_set_mixed(slot, gfn, level);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
int level;
|
||||
|
||||
if (!kvm_arch_has_private_mem(kvm))
|
||||
return;
|
||||
|
||||
for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
|
||||
/*
|
||||
* Don't bother tracking mixed attributes for pages that can't
|
||||
* be huge due to alignment, i.e. process only pages that are
|
||||
* entirely contained by the memslot.
|
||||
*/
|
||||
gfn_t end = gfn_round_for_level(slot->base_gfn + slot->npages, level);
|
||||
gfn_t start = gfn_round_for_level(slot->base_gfn, level);
|
||||
gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level);
|
||||
gfn_t gfn;
|
||||
|
||||
if (start < slot->base_gfn)
|
||||
start += nr_pages;
|
||||
|
||||
/*
|
||||
* Unlike setting attributes, every potential hugepage needs to
|
||||
* be manually checked as the attributes may already be mixed.
|
||||
*/
|
||||
for (gfn = start; gfn < end; gfn += nr_pages) {
|
||||
unsigned long attrs = kvm_get_memory_attributes(kvm, gfn);
|
||||
|
||||
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
|
||||
hugepage_clear_mixed(slot, gfn, level);
|
||||
else
|
||||
hugepage_set_mixed(slot, gfn, level);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -201,6 +201,7 @@ struct kvm_page_fault {
|
||||
|
||||
/* Derived from mmu and global state. */
|
||||
const bool is_tdp;
|
||||
const bool is_private;
|
||||
const bool nx_huge_page_workaround_enabled;
|
||||
|
||||
/*
|
||||
@ -296,6 +297,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
.max_level = KVM_MAX_HUGEPAGE_LEVEL,
|
||||
.req_level = PG_LEVEL_4K,
|
||||
.goal_level = PG_LEVEL_4K,
|
||||
.is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
|
||||
};
|
||||
int r;
|
||||
|
||||
|
@ -6757,10 +6757,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Grab the memslot so that the hva lookup for the mmu_notifier retry
|
||||
* is guaranteed to use the same memslot as the pfn lookup, i.e. rely
|
||||
* on the pfn lookup's validation of the memslot to ensure a valid hva
|
||||
* is used for the retry check.
|
||||
* Explicitly grab the memslot using KVM's internal slot ID to ensure
|
||||
* KVM doesn't unintentionally grab a userspace memslot. It _should_
|
||||
* be impossible for userspace to create a memslot for the APIC when
|
||||
* APICv is enabled, but paranoia won't hurt in this case.
|
||||
*/
|
||||
slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT);
|
||||
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
|
||||
@ -6785,8 +6785,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
read_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_invalidate_retry_hva(kvm, mmu_seq,
|
||||
gfn_to_hva_memslot(slot, gfn))) {
|
||||
if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) {
|
||||
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
|
||||
read_unlock(&vcpu->kvm->mmu_lock);
|
||||
goto out;
|
||||
|
@ -4548,6 +4548,13 @@ static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool kvm_is_vm_type_supported(unsigned long type)
|
||||
{
|
||||
return type == KVM_X86_DEFAULT_VM ||
|
||||
(type == KVM_X86_SW_PROTECTED_VM &&
|
||||
IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_enabled);
|
||||
}
|
||||
|
||||
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
{
|
||||
int r = 0;
|
||||
@ -4625,6 +4632,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_ENABLE_CAP:
|
||||
case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
|
||||
case KVM_CAP_IRQFD_RESAMPLE:
|
||||
case KVM_CAP_MEMORY_FAULT_INFO:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_EXIT_HYPERCALL:
|
||||
@ -4738,6 +4746,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_X86_NOTIFY_VMEXIT:
|
||||
r = kvm_caps.has_notify_vmexit;
|
||||
break;
|
||||
case KVM_CAP_VM_TYPES:
|
||||
r = BIT(KVM_X86_DEFAULT_VM);
|
||||
if (kvm_is_vm_type_supported(KVM_X86_SW_PROTECTED_VM))
|
||||
r |= BIT(KVM_X86_SW_PROTECTED_VM);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -11081,6 +11094,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
|
||||
vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
vcpu->arch.l1tf_flush_l1d = true;
|
||||
|
||||
for (;;) {
|
||||
@ -12434,9 +12448,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
int ret;
|
||||
unsigned long flags;
|
||||
|
||||
if (type)
|
||||
if (!kvm_is_vm_type_supported(type))
|
||||
return -EINVAL;
|
||||
|
||||
kvm->arch.vm_type = type;
|
||||
|
||||
ret = kvm_page_track_init(kvm);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -12575,8 +12591,8 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
|
||||
hva = slot->userspace_addr;
|
||||
}
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
struct kvm_userspace_memory_region m;
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
struct kvm_userspace_memory_region2 m;
|
||||
|
||||
m.slot = id | (i << 16);
|
||||
m.flags = 0;
|
||||
@ -12726,6 +12742,10 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
kvm_mmu_init_memslot_memory_attributes(kvm, slot);
|
||||
#endif
|
||||
|
||||
if (kvm_page_track_create_memslot(kvm, slot, npages))
|
||||
goto out_free;
|
||||
|
||||
|
@ -79,7 +79,7 @@ static struct file *__anon_inode_getfile(const char *name,
|
||||
const struct file_operations *fops,
|
||||
void *priv, int flags,
|
||||
const struct inode *context_inode,
|
||||
bool secure)
|
||||
bool make_inode)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct file *file;
|
||||
@ -87,7 +87,7 @@ static struct file *__anon_inode_getfile(const char *name,
|
||||
if (fops->owner && !try_module_get(fops->owner))
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
if (secure) {
|
||||
if (make_inode) {
|
||||
inode = anon_inode_make_secure_inode(name, context_inode);
|
||||
if (IS_ERR(inode)) {
|
||||
file = ERR_CAST(inode);
|
||||
@ -149,13 +149,10 @@ struct file *anon_inode_getfile(const char *name,
|
||||
EXPORT_SYMBOL_GPL(anon_inode_getfile);
|
||||
|
||||
/**
|
||||
* anon_inode_getfile_secure - Like anon_inode_getfile(), but creates a new
|
||||
* anon_inode_create_getfile - Like anon_inode_getfile(), but creates a new
|
||||
* !S_PRIVATE anon inode rather than reuse the
|
||||
* singleton anon inode and calls the
|
||||
* inode_init_security_anon() LSM hook. This
|
||||
* allows for both the inode to have its own
|
||||
* security context and for the LSM to enforce
|
||||
* policy on the inode's creation.
|
||||
* inode_init_security_anon() LSM hook.
|
||||
*
|
||||
* @name: [in] name of the "class" of the new file
|
||||
* @fops: [in] file operations for the new file
|
||||
@ -164,11 +161,21 @@ EXPORT_SYMBOL_GPL(anon_inode_getfile);
|
||||
* @context_inode:
|
||||
* [in] the logical relationship with the new inode (optional)
|
||||
*
|
||||
* Create a new anonymous inode and file pair. This can be done for two
|
||||
* reasons:
|
||||
*
|
||||
* - for the inode to have its own security context, so that LSMs can enforce
|
||||
* policy on the inode's creation;
|
||||
*
|
||||
* - if the caller needs a unique inode, for example in order to customize
|
||||
* the size returned by fstat()
|
||||
*
|
||||
* The LSM may use @context_inode in inode_init_security_anon(), but a
|
||||
* reference to it is not held. Returns the newly created file* or an error
|
||||
* pointer. See the anon_inode_getfile() documentation for more information.
|
||||
* reference to it is not held.
|
||||
*
|
||||
* Returns the newly created file* or an error pointer.
|
||||
*/
|
||||
struct file *anon_inode_getfile_secure(const char *name,
|
||||
struct file *anon_inode_create_getfile(const char *name,
|
||||
const struct file_operations *fops,
|
||||
void *priv, int flags,
|
||||
const struct inode *context_inode)
|
||||
@ -176,12 +183,13 @@ struct file *anon_inode_getfile_secure(const char *name,
|
||||
return __anon_inode_getfile(name, fops, priv, flags,
|
||||
context_inode, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(anon_inode_create_getfile);
|
||||
|
||||
static int __anon_inode_getfd(const char *name,
|
||||
const struct file_operations *fops,
|
||||
void *priv, int flags,
|
||||
const struct inode *context_inode,
|
||||
bool secure)
|
||||
bool make_inode)
|
||||
{
|
||||
int error, fd;
|
||||
struct file *file;
|
||||
@ -192,7 +200,7 @@ static int __anon_inode_getfd(const char *name,
|
||||
fd = error;
|
||||
|
||||
file = __anon_inode_getfile(name, fops, priv, flags, context_inode,
|
||||
secure);
|
||||
make_inode);
|
||||
if (IS_ERR(file)) {
|
||||
error = PTR_ERR(file);
|
||||
goto err_put_unused_fd;
|
||||
@ -231,10 +239,9 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
|
||||
EXPORT_SYMBOL_GPL(anon_inode_getfd);
|
||||
|
||||
/**
|
||||
* anon_inode_getfd_secure - Like anon_inode_getfd(), but creates a new
|
||||
* anon_inode_create_getfd - Like anon_inode_getfd(), but creates a new
|
||||
* !S_PRIVATE anon inode rather than reuse the singleton anon inode, and calls
|
||||
* the inode_init_security_anon() LSM hook. This allows the inode to have its
|
||||
* own security context and for a LSM to reject creation of the inode.
|
||||
* the inode_init_security_anon() LSM hook.
|
||||
*
|
||||
* @name: [in] name of the "class" of the new file
|
||||
* @fops: [in] file operations for the new file
|
||||
@ -243,16 +250,26 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd);
|
||||
* @context_inode:
|
||||
* [in] the logical relationship with the new inode (optional)
|
||||
*
|
||||
* Create a new anonymous inode and file pair. This can be done for two
|
||||
* reasons:
|
||||
*
|
||||
* - for the inode to have its own security context, so that LSMs can enforce
|
||||
* policy on the inode's creation;
|
||||
*
|
||||
* - if the caller needs a unique inode, for example in order to customize
|
||||
* the size returned by fstat()
|
||||
*
|
||||
* The LSM may use @context_inode in inode_init_security_anon(), but a
|
||||
* reference to it is not held.
|
||||
*
|
||||
* Returns a newly created file descriptor or an error code.
|
||||
*/
|
||||
int anon_inode_getfd_secure(const char *name, const struct file_operations *fops,
|
||||
int anon_inode_create_getfd(const char *name, const struct file_operations *fops,
|
||||
void *priv, int flags,
|
||||
const struct inode *context_inode)
|
||||
{
|
||||
return __anon_inode_getfd(name, fops, priv, flags, context_inode, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(anon_inode_getfd_secure);
|
||||
|
||||
static int __init anon_inode_init(void)
|
||||
{
|
||||
|
@ -1033,7 +1033,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new,
|
||||
{
|
||||
int fd;
|
||||
|
||||
fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new,
|
||||
fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, new,
|
||||
O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
@ -2261,7 +2261,8 @@ static int new_userfaultfd(int flags)
|
||||
/* prevent the mm struct to be freed */
|
||||
mmgrab(ctx->mm);
|
||||
|
||||
fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx,
|
||||
/* Create a new inode so that the LSM can block the creation. */
|
||||
fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, ctx,
|
||||
O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
|
||||
if (fd < 0) {
|
||||
mmdrop(ctx->mm);
|
||||
|
@ -15,13 +15,13 @@ struct inode;
|
||||
struct file *anon_inode_getfile(const char *name,
|
||||
const struct file_operations *fops,
|
||||
void *priv, int flags);
|
||||
struct file *anon_inode_getfile_secure(const char *name,
|
||||
struct file *anon_inode_create_getfile(const char *name,
|
||||
const struct file_operations *fops,
|
||||
void *priv, int flags,
|
||||
const struct inode *context_inode);
|
||||
int anon_inode_getfd(const char *name, const struct file_operations *fops,
|
||||
void *priv, int flags);
|
||||
int anon_inode_getfd_secure(const char *name,
|
||||
int anon_inode_create_getfd(const char *name,
|
||||
const struct file_operations *fops,
|
||||
void *priv, int flags,
|
||||
const struct inode *context_inode);
|
||||
|
@ -80,8 +80,8 @@
|
||||
/* Two fragments for cross MMIO pages. */
|
||||
#define KVM_MAX_MMIO_FRAGMENTS 2
|
||||
|
||||
#ifndef KVM_ADDRESS_SPACE_NUM
|
||||
#define KVM_ADDRESS_SPACE_NUM 1
|
||||
#ifndef KVM_MAX_NR_ADDRESS_SPACES
|
||||
#define KVM_MAX_NR_ADDRESS_SPACES 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -253,9 +253,10 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
|
||||
#endif
|
||||
|
||||
#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
|
||||
union kvm_mmu_notifier_arg {
|
||||
pte_t pte;
|
||||
unsigned long attributes;
|
||||
};
|
||||
|
||||
struct kvm_gfn_range {
|
||||
@ -588,8 +589,20 @@ struct kvm_memory_slot {
|
||||
u32 flags;
|
||||
short id;
|
||||
u16 as_id;
|
||||
|
||||
#ifdef CONFIG_KVM_PRIVATE_MEM
|
||||
struct {
|
||||
struct file __rcu *file;
|
||||
pgoff_t pgoff;
|
||||
} gmem;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
|
||||
{
|
||||
return slot && (slot->flags & KVM_MEM_GUEST_MEMFD);
|
||||
}
|
||||
|
||||
static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
|
||||
{
|
||||
return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
|
||||
@ -677,13 +690,29 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm);
|
||||
#define KVM_MEM_SLOTS_NUM SHRT_MAX
|
||||
#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS)
|
||||
|
||||
#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
|
||||
#if KVM_MAX_NR_ADDRESS_SPACES == 1
|
||||
static inline int kvm_arch_nr_memslot_as_ids(struct kvm *kvm)
|
||||
{
|
||||
return KVM_MAX_NR_ADDRESS_SPACES;
|
||||
}
|
||||
|
||||
static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Arch code must define kvm_arch_has_private_mem if support for private memory
|
||||
* is enabled.
|
||||
*/
|
||||
#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
|
||||
static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct kvm_memslots {
|
||||
u64 generation;
|
||||
atomic_long_t last_used_slot;
|
||||
@ -721,9 +750,9 @@ struct kvm {
|
||||
struct mm_struct *mm; /* userspace tied to this vm */
|
||||
unsigned long nr_memslot_pages;
|
||||
/* The two memslot sets - active and inactive (per address space) */
|
||||
struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2];
|
||||
struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2];
|
||||
/* The current active memslot set for each address space */
|
||||
struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
|
||||
struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES];
|
||||
struct xarray vcpu_array;
|
||||
/*
|
||||
* Protected by slots_lock, but can be read outside if an
|
||||
@ -753,7 +782,7 @@ struct kvm {
|
||||
struct list_head vm_list;
|
||||
struct mutex lock;
|
||||
struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
|
||||
#ifdef CONFIG_HAVE_KVM_EVENTFD
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
struct {
|
||||
spinlock_t lock;
|
||||
struct list_head items;
|
||||
@ -761,8 +790,8 @@ struct kvm {
|
||||
struct list_head resampler_list;
|
||||
struct mutex resampler_lock;
|
||||
} irqfds;
|
||||
struct list_head ioeventfds;
|
||||
#endif
|
||||
struct list_head ioeventfds;
|
||||
struct kvm_vm_stat stat;
|
||||
struct kvm_arch arch;
|
||||
refcount_t users_count;
|
||||
@ -778,17 +807,16 @@ struct kvm {
|
||||
* Update side is protected by irq_lock.
|
||||
*/
|
||||
struct kvm_irq_routing_table __rcu *irq_routing;
|
||||
#endif
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
|
||||
struct hlist_head irq_ack_notifier_list;
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
|
||||
struct mmu_notifier mmu_notifier;
|
||||
unsigned long mmu_invalidate_seq;
|
||||
long mmu_invalidate_in_progress;
|
||||
unsigned long mmu_invalidate_range_start;
|
||||
unsigned long mmu_invalidate_range_end;
|
||||
gfn_t mmu_invalidate_range_start;
|
||||
gfn_t mmu_invalidate_range_end;
|
||||
#endif
|
||||
struct list_head devices;
|
||||
u64 manual_dirty_log_protect;
|
||||
@ -806,6 +834,10 @@ struct kvm {
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
|
||||
struct notifier_block pm_notifier;
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
/* Protected by slots_locks (for writes) and RCU (for reads) */
|
||||
struct xarray mem_attr_array;
|
||||
#endif
|
||||
char stats_id[KVM_STATS_NAME_SIZE];
|
||||
};
|
||||
@ -965,7 +997,7 @@ static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
int kvm_irqfd_init(void);
|
||||
void kvm_irqfd_exit(void);
|
||||
#else
|
||||
@ -989,7 +1021,7 @@ void kvm_put_kvm_no_destroy(struct kvm *kvm);
|
||||
|
||||
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
|
||||
{
|
||||
as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
|
||||
as_id = array_index_nospec(as_id, KVM_MAX_NR_ADDRESS_SPACES);
|
||||
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
|
||||
lockdep_is_held(&kvm->slots_lock) ||
|
||||
!refcount_read(&kvm->users_count));
|
||||
@ -1146,9 +1178,9 @@ enum kvm_mr_change {
|
||||
};
|
||||
|
||||
int kvm_set_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem);
|
||||
const struct kvm_userspace_memory_region2 *mem);
|
||||
int __kvm_set_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem);
|
||||
const struct kvm_userspace_memory_region2 *mem);
|
||||
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
|
||||
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
@ -1392,10 +1424,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
|
||||
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
|
||||
#endif
|
||||
|
||||
void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end);
|
||||
void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end);
|
||||
void kvm_mmu_invalidate_begin(struct kvm *kvm);
|
||||
void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end);
|
||||
void kvm_mmu_invalidate_end(struct kvm *kvm);
|
||||
bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||
|
||||
long kvm_arch_dev_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg);
|
||||
@ -1947,7 +1979,7 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
|
||||
extern const struct kvm_stats_header kvm_vcpu_stats_header;
|
||||
extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
|
||||
|
||||
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
|
||||
static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
|
||||
{
|
||||
if (unlikely(kvm->mmu_invalidate_in_progress))
|
||||
@ -1970,9 +2002,9 @@ static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int mmu_invalidate_retry_hva(struct kvm *kvm,
|
||||
static inline int mmu_invalidate_retry_gfn(struct kvm *kvm,
|
||||
unsigned long mmu_seq,
|
||||
unsigned long hva)
|
||||
gfn_t gfn)
|
||||
{
|
||||
lockdep_assert_held(&kvm->mmu_lock);
|
||||
/*
|
||||
@ -1981,10 +2013,20 @@ static inline int mmu_invalidate_retry_hva(struct kvm *kvm,
|
||||
* that might be being invalidated. Note that it may include some false
|
||||
* positives, due to shortcuts when handing concurrent invalidations.
|
||||
*/
|
||||
if (unlikely(kvm->mmu_invalidate_in_progress) &&
|
||||
hva >= kvm->mmu_invalidate_range_start &&
|
||||
hva < kvm->mmu_invalidate_range_end)
|
||||
return 1;
|
||||
if (unlikely(kvm->mmu_invalidate_in_progress)) {
|
||||
/*
|
||||
* Dropping mmu_lock after bumping mmu_invalidate_in_progress
|
||||
* but before updating the range is a KVM bug.
|
||||
*/
|
||||
if (WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA ||
|
||||
kvm->mmu_invalidate_range_end == INVALID_GPA))
|
||||
return 1;
|
||||
|
||||
if (gfn >= kvm->mmu_invalidate_range_start &&
|
||||
gfn < kvm->mmu_invalidate_range_end)
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (kvm->mmu_invalidate_seq != mmu_seq)
|
||||
return 1;
|
||||
return 0;
|
||||
@ -2013,12 +2055,10 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
|
||||
|
||||
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_EVENTFD
|
||||
|
||||
void kvm_eventfd_init(struct kvm *kvm);
|
||||
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
|
||||
void kvm_irqfd_release(struct kvm *kvm);
|
||||
bool kvm_notify_irqfd_resampler(struct kvm *kvm,
|
||||
@ -2039,31 +2079,7 @@ static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm,
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
static inline void kvm_eventfd_init(struct kvm *kvm) {}
|
||||
|
||||
static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline void kvm_irqfd_release(struct kvm *kvm) {}
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
static inline void kvm_irq_routing_update(struct kvm *kvm)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HAVE_KVM_EVENTFD */
|
||||
#endif /* CONFIG_HAVE_KVM_IRQCHIP */
|
||||
|
||||
void kvm_arch_irq_routing_update(struct kvm *kvm);
|
||||
|
||||
@ -2318,4 +2334,57 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr)
|
||||
/* Max number of entries allowed for each kvm dirty ring */
|
||||
#define KVM_DIRTY_RING_MAX_ENTRIES 65536
|
||||
|
||||
static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
|
||||
gpa_t gpa, gpa_t size,
|
||||
bool is_write, bool is_exec,
|
||||
bool is_private)
|
||||
{
|
||||
vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
|
||||
vcpu->run->memory_fault.gpa = gpa;
|
||||
vcpu->run->memory_fault.size = size;
|
||||
|
||||
/* RWX flags are not (yet) defined or communicated to userspace. */
|
||||
vcpu->run->memory_fault.flags = 0;
|
||||
if (is_private)
|
||||
vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return xa_to_value(xa_load(&kvm->mem_attr_array, gfn));
|
||||
}
|
||||
|
||||
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||
unsigned long attrs);
|
||||
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_gfn_range *range);
|
||||
bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_gfn_range *range);
|
||||
|
||||
static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
|
||||
kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
|
||||
}
|
||||
#else
|
||||
static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
|
||||
|
||||
#ifdef CONFIG_KVM_PRIVATE_MEM
|
||||
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
|
||||
#else
|
||||
static inline int kvm_gmem_get_pfn(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
kvm_pfn_t *pfn, int *max_order)
|
||||
{
|
||||
KVM_BUG_ON(1, kvm);
|
||||
return -EIO;
|
||||
}
|
||||
#endif /* CONFIG_KVM_PRIVATE_MEM */
|
||||
|
||||
#endif
|
||||
|
@ -6,6 +6,7 @@
|
||||
struct kvm;
|
||||
struct kvm_async_pf;
|
||||
struct kvm_device_ops;
|
||||
struct kvm_gfn_range;
|
||||
struct kvm_interrupt;
|
||||
struct kvm_irq_routing_table;
|
||||
struct kvm_memory_slot;
|
||||
|
@ -206,6 +206,7 @@ enum mapping_flags {
|
||||
AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */
|
||||
AS_STABLE_WRITES, /* must wait for writeback before modifying
|
||||
folio contents */
|
||||
AS_UNMOVABLE, /* The mapping cannot be moved, ever */
|
||||
};
|
||||
|
||||
/**
|
||||
@ -291,6 +292,22 @@ static inline void mapping_clear_release_always(struct address_space *mapping)
|
||||
clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline void mapping_set_unmovable(struct address_space *mapping)
|
||||
{
|
||||
/*
|
||||
* It's expected unmovable mappings are also unevictable. Compaction
|
||||
* migrate scanner (isolate_migratepages_block()) relies on this to
|
||||
* reduce page locking.
|
||||
*/
|
||||
set_bit(AS_UNEVICTABLE, &mapping->flags);
|
||||
set_bit(AS_UNMOVABLE, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline bool mapping_unmovable(struct address_space *mapping)
|
||||
{
|
||||
return test_bit(AS_UNMOVABLE, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline bool mapping_stable_writes(const struct address_space *mapping)
|
||||
{
|
||||
return test_bit(AS_STABLE_WRITES, &mapping->flags);
|
||||
|
@ -62,7 +62,7 @@ TRACE_EVENT(kvm_vcpu_wakeup,
|
||||
__entry->valid ? "valid" : "invalid")
|
||||
);
|
||||
|
||||
#if defined(CONFIG_HAVE_KVM_IRQFD)
|
||||
#if defined(CONFIG_HAVE_KVM_IRQCHIP)
|
||||
TRACE_EVENT(kvm_set_irq,
|
||||
TP_PROTO(unsigned int gsi, int level, int irq_source_id),
|
||||
TP_ARGS(gsi, level, irq_source_id),
|
||||
@ -82,7 +82,7 @@ TRACE_EVENT(kvm_set_irq,
|
||||
TP_printk("gsi %u level %d source %d",
|
||||
__entry->gsi, __entry->level, __entry->irq_source_id)
|
||||
);
|
||||
#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
|
||||
#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
|
||||
|
||||
#if defined(__KVM_HAVE_IOAPIC)
|
||||
#define kvm_deliver_mode \
|
||||
@ -170,7 +170,7 @@ TRACE_EVENT(kvm_msi_set_irq,
|
||||
|
||||
#endif /* defined(__KVM_HAVE_IOAPIC) */
|
||||
|
||||
#if defined(CONFIG_HAVE_KVM_IRQFD)
|
||||
#if defined(CONFIG_HAVE_KVM_IRQCHIP)
|
||||
|
||||
#ifdef kvm_irqchips
|
||||
#define kvm_ack_irq_string "irqchip %s pin %u"
|
||||
@ -197,7 +197,7 @@ TRACE_EVENT(kvm_ack_irq,
|
||||
TP_printk(kvm_ack_irq_string, kvm_ack_irq_parm)
|
||||
);
|
||||
|
||||
#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
|
||||
#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
|
||||
|
||||
|
||||
|
||||
|
@ -16,76 +16,6 @@
|
||||
|
||||
#define KVM_API_VERSION 12
|
||||
|
||||
/* *** Deprecated interfaces *** */
|
||||
|
||||
#define KVM_TRC_SHIFT 16
|
||||
|
||||
#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT)
|
||||
#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1))
|
||||
|
||||
#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01)
|
||||
#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02)
|
||||
#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01)
|
||||
|
||||
#define KVM_TRC_HEAD_SIZE 12
|
||||
#define KVM_TRC_CYCLE_SIZE 8
|
||||
#define KVM_TRC_EXTRA_MAX 7
|
||||
|
||||
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
|
||||
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
|
||||
#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
|
||||
#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
|
||||
#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
|
||||
#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
|
||||
#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
|
||||
#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
|
||||
#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
|
||||
#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
|
||||
#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
|
||||
#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
|
||||
#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
|
||||
#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
|
||||
#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
|
||||
#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
|
||||
#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
|
||||
#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
|
||||
#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
|
||||
#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15)
|
||||
#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16)
|
||||
#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17)
|
||||
#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
|
||||
#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
|
||||
|
||||
struct kvm_user_trace_setup {
|
||||
__u32 buf_size;
|
||||
__u32 buf_nr;
|
||||
};
|
||||
|
||||
#define __KVM_DEPRECATED_MAIN_W_0x06 \
|
||||
_IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
|
||||
#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
|
||||
#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
|
||||
|
||||
#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
|
||||
|
||||
struct kvm_breakpoint {
|
||||
__u32 enabled;
|
||||
__u32 padding;
|
||||
__u64 address;
|
||||
};
|
||||
|
||||
struct kvm_debug_guest {
|
||||
__u32 enabled;
|
||||
__u32 pad;
|
||||
struct kvm_breakpoint breakpoints[4];
|
||||
__u32 singlestep;
|
||||
};
|
||||
|
||||
#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
|
||||
|
||||
/* *** End of deprecated interfaces *** */
|
||||
|
||||
|
||||
/* for KVM_SET_USER_MEMORY_REGION */
|
||||
struct kvm_userspace_memory_region {
|
||||
__u32 slot;
|
||||
@ -95,6 +25,19 @@ struct kvm_userspace_memory_region {
|
||||
__u64 userspace_addr; /* start of the userspace allocated memory */
|
||||
};
|
||||
|
||||
/* for KVM_SET_USER_MEMORY_REGION2 */
|
||||
struct kvm_userspace_memory_region2 {
|
||||
__u32 slot;
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size;
|
||||
__u64 userspace_addr;
|
||||
__u64 guest_memfd_offset;
|
||||
__u32 guest_memfd;
|
||||
__u32 pad1;
|
||||
__u64 pad2[14];
|
||||
};
|
||||
|
||||
/*
|
||||
* The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
|
||||
* userspace, other bits are reserved for kvm internal use which are defined
|
||||
@ -102,6 +45,7 @@ struct kvm_userspace_memory_region {
|
||||
*/
|
||||
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
|
||||
#define KVM_MEM_READONLY (1UL << 1)
|
||||
#define KVM_MEM_GUEST_MEMFD (1UL << 2)
|
||||
|
||||
/* for KVM_IRQ_LINE */
|
||||
struct kvm_irq_level {
|
||||
@ -265,6 +209,7 @@ struct kvm_xen_exit {
|
||||
#define KVM_EXIT_RISCV_CSR 36
|
||||
#define KVM_EXIT_NOTIFY 37
|
||||
#define KVM_EXIT_LOONGARCH_IOCSR 38
|
||||
#define KVM_EXIT_MEMORY_FAULT 39
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
/* Emulate instruction failed. */
|
||||
@ -518,6 +463,13 @@ struct kvm_run {
|
||||
#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
|
||||
__u32 flags;
|
||||
} notify;
|
||||
/* KVM_EXIT_MEMORY_FAULT */
|
||||
struct {
|
||||
#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
|
||||
__u64 flags;
|
||||
__u64 gpa;
|
||||
__u64 size;
|
||||
} memory_fault;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
@ -945,9 +897,6 @@ struct kvm_ppc_resize_hpt {
|
||||
*/
|
||||
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
|
||||
#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
|
||||
#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06
|
||||
#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07
|
||||
#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08
|
||||
#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
|
||||
#define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
|
||||
|
||||
@ -1201,6 +1150,11 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
|
||||
#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
|
||||
#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230
|
||||
#define KVM_CAP_USER_MEMORY2 231
|
||||
#define KVM_CAP_MEMORY_FAULT_INFO 232
|
||||
#define KVM_CAP_MEMORY_ATTRIBUTES 233
|
||||
#define KVM_CAP_GUEST_MEMFD 234
|
||||
#define KVM_CAP_VM_TYPES 235
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -1483,6 +1437,8 @@ struct kvm_vfio_spapr_tce {
|
||||
struct kvm_userspace_memory_region)
|
||||
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
|
||||
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
|
||||
#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \
|
||||
struct kvm_userspace_memory_region2)
|
||||
|
||||
/* enable ucontrol for s390 */
|
||||
struct kvm_s390_ucas_mapping {
|
||||
@ -1507,20 +1463,8 @@ struct kvm_s390_ucas_mapping {
|
||||
_IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
|
||||
#define KVM_UNREGISTER_COALESCED_MMIO \
|
||||
_IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
|
||||
#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
|
||||
/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
|
||||
#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70
|
||||
#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
|
||||
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
|
||||
#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \
|
||||
struct kvm_assigned_msix_nr)
|
||||
#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \
|
||||
struct kvm_assigned_msix_entry)
|
||||
#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
|
||||
#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd)
|
||||
#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
|
||||
#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
|
||||
@ -1537,9 +1481,6 @@ struct kvm_s390_ucas_mapping {
|
||||
* KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */
|
||||
#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2)
|
||||
#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3)
|
||||
/* Available with KVM_CAP_PCI_2_3 */
|
||||
#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
/* Available with KVM_CAP_SIGNAL_MSI */
|
||||
#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
|
||||
/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
|
||||
@ -1592,8 +1533,6 @@ struct kvm_s390_ucas_mapping {
|
||||
#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
|
||||
#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
|
||||
#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
|
||||
/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
|
||||
#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87
|
||||
#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
|
||||
#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
|
||||
#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
|
||||
@ -2267,4 +2206,24 @@ struct kvm_s390_zpci_op {
|
||||
/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
|
||||
#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
|
||||
|
||||
/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
|
||||
#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes)
|
||||
|
||||
struct kvm_memory_attributes {
|
||||
__u64 address;
|
||||
__u64 size;
|
||||
__u64 attributes;
|
||||
__u64 flags;
|
||||
};
|
||||
|
||||
#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
|
||||
|
||||
#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd)
|
||||
|
||||
struct kvm_create_guest_memfd {
|
||||
__u64 size;
|
||||
__u64 flags;
|
||||
__u64 reserved[6];
|
||||
};
|
||||
|
||||
#endif /* __LINUX_KVM_H */
|
||||
|
@ -3777,7 +3777,8 @@ static int io_uring_install_fd(struct file *file)
|
||||
*/
|
||||
static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
|
||||
{
|
||||
return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx,
|
||||
/* Create a new inode so that the LSM can block the creation. */
|
||||
return anon_inode_create_getfile("[io_uring]", &io_uring_fops, ctx,
|
||||
O_RDWR | O_CLOEXEC, NULL);
|
||||
}
|
||||
|
||||
|
@ -882,6 +882,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
|
||||
/* Time to isolate some pages for migration */
|
||||
for (; low_pfn < end_pfn; low_pfn++) {
|
||||
bool is_dirty, is_unevictable;
|
||||
|
||||
if (skip_on_failure && low_pfn >= next_skip_pfn) {
|
||||
/*
|
||||
@ -1079,8 +1080,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
if (!folio_test_lru(folio))
|
||||
goto isolate_fail_put;
|
||||
|
||||
is_unevictable = folio_test_unevictable(folio);
|
||||
|
||||
/* Compaction might skip unevictable pages but CMA takes them */
|
||||
if (!(mode & ISOLATE_UNEVICTABLE) && folio_test_unevictable(folio))
|
||||
if (!(mode & ISOLATE_UNEVICTABLE) && is_unevictable)
|
||||
goto isolate_fail_put;
|
||||
|
||||
/*
|
||||
@ -1092,26 +1095,42 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
if ((mode & ISOLATE_ASYNC_MIGRATE) && folio_test_writeback(folio))
|
||||
goto isolate_fail_put;
|
||||
|
||||
if ((mode & ISOLATE_ASYNC_MIGRATE) && folio_test_dirty(folio)) {
|
||||
bool migrate_dirty;
|
||||
is_dirty = folio_test_dirty(folio);
|
||||
|
||||
if (((mode & ISOLATE_ASYNC_MIGRATE) && is_dirty) ||
|
||||
(mapping && is_unevictable)) {
|
||||
bool migrate_dirty = true;
|
||||
bool is_unmovable;
|
||||
|
||||
/*
|
||||
* Only folios without mappings or that have
|
||||
* a ->migrate_folio callback are possible to
|
||||
* migrate without blocking. However, we may
|
||||
* be racing with truncation, which can free
|
||||
* the mapping. Truncation holds the folio lock
|
||||
* until after the folio is removed from the page
|
||||
* cache so holding it ourselves is sufficient.
|
||||
* a ->migrate_folio callback are possible to migrate
|
||||
* without blocking.
|
||||
*
|
||||
* Folios from unmovable mappings are not migratable.
|
||||
*
|
||||
* However, we can be racing with truncation, which can
|
||||
* free the mapping that we need to check. Truncation
|
||||
* holds the folio lock until after the folio is removed
|
||||
* from the page so holding it ourselves is sufficient.
|
||||
*
|
||||
* To avoid locking the folio just to check unmovable,
|
||||
* assume every unmovable folio is also unevictable,
|
||||
* which is a cheaper test. If our assumption goes
|
||||
* wrong, it's not a correctness bug, just potentially
|
||||
* wasted cycles.
|
||||
*/
|
||||
if (!folio_trylock(folio))
|
||||
goto isolate_fail_put;
|
||||
|
||||
mapping = folio_mapping(folio);
|
||||
migrate_dirty = !mapping ||
|
||||
mapping->a_ops->migrate_folio;
|
||||
if ((mode & ISOLATE_ASYNC_MIGRATE) && is_dirty) {
|
||||
migrate_dirty = !mapping ||
|
||||
mapping->a_ops->migrate_folio;
|
||||
}
|
||||
is_unmovable = mapping && mapping_unmovable(mapping);
|
||||
folio_unlock(folio);
|
||||
if (!migrate_dirty)
|
||||
if (!migrate_dirty || is_unmovable)
|
||||
goto isolate_fail_put;
|
||||
}
|
||||
|
||||
|
@ -962,6 +962,8 @@ static int move_to_new_folio(struct folio *dst, struct folio *src,
|
||||
|
||||
if (!mapping)
|
||||
rc = migrate_folio(mapping, dst, src, mode);
|
||||
else if (mapping_unmovable(mapping))
|
||||
rc = -EOPNOTSUPP;
|
||||
else if (mapping->a_ops->migrate_folio)
|
||||
/*
|
||||
* Most folios have a mapping and most filesystems
|
||||
|
@ -77,11 +77,12 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
|
||||
@ -124,6 +125,7 @@ TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
|
||||
TEST_GEN_PROGS_x86_64 += demand_paging_test
|
||||
TEST_GEN_PROGS_x86_64 += dirty_log_test
|
||||
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
|
||||
TEST_GEN_PROGS_x86_64 += guest_memfd_test
|
||||
TEST_GEN_PROGS_x86_64 += guest_print_test
|
||||
TEST_GEN_PROGS_x86_64 += hardware_disable_test
|
||||
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
|
||||
|
@ -705,7 +705,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
|
||||
print_test_banner(mode, p);
|
||||
|
||||
vm = ____vm_create(mode);
|
||||
vm = ____vm_create(VM_SHAPE(mode));
|
||||
setup_memslots(vm, p);
|
||||
kvm_vm_elf_load(vm, program_invocation_name);
|
||||
setup_ucall(vm);
|
||||
|
@ -699,7 +699,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
|
||||
|
||||
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
|
||||
|
||||
vm = __vm_create(mode, 1, extra_mem_pages);
|
||||
vm = __vm_create(VM_SHAPE(mode), 1, extra_mem_pages);
|
||||
|
||||
log_mode_create_vm_done(vm);
|
||||
*vcpu = vm_vcpu_add(vm, 0, guest_code);
|
||||
|
198
tools/testing/selftests/kvm/guest_memfd_test.c
Normal file
198
tools/testing/selftests/kvm/guest_memfd_test.c
Normal file
@ -0,0 +1,198 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright Intel Corporation, 2023
|
||||
*
|
||||
* Author: Chao Peng <chao.p.peng@linux.intel.com>
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/falloc.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util_base.h"
|
||||
|
||||
static void test_file_read_write(int fd)
|
||||
{
|
||||
char buf[64];
|
||||
|
||||
TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0,
|
||||
"read on a guest_mem fd should fail");
|
||||
TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0,
|
||||
"write on a guest_mem fd should fail");
|
||||
TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0,
|
||||
"pread on a guest_mem fd should fail");
|
||||
TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0,
|
||||
"pwrite on a guest_mem fd should fail");
|
||||
}
|
||||
|
||||
static void test_mmap(int fd, size_t page_size)
|
||||
{
|
||||
char *mem;
|
||||
|
||||
mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
TEST_ASSERT_EQ(mem, MAP_FAILED);
|
||||
}
|
||||
|
||||
static void test_file_size(int fd, size_t page_size, size_t total_size)
|
||||
{
|
||||
struct stat sb;
|
||||
int ret;
|
||||
|
||||
ret = fstat(fd, &sb);
|
||||
TEST_ASSERT(!ret, "fstat should succeed");
|
||||
TEST_ASSERT_EQ(sb.st_size, total_size);
|
||||
TEST_ASSERT_EQ(sb.st_blksize, page_size);
|
||||
}
|
||||
|
||||
static void test_fallocate(int fd, size_t page_size, size_t total_size)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size);
|
||||
TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");
|
||||
|
||||
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
|
||||
page_size - 1, page_size);
|
||||
TEST_ASSERT(ret, "fallocate with unaligned offset should fail");
|
||||
|
||||
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size);
|
||||
TEST_ASSERT(ret, "fallocate beginning at total_size should fail");
|
||||
|
||||
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size);
|
||||
TEST_ASSERT(ret, "fallocate beginning after total_size should fail");
|
||||
|
||||
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
|
||||
total_size, page_size);
|
||||
TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed");
|
||||
|
||||
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
|
||||
total_size + page_size, page_size);
|
||||
TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed");
|
||||
|
||||
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
|
||||
page_size, page_size - 1);
|
||||
TEST_ASSERT(ret, "fallocate with unaligned size should fail");
|
||||
|
||||
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
|
||||
page_size, page_size);
|
||||
TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed");
|
||||
|
||||
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size);
|
||||
TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
|
||||
}
|
||||
|
||||
static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
|
||||
{
|
||||
struct {
|
||||
off_t offset;
|
||||
off_t len;
|
||||
} testcases[] = {
|
||||
{0, 1},
|
||||
{0, page_size - 1},
|
||||
{0, page_size + 1},
|
||||
|
||||
{1, 1},
|
||||
{1, page_size - 1},
|
||||
{1, page_size},
|
||||
{1, page_size + 1},
|
||||
|
||||
{page_size, 1},
|
||||
{page_size, page_size - 1},
|
||||
{page_size, page_size + 1},
|
||||
};
|
||||
int ret, i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(testcases); i++) {
|
||||
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
|
||||
testcases[i].offset, testcases[i].len);
|
||||
TEST_ASSERT(ret == -1 && errno == EINVAL,
|
||||
"PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail",
|
||||
testcases[i].offset, testcases[i].len);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_create_guest_memfd_invalid(struct kvm_vm *vm)
|
||||
{
|
||||
size_t page_size = getpagesize();
|
||||
uint64_t flag;
|
||||
size_t size;
|
||||
int fd;
|
||||
|
||||
for (size = 1; size < page_size; size++) {
|
||||
fd = __vm_create_guest_memfd(vm, size, 0);
|
||||
TEST_ASSERT(fd == -1 && errno == EINVAL,
|
||||
"guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL",
|
||||
size);
|
||||
}
|
||||
|
||||
for (flag = 0; flag; flag <<= 1) {
|
||||
fd = __vm_create_guest_memfd(vm, page_size, flag);
|
||||
TEST_ASSERT(fd == -1 && errno == EINVAL,
|
||||
"guest_memfd() with flag '0x%lx' should fail with EINVAL",
|
||||
flag);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
|
||||
{
|
||||
int fd1, fd2, ret;
|
||||
struct stat st1, st2;
|
||||
|
||||
fd1 = __vm_create_guest_memfd(vm, 4096, 0);
|
||||
TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
|
||||
|
||||
ret = fstat(fd1, &st1);
|
||||
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
|
||||
TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size");
|
||||
|
||||
fd2 = __vm_create_guest_memfd(vm, 8192, 0);
|
||||
TEST_ASSERT(fd2 != -1, "memfd creation should succeed");
|
||||
|
||||
ret = fstat(fd2, &st2);
|
||||
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
|
||||
TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size");
|
||||
|
||||
ret = fstat(fd1, &st1);
|
||||
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
|
||||
TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size");
|
||||
TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
size_t page_size;
|
||||
size_t total_size;
|
||||
int fd;
|
||||
struct kvm_vm *vm;
|
||||
|
||||
TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
|
||||
|
||||
page_size = getpagesize();
|
||||
total_size = page_size * 4;
|
||||
|
||||
vm = vm_create_barebones();
|
||||
|
||||
test_create_guest_memfd_invalid(vm);
|
||||
test_create_guest_memfd_multiple(vm);
|
||||
|
||||
fd = vm_create_guest_memfd(vm, total_size, 0);
|
||||
|
||||
test_file_read_write(fd);
|
||||
test_mmap(fd, page_size);
|
||||
test_file_size(fd, page_size, total_size);
|
||||
test_fallocate(fd, page_size, total_size);
|
||||
test_invalid_punch_hole(fd, page_size, total_size);
|
||||
|
||||
close(fd);
|
||||
}
|
@ -44,7 +44,7 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
|
||||
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
|
||||
|
||||
struct userspace_mem_region {
|
||||
struct kvm_userspace_memory_region region;
|
||||
struct kvm_userspace_memory_region2 region;
|
||||
struct sparsebit *unused_phy_pages;
|
||||
int fd;
|
||||
off_t offset;
|
||||
@ -188,6 +188,23 @@ enum vm_guest_mode {
|
||||
NUM_VM_MODES,
|
||||
};
|
||||
|
||||
struct vm_shape {
|
||||
enum vm_guest_mode mode;
|
||||
unsigned int type;
|
||||
};
|
||||
|
||||
#define VM_TYPE_DEFAULT 0
|
||||
|
||||
#define VM_SHAPE(__mode) \
|
||||
({ \
|
||||
struct vm_shape shape = { \
|
||||
.mode = (__mode), \
|
||||
.type = VM_TYPE_DEFAULT \
|
||||
}; \
|
||||
\
|
||||
shape; \
|
||||
})
|
||||
|
||||
#if defined(__aarch64__)
|
||||
|
||||
extern enum vm_guest_mode vm_mode_default;
|
||||
@ -220,6 +237,8 @@ extern enum vm_guest_mode vm_mode_default;
|
||||
|
||||
#endif
|
||||
|
||||
#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT)
|
||||
|
||||
#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
|
||||
#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
|
||||
|
||||
@ -248,6 +267,13 @@ static inline bool kvm_has_cap(long cap)
|
||||
#define __KVM_SYSCALL_ERROR(_name, _ret) \
|
||||
"%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
|
||||
|
||||
/*
|
||||
* Use the "inner", double-underscore macro when reporting errors from within
|
||||
* other macros so that the name of ioctl() and not its literal numeric value
|
||||
* is printed on error. The "outer" macro is strongly preferred when reporting
|
||||
* errors "directly", i.e. without an additional layer of macros, as it reduces
|
||||
* the probability of passing in the wrong string.
|
||||
*/
|
||||
#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret)
|
||||
#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret)
|
||||
|
||||
@ -260,17 +286,13 @@ static inline bool kvm_has_cap(long cap)
|
||||
#define __kvm_ioctl(kvm_fd, cmd, arg) \
|
||||
kvm_do_ioctl(kvm_fd, cmd, arg)
|
||||
|
||||
|
||||
#define _kvm_ioctl(kvm_fd, cmd, name, arg) \
|
||||
#define kvm_ioctl(kvm_fd, cmd, arg) \
|
||||
({ \
|
||||
int ret = __kvm_ioctl(kvm_fd, cmd, arg); \
|
||||
\
|
||||
TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \
|
||||
TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \
|
||||
})
|
||||
|
||||
#define kvm_ioctl(kvm_fd, cmd, arg) \
|
||||
_kvm_ioctl(kvm_fd, cmd, #cmd, arg)
|
||||
|
||||
static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { }
|
||||
|
||||
#define __vm_ioctl(vm, cmd, arg) \
|
||||
@ -279,17 +301,42 @@ static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { }
|
||||
kvm_do_ioctl((vm)->fd, cmd, arg); \
|
||||
})
|
||||
|
||||
#define _vm_ioctl(vm, cmd, name, arg) \
|
||||
/*
|
||||
* Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if
|
||||
* the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM,
|
||||
* probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before
|
||||
* selftests existed and (b) should never outright fail, i.e. is supposed to
|
||||
* return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the
|
||||
* VM and its vCPUs, including KVM_CHECK_EXTENSION.
|
||||
*/
|
||||
#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \
|
||||
do { \
|
||||
int __errno = errno; \
|
||||
\
|
||||
static_assert_is_vm(vm); \
|
||||
\
|
||||
if (cond) \
|
||||
break; \
|
||||
\
|
||||
if (errno == EIO && \
|
||||
__vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \
|
||||
TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \
|
||||
TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \
|
||||
} \
|
||||
errno = __errno; \
|
||||
TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \
|
||||
} while (0)
|
||||
|
||||
#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \
|
||||
__TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm)
|
||||
|
||||
#define vm_ioctl(vm, cmd, arg) \
|
||||
({ \
|
||||
int ret = __vm_ioctl(vm, cmd, arg); \
|
||||
\
|
||||
TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \
|
||||
__TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
|
||||
})
|
||||
|
||||
#define vm_ioctl(vm, cmd, arg) \
|
||||
_vm_ioctl(vm, cmd, #cmd, arg)
|
||||
|
||||
|
||||
static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
|
||||
|
||||
#define __vcpu_ioctl(vcpu, cmd, arg) \
|
||||
@ -298,16 +345,13 @@ static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
|
||||
kvm_do_ioctl((vcpu)->fd, cmd, arg); \
|
||||
})
|
||||
|
||||
#define _vcpu_ioctl(vcpu, cmd, name, arg) \
|
||||
#define vcpu_ioctl(vcpu, cmd, arg) \
|
||||
({ \
|
||||
int ret = __vcpu_ioctl(vcpu, cmd, arg); \
|
||||
\
|
||||
TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \
|
||||
__TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \
|
||||
})
|
||||
|
||||
#define vcpu_ioctl(vcpu, cmd, arg) \
|
||||
_vcpu_ioctl(vcpu, cmd, #cmd, arg)
|
||||
|
||||
/*
|
||||
* Looks up and returns the value corresponding to the capability
|
||||
* (KVM_CAP_*) given by cap.
|
||||
@ -316,7 +360,7 @@ static inline int vm_check_cap(struct kvm_vm *vm, long cap)
|
||||
{
|
||||
int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap);
|
||||
|
||||
TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));
|
||||
TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -333,6 +377,54 @@ static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
|
||||
vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
|
||||
}
|
||||
|
||||
static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
|
||||
uint64_t size, uint64_t attributes)
|
||||
{
|
||||
struct kvm_memory_attributes attr = {
|
||||
.attributes = attributes,
|
||||
.address = gpa,
|
||||
.size = size,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
/*
|
||||
* KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows
|
||||
* need significant enhancements to support multiple attributes.
|
||||
*/
|
||||
TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE,
|
||||
"Update me to support multiple attributes!");
|
||||
|
||||
vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr);
|
||||
}
|
||||
|
||||
|
||||
static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa,
|
||||
uint64_t size)
|
||||
{
|
||||
vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
|
||||
}
|
||||
|
||||
static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa,
|
||||
uint64_t size)
|
||||
{
|
||||
vm_set_memory_attributes(vm, gpa, size, 0);
|
||||
}
|
||||
|
||||
void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size,
|
||||
bool punch_hole);
|
||||
|
||||
static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa,
|
||||
uint64_t size)
|
||||
{
|
||||
vm_guest_mem_fallocate(vm, gpa, size, true);
|
||||
}
|
||||
|
||||
static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa,
|
||||
uint64_t size)
|
||||
{
|
||||
vm_guest_mem_fallocate(vm, gpa, size, false);
|
||||
}
|
||||
|
||||
void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
|
||||
const char *vm_guest_mode_string(uint32_t i);
|
||||
|
||||
@ -375,7 +467,7 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm)
|
||||
{
|
||||
int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);
|
||||
|
||||
TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd));
|
||||
TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm);
|
||||
return fd;
|
||||
}
|
||||
|
||||
@ -431,14 +523,44 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
|
||||
|
||||
void vm_create_irqchip(struct kvm_vm *vm);
|
||||
|
||||
static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
|
||||
uint64_t flags)
|
||||
{
|
||||
struct kvm_create_guest_memfd guest_memfd = {
|
||||
.size = size,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
|
||||
}
|
||||
|
||||
static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
|
||||
uint64_t flags)
|
||||
{
|
||||
int fd = __vm_create_guest_memfd(vm, size, flags);
|
||||
|
||||
TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd));
|
||||
return fd;
|
||||
}
|
||||
|
||||
void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
|
||||
uint64_t gpa, uint64_t size, void *hva);
|
||||
int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
|
||||
uint64_t gpa, uint64_t size, void *hva);
|
||||
void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
|
||||
uint64_t gpa, uint64_t size, void *hva,
|
||||
uint32_t guest_memfd, uint64_t guest_memfd_offset);
|
||||
int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
|
||||
uint64_t gpa, uint64_t size, void *hva,
|
||||
uint32_t guest_memfd, uint64_t guest_memfd_offset);
|
||||
|
||||
void vm_userspace_mem_region_add(struct kvm_vm *vm,
|
||||
enum vm_mem_backing_src_type src_type,
|
||||
uint64_t guest_paddr, uint32_t slot, uint64_t npages,
|
||||
uint32_t flags);
|
||||
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
|
||||
uint64_t guest_paddr, uint32_t slot, uint64_t npages,
|
||||
uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
|
||||
|
||||
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
|
||||
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
|
||||
@ -587,7 +709,7 @@ static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL);
|
||||
|
||||
TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd));
|
||||
TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm);
|
||||
return fd;
|
||||
}
|
||||
|
||||
@ -713,21 +835,33 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
|
||||
* __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
|
||||
* calculate the amount of memory needed for per-vCPU data, e.g. stacks.
|
||||
*/
|
||||
struct kvm_vm *____vm_create(enum vm_guest_mode mode);
|
||||
struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
|
||||
struct kvm_vm *____vm_create(struct vm_shape shape);
|
||||
struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
|
||||
uint64_t nr_extra_pages);
|
||||
|
||||
static inline struct kvm_vm *vm_create_barebones(void)
|
||||
{
|
||||
return ____vm_create(VM_MODE_DEFAULT);
|
||||
return ____vm_create(VM_SHAPE_DEFAULT);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static inline struct kvm_vm *vm_create_barebones_protected_vm(void)
|
||||
{
|
||||
const struct vm_shape shape = {
|
||||
.mode = VM_MODE_DEFAULT,
|
||||
.type = KVM_X86_SW_PROTECTED_VM,
|
||||
};
|
||||
|
||||
return ____vm_create(shape);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
|
||||
{
|
||||
return __vm_create(VM_MODE_DEFAULT, nr_runnable_vcpus, 0);
|
||||
return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0);
|
||||
}
|
||||
|
||||
struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
|
||||
struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
|
||||
uint64_t extra_mem_pages,
|
||||
void *guest_code, struct kvm_vcpu *vcpus[]);
|
||||
|
||||
@ -735,17 +869,27 @@ static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus,
|
||||
void *guest_code,
|
||||
struct kvm_vcpu *vcpus[])
|
||||
{
|
||||
return __vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, 0,
|
||||
return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0,
|
||||
guest_code, vcpus);
|
||||
}
|
||||
|
||||
|
||||
struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
|
||||
struct kvm_vcpu **vcpu,
|
||||
uint64_t extra_mem_pages,
|
||||
void *guest_code);
|
||||
|
||||
/*
|
||||
* Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages
|
||||
* additional pages of guest memory. Returns the VM and vCPU (via out param).
|
||||
*/
|
||||
struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
|
||||
uint64_t extra_mem_pages,
|
||||
void *guest_code);
|
||||
static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
|
||||
uint64_t extra_mem_pages,
|
||||
void *guest_code)
|
||||
{
|
||||
return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu,
|
||||
extra_mem_pages, guest_code);
|
||||
}
|
||||
|
||||
static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
|
||||
void *guest_code)
|
||||
@ -753,6 +897,13 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
|
||||
return __vm_create_with_one_vcpu(vcpu, 0, guest_code);
|
||||
}
|
||||
|
||||
static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape,
|
||||
struct kvm_vcpu **vcpu,
|
||||
void *guest_code)
|
||||
{
|
||||
return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code);
|
||||
}
|
||||
|
||||
struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
|
||||
|
||||
void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
|
||||
@ -776,10 +927,6 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
|
||||
return n;
|
||||
}
|
||||
|
||||
struct kvm_userspace_memory_region *
|
||||
kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
|
||||
uint64_t end);
|
||||
|
||||
#define sync_global_to_guest(vm, g) ({ \
|
||||
typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
|
||||
memcpy(_p, &(g), sizeof(g)); \
|
||||
|
@ -142,6 +142,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
|
||||
return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
|
||||
}
|
||||
|
||||
static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t)
|
||||
{
|
||||
return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM;
|
||||
}
|
||||
|
||||
/* Aligns x up to the next multiple of size. Size must be a power of 2. */
|
||||
static inline uint64_t align_up(uint64_t x, uint64_t size)
|
||||
{
|
||||
@ -186,7 +191,7 @@ static inline uint32_t atoi_non_negative(const char *name, const char *num_str)
|
||||
}
|
||||
|
||||
int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args);
|
||||
int guest_snprintf(char *buf, int n, const char *fmt, ...);
|
||||
__printf(3, 4) int guest_snprintf(char *buf, int n, const char *fmt, ...);
|
||||
|
||||
char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1)));
|
||||
|
||||
|
@ -34,9 +34,10 @@ void ucall_arch_do_ucall(vm_vaddr_t uc);
|
||||
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
|
||||
|
||||
void ucall(uint64_t cmd, int nargs, ...);
|
||||
void ucall_fmt(uint64_t cmd, const char *fmt, ...);
|
||||
void ucall_assert(uint64_t cmd, const char *exp, const char *file,
|
||||
unsigned int line, const char *fmt, ...);
|
||||
__printf(2, 3) void ucall_fmt(uint64_t cmd, const char *fmt, ...);
|
||||
__printf(5, 6) void ucall_assert(uint64_t cmd, const char *exp,
|
||||
const char *file, unsigned int line,
|
||||
const char *fmt, ...);
|
||||
uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
|
||||
void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
|
||||
int ucall_nr_pages_required(uint64_t page_size);
|
||||
@ -52,6 +53,17 @@ int ucall_nr_pages_required(uint64_t page_size);
|
||||
#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
|
||||
ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
|
||||
#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
|
||||
#define GUEST_SYNC1(arg0) ucall(UCALL_SYNC, 1, arg0)
|
||||
#define GUEST_SYNC2(arg0, arg1) ucall(UCALL_SYNC, 2, arg0, arg1)
|
||||
#define GUEST_SYNC3(arg0, arg1, arg2) \
|
||||
ucall(UCALL_SYNC, 3, arg0, arg1, arg2)
|
||||
#define GUEST_SYNC4(arg0, arg1, arg2, arg3) \
|
||||
ucall(UCALL_SYNC, 4, arg0, arg1, arg2, arg3)
|
||||
#define GUEST_SYNC5(arg0, arg1, arg2, arg3, arg4) \
|
||||
ucall(UCALL_SYNC, 5, arg0, arg1, arg2, arg3, arg4)
|
||||
#define GUEST_SYNC6(arg0, arg1, arg2, arg3, arg4, arg5) \
|
||||
ucall(UCALL_SYNC, 6, arg0, arg1, arg2, arg3, arg4, arg5)
|
||||
|
||||
#define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args)
|
||||
#define GUEST_DONE() ucall(UCALL_DONE, 0)
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <asm/msr-index.h>
|
||||
#include <asm/prctl.h>
|
||||
|
||||
#include <linux/kvm_para.h>
|
||||
#include <linux/stringify.h>
|
||||
|
||||
#include "../kvm_util.h"
|
||||
@ -1194,6 +1195,20 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
|
||||
uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
|
||||
void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
|
||||
|
||||
static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
|
||||
uint64_t size, uint64_t flags)
|
||||
{
|
||||
return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
|
||||
}
|
||||
|
||||
static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
|
||||
uint64_t flags)
|
||||
{
|
||||
uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
|
||||
|
||||
GUEST_ASSERT(!ret);
|
||||
}
|
||||
|
||||
void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
|
||||
|
||||
#define vm_xsave_require_permission(xfeature) \
|
||||
|
@ -254,7 +254,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
|
||||
|
||||
/* Create a VM with enough guest pages */
|
||||
guest_num_pages = test_mem_size / guest_page_size;
|
||||
vm = __vm_create_with_vcpus(mode, nr_vcpus, guest_num_pages,
|
||||
vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, guest_num_pages,
|
||||
guest_code, test_args.vcpus);
|
||||
|
||||
/* Align down GPA of the testing memslot */
|
||||
|
@ -209,7 +209,7 @@ __weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
|
||||
(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
|
||||
}
|
||||
|
||||
struct kvm_vm *____vm_create(enum vm_guest_mode mode)
|
||||
struct kvm_vm *____vm_create(struct vm_shape shape)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
|
||||
@ -221,13 +221,13 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode)
|
||||
vm->regions.hva_tree = RB_ROOT;
|
||||
hash_init(vm->regions.slot_hash);
|
||||
|
||||
vm->mode = mode;
|
||||
vm->type = 0;
|
||||
vm->mode = shape.mode;
|
||||
vm->type = shape.type;
|
||||
|
||||
vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
|
||||
vm->va_bits = vm_guest_mode_params[mode].va_bits;
|
||||
vm->page_size = vm_guest_mode_params[mode].page_size;
|
||||
vm->page_shift = vm_guest_mode_params[mode].page_shift;
|
||||
vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits;
|
||||
vm->va_bits = vm_guest_mode_params[vm->mode].va_bits;
|
||||
vm->page_size = vm_guest_mode_params[vm->mode].page_size;
|
||||
vm->page_shift = vm_guest_mode_params[vm->mode].page_shift;
|
||||
|
||||
/* Setup mode specific traits. */
|
||||
switch (vm->mode) {
|
||||
@ -265,7 +265,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode)
|
||||
/*
|
||||
* Ignore KVM support for 5-level paging (vm->va_bits == 57),
|
||||
* it doesn't take effect unless a CR4.LA57 is set, which it
|
||||
* isn't for this VM_MODE.
|
||||
* isn't for this mode (48-bit virtual address space).
|
||||
*/
|
||||
TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
|
||||
"Linear address width (%d bits) not supported",
|
||||
@ -285,10 +285,11 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode)
|
||||
vm->pgtable_levels = 5;
|
||||
break;
|
||||
default:
|
||||
TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
|
||||
TEST_FAIL("Unknown guest mode: 0x%x", vm->mode);
|
||||
}
|
||||
|
||||
#ifdef __aarch64__
|
||||
TEST_ASSERT(!vm->type, "ARM doesn't support test-provided types");
|
||||
if (vm->pa_bits != 40)
|
||||
vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
|
||||
#endif
|
||||
@ -347,19 +348,19 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
|
||||
return vm_adjust_num_guest_pages(mode, nr_pages);
|
||||
}
|
||||
|
||||
struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
|
||||
struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
|
||||
uint64_t nr_extra_pages)
|
||||
{
|
||||
uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus,
|
||||
uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus,
|
||||
nr_extra_pages);
|
||||
struct userspace_mem_region *slot0;
|
||||
struct kvm_vm *vm;
|
||||
int i;
|
||||
|
||||
pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
|
||||
vm_guest_mode_string(mode), nr_pages);
|
||||
pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
|
||||
vm_guest_mode_string(shape.mode), shape.type, nr_pages);
|
||||
|
||||
vm = ____vm_create(mode);
|
||||
vm = ____vm_create(shape);
|
||||
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
|
||||
for (i = 0; i < NR_MEM_REGIONS; i++)
|
||||
@ -400,7 +401,7 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
|
||||
* extra_mem_pages is only used to calculate the maximum page table size,
|
||||
* no real memory allocation for non-slot0 memory in this function.
|
||||
*/
|
||||
struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
|
||||
struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
|
||||
uint64_t extra_mem_pages,
|
||||
void *guest_code, struct kvm_vcpu *vcpus[])
|
||||
{
|
||||
@ -409,7 +410,7 @@ struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus
|
||||
|
||||
TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array");
|
||||
|
||||
vm = __vm_create(mode, nr_vcpus, extra_mem_pages);
|
||||
vm = __vm_create(shape, nr_vcpus, extra_mem_pages);
|
||||
|
||||
for (i = 0; i < nr_vcpus; ++i)
|
||||
vcpus[i] = vm_vcpu_add(vm, i, guest_code);
|
||||
@ -417,15 +418,15 @@ struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus
|
||||
return vm;
|
||||
}
|
||||
|
||||
struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
|
||||
uint64_t extra_mem_pages,
|
||||
void *guest_code)
|
||||
struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
|
||||
struct kvm_vcpu **vcpu,
|
||||
uint64_t extra_mem_pages,
|
||||
void *guest_code)
|
||||
{
|
||||
struct kvm_vcpu *vcpus[1];
|
||||
struct kvm_vm *vm;
|
||||
|
||||
vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages,
|
||||
guest_code, vcpus);
|
||||
vm = __vm_create_with_vcpus(shape, 1, extra_mem_pages, guest_code, vcpus);
|
||||
|
||||
*vcpu = vcpus[0];
|
||||
return vm;
|
||||
@ -453,8 +454,9 @@ void kvm_vm_restart(struct kvm_vm *vmp)
|
||||
vm_create_irqchip(vmp);
|
||||
|
||||
hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
|
||||
int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region);
|
||||
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
|
||||
int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
|
||||
|
||||
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
|
||||
" rc: %i errno: %i\n"
|
||||
" slot: %u flags: 0x%x\n"
|
||||
" guest_phys_addr: 0x%llx size: 0x%llx",
|
||||
@ -590,35 +592,6 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM Userspace Memory Region Find
|
||||
*
|
||||
* Input Args:
|
||||
* vm - Virtual Machine
|
||||
* start - Starting VM physical address
|
||||
* end - Ending VM physical address, inclusive.
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
* Return:
|
||||
* Pointer to overlapping region, NULL if no such region.
|
||||
*
|
||||
* Public interface to userspace_mem_region_find. Allows tests to look up
|
||||
* the memslot datastructure for a given range of guest physical memory.
|
||||
*/
|
||||
struct kvm_userspace_memory_region *
|
||||
kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
|
||||
uint64_t end)
|
||||
{
|
||||
struct userspace_mem_region *region;
|
||||
|
||||
region = userspace_mem_region_find(vm, start, end);
|
||||
if (!region)
|
||||
return NULL;
|
||||
|
||||
return ®ion->region;
|
||||
}
|
||||
|
||||
__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
||||
@ -686,7 +659,7 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
|
||||
}
|
||||
|
||||
region->region.memory_size = 0;
|
||||
vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region);
|
||||
vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
|
||||
|
||||
sparsebit_free(®ion->unused_phy_pages);
|
||||
ret = munmap(region->mmap_start, region->mmap_size);
|
||||
@ -697,6 +670,8 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
|
||||
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
|
||||
close(region->fd);
|
||||
}
|
||||
if (region->region.guest_memfd >= 0)
|
||||
close(region->region.guest_memfd);
|
||||
|
||||
free(region);
|
||||
}
|
||||
@ -898,36 +873,44 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
|
||||
errno, strerror(errno));
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Userspace Memory Region Add
|
||||
*
|
||||
* Input Args:
|
||||
* vm - Virtual Machine
|
||||
* src_type - Storage source for this region.
|
||||
* NULL to use anonymous memory.
|
||||
* guest_paddr - Starting guest physical address
|
||||
* slot - KVM region slot
|
||||
* npages - Number of physical pages
|
||||
* flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
* Return: None
|
||||
*
|
||||
* Allocates a memory area of the number of pages specified by npages
|
||||
* and maps it to the VM specified by vm, at a starting physical address
|
||||
* given by guest_paddr. The region is created with a KVM region slot
|
||||
* given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
|
||||
* region is created with the flags given by flags.
|
||||
*/
|
||||
void vm_userspace_mem_region_add(struct kvm_vm *vm,
|
||||
enum vm_mem_backing_src_type src_type,
|
||||
uint64_t guest_paddr, uint32_t slot, uint64_t npages,
|
||||
uint32_t flags)
|
||||
int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
|
||||
uint64_t gpa, uint64_t size, void *hva,
|
||||
uint32_t guest_memfd, uint64_t guest_memfd_offset)
|
||||
{
|
||||
struct kvm_userspace_memory_region2 region = {
|
||||
.slot = slot,
|
||||
.flags = flags,
|
||||
.guest_phys_addr = gpa,
|
||||
.memory_size = size,
|
||||
.userspace_addr = (uintptr_t)hva,
|
||||
.guest_memfd = guest_memfd,
|
||||
.guest_memfd_offset = guest_memfd_offset,
|
||||
};
|
||||
|
||||
return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion);
|
||||
}
|
||||
|
||||
void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
|
||||
uint64_t gpa, uint64_t size, void *hva,
|
||||
uint32_t guest_memfd, uint64_t guest_memfd_offset)
|
||||
{
|
||||
int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva,
|
||||
guest_memfd, guest_memfd_offset);
|
||||
|
||||
TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)",
|
||||
errno, strerror(errno));
|
||||
}
|
||||
|
||||
|
||||
/* FIXME: This thing needs to be ripped apart and rewritten. */
|
||||
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
|
||||
uint64_t guest_paddr, uint32_t slot, uint64_t npages,
|
||||
uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset)
|
||||
{
|
||||
int ret;
|
||||
struct userspace_mem_region *region;
|
||||
size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
|
||||
size_t mem_size = npages * vm->page_size;
|
||||
size_t alignment;
|
||||
|
||||
TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
|
||||
@ -980,7 +963,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
|
||||
/* Allocate and initialize new mem region structure. */
|
||||
region = calloc(1, sizeof(*region));
|
||||
TEST_ASSERT(region != NULL, "Insufficient Memory");
|
||||
region->mmap_size = npages * vm->page_size;
|
||||
region->mmap_size = mem_size;
|
||||
|
||||
#ifdef __s390x__
|
||||
/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
|
||||
@ -1027,14 +1010,38 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
|
||||
/* As needed perform madvise */
|
||||
if ((src_type == VM_MEM_SRC_ANONYMOUS ||
|
||||
src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
|
||||
ret = madvise(region->host_mem, npages * vm->page_size,
|
||||
ret = madvise(region->host_mem, mem_size,
|
||||
src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
|
||||
TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
|
||||
region->host_mem, npages * vm->page_size,
|
||||
region->host_mem, mem_size,
|
||||
vm_mem_backing_src_alias(src_type)->name);
|
||||
}
|
||||
|
||||
region->backing_src_type = src_type;
|
||||
|
||||
if (flags & KVM_MEM_GUEST_MEMFD) {
|
||||
if (guest_memfd < 0) {
|
||||
uint32_t guest_memfd_flags = 0;
|
||||
TEST_ASSERT(!guest_memfd_offset,
|
||||
"Offset must be zero when creating new guest_memfd");
|
||||
guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
|
||||
} else {
|
||||
/*
|
||||
* Install a unique fd for each memslot so that the fd
|
||||
* can be closed when the region is deleted without
|
||||
* needing to track if the fd is owned by the framework
|
||||
* or by the caller.
|
||||
*/
|
||||
guest_memfd = dup(guest_memfd);
|
||||
TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
|
||||
}
|
||||
|
||||
region->region.guest_memfd = guest_memfd;
|
||||
region->region.guest_memfd_offset = guest_memfd_offset;
|
||||
} else {
|
||||
region->region.guest_memfd = -1;
|
||||
}
|
||||
|
||||
region->unused_phy_pages = sparsebit_alloc();
|
||||
sparsebit_set_num(region->unused_phy_pages,
|
||||
guest_paddr >> vm->page_shift, npages);
|
||||
@ -1043,13 +1050,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
|
||||
region->region.guest_phys_addr = guest_paddr;
|
||||
region->region.memory_size = npages * vm->page_size;
|
||||
region->region.userspace_addr = (uintptr_t) region->host_mem;
|
||||
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region);
|
||||
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
|
||||
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
|
||||
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
|
||||
" rc: %i errno: %i\n"
|
||||
" slot: %u flags: 0x%x\n"
|
||||
" guest_phys_addr: 0x%lx size: 0x%lx",
|
||||
" guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n",
|
||||
ret, errno, slot, flags,
|
||||
guest_paddr, (uint64_t) region->region.memory_size);
|
||||
guest_paddr, (uint64_t) region->region.memory_size,
|
||||
region->region.guest_memfd);
|
||||
|
||||
/* Add to quick lookup data structures */
|
||||
vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
|
||||
@ -1070,6 +1078,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
|
||||
}
|
||||
}
|
||||
|
||||
void vm_userspace_mem_region_add(struct kvm_vm *vm,
|
||||
enum vm_mem_backing_src_type src_type,
|
||||
uint64_t guest_paddr, uint32_t slot,
|
||||
uint64_t npages, uint32_t flags)
|
||||
{
|
||||
vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Memslot to region
|
||||
*
|
||||
@ -1126,9 +1142,9 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
|
||||
|
||||
region->region.flags = flags;
|
||||
|
||||
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region);
|
||||
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
|
||||
|
||||
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
|
||||
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
|
||||
" rc: %i errno: %i slot: %u flags: 0x%x",
|
||||
ret, errno, slot, flags);
|
||||
}
|
||||
@ -1156,9 +1172,9 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
|
||||
|
||||
region->region.guest_phys_addr = new_gpa;
|
||||
|
||||
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region);
|
||||
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
|
||||
|
||||
TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
|
||||
TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed\n"
|
||||
"ret: %i errno: %i slot: %u new_gpa: 0x%lx",
|
||||
ret, errno, slot, new_gpa);
|
||||
}
|
||||
@ -1181,6 +1197,34 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
|
||||
__vm_mem_region_delete(vm, memslot2region(vm, slot), true);
|
||||
}
|
||||
|
||||
void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
|
||||
bool punch_hole)
|
||||
{
|
||||
const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0);
|
||||
struct userspace_mem_region *region;
|
||||
uint64_t end = base + size;
|
||||
uint64_t gpa, len;
|
||||
off_t fd_offset;
|
||||
int ret;
|
||||
|
||||
for (gpa = base; gpa < end; gpa += len) {
|
||||
uint64_t offset;
|
||||
|
||||
region = userspace_mem_region_find(vm, gpa, gpa);
|
||||
TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD,
|
||||
"Private memory region not found for GPA 0x%lx", gpa);
|
||||
|
||||
offset = gpa - region->region.guest_phys_addr;
|
||||
fd_offset = region->region.guest_memfd_offset + offset;
|
||||
len = min_t(uint64_t, end - gpa, region->region.memory_size - offset);
|
||||
|
||||
ret = fallocate(region->region.guest_memfd, mode, fd_offset, len);
|
||||
TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx\n",
|
||||
punch_hole ? "punch hole" : "allocate", gpa, len,
|
||||
region->region.guest_memfd, mode, fd_offset);
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns the size of a vCPU's kvm_run structure. */
|
||||
static int vcpu_mmap_sz(void)
|
||||
{
|
||||
@ -1227,7 +1271,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
|
||||
vcpu->vm = vm;
|
||||
vcpu->id = vcpu_id;
|
||||
vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id);
|
||||
TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd));
|
||||
TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm);
|
||||
|
||||
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
|
||||
"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
|
||||
|
@ -168,7 +168,8 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
|
||||
* The memory is also added to memslot 0, but that's a benign side
|
||||
* effect as KVM allows aliasing HVAs in meslots.
|
||||
*/
|
||||
vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages,
|
||||
vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus,
|
||||
slot0_pages + guest_num_pages,
|
||||
memstress_guest_code, vcpus);
|
||||
|
||||
args->vm = vm;
|
||||
|
@ -94,11 +94,6 @@ static void guest_dirty_test_data(void)
|
||||
);
|
||||
}
|
||||
|
||||
static struct kvm_vm *create_vm(void)
|
||||
{
|
||||
return ____vm_create(VM_MODE_DEFAULT);
|
||||
}
|
||||
|
||||
static void create_main_memslot(struct kvm_vm *vm)
|
||||
{
|
||||
int i;
|
||||
@ -157,7 +152,7 @@ static struct kvm_vm *create_vm_two_memslots(void)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
|
||||
vm = create_vm();
|
||||
vm = vm_create_barebones();
|
||||
|
||||
create_memslots(vm);
|
||||
|
||||
@ -276,7 +271,7 @@ static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void test_migration_mode(void)
|
||||
{
|
||||
struct kvm_vm *vm = create_vm();
|
||||
struct kvm_vm *vm = vm_create_barebones();
|
||||
struct kvm_vcpu *vcpu;
|
||||
u64 orig_psw;
|
||||
int rc;
|
||||
@ -670,7 +665,7 @@ struct testdef {
|
||||
*/
|
||||
static int machine_has_cmma(void)
|
||||
{
|
||||
struct kvm_vm *vm = create_vm();
|
||||
struct kvm_vm *vm = vm_create_barebones();
|
||||
int r;
|
||||
|
||||
r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
|
||||
|
@ -157,17 +157,17 @@ static void guest_code_move_memory_region(void)
|
||||
*/
|
||||
val = guest_spin_on_val(0);
|
||||
__GUEST_ASSERT(val == 1 || val == MMIO_VAL,
|
||||
"Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val);
|
||||
"Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val);
|
||||
|
||||
/* Spin until the misaligning memory region move completes. */
|
||||
val = guest_spin_on_val(MMIO_VAL);
|
||||
__GUEST_ASSERT(val == 1 || val == 0,
|
||||
"Expected '0' or '1' (no MMIO), got '%llx'", val);
|
||||
"Expected '0' or '1' (no MMIO), got '%lx'", val);
|
||||
|
||||
/* Spin until the memory region starts to get re-aligned. */
|
||||
val = guest_spin_on_val(0);
|
||||
__GUEST_ASSERT(val == 1 || val == MMIO_VAL,
|
||||
"Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val);
|
||||
"Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val);
|
||||
|
||||
/* Spin until the re-aligning memory region move completes. */
|
||||
val = guest_spin_on_val(MMIO_VAL);
|
||||
@ -326,6 +326,55 @@ static void test_zero_memory_regions(void)
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
static void test_invalid_memory_region_flags(void)
|
||||
{
|
||||
uint32_t supported_flags = KVM_MEM_LOG_DIRTY_PAGES;
|
||||
const uint32_t v2_only_flags = KVM_MEM_GUEST_MEMFD;
|
||||
struct kvm_vm *vm;
|
||||
int r, i;
|
||||
|
||||
#if defined __aarch64__ || defined __x86_64__
|
||||
supported_flags |= KVM_MEM_READONLY;
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
|
||||
vm = vm_create_barebones_protected_vm();
|
||||
else
|
||||
#endif
|
||||
vm = vm_create_barebones();
|
||||
|
||||
if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES) & KVM_MEMORY_ATTRIBUTE_PRIVATE)
|
||||
supported_flags |= KVM_MEM_GUEST_MEMFD;
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
if ((supported_flags & BIT(i)) && !(v2_only_flags & BIT(i)))
|
||||
continue;
|
||||
|
||||
r = __vm_set_user_memory_region(vm, 0, BIT(i),
|
||||
0, MEM_REGION_SIZE, NULL);
|
||||
|
||||
TEST_ASSERT(r && errno == EINVAL,
|
||||
"KVM_SET_USER_MEMORY_REGION should have failed on v2 only flag 0x%lx", BIT(i));
|
||||
|
||||
if (supported_flags & BIT(i))
|
||||
continue;
|
||||
|
||||
r = __vm_set_user_memory_region2(vm, 0, BIT(i),
|
||||
0, MEM_REGION_SIZE, NULL, 0, 0);
|
||||
TEST_ASSERT(r && errno == EINVAL,
|
||||
"KVM_SET_USER_MEMORY_REGION2 should have failed on unsupported flag 0x%lx", BIT(i));
|
||||
}
|
||||
|
||||
if (supported_flags & KVM_MEM_GUEST_MEMFD) {
|
||||
r = __vm_set_user_memory_region2(vm, 0,
|
||||
KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_GUEST_MEMFD,
|
||||
0, MEM_REGION_SIZE, NULL, 0, 0);
|
||||
TEST_ASSERT(r && errno == EINVAL,
|
||||
"KVM_SET_USER_MEMORY_REGION2 should have failed, dirty logging private memory is unsupported");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any
|
||||
* tentative to add further slots should fail.
|
||||
@ -385,13 +434,105 @@ static void test_add_max_memory_regions(void)
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
|
||||
#ifdef __x86_64__
|
||||
static void test_invalid_guest_memfd(struct kvm_vm *vm, int memfd,
|
||||
size_t offset, const char *msg)
|
||||
{
|
||||
int r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
|
||||
MEM_REGION_GPA, MEM_REGION_SIZE,
|
||||
0, memfd, offset);
|
||||
TEST_ASSERT(r == -1 && errno == EINVAL, "%s", msg);
|
||||
}
|
||||
|
||||
static void test_add_private_memory_region(void)
|
||||
{
|
||||
struct kvm_vm *vm, *vm2;
|
||||
int memfd, i;
|
||||
|
||||
pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n");
|
||||
|
||||
vm = vm_create_barebones_protected_vm();
|
||||
|
||||
test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail");
|
||||
test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail");
|
||||
|
||||
memfd = kvm_memfd_alloc(MEM_REGION_SIZE, false);
|
||||
test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail");
|
||||
close(memfd);
|
||||
|
||||
vm2 = vm_create_barebones_protected_vm();
|
||||
memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0);
|
||||
test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail");
|
||||
|
||||
vm_set_user_memory_region2(vm2, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
|
||||
MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0);
|
||||
close(memfd);
|
||||
kvm_vm_free(vm2);
|
||||
|
||||
memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0);
|
||||
for (i = 1; i < PAGE_SIZE; i++)
|
||||
test_invalid_guest_memfd(vm, memfd, i, "Unaligned offset should fail");
|
||||
|
||||
vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
|
||||
MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0);
|
||||
close(memfd);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void test_add_overlapping_private_memory_regions(void)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
int memfd;
|
||||
int r;
|
||||
|
||||
pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n");
|
||||
|
||||
vm = vm_create_barebones_protected_vm();
|
||||
|
||||
memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0);
|
||||
|
||||
vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
|
||||
MEM_REGION_GPA, MEM_REGION_SIZE * 2, 0, memfd, 0);
|
||||
|
||||
vm_set_user_memory_region2(vm, MEM_REGION_SLOT + 1, KVM_MEM_GUEST_MEMFD,
|
||||
MEM_REGION_GPA * 2, MEM_REGION_SIZE * 2,
|
||||
0, memfd, MEM_REGION_SIZE * 2);
|
||||
|
||||
/*
|
||||
* Delete the first memslot, and then attempt to recreate it except
|
||||
* with a "bad" offset that results in overlap in the guest_memfd().
|
||||
*/
|
||||
vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
|
||||
MEM_REGION_GPA, 0, NULL, -1, 0);
|
||||
|
||||
/* Overlap the front half of the other slot. */
|
||||
r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
|
||||
MEM_REGION_GPA * 2 - MEM_REGION_SIZE,
|
||||
MEM_REGION_SIZE * 2,
|
||||
0, memfd, 0);
|
||||
TEST_ASSERT(r == -1 && errno == EEXIST, "%s",
|
||||
"Overlapping guest_memfd() bindings should fail with EEXIST");
|
||||
|
||||
/* And now the back half of the other slot. */
|
||||
r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
|
||||
MEM_REGION_GPA * 2 + MEM_REGION_SIZE,
|
||||
MEM_REGION_SIZE * 2,
|
||||
0, memfd, 0);
|
||||
TEST_ASSERT(r == -1 && errno == EEXIST, "%s",
|
||||
"Overlapping guest_memfd() bindings should fail with EEXIST");
|
||||
|
||||
close(memfd);
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
int i, loops;
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
/*
|
||||
* FIXME: the zero-memslot test fails on aarch64 and s390x because
|
||||
* KVM_RUN fails with ENOEXEC or EFAULT.
|
||||
@ -399,9 +540,19 @@ int main(int argc, char *argv[])
|
||||
test_zero_memory_regions();
|
||||
#endif
|
||||
|
||||
test_invalid_memory_region_flags();
|
||||
|
||||
test_add_max_memory_regions();
|
||||
|
||||
#ifdef __x86_64__
|
||||
if (kvm_has_cap(KVM_CAP_GUEST_MEMFD) &&
|
||||
(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))) {
|
||||
test_add_private_memory_region();
|
||||
test_add_overlapping_private_memory_regions();
|
||||
} else {
|
||||
pr_info("Skipping tests for KVM_MEM_GUEST_MEMFD memory regions\n");
|
||||
}
|
||||
|
||||
if (argc > 1)
|
||||
loops = atoi_positive("Number of iterations", argv[1]);
|
||||
else
|
||||
|
@ -55,18 +55,18 @@ static void guest_msr(struct msr_data *msr)
|
||||
if (msr->fault_expected)
|
||||
__GUEST_ASSERT(vector == GP_VECTOR,
|
||||
"Expected #GP on %sMSR(0x%x), got vector '0x%x'",
|
||||
msr->idx, msr->write ? "WR" : "RD", vector);
|
||||
msr->write ? "WR" : "RD", msr->idx, vector);
|
||||
else
|
||||
__GUEST_ASSERT(!vector,
|
||||
"Expected success on %sMSR(0x%x), got vector '0x%x'",
|
||||
msr->idx, msr->write ? "WR" : "RD", vector);
|
||||
msr->write ? "WR" : "RD", msr->idx, vector);
|
||||
|
||||
if (vector || is_write_only_msr(msr->idx))
|
||||
goto done;
|
||||
|
||||
if (msr->write)
|
||||
__GUEST_ASSERT(!vector,
|
||||
"WRMSR(0x%x) to '0x%llx', RDMSR read '0x%llx'",
|
||||
"WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'",
|
||||
msr->idx, msr->write_val, msr_val);
|
||||
|
||||
/* Invariant TSC bit appears when TSC invariant control MSR is written to */
|
||||
@ -102,11 +102,11 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
|
||||
vector = __hyperv_hypercall(hcall->control, input, output, &res);
|
||||
if (hcall->ud_expected) {
|
||||
__GUEST_ASSERT(vector == UD_VECTOR,
|
||||
"Expected #UD for control '%u', got vector '0x%x'",
|
||||
"Expected #UD for control '%lu', got vector '0x%x'",
|
||||
hcall->control, vector);
|
||||
} else {
|
||||
__GUEST_ASSERT(!vector,
|
||||
"Expected no exception for control '%u', got vector '0x%x'",
|
||||
"Expected no exception for control '%lu', got vector '0x%x'",
|
||||
hcall->control, vector);
|
||||
GUEST_ASSERT_EQ(res, hcall->expect);
|
||||
}
|
||||
|
@ -1,121 +0,0 @@
|
||||
/*
|
||||
* mmio_warning_test
|
||||
*
|
||||
* Copyright (C) 2019, Google LLC.
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2.
|
||||
*
|
||||
* Test that we don't get a kernel warning when we call KVM_RUN after a
|
||||
* triple fault occurs. To get the triple fault to occur we call KVM_RUN
|
||||
* on a VCPU that hasn't been properly setup.
|
||||
*
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <fcntl.h>
|
||||
#include <kvm_util.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <processor.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <test_util.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define NTHREAD 4
|
||||
#define NPROCESS 5
|
||||
|
||||
struct thread_context {
|
||||
int kvmcpu;
|
||||
struct kvm_run *run;
|
||||
};
|
||||
|
||||
void *thr(void *arg)
|
||||
{
|
||||
struct thread_context *tc = (struct thread_context *)arg;
|
||||
int res;
|
||||
int kvmcpu = tc->kvmcpu;
|
||||
struct kvm_run *run = tc->run;
|
||||
|
||||
res = ioctl(kvmcpu, KVM_RUN, 0);
|
||||
pr_info("ret1=%d exit_reason=%d suberror=%d\n",
|
||||
res, run->exit_reason, run->internal.suberror);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void test(void)
|
||||
{
|
||||
int i, kvm, kvmvm, kvmcpu;
|
||||
pthread_t th[NTHREAD];
|
||||
struct kvm_run *run;
|
||||
struct thread_context tc;
|
||||
|
||||
kvm = open("/dev/kvm", O_RDWR);
|
||||
TEST_ASSERT(kvm != -1, "failed to open /dev/kvm");
|
||||
kvmvm = __kvm_ioctl(kvm, KVM_CREATE_VM, NULL);
|
||||
TEST_ASSERT(kvmvm > 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, kvmvm));
|
||||
kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0);
|
||||
TEST_ASSERT(kvmcpu != -1, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, kvmcpu));
|
||||
run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED,
|
||||
kvmcpu, 0);
|
||||
tc.kvmcpu = kvmcpu;
|
||||
tc.run = run;
|
||||
srand(getpid());
|
||||
for (i = 0; i < NTHREAD; i++) {
|
||||
pthread_create(&th[i], NULL, thr, (void *)(uintptr_t)&tc);
|
||||
usleep(rand() % 10000);
|
||||
}
|
||||
for (i = 0; i < NTHREAD; i++)
|
||||
pthread_join(th[i], NULL);
|
||||
}
|
||||
|
||||
int get_warnings_count(void)
|
||||
{
|
||||
int warnings;
|
||||
FILE *f;
|
||||
|
||||
f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
|
||||
if (fscanf(f, "%d", &warnings) < 1)
|
||||
warnings = 0;
|
||||
pclose(f);
|
||||
|
||||
return warnings;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int warnings_before, warnings_after;
|
||||
|
||||
TEST_REQUIRE(host_cpu_is_intel);
|
||||
|
||||
TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
|
||||
|
||||
warnings_before = get_warnings_count();
|
||||
|
||||
for (int i = 0; i < NPROCESS; ++i) {
|
||||
int status;
|
||||
int pid = fork();
|
||||
|
||||
if (pid < 0)
|
||||
exit(1);
|
||||
if (pid == 0) {
|
||||
test();
|
||||
exit(0);
|
||||
}
|
||||
while (waitpid(pid, &status, __WALL) != pid)
|
||||
;
|
||||
}
|
||||
|
||||
warnings_after = get_warnings_count();
|
||||
TEST_ASSERT(warnings_before == warnings_after,
|
||||
"Warnings found in kernel. Run 'dmesg' to inspect them.");
|
||||
|
||||
return 0;
|
||||
}
|
@ -27,10 +27,12 @@ do { \
|
||||
\
|
||||
if (fault_wanted) \
|
||||
__GUEST_ASSERT((vector) == UD_VECTOR, \
|
||||
"Expected #UD on " insn " for testcase '0x%x', got '0x%x'", vector); \
|
||||
"Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
|
||||
testcase, vector); \
|
||||
else \
|
||||
__GUEST_ASSERT(!(vector), \
|
||||
"Expected success on " insn " for testcase '0x%x', got '0x%x'", vector); \
|
||||
"Expected success on " insn " for testcase '0x%x', got '0x%x'", \
|
||||
testcase, vector); \
|
||||
} while (0)
|
||||
|
||||
static void guest_monitor_wait(int testcase)
|
||||
|
@ -0,0 +1,482 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2022, Google LLC.
|
||||
*/
|
||||
#define _GNU_SOURCE /* for program_invocation_short_name */
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kvm_para.h>
|
||||
#include <linux/memfd.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include <test_util.h>
|
||||
#include <kvm_util.h>
|
||||
#include <processor.h>
|
||||
|
||||
#define BASE_DATA_SLOT 10
|
||||
#define BASE_DATA_GPA ((uint64_t)(1ull << 32))
|
||||
#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE))
|
||||
|
||||
/* Horrific macro so that the line info is captured accurately :-( */
|
||||
#define memcmp_g(gpa, pattern, size) \
|
||||
do { \
|
||||
uint8_t *mem = (uint8_t *)gpa; \
|
||||
size_t i; \
|
||||
\
|
||||
for (i = 0; i < size; i++) \
|
||||
__GUEST_ASSERT(mem[i] == pattern, \
|
||||
"Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \
|
||||
pattern, i, gpa + i, mem[i]); \
|
||||
} while (0)
|
||||
|
||||
static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
TEST_ASSERT(mem[i] == pattern,
|
||||
"Host expected 0x%x at gpa 0x%lx, got 0x%x",
|
||||
pattern, gpa + i, mem[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Run memory conversion tests with explicit conversion:
|
||||
* Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
|
||||
* to back/unback private memory. Subsequent accesses by guest to the gpa range
|
||||
* will not cause exit to userspace.
|
||||
*
|
||||
* Test memory conversion scenarios with following steps:
|
||||
* 1) Access private memory using private access and verify that memory contents
|
||||
* are not visible to userspace.
|
||||
* 2) Convert memory to shared using explicit conversions and ensure that
|
||||
* userspace is able to access the shared regions.
|
||||
* 3) Convert memory back to private using explicit conversions and ensure that
|
||||
* userspace is again not able to access converted private regions.
|
||||
*/
|
||||
|
||||
#define GUEST_STAGE(o, s) { .offset = o, .size = s }
|
||||
|
||||
enum ucall_syncs {
|
||||
SYNC_SHARED,
|
||||
SYNC_PRIVATE,
|
||||
};
|
||||
|
||||
static void guest_sync_shared(uint64_t gpa, uint64_t size,
|
||||
uint8_t current_pattern, uint8_t new_pattern)
|
||||
{
|
||||
GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
|
||||
}
|
||||
|
||||
static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
|
||||
{
|
||||
GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
|
||||
}
|
||||
|
||||
/* Arbitrary values, KVM doesn't care about the attribute flags. */
|
||||
#define MAP_GPA_SET_ATTRIBUTES BIT(0)
|
||||
#define MAP_GPA_SHARED BIT(1)
|
||||
#define MAP_GPA_DO_FALLOCATE BIT(2)
|
||||
|
||||
static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
|
||||
bool do_fallocate)
|
||||
{
|
||||
uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
|
||||
|
||||
if (map_shared)
|
||||
flags |= MAP_GPA_SHARED;
|
||||
if (do_fallocate)
|
||||
flags |= MAP_GPA_DO_FALLOCATE;
|
||||
kvm_hypercall_map_gpa_range(gpa, size, flags);
|
||||
}
|
||||
|
||||
static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
|
||||
{
|
||||
guest_map_mem(gpa, size, true, do_fallocate);
|
||||
}
|
||||
|
||||
static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
|
||||
{
|
||||
guest_map_mem(gpa, size, false, do_fallocate);
|
||||
}
|
||||
|
||||
struct {
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
} static const test_ranges[] = {
|
||||
GUEST_STAGE(0, PAGE_SIZE),
|
||||
GUEST_STAGE(0, SZ_2M),
|
||||
GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
|
||||
GUEST_STAGE(PAGE_SIZE, SZ_2M),
|
||||
GUEST_STAGE(SZ_2M, PAGE_SIZE),
|
||||
};
|
||||
|
||||
static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
|
||||
{
|
||||
const uint8_t def_p = 0xaa;
|
||||
const uint8_t init_p = 0xcc;
|
||||
uint64_t j;
|
||||
int i;
|
||||
|
||||
/* Memory should be shared by default. */
|
||||
memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
|
||||
memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
|
||||
guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
|
||||
|
||||
memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
|
||||
uint64_t gpa = base_gpa + test_ranges[i].offset;
|
||||
uint64_t size = test_ranges[i].size;
|
||||
uint8_t p1 = 0x11;
|
||||
uint8_t p2 = 0x22;
|
||||
uint8_t p3 = 0x33;
|
||||
uint8_t p4 = 0x44;
|
||||
|
||||
/*
|
||||
* Set the test region to pattern one to differentiate it from
|
||||
* the data range as a whole (contains the initial pattern).
|
||||
*/
|
||||
memset((void *)gpa, p1, size);
|
||||
|
||||
/*
|
||||
* Convert to private, set and verify the private data, and
|
||||
* then verify that the rest of the data (map shared) still
|
||||
* holds the initial pattern, and that the host always sees the
|
||||
* shared memory (initial pattern). Unlike shared memory,
|
||||
* punching a hole in private memory is destructive, i.e.
|
||||
* previous values aren't guaranteed to be preserved.
|
||||
*/
|
||||
guest_map_private(gpa, size, do_fallocate);
|
||||
|
||||
if (size > PAGE_SIZE) {
|
||||
memset((void *)gpa, p2, PAGE_SIZE);
|
||||
goto skip;
|
||||
}
|
||||
|
||||
memset((void *)gpa, p2, size);
|
||||
guest_sync_private(gpa, size, p1);
|
||||
|
||||
/*
|
||||
* Verify that the private memory was set to pattern two, and
|
||||
* that shared memory still holds the initial pattern.
|
||||
*/
|
||||
memcmp_g(gpa, p2, size);
|
||||
if (gpa > base_gpa)
|
||||
memcmp_g(base_gpa, init_p, gpa - base_gpa);
|
||||
if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
|
||||
memcmp_g(gpa + size, init_p,
|
||||
(base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
|
||||
|
||||
/*
|
||||
* Convert odd-number page frames back to shared to verify KVM
|
||||
* also correctly handles holes in private ranges.
|
||||
*/
|
||||
for (j = 0; j < size; j += PAGE_SIZE) {
|
||||
if ((j >> PAGE_SHIFT) & 1) {
|
||||
guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
|
||||
guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
|
||||
|
||||
memcmp_g(gpa + j, p3, PAGE_SIZE);
|
||||
} else {
|
||||
guest_sync_private(gpa + j, PAGE_SIZE, p1);
|
||||
}
|
||||
}
|
||||
|
||||
skip:
|
||||
/*
|
||||
* Convert the entire region back to shared, explicitly write
|
||||
* pattern three to fill in the even-number frames before
|
||||
* asking the host to verify (and write pattern four).
|
||||
*/
|
||||
guest_map_shared(gpa, size, do_fallocate);
|
||||
memset((void *)gpa, p3, size);
|
||||
guest_sync_shared(gpa, size, p3, p4);
|
||||
memcmp_g(gpa, p4, size);
|
||||
|
||||
/* Reset the shared memory back to the initial pattern. */
|
||||
memset((void *)gpa, init_p, size);
|
||||
|
||||
/*
|
||||
* Free (via PUNCH_HOLE) *all* private memory so that the next
|
||||
* iteration starts from a clean slate, e.g. with respect to
|
||||
* whether or not there are pages/folios in guest_mem.
|
||||
*/
|
||||
guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
|
||||
}
|
||||
}
|
||||
|
||||
static void guest_punch_hole(uint64_t gpa, uint64_t size)
|
||||
{
|
||||
/* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
|
||||
uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
|
||||
|
||||
kvm_hypercall_map_gpa_range(gpa, size, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that PUNCH_HOLE actually frees memory by punching holes without doing a
|
||||
* proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
|
||||
* (subsequent fault) should zero memory.
|
||||
*/
|
||||
static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
|
||||
{
|
||||
const uint8_t init_p = 0xcc;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Convert the entire range to private, this testcase is all about
|
||||
* punching holes in guest_memfd, i.e. shared mappings aren't needed.
|
||||
*/
|
||||
guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
|
||||
uint64_t gpa = base_gpa + test_ranges[i].offset;
|
||||
uint64_t size = test_ranges[i].size;
|
||||
|
||||
/*
|
||||
* Free all memory before each iteration, even for the !precise
|
||||
* case where the memory will be faulted back in. Freeing and
|
||||
* reallocating should obviously work, and freeing all memory
|
||||
* minimizes the probability of cross-testcase influence.
|
||||
*/
|
||||
guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
|
||||
|
||||
/* Fault-in and initialize memory, and verify the pattern. */
|
||||
if (precise) {
|
||||
memset((void *)gpa, init_p, size);
|
||||
memcmp_g(gpa, init_p, size);
|
||||
} else {
|
||||
memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
|
||||
memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Punch a hole at the target range and verify that reads from
|
||||
* the guest succeed and return zeroes.
|
||||
*/
|
||||
guest_punch_hole(gpa, size);
|
||||
memcmp_g(gpa, 0, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void guest_code(uint64_t base_gpa)
|
||||
{
|
||||
/*
|
||||
* Run the conversion test twice, with and without doing fallocate() on
|
||||
* the guest_memfd backing when converting between shared and private.
|
||||
*/
|
||||
guest_test_explicit_conversion(base_gpa, false);
|
||||
guest_test_explicit_conversion(base_gpa, true);
|
||||
|
||||
/*
|
||||
* Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
|
||||
* faulted in, once with only the target range faulted in.
|
||||
*/
|
||||
guest_test_punch_hole(base_gpa, false);
|
||||
guest_test_punch_hole(base_gpa, true);
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_run *run = vcpu->run;
|
||||
uint64_t gpa = run->hypercall.args[0];
|
||||
uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
|
||||
bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
|
||||
bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
|
||||
bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
|
||||
struct kvm_vm *vm = vcpu->vm;
|
||||
|
||||
TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
|
||||
"Wanted MAP_GPA_RANGE (%u), got '%llu'",
|
||||
KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
|
||||
|
||||
if (do_fallocate)
|
||||
vm_guest_mem_fallocate(vm, gpa, size, map_shared);
|
||||
|
||||
if (set_attributes)
|
||||
vm_set_memory_attributes(vm, gpa, size,
|
||||
map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
|
||||
run->hypercall.ret = 0;
|
||||
}
|
||||
|
||||
static bool run_vcpus;
|
||||
|
||||
static void *__test_mem_conversions(void *__vcpu)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = __vcpu;
|
||||
struct kvm_run *run = vcpu->run;
|
||||
struct kvm_vm *vm = vcpu->vm;
|
||||
struct ucall uc;
|
||||
|
||||
while (!READ_ONCE(run_vcpus))
|
||||
;
|
||||
|
||||
for ( ;; ) {
|
||||
vcpu_run(vcpu);
|
||||
|
||||
if (run->exit_reason == KVM_EXIT_HYPERCALL) {
|
||||
handle_exit_hypercall(vcpu);
|
||||
continue;
|
||||
}
|
||||
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
|
||||
run->exit_reason, exit_reason_str(run->exit_reason));
|
||||
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_ABORT:
|
||||
REPORT_GUEST_ASSERT(uc);
|
||||
case UCALL_SYNC: {
|
||||
uint64_t gpa = uc.args[1];
|
||||
size_t size = uc.args[2];
|
||||
size_t i;
|
||||
|
||||
TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
|
||||
uc.args[0] == SYNC_PRIVATE,
|
||||
"Unknown sync command '%ld'", uc.args[0]);
|
||||
|
||||
for (i = 0; i < size; i += vm->page_size) {
|
||||
size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
|
||||
uint8_t *hva = addr_gpa2hva(vm, gpa + i);
|
||||
|
||||
/* In all cases, the host should observe the shared data. */
|
||||
memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
|
||||
|
||||
/* For shared, write the new pattern to guest memory. */
|
||||
if (uc.args[0] == SYNC_SHARED)
|
||||
memset(hva, uc.args[4], nr_bytes);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case UCALL_DONE:
|
||||
return NULL;
|
||||
default:
|
||||
TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
|
||||
uint32_t nr_memslots)
|
||||
{
|
||||
/*
|
||||
* Allocate enough memory so that each vCPU's chunk of memory can be
|
||||
* naturally aligned with respect to the size of the backing store.
|
||||
*/
|
||||
const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
|
||||
const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
|
||||
const size_t memfd_size = per_cpu_size * nr_vcpus;
|
||||
const size_t slot_size = memfd_size / nr_memslots;
|
||||
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
|
||||
pthread_t threads[KVM_MAX_VCPUS];
|
||||
struct kvm_vm *vm;
|
||||
int memfd, i, r;
|
||||
|
||||
const struct vm_shape shape = {
|
||||
.mode = VM_MODE_DEFAULT,
|
||||
.type = KVM_X86_SW_PROTECTED_VM,
|
||||
};
|
||||
|
||||
TEST_ASSERT(slot_size * nr_memslots == memfd_size,
|
||||
"The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
|
||||
memfd_size, nr_memslots);
|
||||
vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
|
||||
|
||||
vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
|
||||
|
||||
memfd = vm_create_guest_memfd(vm, memfd_size, 0);
|
||||
|
||||
for (i = 0; i < nr_memslots; i++)
|
||||
vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
|
||||
BASE_DATA_SLOT + i, slot_size / vm->page_size,
|
||||
KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
|
||||
|
||||
for (i = 0; i < nr_vcpus; i++) {
|
||||
uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size;
|
||||
|
||||
vcpu_args_set(vcpus[i], 1, gpa);
|
||||
|
||||
/*
|
||||
* Map only what is needed so that an out-of-bounds access
|
||||
* results #PF => SHUTDOWN instead of data corruption.
|
||||
*/
|
||||
virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
|
||||
|
||||
pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
|
||||
}
|
||||
|
||||
WRITE_ONCE(run_vcpus, true);
|
||||
|
||||
for (i = 0; i < nr_vcpus; i++)
|
||||
pthread_join(threads[i], NULL);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
|
||||
/*
|
||||
* Allocate and free memory from the guest_memfd after closing the VM
|
||||
* fd. The guest_memfd is gifted a reference to its owning VM, i.e.
|
||||
* should prevent the VM from being fully destroyed until the last
|
||||
* reference to the guest_memfd is also put.
|
||||
*/
|
||||
r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
|
||||
TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
|
||||
|
||||
r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
|
||||
TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
|
||||
}
|
||||
|
||||
static void usage(const char *cmd)
|
||||
{
|
||||
puts("");
|
||||
printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
|
||||
puts("");
|
||||
backing_src_help("-s");
|
||||
puts("");
|
||||
puts(" -n: specify the number of vcpus (default: 1)");
|
||||
puts("");
|
||||
puts(" -m: specify the number of memslots (default: 1)");
|
||||
puts("");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
|
||||
uint32_t nr_memslots = 1;
|
||||
uint32_t nr_vcpus = 1;
|
||||
int opt;
|
||||
|
||||
TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
|
||||
|
||||
while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
|
||||
switch (opt) {
|
||||
case 's':
|
||||
src_type = parse_backing_src_type(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
nr_vcpus = atoi_positive("nr_vcpus", optarg);
|
||||
break;
|
||||
case 'm':
|
||||
nr_memslots = atoi_positive("nr_memslots", optarg);
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
usage(argv[0]);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
test_mem_conversions(src_type, nr_vcpus, nr_memslots);
|
||||
|
||||
return 0;
|
||||
}
|
120
tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c
Normal file
120
tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c
Normal file
@ -0,0 +1,120 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2023, Google LLC.
|
||||
*/
|
||||
#include <linux/kvm.h>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "test_util.h"
|
||||
|
||||
/* Arbitrarily selected to avoid overlaps with anything else */
|
||||
#define EXITS_TEST_GVA 0xc0000000
|
||||
#define EXITS_TEST_GPA EXITS_TEST_GVA
|
||||
#define EXITS_TEST_NPAGES 1
|
||||
#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
|
||||
#define EXITS_TEST_SLOT 10
|
||||
|
||||
static uint64_t guest_repeatedly_read(void)
|
||||
{
|
||||
volatile uint64_t value;
|
||||
|
||||
while (true)
|
||||
value = *((uint64_t *) EXITS_TEST_GVA);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = _vcpu_run(vcpu);
|
||||
if (r) {
|
||||
TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r));
|
||||
TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT);
|
||||
}
|
||||
return vcpu->run->exit_reason;
|
||||
}
|
||||
|
||||
const struct vm_shape protected_vm_shape = {
|
||||
.mode = VM_MODE_DEFAULT,
|
||||
.type = KVM_X86_SW_PROTECTED_VM,
|
||||
};
|
||||
|
||||
static void test_private_access_memslot_deleted(void)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
pthread_t vm_thread;
|
||||
void *thread_return;
|
||||
uint32_t exit_reason;
|
||||
|
||||
vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
|
||||
guest_repeatedly_read);
|
||||
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
EXITS_TEST_GPA, EXITS_TEST_SLOT,
|
||||
EXITS_TEST_NPAGES,
|
||||
KVM_MEM_GUEST_MEMFD);
|
||||
|
||||
virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
|
||||
|
||||
/* Request to access page privately */
|
||||
vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
|
||||
|
||||
pthread_create(&vm_thread, NULL,
|
||||
(void *(*)(void *))run_vcpu_get_exit_reason,
|
||||
(void *)vcpu);
|
||||
|
||||
vm_mem_region_delete(vm, EXITS_TEST_SLOT);
|
||||
|
||||
pthread_join(vm_thread, &thread_return);
|
||||
exit_reason = (uint32_t)(uint64_t)thread_return;
|
||||
|
||||
TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
|
||||
TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
|
||||
TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
|
||||
TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void test_private_access_memslot_not_private(void)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
uint32_t exit_reason;
|
||||
|
||||
vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
|
||||
guest_repeatedly_read);
|
||||
|
||||
/* Add a non-private memslot (flags = 0) */
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
EXITS_TEST_GPA, EXITS_TEST_SLOT,
|
||||
EXITS_TEST_NPAGES, 0);
|
||||
|
||||
virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
|
||||
|
||||
/* Request to access page privately */
|
||||
vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
|
||||
|
||||
exit_reason = run_vcpu_get_exit_reason(vcpu);
|
||||
|
||||
TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
|
||||
TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
|
||||
TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
|
||||
TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
|
||||
|
||||
test_private_access_memslot_deleted();
|
||||
test_private_access_memslot_not_private();
|
||||
}
|
@ -103,7 +103,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
|
||||
|
||||
run_guest(vmcb, svm->vmcb_gpa);
|
||||
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
|
||||
"Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'",
|
||||
"Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
|
||||
vmcb->control.exit_code,
|
||||
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
|
||||
|
||||
@ -133,7 +133,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
|
||||
|
||||
run_guest(vmcb, svm->vmcb_gpa);
|
||||
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
|
||||
"Expected HLT #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'",
|
||||
"Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
|
||||
vmcb->control.exit_code,
|
||||
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
|
||||
|
||||
|
@ -271,7 +271,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
kvm_check_cap(KVM_CAP_MCE);
|
||||
|
||||
vm = __vm_create(VM_MODE_DEFAULT, 3, 0);
|
||||
vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0);
|
||||
|
||||
kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
|
||||
&supported_mcg_caps);
|
||||
|
@ -56,7 +56,7 @@ static void guest_test_perf_capabilities_gp(uint64_t val)
|
||||
uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
|
||||
|
||||
__GUEST_ASSERT(vector == GP_VECTOR,
|
||||
"Expected #GP for value '0x%llx', got vector '0x%x'",
|
||||
"Expected #GP for value '0x%lx', got vector '0x%x'",
|
||||
val, vector);
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,7 @@ do { \
|
||||
\
|
||||
__GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \
|
||||
__supported == ((xfeatures) | (dependencies)), \
|
||||
"supported = 0x%llx, xfeatures = 0x%llx, dependencies = 0x%llx", \
|
||||
"supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \
|
||||
__supported, (xfeatures), (dependencies)); \
|
||||
} while (0)
|
||||
|
||||
@ -42,7 +42,7 @@ do { \
|
||||
uint64_t __supported = (supported_xcr0) & (xfeatures); \
|
||||
\
|
||||
__GUEST_ASSERT(!__supported || __supported == (xfeatures), \
|
||||
"supported = 0x%llx, xfeatures = 0x%llx", \
|
||||
"supported = 0x%lx, xfeatures = 0x%llx", \
|
||||
__supported, (xfeatures)); \
|
||||
} while (0)
|
||||
|
||||
@ -81,7 +81,7 @@ static void guest_code(void)
|
||||
|
||||
vector = xsetbv_safe(0, supported_xcr0);
|
||||
__GUEST_ASSERT(!vector,
|
||||
"Expected success on XSETBV(0x%llx), got vector '0x%x'",
|
||||
"Expected success on XSETBV(0x%lx), got vector '0x%x'",
|
||||
supported_xcr0, vector);
|
||||
|
||||
for (i = 0; i < 64; i++) {
|
||||
@ -90,7 +90,7 @@ static void guest_code(void)
|
||||
|
||||
vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
|
||||
__GUEST_ASSERT(vector == GP_VECTOR,
|
||||
"Expected #GP on XSETBV(0x%llx), supported XCR0 = %llx, got vector '0x%x'",
|
||||
"Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
|
||||
BIT_ULL(i), supported_xcr0, vector);
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
config HAVE_KVM
|
||||
bool
|
||||
select EVENTFD
|
||||
|
||||
config HAVE_KVM_PFNCACHE
|
||||
bool
|
||||
@ -10,9 +11,6 @@ config HAVE_KVM_PFNCACHE
|
||||
config HAVE_KVM_IRQCHIP
|
||||
bool
|
||||
|
||||
config HAVE_KVM_IRQFD
|
||||
bool
|
||||
|
||||
config HAVE_KVM_IRQ_ROUTING
|
||||
bool
|
||||
|
||||
@ -39,10 +37,6 @@ config NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
bool
|
||||
depends on HAVE_KVM_DIRTY_RING
|
||||
|
||||
config HAVE_KVM_EVENTFD
|
||||
bool
|
||||
select EVENTFD
|
||||
|
||||
config KVM_MMIO
|
||||
bool
|
||||
|
||||
@ -92,3 +86,20 @@ config HAVE_KVM_PM_NOTIFIER
|
||||
|
||||
config KVM_GENERIC_HARDWARE_ENABLING
|
||||
bool
|
||||
|
||||
config KVM_GENERIC_MMU_NOTIFIER
|
||||
select MMU_NOTIFIER
|
||||
bool
|
||||
|
||||
config KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
bool
|
||||
|
||||
config KVM_PRIVATE_MEM
|
||||
select XARRAY_MULTI
|
||||
bool
|
||||
|
||||
config KVM_GENERIC_PRIVATE_MEM
|
||||
select KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
select KVM_PRIVATE_MEM
|
||||
bool
|
||||
|
@ -12,3 +12,4 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
|
||||
kvm-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
|
||||
kvm-$(CONFIG_HAVE_KVM_DIRTY_RING) += $(KVM)/dirty_ring.o
|
||||
kvm-$(CONFIG_HAVE_KVM_PFNCACHE) += $(KVM)/pfncache.o
|
||||
kvm-$(CONFIG_KVM_PRIVATE_MEM) += $(KVM)/guest_memfd.o
|
||||
|
@ -58,7 +58,7 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
|
||||
as_id = slot >> 16;
|
||||
id = (u16)slot;
|
||||
|
||||
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
|
||||
if (as_id >= kvm_arch_nr_memslot_as_ids(kvm) || id >= KVM_USER_MEM_SLOTS)
|
||||
return;
|
||||
|
||||
memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
|
||||
|
@ -28,7 +28,7 @@
|
||||
|
||||
#include <kvm/iodev.h>
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
|
||||
static struct workqueue_struct *irqfd_cleanup_wq;
|
||||
|
||||
@ -526,21 +526,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||
synchronize_srcu(&kvm->irq_srcu);
|
||||
kvm_arch_post_irq_ack_notifier_list_update(kvm);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
kvm_eventfd_init(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
spin_lock_init(&kvm->irqfds.lock);
|
||||
INIT_LIST_HEAD(&kvm->irqfds.items);
|
||||
INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
|
||||
mutex_init(&kvm->irqfds.resampler_lock);
|
||||
#endif
|
||||
INIT_LIST_HEAD(&kvm->ioeventfds);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
/*
|
||||
* shutdown any irqfd's that match fd+gsi
|
||||
*/
|
||||
@ -1012,3 +998,15 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
|
||||
return kvm_assign_ioeventfd(kvm, args);
|
||||
}
|
||||
|
||||
void
|
||||
kvm_eventfd_init(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
spin_lock_init(&kvm->irqfds.lock);
|
||||
INIT_LIST_HEAD(&kvm->irqfds.items);
|
||||
INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
|
||||
mutex_init(&kvm->irqfds.resampler_lock);
|
||||
#endif
|
||||
INIT_LIST_HEAD(&kvm->ioeventfds);
|
||||
}
|
||||
|
533
virt/kvm/guest_memfd.c
Normal file
533
virt/kvm/guest_memfd.c
Normal file
@ -0,0 +1,533 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/falloc.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
|
||||
#include "kvm_mm.h"
|
||||
|
||||
struct kvm_gmem {
|
||||
struct kvm *kvm;
|
||||
struct xarray bindings;
|
||||
struct list_head entry;
|
||||
};
|
||||
|
||||
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
|
||||
{
|
||||
struct folio *folio;
|
||||
|
||||
/* TODO: Support huge pages. */
|
||||
folio = filemap_grab_folio(inode->i_mapping, index);
|
||||
if (IS_ERR_OR_NULL(folio))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Use the up-to-date flag to track whether or not the memory has been
|
||||
* zeroed before being handed off to the guest. There is no backing
|
||||
* storage for the memory, so the folio will remain up-to-date until
|
||||
* it's removed.
|
||||
*
|
||||
* TODO: Skip clearing pages when trusted firmware will do it when
|
||||
* assigning memory to the guest.
|
||||
*/
|
||||
if (!folio_test_uptodate(folio)) {
|
||||
unsigned long nr_pages = folio_nr_pages(folio);
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
clear_highpage(folio_page(folio, i));
|
||||
|
||||
folio_mark_uptodate(folio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ignore accessed, referenced, and dirty flags. The memory is
|
||||
* unevictable and there is no storage to write back to.
|
||||
*/
|
||||
return folio;
|
||||
}
|
||||
|
||||
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
|
||||
pgoff_t end)
|
||||
{
|
||||
bool flush = false, found_memslot = false;
|
||||
struct kvm_memory_slot *slot;
|
||||
struct kvm *kvm = gmem->kvm;
|
||||
unsigned long index;
|
||||
|
||||
xa_for_each_range(&gmem->bindings, index, slot, start, end - 1) {
|
||||
pgoff_t pgoff = slot->gmem.pgoff;
|
||||
|
||||
struct kvm_gfn_range gfn_range = {
|
||||
.start = slot->base_gfn + max(pgoff, start) - pgoff,
|
||||
.end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff,
|
||||
.slot = slot,
|
||||
.may_block = true,
|
||||
};
|
||||
|
||||
if (!found_memslot) {
|
||||
found_memslot = true;
|
||||
|
||||
KVM_MMU_LOCK(kvm);
|
||||
kvm_mmu_invalidate_begin(kvm);
|
||||
}
|
||||
|
||||
flush |= kvm_mmu_unmap_gfn_range(kvm, &gfn_range);
|
||||
}
|
||||
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
if (found_memslot)
|
||||
KVM_MMU_UNLOCK(kvm);
|
||||
}
|
||||
|
||||
static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
|
||||
pgoff_t end)
|
||||
{
|
||||
struct kvm *kvm = gmem->kvm;
|
||||
|
||||
if (xa_find(&gmem->bindings, &start, end - 1, XA_PRESENT)) {
|
||||
KVM_MMU_LOCK(kvm);
|
||||
kvm_mmu_invalidate_end(kvm);
|
||||
KVM_MMU_UNLOCK(kvm);
|
||||
}
|
||||
}
|
||||
|
||||
static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
||||
{
|
||||
struct list_head *gmem_list = &inode->i_mapping->i_private_list;
|
||||
pgoff_t start = offset >> PAGE_SHIFT;
|
||||
pgoff_t end = (offset + len) >> PAGE_SHIFT;
|
||||
struct kvm_gmem *gmem;
|
||||
|
||||
/*
|
||||
* Bindings must be stable across invalidation to ensure the start+end
|
||||
* are balanced.
|
||||
*/
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
list_for_each_entry(gmem, gmem_list, entry)
|
||||
kvm_gmem_invalidate_begin(gmem, start, end);
|
||||
|
||||
truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
|
||||
|
||||
list_for_each_entry(gmem, gmem_list, entry)
|
||||
kvm_gmem_invalidate_end(gmem, start, end);
|
||||
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
|
||||
{
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
pgoff_t start, index, end;
|
||||
int r;
|
||||
|
||||
/* Dedicated guest is immutable by default. */
|
||||
if (offset + len > i_size_read(inode))
|
||||
return -EINVAL;
|
||||
|
||||
filemap_invalidate_lock_shared(mapping);
|
||||
|
||||
start = offset >> PAGE_SHIFT;
|
||||
end = (offset + len) >> PAGE_SHIFT;
|
||||
|
||||
r = 0;
|
||||
for (index = start; index < end; ) {
|
||||
struct folio *folio;
|
||||
|
||||
if (signal_pending(current)) {
|
||||
r = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
folio = kvm_gmem_get_folio(inode, index);
|
||||
if (!folio) {
|
||||
r = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
index = folio_next_index(folio);
|
||||
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
|
||||
/* 64-bit only, wrapping the index should be impossible. */
|
||||
if (WARN_ON_ONCE(!index))
|
||||
break;
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
filemap_invalidate_unlock_shared(mapping);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static long kvm_gmem_fallocate(struct file *file, int mode, loff_t offset,
|
||||
loff_t len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!(mode & FALLOC_FL_KEEP_SIZE))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
|
||||
return -EINVAL;
|
||||
|
||||
if (mode & FALLOC_FL_PUNCH_HOLE)
|
||||
ret = kvm_gmem_punch_hole(file_inode(file), offset, len);
|
||||
else
|
||||
ret = kvm_gmem_allocate(file_inode(file), offset, len);
|
||||
|
||||
if (!ret)
|
||||
file_modified(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_gmem_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct kvm_gmem *gmem = file->private_data;
|
||||
struct kvm_memory_slot *slot;
|
||||
struct kvm *kvm = gmem->kvm;
|
||||
unsigned long index;
|
||||
|
||||
/*
|
||||
* Prevent concurrent attempts to *unbind* a memslot. This is the last
|
||||
* reference to the file and thus no new bindings can be created, but
|
||||
* dereferencing the slot for existing bindings needs to be protected
|
||||
* against memslot updates, specifically so that unbind doesn't race
|
||||
* and free the memslot (kvm_gmem_get_file() will return NULL).
|
||||
*/
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
xa_for_each(&gmem->bindings, index, slot)
|
||||
rcu_assign_pointer(slot->gmem.file, NULL);
|
||||
|
||||
synchronize_rcu();
|
||||
|
||||
/*
|
||||
* All in-flight operations are gone and new bindings can be created.
|
||||
* Zap all SPTEs pointed at by this file. Do not free the backing
|
||||
* memory, as its lifetime is associated with the inode, not the file.
|
||||
*/
|
||||
kvm_gmem_invalidate_begin(gmem, 0, -1ul);
|
||||
kvm_gmem_invalidate_end(gmem, 0, -1ul);
|
||||
|
||||
list_del(&gmem->entry);
|
||||
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
xa_destroy(&gmem->bindings);
|
||||
kfree(gmem);
|
||||
|
||||
kvm_put_kvm(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct file *kvm_gmem_get_file(struct kvm_memory_slot *slot)
|
||||
{
|
||||
/*
|
||||
* Do not return slot->gmem.file if it has already been closed;
|
||||
* there might be some time between the last fput() and when
|
||||
* kvm_gmem_release() clears slot->gmem.file, and you do not
|
||||
* want to spin in the meanwhile.
|
||||
*/
|
||||
return get_file_active(&slot->gmem.file);
|
||||
}
|
||||
|
||||
static struct file_operations kvm_gmem_fops = {
|
||||
.open = generic_file_open,
|
||||
.release = kvm_gmem_release,
|
||||
.fallocate = kvm_gmem_fallocate,
|
||||
};
|
||||
|
||||
void kvm_gmem_init(struct module *module)
|
||||
{
|
||||
kvm_gmem_fops.owner = module;
|
||||
}
|
||||
|
||||
static int kvm_gmem_migrate_folio(struct address_space *mapping,
|
||||
struct folio *dst, struct folio *src,
|
||||
enum migrate_mode mode)
|
||||
{
|
||||
WARN_ON_ONCE(1);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int kvm_gmem_error_folio(struct address_space *mapping,
|
||||
struct folio *folio)
|
||||
{
|
||||
struct list_head *gmem_list = &mapping->i_private_list;
|
||||
struct kvm_gmem *gmem;
|
||||
pgoff_t start, end;
|
||||
|
||||
filemap_invalidate_lock_shared(mapping);
|
||||
|
||||
start = folio->index;
|
||||
end = start + folio_nr_pages(folio);
|
||||
|
||||
list_for_each_entry(gmem, gmem_list, entry)
|
||||
kvm_gmem_invalidate_begin(gmem, start, end);
|
||||
|
||||
/*
|
||||
* Do not truncate the range, what action is taken in response to the
|
||||
* error is userspace's decision (assuming the architecture supports
|
||||
* gracefully handling memory errors). If/when the guest attempts to
|
||||
* access a poisoned page, kvm_gmem_get_pfn() will return -EHWPOISON,
|
||||
* at which point KVM can either terminate the VM or propagate the
|
||||
* error to userspace.
|
||||
*/
|
||||
|
||||
list_for_each_entry(gmem, gmem_list, entry)
|
||||
kvm_gmem_invalidate_end(gmem, start, end);
|
||||
|
||||
filemap_invalidate_unlock_shared(mapping);
|
||||
|
||||
return MF_DELAYED;
|
||||
}
|
||||
|
||||
static const struct address_space_operations kvm_gmem_aops = {
|
||||
.dirty_folio = noop_dirty_folio,
|
||||
.migrate_folio = kvm_gmem_migrate_folio,
|
||||
.error_remove_folio = kvm_gmem_error_folio,
|
||||
};
|
||||
|
||||
static int kvm_gmem_getattr(struct mnt_idmap *idmap, const struct path *path,
|
||||
struct kstat *stat, u32 request_mask,
|
||||
unsigned int query_flags)
|
||||
{
|
||||
struct inode *inode = path->dentry->d_inode;
|
||||
|
||||
generic_fillattr(idmap, request_mask, inode, stat);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_gmem_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
|
||||
struct iattr *attr)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static const struct inode_operations kvm_gmem_iops = {
|
||||
.getattr = kvm_gmem_getattr,
|
||||
.setattr = kvm_gmem_setattr,
|
||||
};
|
||||
|
||||
static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
|
||||
{
|
||||
const char *anon_name = "[kvm-gmem]";
|
||||
struct kvm_gmem *gmem;
|
||||
struct inode *inode;
|
||||
struct file *file;
|
||||
int fd, err;
|
||||
|
||||
fd = get_unused_fd_flags(0);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
|
||||
if (!gmem) {
|
||||
err = -ENOMEM;
|
||||
goto err_fd;
|
||||
}
|
||||
|
||||
file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
|
||||
O_RDWR, NULL);
|
||||
if (IS_ERR(file)) {
|
||||
err = PTR_ERR(file);
|
||||
goto err_gmem;
|
||||
}
|
||||
|
||||
file->f_flags |= O_LARGEFILE;
|
||||
|
||||
inode = file->f_inode;
|
||||
WARN_ON(file->f_mapping != inode->i_mapping);
|
||||
|
||||
inode->i_private = (void *)(unsigned long)flags;
|
||||
inode->i_op = &kvm_gmem_iops;
|
||||
inode->i_mapping->a_ops = &kvm_gmem_aops;
|
||||
inode->i_mode |= S_IFREG;
|
||||
inode->i_size = size;
|
||||
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
|
||||
mapping_set_unmovable(inode->i_mapping);
|
||||
/* Unmovable mappings are supposed to be marked unevictable as well. */
|
||||
WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
|
||||
|
||||
kvm_get_kvm(kvm);
|
||||
gmem->kvm = kvm;
|
||||
xa_init(&gmem->bindings);
|
||||
list_add(&gmem->entry, &inode->i_mapping->i_private_list);
|
||||
|
||||
fd_install(fd, file);
|
||||
return fd;
|
||||
|
||||
err_gmem:
|
||||
kfree(gmem);
|
||||
err_fd:
|
||||
put_unused_fd(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
|
||||
{
|
||||
loff_t size = args->size;
|
||||
u64 flags = args->flags;
|
||||
u64 valid_flags = 0;
|
||||
|
||||
if (flags & ~valid_flags)
|
||||
return -EINVAL;
|
||||
|
||||
if (size <= 0 || !PAGE_ALIGNED(size))
|
||||
return -EINVAL;
|
||||
|
||||
return __kvm_gmem_create(kvm, size, flags);
|
||||
}
|
||||
|
||||
int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
unsigned int fd, loff_t offset)
|
||||
{
|
||||
loff_t size = slot->npages << PAGE_SHIFT;
|
||||
unsigned long start, end;
|
||||
struct kvm_gmem *gmem;
|
||||
struct inode *inode;
|
||||
struct file *file;
|
||||
int r = -EINVAL;
|
||||
|
||||
BUILD_BUG_ON(sizeof(gfn_t) != sizeof(slot->gmem.pgoff));
|
||||
|
||||
file = fget(fd);
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
|
||||
if (file->f_op != &kvm_gmem_fops)
|
||||
goto err;
|
||||
|
||||
gmem = file->private_data;
|
||||
if (gmem->kvm != kvm)
|
||||
goto err;
|
||||
|
||||
inode = file_inode(file);
|
||||
|
||||
if (offset < 0 || !PAGE_ALIGNED(offset) ||
|
||||
offset + size > i_size_read(inode))
|
||||
goto err;
|
||||
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
start = offset >> PAGE_SHIFT;
|
||||
end = start + slot->npages;
|
||||
|
||||
if (!xa_empty(&gmem->bindings) &&
|
||||
xa_find(&gmem->bindings, &start, end - 1, XA_PRESENT)) {
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* No synchronize_rcu() needed, any in-flight readers are guaranteed to
|
||||
* be see either a NULL file or this new file, no need for them to go
|
||||
* away.
|
||||
*/
|
||||
rcu_assign_pointer(slot->gmem.file, file);
|
||||
slot->gmem.pgoff = start;
|
||||
|
||||
xa_store_range(&gmem->bindings, start, end - 1, slot, GFP_KERNEL);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
|
||||
/*
|
||||
* Drop the reference to the file, even on success. The file pins KVM,
|
||||
* not the other way 'round. Active bindings are invalidated if the
|
||||
* file is closed before memslots are destroyed.
|
||||
*/
|
||||
r = 0;
|
||||
err:
|
||||
fput(file);
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvm_gmem_unbind(struct kvm_memory_slot *slot)
|
||||
{
|
||||
unsigned long start = slot->gmem.pgoff;
|
||||
unsigned long end = start + slot->npages;
|
||||
struct kvm_gmem *gmem;
|
||||
struct file *file;
|
||||
|
||||
/*
|
||||
* Nothing to do if the underlying file was already closed (or is being
|
||||
* closed right now), kvm_gmem_release() invalidates all bindings.
|
||||
*/
|
||||
file = kvm_gmem_get_file(slot);
|
||||
if (!file)
|
||||
return;
|
||||
|
||||
gmem = file->private_data;
|
||||
|
||||
filemap_invalidate_lock(file->f_mapping);
|
||||
xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL);
|
||||
rcu_assign_pointer(slot->gmem.file, NULL);
|
||||
synchronize_rcu();
|
||||
filemap_invalidate_unlock(file->f_mapping);
|
||||
|
||||
fput(file);
|
||||
}
|
||||
|
||||
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
|
||||
{
|
||||
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
|
||||
struct kvm_gmem *gmem;
|
||||
struct folio *folio;
|
||||
struct page *page;
|
||||
struct file *file;
|
||||
int r;
|
||||
|
||||
file = kvm_gmem_get_file(slot);
|
||||
if (!file)
|
||||
return -EFAULT;
|
||||
|
||||
gmem = file->private_data;
|
||||
|
||||
if (WARN_ON_ONCE(xa_load(&gmem->bindings, index) != slot)) {
|
||||
r = -EIO;
|
||||
goto out_fput;
|
||||
}
|
||||
|
||||
folio = kvm_gmem_get_folio(file_inode(file), index);
|
||||
if (!folio) {
|
||||
r = -ENOMEM;
|
||||
goto out_fput;
|
||||
}
|
||||
|
||||
if (folio_test_hwpoison(folio)) {
|
||||
r = -EHWPOISON;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
page = folio_file_page(folio, index);
|
||||
|
||||
*pfn = page_to_pfn(page);
|
||||
if (max_order)
|
||||
*max_order = 0;
|
||||
|
||||
r = 0;
|
||||
|
||||
out_unlock:
|
||||
folio_unlock(folio);
|
||||
out_fput:
|
||||
fput(file);
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
|
@ -533,30 +533,43 @@ void kvm_destroy_vcpus(struct kvm *kvm)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_destroy_vcpus);
|
||||
|
||||
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
|
||||
static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
|
||||
{
|
||||
return container_of(mn, struct kvm, mmu_notifier);
|
||||
}
|
||||
|
||||
typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||
typedef bool (*gfn_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||
|
||||
typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end);
|
||||
typedef void (*on_lock_fn_t)(struct kvm *kvm);
|
||||
|
||||
typedef void (*on_unlock_fn_t)(struct kvm *kvm);
|
||||
|
||||
struct kvm_hva_range {
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
struct kvm_mmu_notifier_range {
|
||||
/*
|
||||
* 64-bit addresses, as KVM notifiers can operate on host virtual
|
||||
* addresses (unsigned long) and guest physical addresses (64-bit).
|
||||
*/
|
||||
u64 start;
|
||||
u64 end;
|
||||
union kvm_mmu_notifier_arg arg;
|
||||
hva_handler_t handler;
|
||||
gfn_handler_t handler;
|
||||
on_lock_fn_t on_lock;
|
||||
on_unlock_fn_t on_unlock;
|
||||
bool flush_on_ret;
|
||||
bool may_block;
|
||||
};
|
||||
|
||||
/*
|
||||
* The inner-most helper returns a tuple containing the return value from the
|
||||
* arch- and action-specific handler, plus a flag indicating whether or not at
|
||||
* least one memslot was found, i.e. if the handler found guest memory.
|
||||
*
|
||||
* Note, most notifiers are averse to booleans, so even though KVM tracks the
|
||||
* return from arch code as a bool, outer helpers will cast it to an int. :-(
|
||||
*/
|
||||
typedef struct kvm_mmu_notifier_return {
|
||||
bool ret;
|
||||
bool found_memslot;
|
||||
} kvm_mn_ret_t;
|
||||
|
||||
/*
|
||||
* Use a dedicated stub instead of NULL to indicate that there is no callback
|
||||
* function/handler. The compiler technically can't guarantee that a real
|
||||
@ -578,26 +591,29 @@ static const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG;
|
||||
node; \
|
||||
node = interval_tree_iter_next(node, start, last)) \
|
||||
|
||||
static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
|
||||
const struct kvm_hva_range *range)
|
||||
static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
|
||||
const struct kvm_mmu_notifier_range *range)
|
||||
{
|
||||
bool ret = false, locked = false;
|
||||
struct kvm_mmu_notifier_return r = {
|
||||
.ret = false,
|
||||
.found_memslot = false,
|
||||
};
|
||||
struct kvm_gfn_range gfn_range;
|
||||
struct kvm_memory_slot *slot;
|
||||
struct kvm_memslots *slots;
|
||||
int i, idx;
|
||||
|
||||
if (WARN_ON_ONCE(range->end <= range->start))
|
||||
return 0;
|
||||
return r;
|
||||
|
||||
/* A null handler is allowed if and only if on_lock() is provided. */
|
||||
if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) &&
|
||||
IS_KVM_NULL_FN(range->handler)))
|
||||
return 0;
|
||||
return r;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
struct interval_tree_node *node;
|
||||
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
@ -606,9 +622,9 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
|
||||
unsigned long hva_start, hva_end;
|
||||
|
||||
slot = container_of(node, struct kvm_memory_slot, hva_node[slots->node_idx]);
|
||||
hva_start = max(range->start, slot->userspace_addr);
|
||||
hva_end = min(range->end, slot->userspace_addr +
|
||||
(slot->npages << PAGE_SHIFT));
|
||||
hva_start = max_t(unsigned long, range->start, slot->userspace_addr);
|
||||
hva_end = min_t(unsigned long, range->end,
|
||||
slot->userspace_addr + (slot->npages << PAGE_SHIFT));
|
||||
|
||||
/*
|
||||
* To optimize for the likely case where the address
|
||||
@ -627,71 +643,66 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
|
||||
gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
|
||||
gfn_range.slot = slot;
|
||||
|
||||
if (!locked) {
|
||||
locked = true;
|
||||
if (!r.found_memslot) {
|
||||
r.found_memslot = true;
|
||||
KVM_MMU_LOCK(kvm);
|
||||
if (!IS_KVM_NULL_FN(range->on_lock))
|
||||
range->on_lock(kvm, range->start, range->end);
|
||||
range->on_lock(kvm);
|
||||
|
||||
if (IS_KVM_NULL_FN(range->handler))
|
||||
break;
|
||||
}
|
||||
ret |= range->handler(kvm, &gfn_range);
|
||||
r.ret |= range->handler(kvm, &gfn_range);
|
||||
}
|
||||
}
|
||||
|
||||
if (range->flush_on_ret && ret)
|
||||
if (range->flush_on_ret && r.ret)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
if (locked) {
|
||||
if (r.found_memslot)
|
||||
KVM_MMU_UNLOCK(kvm);
|
||||
if (!IS_KVM_NULL_FN(range->on_unlock))
|
||||
range->on_unlock(kvm);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
/* The notifiers are averse to booleans. :-( */
|
||||
return (int)ret;
|
||||
return r;
|
||||
}
|
||||
|
||||
static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
union kvm_mmu_notifier_arg arg,
|
||||
hva_handler_t handler)
|
||||
gfn_handler_t handler)
|
||||
{
|
||||
struct kvm *kvm = mmu_notifier_to_kvm(mn);
|
||||
const struct kvm_hva_range range = {
|
||||
const struct kvm_mmu_notifier_range range = {
|
||||
.start = start,
|
||||
.end = end,
|
||||
.arg = arg,
|
||||
.handler = handler,
|
||||
.on_lock = (void *)kvm_null_fn,
|
||||
.on_unlock = (void *)kvm_null_fn,
|
||||
.flush_on_ret = true,
|
||||
.may_block = false,
|
||||
};
|
||||
|
||||
return __kvm_handle_hva_range(kvm, &range);
|
||||
return __kvm_handle_hva_range(kvm, &range).ret;
|
||||
}
|
||||
|
||||
static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
hva_handler_t handler)
|
||||
gfn_handler_t handler)
|
||||
{
|
||||
struct kvm *kvm = mmu_notifier_to_kvm(mn);
|
||||
const struct kvm_hva_range range = {
|
||||
const struct kvm_mmu_notifier_range range = {
|
||||
.start = start,
|
||||
.end = end,
|
||||
.handler = handler,
|
||||
.on_lock = (void *)kvm_null_fn,
|
||||
.on_unlock = (void *)kvm_null_fn,
|
||||
.flush_on_ret = false,
|
||||
.may_block = false,
|
||||
};
|
||||
|
||||
return __kvm_handle_hva_range(kvm, &range);
|
||||
return __kvm_handle_hva_range(kvm, &range).ret;
|
||||
}
|
||||
|
||||
static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
@ -736,16 +747,29 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
|
||||
kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn);
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end)
|
||||
void kvm_mmu_invalidate_begin(struct kvm *kvm)
|
||||
{
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
/*
|
||||
* The count increase must become visible at unlock time as no
|
||||
* spte can be established without taking the mmu_lock and
|
||||
* count is also read inside the mmu_lock critical section.
|
||||
*/
|
||||
kvm->mmu_invalidate_in_progress++;
|
||||
|
||||
if (likely(kvm->mmu_invalidate_in_progress == 1)) {
|
||||
kvm->mmu_invalidate_range_start = INVALID_GPA;
|
||||
kvm->mmu_invalidate_range_end = INVALID_GPA;
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end)
|
||||
{
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
WARN_ON_ONCE(!kvm->mmu_invalidate_in_progress);
|
||||
|
||||
if (likely(kvm->mmu_invalidate_range_start == INVALID_GPA)) {
|
||||
kvm->mmu_invalidate_range_start = start;
|
||||
kvm->mmu_invalidate_range_end = end;
|
||||
} else {
|
||||
@ -765,16 +789,21 @@ void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
|
||||
}
|
||||
}
|
||||
|
||||
bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
kvm_mmu_invalidate_range_add(kvm, range->start, range->end);
|
||||
return kvm_unmap_gfn_range(kvm, range);
|
||||
}
|
||||
|
||||
static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
||||
const struct mmu_notifier_range *range)
|
||||
{
|
||||
struct kvm *kvm = mmu_notifier_to_kvm(mn);
|
||||
const struct kvm_hva_range hva_range = {
|
||||
const struct kvm_mmu_notifier_range hva_range = {
|
||||
.start = range->start,
|
||||
.end = range->end,
|
||||
.handler = kvm_unmap_gfn_range,
|
||||
.handler = kvm_mmu_unmap_gfn_range,
|
||||
.on_lock = kvm_mmu_invalidate_begin,
|
||||
.on_unlock = kvm_arch_guest_memory_reclaimed,
|
||||
.flush_on_ret = true,
|
||||
.may_block = mmu_notifier_range_blockable(range),
|
||||
};
|
||||
@ -806,14 +835,21 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
||||
gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
|
||||
hva_range.may_block);
|
||||
|
||||
__kvm_handle_hva_range(kvm, &hva_range);
|
||||
/*
|
||||
* If one or more memslots were found and thus zapped, notify arch code
|
||||
* that guest memory has been reclaimed. This needs to be done *after*
|
||||
* dropping mmu_lock, as x86's reclaim path is slooooow.
|
||||
*/
|
||||
if (__kvm_handle_hva_range(kvm, &hva_range).found_memslot)
|
||||
kvm_arch_guest_memory_reclaimed(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end)
|
||||
void kvm_mmu_invalidate_end(struct kvm *kvm)
|
||||
{
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
* This sequence increase will notify the kvm page fault that
|
||||
* the page that is going to be mapped in the spte could have
|
||||
@ -827,18 +863,24 @@ void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
|
||||
* in conjunction with the smp_rmb in mmu_invalidate_retry().
|
||||
*/
|
||||
kvm->mmu_invalidate_in_progress--;
|
||||
KVM_BUG_ON(kvm->mmu_invalidate_in_progress < 0, kvm);
|
||||
|
||||
/*
|
||||
* Assert that at least one range was added between start() and end().
|
||||
* Not adding a range isn't fatal, but it is a KVM bug.
|
||||
*/
|
||||
WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA);
|
||||
}
|
||||
|
||||
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
|
||||
const struct mmu_notifier_range *range)
|
||||
{
|
||||
struct kvm *kvm = mmu_notifier_to_kvm(mn);
|
||||
const struct kvm_hva_range hva_range = {
|
||||
const struct kvm_mmu_notifier_range hva_range = {
|
||||
.start = range->start,
|
||||
.end = range->end,
|
||||
.handler = (void *)kvm_null_fn,
|
||||
.on_lock = kvm_mmu_invalidate_end,
|
||||
.on_unlock = (void *)kvm_null_fn,
|
||||
.flush_on_ret = false,
|
||||
.may_block = mmu_notifier_range_blockable(range),
|
||||
};
|
||||
@ -857,8 +899,6 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
|
||||
*/
|
||||
if (wake)
|
||||
rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait);
|
||||
|
||||
BUG_ON(kvm->mmu_invalidate_in_progress < 0);
|
||||
}
|
||||
|
||||
static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
|
||||
@ -932,14 +972,14 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
|
||||
return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
|
||||
}
|
||||
|
||||
#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
|
||||
#else /* !CONFIG_KVM_GENERIC_MMU_NOTIFIER */
|
||||
|
||||
static int kvm_init_mmu_notifier(struct kvm *kvm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
|
||||
#endif /* CONFIG_KVM_GENERIC_MMU_NOTIFIER */
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
|
||||
static int kvm_pm_notifier_call(struct notifier_block *bl,
|
||||
@ -985,6 +1025,9 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
/* This does not remove the slot from struct kvm_memslots data structures */
|
||||
static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
{
|
||||
if (slot->flags & KVM_MEM_GUEST_MEMFD)
|
||||
kvm_gmem_unbind(slot);
|
||||
|
||||
kvm_destroy_dirty_bitmap(slot);
|
||||
|
||||
kvm_arch_free_memslot(kvm, slot);
|
||||
@ -1166,6 +1209,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
|
||||
spin_lock_init(&kvm->mn_invalidate_lock);
|
||||
rcuwait_init(&kvm->mn_memslots_update_rcuwait);
|
||||
xa_init(&kvm->vcpu_array);
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
xa_init(&kvm->mem_attr_array);
|
||||
#endif
|
||||
|
||||
INIT_LIST_HEAD(&kvm->gpc_list);
|
||||
spin_lock_init(&kvm->gpc_lock);
|
||||
@ -1190,7 +1236,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
|
||||
goto out_err_no_irq_srcu;
|
||||
|
||||
refcount_set(&kvm->users_count, 1);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
slots = &kvm->__memslots[i][j];
|
||||
|
||||
@ -1222,7 +1268,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
|
||||
if (r)
|
||||
goto out_err_no_disable;
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
|
||||
#endif
|
||||
|
||||
@ -1256,7 +1302,7 @@ out_err:
|
||||
out_err_no_debugfs:
|
||||
kvm_coalesced_mmio_free(kvm);
|
||||
out_no_coalesced_mmio:
|
||||
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
|
||||
if (kvm->mmu_notifier.ops)
|
||||
mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
|
||||
#endif
|
||||
@ -1315,7 +1361,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
||||
kvm->buses[i] = NULL;
|
||||
}
|
||||
kvm_coalesced_mmio_free(kvm);
|
||||
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
|
||||
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
|
||||
/*
|
||||
* At this point, pending calls to invalidate_range_start()
|
||||
@ -1324,20 +1370,30 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
||||
* No threads can be waiting in kvm_swap_active_memslots() as the
|
||||
* last reference on KVM has been dropped, but freeing
|
||||
* memslots would deadlock without this manual intervention.
|
||||
*
|
||||
* If the count isn't unbalanced, i.e. KVM did NOT unregister its MMU
|
||||
* notifier between a start() and end(), then there shouldn't be any
|
||||
* in-progress invalidations.
|
||||
*/
|
||||
WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
|
||||
kvm->mn_active_invalidate_count = 0;
|
||||
if (kvm->mn_active_invalidate_count)
|
||||
kvm->mn_active_invalidate_count = 0;
|
||||
else
|
||||
WARN_ON(kvm->mmu_invalidate_in_progress);
|
||||
#else
|
||||
kvm_flush_shadow_all(kvm);
|
||||
#endif
|
||||
kvm_arch_destroy_vm(kvm);
|
||||
kvm_destroy_devices(kvm);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
kvm_free_memslots(kvm, &kvm->__memslots[i][0]);
|
||||
kvm_free_memslots(kvm, &kvm->__memslots[i][1]);
|
||||
}
|
||||
cleanup_srcu_struct(&kvm->irq_srcu);
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
xa_destroy(&kvm->mem_attr_array);
|
||||
#endif
|
||||
kvm_arch_free_vm(kvm);
|
||||
preempt_notifier_dec();
|
||||
hardware_disable_all();
|
||||
@ -1538,10 +1594,26 @@ static void kvm_replace_memslot(struct kvm *kvm,
|
||||
}
|
||||
}
|
||||
|
||||
static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
|
||||
/*
|
||||
* Flags that do not access any of the extra space of struct
|
||||
* kvm_userspace_memory_region2. KVM_SET_USER_MEMORY_REGION_V1_FLAGS
|
||||
* only allows these.
|
||||
*/
|
||||
#define KVM_SET_USER_MEMORY_REGION_V1_FLAGS \
|
||||
(KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_READONLY)
|
||||
|
||||
static int check_memory_region_flags(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region2 *mem)
|
||||
{
|
||||
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
|
||||
|
||||
if (kvm_arch_has_private_mem(kvm))
|
||||
valid_flags |= KVM_MEM_GUEST_MEMFD;
|
||||
|
||||
/* Dirty logging private memory is not currently supported. */
|
||||
if (mem->flags & KVM_MEM_GUEST_MEMFD)
|
||||
valid_flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
|
||||
|
||||
#ifdef __KVM_HAVE_READONLY_MEM
|
||||
valid_flags |= KVM_MEM_READONLY;
|
||||
#endif
|
||||
@ -1603,7 +1675,7 @@ static void kvm_swap_active_memslots(struct kvm *kvm, int as_id)
|
||||
* space 0 will use generations 0, 2, 4, ... while address space 1 will
|
||||
* use generations 1, 3, 5, ...
|
||||
*/
|
||||
gen += KVM_ADDRESS_SPACE_NUM;
|
||||
gen += kvm_arch_nr_memslot_as_ids(kvm);
|
||||
|
||||
kvm_arch_memslots_updated(kvm, gen);
|
||||
|
||||
@ -1940,7 +2012,7 @@ static bool kvm_check_memslot_overlap(struct kvm_memslots *slots, int id,
|
||||
* Must be called holding kvm->slots_lock for write.
|
||||
*/
|
||||
int __kvm_set_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem)
|
||||
const struct kvm_userspace_memory_region2 *mem)
|
||||
{
|
||||
struct kvm_memory_slot *old, *new;
|
||||
struct kvm_memslots *slots;
|
||||
@ -1950,7 +2022,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
int as_id, id;
|
||||
int r;
|
||||
|
||||
r = check_memory_region_flags(mem);
|
||||
r = check_memory_region_flags(kvm, mem);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
@ -1969,7 +2041,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
!access_ok((void __user *)(unsigned long)mem->userspace_addr,
|
||||
mem->memory_size))
|
||||
return -EINVAL;
|
||||
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
|
||||
if (mem->flags & KVM_MEM_GUEST_MEMFD &&
|
||||
(mem->guest_memfd_offset & (PAGE_SIZE - 1) ||
|
||||
mem->guest_memfd_offset + mem->memory_size < mem->guest_memfd_offset))
|
||||
return -EINVAL;
|
||||
if (as_id >= kvm_arch_nr_memslot_as_ids(kvm) || id >= KVM_MEM_SLOTS_NUM)
|
||||
return -EINVAL;
|
||||
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
|
||||
return -EINVAL;
|
||||
@ -2007,6 +2083,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
|
||||
return -EINVAL;
|
||||
} else { /* Modify an existing slot. */
|
||||
/* Private memslots are immutable, they can only be deleted. */
|
||||
if (mem->flags & KVM_MEM_GUEST_MEMFD)
|
||||
return -EINVAL;
|
||||
if ((mem->userspace_addr != old->userspace_addr) ||
|
||||
(npages != old->npages) ||
|
||||
((mem->flags ^ old->flags) & KVM_MEM_READONLY))
|
||||
@ -2035,16 +2114,29 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
new->npages = npages;
|
||||
new->flags = mem->flags;
|
||||
new->userspace_addr = mem->userspace_addr;
|
||||
if (mem->flags & KVM_MEM_GUEST_MEMFD) {
|
||||
r = kvm_gmem_bind(kvm, new, mem->guest_memfd, mem->guest_memfd_offset);
|
||||
if (r)
|
||||
goto out;
|
||||
}
|
||||
|
||||
r = kvm_set_memslot(kvm, old, new, change);
|
||||
if (r)
|
||||
kfree(new);
|
||||
goto out_unbind;
|
||||
|
||||
return 0;
|
||||
|
||||
out_unbind:
|
||||
if (mem->flags & KVM_MEM_GUEST_MEMFD)
|
||||
kvm_gmem_unbind(new);
|
||||
out:
|
||||
kfree(new);
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
|
||||
|
||||
int kvm_set_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem)
|
||||
const struct kvm_userspace_memory_region2 *mem)
|
||||
{
|
||||
int r;
|
||||
|
||||
@ -2056,7 +2148,7 @@ int kvm_set_memory_region(struct kvm *kvm,
|
||||
EXPORT_SYMBOL_GPL(kvm_set_memory_region);
|
||||
|
||||
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
struct kvm_userspace_memory_region2 *mem)
|
||||
{
|
||||
if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
|
||||
return -EINVAL;
|
||||
@ -2089,7 +2181,7 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
|
||||
|
||||
as_id = log->slot >> 16;
|
||||
id = (u16)log->slot;
|
||||
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
|
||||
if (as_id >= kvm_arch_nr_memslot_as_ids(kvm) || id >= KVM_USER_MEM_SLOTS)
|
||||
return -EINVAL;
|
||||
|
||||
slots = __kvm_memslots(kvm, as_id);
|
||||
@ -2151,7 +2243,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
|
||||
as_id = log->slot >> 16;
|
||||
id = (u16)log->slot;
|
||||
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
|
||||
if (as_id >= kvm_arch_nr_memslot_as_ids(kvm) || id >= KVM_USER_MEM_SLOTS)
|
||||
return -EINVAL;
|
||||
|
||||
slots = __kvm_memslots(kvm, as_id);
|
||||
@ -2263,7 +2355,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
|
||||
|
||||
as_id = log->slot >> 16;
|
||||
id = (u16)log->slot;
|
||||
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
|
||||
if (as_id >= kvm_arch_nr_memslot_as_ids(kvm) || id >= KVM_USER_MEM_SLOTS)
|
||||
return -EINVAL;
|
||||
|
||||
if (log->first_page & 63)
|
||||
@ -2335,6 +2427,200 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
|
||||
}
|
||||
#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
/*
|
||||
* Returns true if _all_ gfns in the range [@start, @end) have attributes
|
||||
* matching @attrs.
|
||||
*/
|
||||
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||
unsigned long attrs)
|
||||
{
|
||||
XA_STATE(xas, &kvm->mem_attr_array, start);
|
||||
unsigned long index;
|
||||
bool has_attrs;
|
||||
void *entry;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
if (!attrs) {
|
||||
has_attrs = !xas_find(&xas, end - 1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
has_attrs = true;
|
||||
for (index = start; index < end; index++) {
|
||||
do {
|
||||
entry = xas_next(&xas);
|
||||
} while (xas_retry(&xas, entry));
|
||||
|
||||
if (xas.xa_index != index || xa_to_value(entry) != attrs) {
|
||||
has_attrs = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return has_attrs;
|
||||
}
|
||||
|
||||
static u64 kvm_supported_mem_attributes(struct kvm *kvm)
|
||||
{
|
||||
if (!kvm || kvm_arch_has_private_mem(kvm))
|
||||
return KVM_MEMORY_ATTRIBUTE_PRIVATE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __always_inline void kvm_handle_gfn_range(struct kvm *kvm,
|
||||
struct kvm_mmu_notifier_range *range)
|
||||
{
|
||||
struct kvm_gfn_range gfn_range;
|
||||
struct kvm_memory_slot *slot;
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memslot_iter iter;
|
||||
bool found_memslot = false;
|
||||
bool ret = false;
|
||||
int i;
|
||||
|
||||
gfn_range.arg = range->arg;
|
||||
gfn_range.may_block = range->may_block;
|
||||
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
|
||||
kvm_for_each_memslot_in_gfn_range(&iter, slots, range->start, range->end) {
|
||||
slot = iter.slot;
|
||||
gfn_range.slot = slot;
|
||||
|
||||
gfn_range.start = max(range->start, slot->base_gfn);
|
||||
gfn_range.end = min(range->end, slot->base_gfn + slot->npages);
|
||||
if (gfn_range.start >= gfn_range.end)
|
||||
continue;
|
||||
|
||||
if (!found_memslot) {
|
||||
found_memslot = true;
|
||||
KVM_MMU_LOCK(kvm);
|
||||
if (!IS_KVM_NULL_FN(range->on_lock))
|
||||
range->on_lock(kvm);
|
||||
}
|
||||
|
||||
ret |= range->handler(kvm, &gfn_range);
|
||||
}
|
||||
}
|
||||
|
||||
if (range->flush_on_ret && ret)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
if (found_memslot)
|
||||
KVM_MMU_UNLOCK(kvm);
|
||||
}
|
||||
|
||||
static bool kvm_pre_set_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_gfn_range *range)
|
||||
{
|
||||
/*
|
||||
* Unconditionally add the range to the invalidation set, regardless of
|
||||
* whether or not the arch callback actually needs to zap SPTEs. E.g.
|
||||
* if KVM supports RWX attributes in the future and the attributes are
|
||||
* going from R=>RW, zapping isn't strictly necessary. Unconditionally
|
||||
* adding the range allows KVM to require that MMU invalidations add at
|
||||
* least one range between begin() and end(), e.g. allows KVM to detect
|
||||
* bugs where the add() is missed. Relaxing the rule *might* be safe,
|
||||
* but it's not obvious that allowing new mappings while the attributes
|
||||
* are in flux is desirable or worth the complexity.
|
||||
*/
|
||||
kvm_mmu_invalidate_range_add(kvm, range->start, range->end);
|
||||
|
||||
return kvm_arch_pre_set_memory_attributes(kvm, range);
|
||||
}
|
||||
|
||||
/* Set @attributes for the gfn range [@start, @end). */
|
||||
static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||
unsigned long attributes)
|
||||
{
|
||||
struct kvm_mmu_notifier_range pre_set_range = {
|
||||
.start = start,
|
||||
.end = end,
|
||||
.handler = kvm_pre_set_memory_attributes,
|
||||
.on_lock = kvm_mmu_invalidate_begin,
|
||||
.flush_on_ret = true,
|
||||
.may_block = true,
|
||||
};
|
||||
struct kvm_mmu_notifier_range post_set_range = {
|
||||
.start = start,
|
||||
.end = end,
|
||||
.arg.attributes = attributes,
|
||||
.handler = kvm_arch_post_set_memory_attributes,
|
||||
.on_lock = kvm_mmu_invalidate_end,
|
||||
.may_block = true,
|
||||
};
|
||||
unsigned long i;
|
||||
void *entry;
|
||||
int r = 0;
|
||||
|
||||
entry = attributes ? xa_mk_value(attributes) : NULL;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
/* Nothing to do if the entire range as the desired attributes. */
|
||||
if (kvm_range_has_memory_attributes(kvm, start, end, attributes))
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Reserve memory ahead of time to avoid having to deal with failures
|
||||
* partway through setting the new attributes.
|
||||
*/
|
||||
for (i = start; i < end; i++) {
|
||||
r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT);
|
||||
if (r)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
kvm_handle_gfn_range(kvm, &pre_set_range);
|
||||
|
||||
for (i = start; i < end; i++) {
|
||||
r = xa_err(xa_store(&kvm->mem_attr_array, i, entry,
|
||||
GFP_KERNEL_ACCOUNT));
|
||||
KVM_BUG_ON(r, kvm);
|
||||
}
|
||||
|
||||
kvm_handle_gfn_range(kvm, &post_set_range);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
|
||||
struct kvm_memory_attributes *attrs)
|
||||
{
|
||||
gfn_t start, end;
|
||||
|
||||
/* flags is currently not used. */
|
||||
if (attrs->flags)
|
||||
return -EINVAL;
|
||||
if (attrs->attributes & ~kvm_supported_mem_attributes(kvm))
|
||||
return -EINVAL;
|
||||
if (attrs->size == 0 || attrs->address + attrs->size < attrs->address)
|
||||
return -EINVAL;
|
||||
if (!PAGE_ALIGNED(attrs->address) || !PAGE_ALIGNED(attrs->size))
|
||||
return -EINVAL;
|
||||
|
||||
start = attrs->address >> PAGE_SHIFT;
|
||||
end = (attrs->address + attrs->size) >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* xarray tracks data using "unsigned long", and as a result so does
|
||||
* KVM. For simplicity, supports generic attributes only on 64-bit
|
||||
* architectures.
|
||||
*/
|
||||
BUILD_BUG_ON(sizeof(attrs->attributes) != sizeof(unsigned long));
|
||||
|
||||
return kvm_vm_set_mem_attributes(kvm, start, end, attrs->attributes);
|
||||
}
|
||||
#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
|
||||
|
||||
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||
@ -4527,13 +4813,14 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
||||
{
|
||||
switch (arg) {
|
||||
case KVM_CAP_USER_MEMORY:
|
||||
case KVM_CAP_USER_MEMORY2:
|
||||
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
|
||||
case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
|
||||
case KVM_CAP_INTERNAL_ERROR_DATA:
|
||||
#ifdef CONFIG_HAVE_KVM_MSI
|
||||
case KVM_CAP_SIGNAL_MSI:
|
||||
#endif
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
case KVM_CAP_IRQFD:
|
||||
#endif
|
||||
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
|
||||
@ -4555,9 +4842,11 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
||||
case KVM_CAP_IRQ_ROUTING:
|
||||
return KVM_MAX_IRQ_ROUTES;
|
||||
#endif
|
||||
#if KVM_ADDRESS_SPACE_NUM > 1
|
||||
#if KVM_MAX_NR_ADDRESS_SPACES > 1
|
||||
case KVM_CAP_MULTI_ADDRESS_SPACE:
|
||||
return KVM_ADDRESS_SPACE_NUM;
|
||||
if (kvm)
|
||||
return kvm_arch_nr_memslot_as_ids(kvm);
|
||||
return KVM_MAX_NR_ADDRESS_SPACES;
|
||||
#endif
|
||||
case KVM_CAP_NR_MEMSLOTS:
|
||||
return KVM_USER_MEM_SLOTS;
|
||||
@ -4579,6 +4868,14 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
||||
case KVM_CAP_BINARY_STATS_FD:
|
||||
case KVM_CAP_SYSTEM_EVENT_DATA:
|
||||
return 1;
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
case KVM_CAP_MEMORY_ATTRIBUTES:
|
||||
return kvm_supported_mem_attributes(kvm);
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_PRIVATE_MEM
|
||||
case KVM_CAP_GUEST_MEMFD:
|
||||
return !kvm || kvm_arch_has_private_mem(kvm);
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -4657,7 +4954,7 @@ bool kvm_are_all_memslots_empty(struct kvm *kvm)
|
||||
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
if (!kvm_memslots_empty(__kvm_memslots(kvm, i)))
|
||||
return false;
|
||||
}
|
||||
@ -4783,6 +5080,14 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
|
||||
return fd;
|
||||
}
|
||||
|
||||
#define SANITY_CHECK_MEM_REGION_FIELD(field) \
|
||||
do { \
|
||||
BUILD_BUG_ON(offsetof(struct kvm_userspace_memory_region, field) != \
|
||||
offsetof(struct kvm_userspace_memory_region2, field)); \
|
||||
BUILD_BUG_ON(sizeof_field(struct kvm_userspace_memory_region, field) != \
|
||||
sizeof_field(struct kvm_userspace_memory_region2, field)); \
|
||||
} while (0)
|
||||
|
||||
static long kvm_vm_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
@ -4805,15 +5110,39 @@ static long kvm_vm_ioctl(struct file *filp,
|
||||
r = kvm_vm_ioctl_enable_cap_generic(kvm, &cap);
|
||||
break;
|
||||
}
|
||||
case KVM_SET_USER_MEMORY_REGION2:
|
||||
case KVM_SET_USER_MEMORY_REGION: {
|
||||
struct kvm_userspace_memory_region kvm_userspace_mem;
|
||||
struct kvm_userspace_memory_region2 mem;
|
||||
unsigned long size;
|
||||
|
||||
if (ioctl == KVM_SET_USER_MEMORY_REGION) {
|
||||
/*
|
||||
* Fields beyond struct kvm_userspace_memory_region shouldn't be
|
||||
* accessed, but avoid leaking kernel memory in case of a bug.
|
||||
*/
|
||||
memset(&mem, 0, sizeof(mem));
|
||||
size = sizeof(struct kvm_userspace_memory_region);
|
||||
} else {
|
||||
size = sizeof(struct kvm_userspace_memory_region2);
|
||||
}
|
||||
|
||||
/* Ensure the common parts of the two structs are identical. */
|
||||
SANITY_CHECK_MEM_REGION_FIELD(slot);
|
||||
SANITY_CHECK_MEM_REGION_FIELD(flags);
|
||||
SANITY_CHECK_MEM_REGION_FIELD(guest_phys_addr);
|
||||
SANITY_CHECK_MEM_REGION_FIELD(memory_size);
|
||||
SANITY_CHECK_MEM_REGION_FIELD(userspace_addr);
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&kvm_userspace_mem, argp,
|
||||
sizeof(kvm_userspace_mem)))
|
||||
if (copy_from_user(&mem, argp, size))
|
||||
goto out;
|
||||
|
||||
r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
|
||||
r = -EINVAL;
|
||||
if (ioctl == KVM_SET_USER_MEMORY_REGION &&
|
||||
(mem.flags & ~KVM_SET_USER_MEMORY_REGION_V1_FLAGS))
|
||||
goto out;
|
||||
|
||||
r = kvm_vm_ioctl_set_memory_region(kvm, &mem);
|
||||
break;
|
||||
}
|
||||
case KVM_GET_DIRTY_LOG: {
|
||||
@ -4941,6 +5270,18 @@ static long kvm_vm_ioctl(struct file *filp,
|
||||
break;
|
||||
}
|
||||
#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
case KVM_SET_MEMORY_ATTRIBUTES: {
|
||||
struct kvm_memory_attributes attrs;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&attrs, argp, sizeof(attrs)))
|
||||
goto out;
|
||||
|
||||
r = kvm_vm_ioctl_set_mem_attributes(kvm, &attrs);
|
||||
break;
|
||||
}
|
||||
#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
|
||||
case KVM_CREATE_DEVICE: {
|
||||
struct kvm_create_device cd;
|
||||
|
||||
@ -4968,6 +5309,18 @@ static long kvm_vm_ioctl(struct file *filp,
|
||||
case KVM_GET_STATS_FD:
|
||||
r = kvm_vm_ioctl_get_stats_fd(kvm);
|
||||
break;
|
||||
#ifdef CONFIG_KVM_PRIVATE_MEM
|
||||
case KVM_CREATE_GUEST_MEMFD: {
|
||||
struct kvm_create_guest_memfd guest_memfd;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&guest_memfd, argp, sizeof(guest_memfd)))
|
||||
goto out;
|
||||
|
||||
r = kvm_gmem_create(kvm, &guest_memfd);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
|
||||
}
|
||||
@ -5139,11 +5492,6 @@ static long kvm_dev_ioctl(struct file *filp,
|
||||
r += PAGE_SIZE; /* coalesced mmio ring page */
|
||||
#endif
|
||||
break;
|
||||
case KVM_TRACE_ENABLE:
|
||||
case KVM_TRACE_PAUSE:
|
||||
case KVM_TRACE_DISABLE:
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
default:
|
||||
return kvm_arch_dev_ioctl(filp, ioctl, arg);
|
||||
}
|
||||
@ -6104,6 +6452,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
|
||||
if (WARN_ON_ONCE(r))
|
||||
goto err_vfio;
|
||||
|
||||
kvm_gmem_init(module);
|
||||
|
||||
/*
|
||||
* Registration _must_ be the very last thing done, as this exposes
|
||||
* /dev/kvm to userspace, i.e. all infrastructure must be setup!
|
||||
|
@ -37,4 +37,30 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
|
||||
}
|
||||
#endif /* HAVE_KVM_PFNCACHE */
|
||||
|
||||
#ifdef CONFIG_KVM_PRIVATE_MEM
|
||||
void kvm_gmem_init(struct module *module);
|
||||
int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
|
||||
int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
unsigned int fd, loff_t offset);
|
||||
void kvm_gmem_unbind(struct kvm_memory_slot *slot);
|
||||
#else
|
||||
static inline void kvm_gmem_init(struct module *module)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static inline int kvm_gmem_bind(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned int fd, loff_t offset)
|
||||
{
|
||||
WARN_ON_ONCE(1);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static inline void kvm_gmem_unbind(struct kvm_memory_slot *slot)
|
||||
{
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
#endif /* CONFIG_KVM_PRIVATE_MEM */
|
||||
|
||||
#endif /* __KVM_MM_H__ */
|
||||
|
Loading…
Reference in New Issue
Block a user