mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-18 11:54:37 +08:00
ARM:
* Clean up vCPU targets, always returning generic v8 as the preferred target * Trap forwarding infrastructure for nested virtualization (used for traps that are taken from an L2 guest and are needed by the L1 hypervisor) * FEAT_TLBIRANGE support to only invalidate specific ranges of addresses when collapsing a table PTE to a block PTE. This avoids that the guest refills the TLBs again for addresses that aren't covered by the table PTE. * Fix vPMU issues related to handling of PMUver. * Don't unnecessary align non-stack allocations in the EL2 VA space * Drop HCR_VIRT_EXCP_MASK, which was never used... * Don't use smp_processor_id() in kvm_arch_vcpu_load(), but the cpu parameter instead * Drop redundant call to kvm_set_pfn_accessed() in user_mem_abort() * Remove prototypes without implementations RISC-V: * Zba, Zbs, Zicntr, Zicsr, Zifencei, and Zihpm support for guest * Added ONE_REG interface for SATP mode * Added ONE_REG interface to enable/disable multiple ISA extensions * Improved error codes returned by ONE_REG interfaces * Added KVM_GET_REG_LIST ioctl() implementation for KVM RISC-V * Added get-reg-list selftest for KVM RISC-V s390: * PV crypto passthrough enablement (Tony, Steffen, Viktor, Janosch) Allows a PV guest to use crypto cards. Card access is governed by the firmware and once a crypto queue is "bound" to a PV VM every other entity (PV or not) looses access until it is not bound anymore. Enablement is done via flags when creating the PV VM. * Guest debug fixes (Ilya) x86: * Clean up KVM's handling of Intel architectural events * Intel bugfixes * Add support for SEV-ES DebugSwap, allowing SEV-ES guests to use debug registers and generate/handle #DBs * Clean up LBR virtualization code * Fix a bug where KVM fails to set the target pCPU during an IRTE update * Fix fatal bugs in SEV-ES intrahost migration * Fix a bug where the recent (architecturally correct) change to reinject #BP and skip INT3 broke SEV guests (can't decode INT3 to skip it) * Retry APIC map recalculation if a vCPU is added/enabled * Overhaul emergency reboot code to bring SVM up to par with VMX, tie the "emergency disabling" behavior to KVM actually being loaded, and move all of the logic within KVM * Fix user triggerable WARNs in SVM where KVM incorrectly assumes the TSC ratio MSR cannot diverge from the default when TSC scaling is disabled up related code * Add a framework to allow "caching" feature flags so that KVM can check if the guest can use a feature without needing to search guest CPUID * Rip out the ancient MMU_DEBUG crud and replace the useful bits with CONFIG_KVM_PROVE_MMU * Fix KVM's handling of !visible guest roots to avoid premature triple fault injection * Overhaul KVM's page-track APIs, and KVMGT's usage, to reduce the API surface that is needed by external users (currently only KVMGT), and fix a variety of issues in the process This last item had a silly one-character bug in the topic branch that was sent to me. Because it caused pretty bad selftest failures in some configurations, I decided to squash in the fix. So, while the exact commit ids haven't been in linux-next, the code has (from the kvm-x86 tree). Generic: * Wrap kvm_{gfn,hva}_range.pte in a union to allow mmu_notifier events to pass action specific data without needing to constantly update the main handlers. * Drop unused function declarations Selftests: * Add testcases to x86's sync_regs_test for detecting KVM TOCTOU bugs * Add support for printf() in guest code and covert all guest asserts to use printf-based reporting * Clean up the PMU event filter test and add new testcases * Include x86 selftests in the KVM x86 MAINTAINERS entry -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmT1m0kUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroMNgggAiN7nz6UC423FznuI+yO3TLm8tkx1 CpKh5onqQogVtchH+vrngi97cfOzZb1/AtifY90OWQi31KEWhehkeofcx7G6ERhj 5a9NFADY1xGBsX4exca/VHDxhnzsbDWaWYPXw5vWFWI6erft9Mvy3tp1LwTvOzqM v8X4aWz+g5bmo/DWJf4Wu32tEU6mnxzkrjKU14JmyqQTBawVmJ3RYvHVJ/Agpw+n hRtPAy7FU6XTdkmq/uCT+KUHuJEIK0E/l1js47HFAqSzwdW70UDg14GGo1o4ETxu RjZQmVNvL57yVgi6QU38/A0FWIsWQm5IlaX1Ug6x8pjZPnUKNbo9BY4T1g== =W+4p -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm updates from Paolo Bonzini: "ARM: - Clean up vCPU targets, always returning generic v8 as the preferred target - Trap forwarding infrastructure for nested virtualization (used for traps that are taken from an L2 guest and are needed by the L1 hypervisor) - FEAT_TLBIRANGE support to only invalidate specific ranges of addresses when collapsing a table PTE to a block PTE. This avoids that the guest refills the TLBs again for addresses that aren't covered by the table PTE. - Fix vPMU issues related to handling of PMUver. - Don't unnecessary align non-stack allocations in the EL2 VA space - Drop HCR_VIRT_EXCP_MASK, which was never used... - Don't use smp_processor_id() in kvm_arch_vcpu_load(), but the cpu parameter instead - Drop redundant call to kvm_set_pfn_accessed() in user_mem_abort() - Remove prototypes without implementations RISC-V: - Zba, Zbs, Zicntr, Zicsr, Zifencei, and Zihpm support for guest - Added ONE_REG interface for SATP mode - Added ONE_REG interface to enable/disable multiple ISA extensions - Improved error codes returned by ONE_REG interfaces - Added KVM_GET_REG_LIST ioctl() implementation for KVM RISC-V - Added get-reg-list selftest for KVM RISC-V s390: - PV crypto passthrough enablement (Tony, Steffen, Viktor, Janosch) Allows a PV guest to use crypto cards. Card access is governed by the firmware and once a crypto queue is "bound" to a PV VM every other entity (PV or not) looses access until it is not bound anymore. Enablement is done via flags when creating the PV VM. - Guest debug fixes (Ilya) x86: - Clean up KVM's handling of Intel architectural events - Intel bugfixes - Add support for SEV-ES DebugSwap, allowing SEV-ES guests to use debug registers and generate/handle #DBs - Clean up LBR virtualization code - Fix a bug where KVM fails to set the target pCPU during an IRTE update - Fix fatal bugs in SEV-ES intrahost migration - Fix a bug where the recent (architecturally correct) change to reinject #BP and skip INT3 broke SEV guests (can't decode INT3 to skip it) - Retry APIC map recalculation if a vCPU is added/enabled - Overhaul emergency reboot code to bring SVM up to par with VMX, tie the "emergency disabling" behavior to KVM actually being loaded, and move all of the logic within KVM - Fix user triggerable WARNs in SVM where KVM incorrectly assumes the TSC ratio MSR cannot diverge from the default when TSC scaling is disabled up related code - Add a framework to allow "caching" feature flags so that KVM can check if the guest can use a feature without needing to search guest CPUID - Rip out the ancient MMU_DEBUG crud and replace the useful bits with CONFIG_KVM_PROVE_MMU - Fix KVM's handling of !visible guest roots to avoid premature triple fault injection - Overhaul KVM's page-track APIs, and KVMGT's usage, to reduce the API surface that is needed by external users (currently only KVMGT), and fix a variety of issues in the process Generic: - Wrap kvm_{gfn,hva}_range.pte in a union to allow mmu_notifier events to pass action specific data without needing to constantly update the main handlers. - Drop unused function declarations Selftests: - Add testcases to x86's sync_regs_test for detecting KVM TOCTOU bugs - Add support for printf() in guest code and covert all guest asserts to use printf-based reporting - Clean up the PMU event filter test and add new testcases - Include x86 selftests in the KVM x86 MAINTAINERS entry" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (279 commits) KVM: x86/mmu: Include mmu.h in spte.h KVM: x86/mmu: Use dummy root, backed by zero page, for !visible guest roots KVM: x86/mmu: Disallow guest from using !visible slots for page tables KVM: x86/mmu: Harden TDP MMU iteration against root w/o shadow page KVM: x86/mmu: Harden new PGD against roots without shadow pages KVM: x86/mmu: Add helper to convert root hpa to shadow page drm/i915/gvt: Drop final dependencies on KVM internal details KVM: x86/mmu: Handle KVM bookkeeping in page-track APIs, not callers KVM: x86/mmu: Drop @slot param from exported/external page-track APIs KVM: x86/mmu: Bug the VM if write-tracking is used but not enabled KVM: x86/mmu: Assert that correct locks are held for page write-tracking KVM: x86/mmu: Rename page-track APIs to reflect the new reality KVM: x86/mmu: Drop infrastructure for multiple page-track modes KVM: x86/mmu: Use page-track notifiers iff there are external users KVM: x86/mmu: Move KVM-only page-track declarations to internal header KVM: x86: Remove the unused page-track hook track_flush_slot() drm/i915/gvt: switch from ->track_flush_slot() to ->track_remove_region() KVM: x86: Add a new page-track hook to handle memslot deletion drm/i915/gvt: Don't bother removing write-protection on to-be-deleted slot KVM: x86: Reject memslot MOVE operations if KVMGT is attached ...
This commit is contained in:
commit
0c02183427
@ -2259,6 +2259,8 @@ Errors:
|
||||
EINVAL invalid register ID, or no such register or used with VMs in
|
||||
protected virtualization mode on s390
|
||||
EPERM (arm64) register access not allowed before vcpu finalization
|
||||
EBUSY (riscv) changing register value not allowed after the vcpu
|
||||
has run at least once
|
||||
====== ============================================================
|
||||
|
||||
(These error codes are indicative only: do not rely on a specific error
|
||||
@ -3499,7 +3501,7 @@ VCPU matching underlying host.
|
||||
---------------------
|
||||
|
||||
:Capability: basic
|
||||
:Architectures: arm64, mips
|
||||
:Architectures: arm64, mips, riscv
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: struct kvm_reg_list (in/out)
|
||||
:Returns: 0 on success; -1 on error
|
||||
|
@ -11589,6 +11589,8 @@ F: arch/x86/include/uapi/asm/svm.h
|
||||
F: arch/x86/include/uapi/asm/vmx.h
|
||||
F: arch/x86/kvm/
|
||||
F: arch/x86/kvm/*/
|
||||
F: tools/testing/selftests/kvm/*/x86_64/
|
||||
F: tools/testing/selftests/kvm/x86_64/
|
||||
|
||||
KERNFS
|
||||
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
||||
|
@ -227,6 +227,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void kvm_vcpu_pmu_resync_el0(void) {}
|
||||
|
||||
/* PMU Version in DFR Register */
|
||||
#define ARMV8_PMU_DFR_VER_NI 0
|
||||
#define ARMV8_PMU_DFR_VER_V3P4 0x5
|
||||
|
@ -18,10 +18,19 @@
|
||||
#define HCR_DCT (UL(1) << 57)
|
||||
#define HCR_ATA_SHIFT 56
|
||||
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
|
||||
#define HCR_TTLBOS (UL(1) << 55)
|
||||
#define HCR_TTLBIS (UL(1) << 54)
|
||||
#define HCR_ENSCXT (UL(1) << 53)
|
||||
#define HCR_TOCU (UL(1) << 52)
|
||||
#define HCR_AMVOFFEN (UL(1) << 51)
|
||||
#define HCR_TICAB (UL(1) << 50)
|
||||
#define HCR_TID4 (UL(1) << 49)
|
||||
#define HCR_FIEN (UL(1) << 47)
|
||||
#define HCR_FWB (UL(1) << 46)
|
||||
#define HCR_NV2 (UL(1) << 45)
|
||||
#define HCR_AT (UL(1) << 44)
|
||||
#define HCR_NV1 (UL(1) << 43)
|
||||
#define HCR_NV (UL(1) << 42)
|
||||
#define HCR_API (UL(1) << 41)
|
||||
#define HCR_APK (UL(1) << 40)
|
||||
#define HCR_TEA (UL(1) << 37)
|
||||
@ -89,7 +98,6 @@
|
||||
HCR_BSU_IS | HCR_FB | HCR_TACR | \
|
||||
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
|
||||
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
|
||||
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
|
||||
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
|
||||
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
|
||||
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
|
||||
@ -324,6 +332,47 @@
|
||||
BIT(18) | \
|
||||
GENMASK(16, 15))
|
||||
|
||||
/*
|
||||
* FGT register definitions
|
||||
*
|
||||
* RES0 and polarity masks as of DDI0487J.a, to be updated as needed.
|
||||
* We're not using the generated masks as they are usually ahead of
|
||||
* the published ARM ARM, which we use as a reference.
|
||||
*
|
||||
* Once we get to a point where the two describe the same thing, we'll
|
||||
* merge the definitions. One day.
|
||||
*/
|
||||
#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51))
|
||||
#define __HFGRTR_EL2_MASK GENMASK(49, 0)
|
||||
#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
|
||||
|
||||
#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \
|
||||
BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
|
||||
GENMASK(26, 25) | BIT(21) | BIT(18) | \
|
||||
GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
|
||||
#define __HFGWTR_EL2_MASK GENMASK(49, 0)
|
||||
#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
|
||||
|
||||
#define __HFGITR_EL2_RES0 GENMASK(63, 57)
|
||||
#define __HFGITR_EL2_MASK GENMASK(54, 0)
|
||||
#define __HFGITR_EL2_nMASK GENMASK(56, 55)
|
||||
|
||||
#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \
|
||||
GENMASK(21, 20) | BIT(8))
|
||||
#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK
|
||||
#define __HDFGRTR_EL2_nMASK GENMASK(62, 59)
|
||||
|
||||
#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \
|
||||
BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \
|
||||
BIT(22) | BIT(9) | BIT(6))
|
||||
#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK
|
||||
#define __HDFGWTR_EL2_nMASK GENMASK(62, 60)
|
||||
|
||||
/* Similar definitions for HCRX_EL2 */
|
||||
#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12))
|
||||
#define __HCRX_EL2_MASK (0)
|
||||
#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0))
|
||||
|
||||
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
|
||||
#define HPFAR_MASK (~UL(0xf))
|
||||
/*
|
||||
|
@ -70,6 +70,7 @@ enum __kvm_host_smccc_func {
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
|
||||
@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
|
||||
extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t ipa,
|
||||
int level);
|
||||
extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t start, unsigned long pages);
|
||||
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
|
||||
|
||||
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
|
||||
|
@ -49,6 +49,7 @@
|
||||
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
|
||||
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
|
||||
#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
|
||||
#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
|
||||
|
||||
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
||||
KVM_DIRTY_LOG_INITIALLY_SET)
|
||||
@ -380,6 +381,7 @@ enum vcpu_sysreg {
|
||||
CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
|
||||
HSTR_EL2, /* Hypervisor System Trap Register */
|
||||
HACR_EL2, /* Hypervisor Auxiliary Control Register */
|
||||
HCRX_EL2, /* Extended Hypervisor Configuration Register */
|
||||
TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
|
||||
TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
|
||||
TCR_EL2, /* Translation Control Register (EL2) */
|
||||
@ -400,6 +402,11 @@ enum vcpu_sysreg {
|
||||
TPIDR_EL2, /* EL2 Software Thread ID Register */
|
||||
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
|
||||
SP_EL2, /* EL2 Stack Pointer */
|
||||
HFGRTR_EL2,
|
||||
HFGWTR_EL2,
|
||||
HFGITR_EL2,
|
||||
HDFGRTR_EL2,
|
||||
HDFGWTR_EL2,
|
||||
CNTHP_CTL_EL2,
|
||||
CNTHP_CVAL_EL2,
|
||||
CNTHV_CTL_EL2,
|
||||
@ -567,8 +574,7 @@ struct kvm_vcpu_arch {
|
||||
/* Cache some mmu pages needed inside spinlock regions */
|
||||
struct kvm_mmu_memory_cache mmu_page_cache;
|
||||
|
||||
/* Target CPU and feature flags */
|
||||
int target;
|
||||
/* feature flags */
|
||||
DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
|
||||
|
||||
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
|
||||
@ -669,6 +675,8 @@ struct kvm_vcpu_arch {
|
||||
#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
|
||||
/* PTRAUTH exposed to guest */
|
||||
#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
|
||||
/* KVM_ARM_VCPU_INIT completed */
|
||||
#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3))
|
||||
|
||||
/* Exception pending */
|
||||
#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
|
||||
@ -899,7 +907,6 @@ struct kvm_vcpu_stat {
|
||||
u64 exits;
|
||||
};
|
||||
|
||||
void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
|
||||
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
|
||||
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
|
||||
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
|
||||
@ -967,8 +974,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
|
||||
#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
|
||||
#endif /* __KVM_NVHE_HYPERVISOR__ */
|
||||
|
||||
void force_vm_exit(const cpumask_t *mask);
|
||||
|
||||
int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
|
||||
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
|
||||
|
||||
@ -983,6 +988,7 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
|
||||
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
|
||||
|
||||
int __init kvm_sys_reg_table_init(void);
|
||||
int __init populate_nv_trap_config(void);
|
||||
|
||||
bool lock_all_vcpus(struct kvm *kvm);
|
||||
void unlock_all_vcpus(struct kvm *kvm);
|
||||
@ -1049,8 +1055,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
|
||||
return cpus_have_const_cap(ARM64_SPECTRE_V3A);
|
||||
}
|
||||
|
||||
void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
|
||||
@ -1113,13 +1117,15 @@ int __init kvm_set_ipa_limit(void);
|
||||
#define __KVM_HAVE_ARCH_VM_ALLOC
|
||||
struct kvm *kvm_arch_alloc_vm(void);
|
||||
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
|
||||
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
|
||||
|
||||
static inline bool kvm_vm_is_protected(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
|
||||
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
|
||||
void __iomem **haddr);
|
||||
int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
|
||||
void **haddr);
|
||||
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
|
||||
void __init free_hyp_pgds(void);
|
||||
|
||||
void stage2_unmap_vm(struct kvm *kvm);
|
||||
|
@ -11,6 +11,8 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
|
||||
test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
|
||||
}
|
||||
|
||||
extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
|
||||
|
||||
struct sys_reg_params;
|
||||
struct sys_reg_desc;
|
||||
|
||||
|
@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
|
||||
* kvm_pgtable_prot format.
|
||||
*/
|
||||
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
|
||||
|
||||
/**
|
||||
* kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
|
||||
*
|
||||
* @mmu: Stage-2 KVM MMU struct
|
||||
* @addr: The base Intermediate physical address from which to invalidate
|
||||
* @size: Size of the range from the base to invalidate
|
||||
*/
|
||||
void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t addr, size_t size);
|
||||
#endif /* __ARM64_KVM_PGTABLE_H__ */
|
||||
|
@ -124,6 +124,37 @@
|
||||
#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
|
||||
#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
|
||||
|
||||
#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0)
|
||||
#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0)
|
||||
#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1)
|
||||
|
||||
#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1)
|
||||
#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3)
|
||||
#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5)
|
||||
|
||||
#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1)
|
||||
#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3)
|
||||
#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5)
|
||||
|
||||
#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1)
|
||||
|
||||
#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1)
|
||||
#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3)
|
||||
#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5)
|
||||
|
||||
#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1)
|
||||
#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3)
|
||||
#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5)
|
||||
|
||||
#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1)
|
||||
#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
|
||||
#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
|
||||
|
||||
/* Data cache zero operations */
|
||||
#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
|
||||
#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
|
||||
#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
|
||||
|
||||
/*
|
||||
* Automatically generated definitions for system registers, the
|
||||
* manual encodings below are in the process of being converted to
|
||||
@ -163,6 +194,82 @@
|
||||
#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0)
|
||||
#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0)
|
||||
|
||||
#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
|
||||
#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0)
|
||||
#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
|
||||
#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1)
|
||||
#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
|
||||
#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2)
|
||||
#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2)
|
||||
|
||||
#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0)
|
||||
#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1)
|
||||
#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0)
|
||||
|
||||
#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3)
|
||||
#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
|
||||
#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
|
||||
#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6)
|
||||
#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0)
|
||||
#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0)
|
||||
#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0)
|
||||
#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2)
|
||||
#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2)
|
||||
#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0)
|
||||
#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6)
|
||||
#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6)
|
||||
#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5)
|
||||
#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5)
|
||||
#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5)
|
||||
#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0)
|
||||
#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6)
|
||||
#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7)
|
||||
#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0)
|
||||
#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0)
|
||||
#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4)
|
||||
#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7)
|
||||
#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6)
|
||||
#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6)
|
||||
#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6)
|
||||
#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6)
|
||||
#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7)
|
||||
#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7)
|
||||
#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7)
|
||||
#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7)
|
||||
#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7)
|
||||
#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7)
|
||||
#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7)
|
||||
#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6)
|
||||
#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6)
|
||||
#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7)
|
||||
#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1)
|
||||
#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4)
|
||||
#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0)
|
||||
#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1)
|
||||
#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
|
||||
#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0)
|
||||
#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4)
|
||||
#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4)
|
||||
#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4)
|
||||
#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2)
|
||||
#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2)
|
||||
#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3)
|
||||
#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0)
|
||||
#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0)
|
||||
#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0)
|
||||
#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1)
|
||||
#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0)
|
||||
#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2)
|
||||
#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2)
|
||||
#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2)
|
||||
#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2)
|
||||
#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2)
|
||||
#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2)
|
||||
#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1)
|
||||
|
||||
/* ETM */
|
||||
#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
|
||||
|
||||
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
|
||||
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
|
||||
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
|
||||
@ -203,8 +310,13 @@
|
||||
#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1)
|
||||
#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2)
|
||||
#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3)
|
||||
#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4)
|
||||
#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5)
|
||||
#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6)
|
||||
#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0)
|
||||
#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1)
|
||||
#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2)
|
||||
#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3)
|
||||
#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0)
|
||||
#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1)
|
||||
|
||||
@ -275,6 +387,8 @@
|
||||
#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
|
||||
#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
|
||||
|
||||
#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5)
|
||||
|
||||
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
|
||||
|
||||
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
|
||||
@ -383,8 +497,6 @@
|
||||
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
|
||||
|
||||
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
|
||||
#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
|
||||
#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
|
||||
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
|
||||
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
|
||||
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
|
||||
@ -478,6 +590,158 @@
|
||||
|
||||
#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0)
|
||||
|
||||
/* AT instructions */
|
||||
#define AT_Op0 1
|
||||
#define AT_CRn 7
|
||||
|
||||
#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
|
||||
#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
|
||||
#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
|
||||
#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
|
||||
#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
|
||||
#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
|
||||
#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
|
||||
#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
|
||||
#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
|
||||
#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
|
||||
#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
|
||||
#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
|
||||
|
||||
/* TLBI instructions */
|
||||
#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0)
|
||||
#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1)
|
||||
#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2)
|
||||
#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3)
|
||||
#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5)
|
||||
#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7)
|
||||
#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1)
|
||||
#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3)
|
||||
#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5)
|
||||
#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7)
|
||||
#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0)
|
||||
#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1)
|
||||
#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2)
|
||||
#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3)
|
||||
#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5)
|
||||
#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7)
|
||||
#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1)
|
||||
#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3)
|
||||
#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5)
|
||||
#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7)
|
||||
#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1)
|
||||
#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3)
|
||||
#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5)
|
||||
#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7)
|
||||
#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0)
|
||||
#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1)
|
||||
#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2)
|
||||
#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3)
|
||||
#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5)
|
||||
#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7)
|
||||
#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0)
|
||||
#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1)
|
||||
#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2)
|
||||
#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3)
|
||||
#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5)
|
||||
#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7)
|
||||
#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1)
|
||||
#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3)
|
||||
#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5)
|
||||
#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7)
|
||||
#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0)
|
||||
#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1)
|
||||
#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2)
|
||||
#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3)
|
||||
#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5)
|
||||
#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7)
|
||||
#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1)
|
||||
#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3)
|
||||
#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5)
|
||||
#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7)
|
||||
#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1)
|
||||
#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3)
|
||||
#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5)
|
||||
#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7)
|
||||
#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0)
|
||||
#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1)
|
||||
#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2)
|
||||
#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3)
|
||||
#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5)
|
||||
#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7)
|
||||
#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1)
|
||||
#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2)
|
||||
#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5)
|
||||
#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6)
|
||||
#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0)
|
||||
#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1)
|
||||
#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4)
|
||||
#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5)
|
||||
#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6)
|
||||
#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1)
|
||||
#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5)
|
||||
#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0)
|
||||
#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1)
|
||||
#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4)
|
||||
#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5)
|
||||
#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6)
|
||||
#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0)
|
||||
#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1)
|
||||
#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2)
|
||||
#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3)
|
||||
#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4)
|
||||
#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5)
|
||||
#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6)
|
||||
#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7)
|
||||
#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1)
|
||||
#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5)
|
||||
#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1)
|
||||
#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5)
|
||||
#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0)
|
||||
#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1)
|
||||
#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4)
|
||||
#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5)
|
||||
#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6)
|
||||
#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1)
|
||||
#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2)
|
||||
#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5)
|
||||
#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6)
|
||||
#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0)
|
||||
#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1)
|
||||
#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4)
|
||||
#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5)
|
||||
#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6)
|
||||
#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1)
|
||||
#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5)
|
||||
#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0)
|
||||
#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1)
|
||||
#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4)
|
||||
#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5)
|
||||
#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6)
|
||||
#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0)
|
||||
#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1)
|
||||
#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2)
|
||||
#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3)
|
||||
#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4)
|
||||
#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5)
|
||||
#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6)
|
||||
#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7)
|
||||
#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1)
|
||||
#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5)
|
||||
#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1)
|
||||
#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5)
|
||||
#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0)
|
||||
#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1)
|
||||
#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4)
|
||||
#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5)
|
||||
#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
|
||||
|
||||
/* Misc instructions */
|
||||
#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
|
||||
#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
|
||||
#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
|
||||
#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
|
||||
#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
|
||||
|
||||
/* Common SCTLR_ELx flags. */
|
||||
#define SCTLR_ELx_ENTP2 (BIT(60))
|
||||
#define SCTLR_ELx_DSSBS (BIT(44))
|
||||
|
@ -335,14 +335,77 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
|
||||
*/
|
||||
#define MAX_TLBI_OPS PTRS_PER_PTE
|
||||
|
||||
/*
|
||||
* __flush_tlb_range_op - Perform TLBI operation upon a range
|
||||
*
|
||||
* @op: TLBI instruction that operates on a range (has 'r' prefix)
|
||||
* @start: The start address of the range
|
||||
* @pages: Range as the number of pages from 'start'
|
||||
* @stride: Flush granularity
|
||||
* @asid: The ASID of the task (0 for IPA instructions)
|
||||
* @tlb_level: Translation Table level hint, if known
|
||||
* @tlbi_user: If 'true', call an additional __tlbi_user()
|
||||
* (typically for user ASIDs). 'flase' for IPA instructions
|
||||
*
|
||||
* When the CPU does not support TLB range operations, flush the TLB
|
||||
* entries one by one at the granularity of 'stride'. If the TLB
|
||||
* range ops are supported, then:
|
||||
*
|
||||
* 1. If 'pages' is odd, flush the first page through non-range
|
||||
* operations;
|
||||
*
|
||||
* 2. For remaining pages: the minimum range granularity is decided
|
||||
* by 'scale', so multiple range TLBI operations may be required.
|
||||
* Start from scale = 0, flush the corresponding number of pages
|
||||
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
|
||||
* until no pages left.
|
||||
*
|
||||
* Note that certain ranges can be represented by either num = 31 and
|
||||
* scale or num = 0 and scale + 1. The loop below favours the latter
|
||||
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
|
||||
*/
|
||||
#define __flush_tlb_range_op(op, start, pages, stride, \
|
||||
asid, tlb_level, tlbi_user) \
|
||||
do { \
|
||||
int num = 0; \
|
||||
int scale = 0; \
|
||||
unsigned long addr; \
|
||||
\
|
||||
while (pages > 0) { \
|
||||
if (!system_supports_tlb_range() || \
|
||||
pages % 2 == 1) { \
|
||||
addr = __TLBI_VADDR(start, asid); \
|
||||
__tlbi_level(op, addr, tlb_level); \
|
||||
if (tlbi_user) \
|
||||
__tlbi_user_level(op, addr, tlb_level); \
|
||||
start += stride; \
|
||||
pages -= stride >> PAGE_SHIFT; \
|
||||
continue; \
|
||||
} \
|
||||
\
|
||||
num = __TLBI_RANGE_NUM(pages, scale); \
|
||||
if (num >= 0) { \
|
||||
addr = __TLBI_VADDR_RANGE(start, asid, scale, \
|
||||
num, tlb_level); \
|
||||
__tlbi(r##op, addr); \
|
||||
if (tlbi_user) \
|
||||
__tlbi_user(r##op, addr); \
|
||||
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
|
||||
pages -= __TLBI_RANGE_PAGES(num, scale); \
|
||||
} \
|
||||
scale++; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
|
||||
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
|
||||
|
||||
static inline void __flush_tlb_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
unsigned long stride, bool last_level,
|
||||
int tlb_level)
|
||||
{
|
||||
int num = 0;
|
||||
int scale = 0;
|
||||
unsigned long asid, addr, pages;
|
||||
unsigned long asid, pages;
|
||||
|
||||
start = round_down(start, stride);
|
||||
end = round_up(end, stride);
|
||||
@ -364,56 +427,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
|
||||
dsb(ishst);
|
||||
asid = ASID(vma->vm_mm);
|
||||
|
||||
/*
|
||||
* When the CPU does not support TLB range operations, flush the TLB
|
||||
* entries one by one at the granularity of 'stride'. If the TLB
|
||||
* range ops are supported, then:
|
||||
*
|
||||
* 1. If 'pages' is odd, flush the first page through non-range
|
||||
* operations;
|
||||
*
|
||||
* 2. For remaining pages: the minimum range granularity is decided
|
||||
* by 'scale', so multiple range TLBI operations may be required.
|
||||
* Start from scale = 0, flush the corresponding number of pages
|
||||
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
|
||||
* until no pages left.
|
||||
*
|
||||
* Note that certain ranges can be represented by either num = 31 and
|
||||
* scale or num = 0 and scale + 1. The loop below favours the latter
|
||||
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
|
||||
*/
|
||||
while (pages > 0) {
|
||||
if (!system_supports_tlb_range() ||
|
||||
pages % 2 == 1) {
|
||||
addr = __TLBI_VADDR(start, asid);
|
||||
if (last_level) {
|
||||
__tlbi_level(vale1is, addr, tlb_level);
|
||||
__tlbi_user_level(vale1is, addr, tlb_level);
|
||||
} else {
|
||||
__tlbi_level(vae1is, addr, tlb_level);
|
||||
__tlbi_user_level(vae1is, addr, tlb_level);
|
||||
}
|
||||
start += stride;
|
||||
pages -= stride >> PAGE_SHIFT;
|
||||
continue;
|
||||
}
|
||||
if (last_level)
|
||||
__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
|
||||
else
|
||||
__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
|
||||
|
||||
num = __TLBI_RANGE_NUM(pages, scale);
|
||||
if (num >= 0) {
|
||||
addr = __TLBI_VADDR_RANGE(start, asid, scale,
|
||||
num, tlb_level);
|
||||
if (last_level) {
|
||||
__tlbi(rvale1is, addr);
|
||||
__tlbi_user(rvale1is, addr);
|
||||
} else {
|
||||
__tlbi(rvae1is, addr);
|
||||
__tlbi_user(rvae1is, addr);
|
||||
}
|
||||
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
|
||||
pages -= __TLBI_RANGE_PAGES(num, scale);
|
||||
}
|
||||
scale++;
|
||||
}
|
||||
dsb(ish);
|
||||
mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
|
||||
}
|
||||
|
@ -2627,6 +2627,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
.matches = has_cpuid_feature,
|
||||
ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP)
|
||||
},
|
||||
{
|
||||
.desc = "Fine Grained Traps",
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.capability = ARM64_HAS_FGT,
|
||||
.matches = has_cpuid_feature,
|
||||
ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
|
||||
},
|
||||
#ifdef CONFIG_ARM64_SME
|
||||
{
|
||||
.desc = "Scalable Matrix Extension",
|
||||
|
@ -25,7 +25,6 @@ menuconfig KVM
|
||||
select MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
select KVM_MMIO
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_XFER_TO_GUEST_WORK
|
||||
@ -43,6 +42,7 @@ menuconfig KVM
|
||||
select SCHED_INFO
|
||||
select GUEST_PERF_EVENTS if PERF_EVENTS
|
||||
select INTERVAL_TREE
|
||||
select XARRAY_MULTI
|
||||
help
|
||||
Support hosting virtualized guest machines.
|
||||
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_nested.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/sections.h>
|
||||
@ -365,7 +366,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
|
||||
/* Force users to call KVM_ARM_VCPU_INIT */
|
||||
vcpu->arch.target = -1;
|
||||
vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
|
||||
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
|
||||
|
||||
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
|
||||
@ -462,7 +463,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
vcpu_ptrauth_disable(vcpu);
|
||||
kvm_arch_vcpu_load_debug_state_flags(vcpu);
|
||||
|
||||
if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
|
||||
if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
|
||||
vcpu_set_on_unsupported_cpu(vcpu);
|
||||
}
|
||||
|
||||
@ -574,7 +575,7 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.target >= 0;
|
||||
return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -803,6 +804,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
kvm_pmu_handle_pmcr(vcpu,
|
||||
__vcpu_sys_reg(vcpu, PMCR_EL0));
|
||||
|
||||
if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu))
|
||||
kvm_vcpu_pmu_restore_guest(vcpu);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
|
||||
return kvm_vcpu_suspend(vcpu);
|
||||
|
||||
@ -818,6 +822,9 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
|
||||
if (likely(!vcpu_mode_is_32bit(vcpu)))
|
||||
return false;
|
||||
|
||||
if (vcpu_has_nv(vcpu))
|
||||
return true;
|
||||
|
||||
return !kvm_supports_32bit_el0();
|
||||
}
|
||||
|
||||
@ -1058,7 +1065,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
* invalid. The VMM can try and fix it by issuing a
|
||||
* KVM_ARM_VCPU_INIT if it really wants to.
|
||||
*/
|
||||
vcpu->arch.target = -1;
|
||||
vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
|
||||
ret = ARM_EXCEPTION_IL;
|
||||
}
|
||||
|
||||
@ -1219,8 +1226,7 @@ static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
unsigned long features = init->features[0];
|
||||
|
||||
return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES) ||
|
||||
vcpu->arch.target != init->target;
|
||||
return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
|
||||
}
|
||||
|
||||
static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
|
||||
@ -1236,20 +1242,18 @@ static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
|
||||
!bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
|
||||
goto out_unlock;
|
||||
|
||||
vcpu->arch.target = init->target;
|
||||
bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
|
||||
|
||||
/* Now we know what it is, we can reset it. */
|
||||
ret = kvm_reset_vcpu(vcpu);
|
||||
if (ret) {
|
||||
vcpu->arch.target = -1;
|
||||
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
|
||||
set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
|
||||
|
||||
vcpu_set_flag(vcpu, VCPU_INITIALIZED);
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
return ret;
|
||||
@ -1260,14 +1264,15 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (init->target != kvm_target_cpu())
|
||||
if (init->target != KVM_ARM_TARGET_GENERIC_V8 &&
|
||||
init->target != kvm_target_cpu())
|
||||
return -EINVAL;
|
||||
|
||||
ret = kvm_vcpu_init_check_features(vcpu, init);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (vcpu->arch.target == -1)
|
||||
if (!kvm_vcpu_initialized(vcpu))
|
||||
return __kvm_vcpu_set_target(vcpu, init);
|
||||
|
||||
if (kvm_vcpu_init_changed(vcpu, init))
|
||||
@ -1532,12 +1537,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
|
||||
}
|
||||
|
||||
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
|
||||
struct kvm_arm_device_addr *dev_addr)
|
||||
{
|
||||
@ -1595,9 +1594,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
|
||||
return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
|
||||
}
|
||||
case KVM_ARM_PREFERRED_TARGET: {
|
||||
struct kvm_vcpu_init init;
|
||||
|
||||
kvm_vcpu_preferred_target(&init);
|
||||
struct kvm_vcpu_init init = {
|
||||
.target = KVM_ARM_TARGET_GENERIC_V8,
|
||||
};
|
||||
|
||||
if (copy_to_user(argp, &init, sizeof(init)))
|
||||
return -EFAULT;
|
||||
@ -2276,30 +2275,8 @@ static int __init init_hyp_mode(void)
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
|
||||
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
|
||||
unsigned long hyp_addr;
|
||||
|
||||
/*
|
||||
* Allocate a contiguous HYP private VA range for the stack
|
||||
* and guard page. The allocation is also aligned based on
|
||||
* the order of its size.
|
||||
*/
|
||||
err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
|
||||
if (err) {
|
||||
kvm_err("Cannot allocate hyp stack guard page\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since the stack grows downwards, map the stack to the page
|
||||
* at the higher address and leave the lower guard page
|
||||
* unbacked.
|
||||
*
|
||||
* Any valid stack address now has the PAGE_SHIFT bit as 1
|
||||
* and addresses corresponding to the guard page have the
|
||||
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
|
||||
*/
|
||||
err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
|
||||
__pa(stack_page), PAGE_HYP);
|
||||
err = create_hyp_stack(__pa(stack_page), ¶ms->stack_hyp_va);
|
||||
if (err) {
|
||||
kvm_err("Cannot map hyp stack\n");
|
||||
goto out_err;
|
||||
@ -2312,8 +2289,6 @@ static int __init init_hyp_mode(void)
|
||||
* has been mapped in the flexible private VA space.
|
||||
*/
|
||||
params->stack_pa = __pa(stack_page);
|
||||
|
||||
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -884,21 +884,6 @@ u32 __attribute_const__ kvm_target_cpu(void)
|
||||
return KVM_ARM_TARGET_GENERIC_V8;
|
||||
}
|
||||
|
||||
void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
|
||||
{
|
||||
u32 target = kvm_target_cpu();
|
||||
|
||||
memset(init, 0, sizeof(*init));
|
||||
|
||||
/*
|
||||
* For now, we don't return any features.
|
||||
* In future, we might use features to return target
|
||||
* specific features available for the preferred
|
||||
* target type.
|
||||
*/
|
||||
init->target = (__u32)target;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
{
|
||||
return -EINVAL;
|
||||
|
@ -222,7 +222,33 @@ static int kvm_handle_eret(struct kvm_vcpu *vcpu)
|
||||
if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
|
||||
return kvm_handle_ptrauth(vcpu);
|
||||
|
||||
kvm_emulate_nested_eret(vcpu);
|
||||
/*
|
||||
* If we got here, two possibilities:
|
||||
*
|
||||
* - the guest is in EL2, and we need to fully emulate ERET
|
||||
*
|
||||
* - the guest is in EL1, and we need to reinject the
|
||||
* exception into the L1 hypervisor.
|
||||
*
|
||||
* If KVM ever traps ERET for its own use, we'll have to
|
||||
* revisit this.
|
||||
*/
|
||||
if (is_hyp_ctxt(vcpu))
|
||||
kvm_emulate_nested_eret(vcpu);
|
||||
else
|
||||
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int handle_svc(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* So far, SVC traps only for NV via HFGITR_EL2. A SVC from a
|
||||
* 32bit guest would be caught by vpcu_mode_is_bad_32bit(), so
|
||||
* we should only have to deal with a 64 bit exception.
|
||||
*/
|
||||
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -239,6 +265,7 @@ static exit_handle_fn arm_exit_handlers[] = {
|
||||
[ESR_ELx_EC_SMC32] = handle_smc,
|
||||
[ESR_ELx_EC_HVC64] = handle_hvc,
|
||||
[ESR_ELx_EC_SMC64] = handle_smc,
|
||||
[ESR_ELx_EC_SVC64] = handle_svc,
|
||||
[ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
|
||||
[ESR_ELx_EC_SVE] = handle_sve,
|
||||
[ESR_ELx_EC_ERET] = kvm_handle_eret,
|
||||
|
@ -70,20 +70,26 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool __hfgxtr_traps_required(void)
|
||||
{
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
return true;
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void __activate_traps_hfgxtr(void)
|
||||
#define compute_clr_set(vcpu, reg, clr, set) \
|
||||
do { \
|
||||
u64 hfg; \
|
||||
hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0; \
|
||||
set |= hfg & __ ## reg ## _MASK; \
|
||||
clr |= ~hfg & __ ## reg ## _nMASK; \
|
||||
} while(0)
|
||||
|
||||
|
||||
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
|
||||
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
|
||||
u64 r_val, w_val;
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_HAS_FGT))
|
||||
return;
|
||||
|
||||
ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
|
||||
ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
|
||||
|
||||
if (cpus_have_final_cap(ARM64_SME)) {
|
||||
tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
|
||||
@ -98,26 +104,72 @@ static inline void __activate_traps_hfgxtr(void)
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
|
||||
w_set |= HFGxTR_EL2_TCR_EL1_MASK;
|
||||
|
||||
sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
|
||||
sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
|
||||
}
|
||||
|
||||
static inline void __deactivate_traps_hfgxtr(void)
|
||||
{
|
||||
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
|
||||
|
||||
if (cpus_have_final_cap(ARM64_SME)) {
|
||||
tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
|
||||
|
||||
r_set |= tmp;
|
||||
w_set |= tmp;
|
||||
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
|
||||
compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
|
||||
compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
|
||||
}
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
|
||||
w_clr |= HFGxTR_EL2_TCR_EL1_MASK;
|
||||
/* The default is not to trap anything but ACCDATA_EL1 */
|
||||
r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
|
||||
r_val |= r_set;
|
||||
r_val &= ~r_clr;
|
||||
|
||||
sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
|
||||
sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
|
||||
w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
|
||||
w_val |= w_set;
|
||||
w_val &= ~w_clr;
|
||||
|
||||
write_sysreg_s(r_val, SYS_HFGRTR_EL2);
|
||||
write_sysreg_s(w_val, SYS_HFGWTR_EL2);
|
||||
|
||||
if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
|
||||
return;
|
||||
|
||||
ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2);
|
||||
|
||||
r_set = r_clr = 0;
|
||||
compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set);
|
||||
r_val = __HFGITR_EL2_nMASK;
|
||||
r_val |= r_set;
|
||||
r_val &= ~r_clr;
|
||||
|
||||
write_sysreg_s(r_val, SYS_HFGITR_EL2);
|
||||
|
||||
ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2);
|
||||
ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2);
|
||||
|
||||
r_clr = r_set = w_clr = w_set = 0;
|
||||
|
||||
compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set);
|
||||
compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set);
|
||||
|
||||
r_val = __HDFGRTR_EL2_nMASK;
|
||||
r_val |= r_set;
|
||||
r_val &= ~r_clr;
|
||||
|
||||
w_val = __HDFGWTR_EL2_nMASK;
|
||||
w_val |= w_set;
|
||||
w_val &= ~w_clr;
|
||||
|
||||
write_sysreg_s(r_val, SYS_HDFGRTR_EL2);
|
||||
write_sysreg_s(w_val, SYS_HDFGWTR_EL2);
|
||||
}
|
||||
|
||||
static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_HAS_FGT))
|
||||
return;
|
||||
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
|
||||
|
||||
if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
|
||||
return;
|
||||
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
|
||||
}
|
||||
|
||||
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
|
||||
@ -145,8 +197,21 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
|
||||
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
|
||||
|
||||
if (__hfgxtr_traps_required())
|
||||
__activate_traps_hfgxtr();
|
||||
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
|
||||
u64 hcrx = HCRX_GUEST_FLAGS;
|
||||
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
|
||||
u64 clr = 0, set = 0;
|
||||
|
||||
compute_clr_set(vcpu, HCRX_EL2, clr, set);
|
||||
|
||||
hcrx |= set;
|
||||
hcrx &= ~clr;
|
||||
}
|
||||
|
||||
write_sysreg_s(hcrx, SYS_HCRX_EL2);
|
||||
}
|
||||
|
||||
__activate_traps_hfgxtr(vcpu);
|
||||
}
|
||||
|
||||
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
|
||||
@ -162,8 +227,10 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
|
||||
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
|
||||
}
|
||||
|
||||
if (__hfgxtr_traps_required())
|
||||
__deactivate_traps_hfgxtr();
|
||||
if (cpus_have_final_cap(ARM64_HAS_HCX))
|
||||
write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
|
||||
|
||||
__deactivate_traps_hfgxtr(vcpu);
|
||||
}
|
||||
|
||||
static inline void ___activate_traps(struct kvm_vcpu *vcpu)
|
||||
@ -177,9 +244,6 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
|
||||
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
|
||||
|
||||
if (cpus_have_final_cap(ARM64_HAS_HCX))
|
||||
write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2);
|
||||
}
|
||||
|
||||
static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
@ -194,9 +258,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.hcr_el2 &= ~HCR_VSE;
|
||||
vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
|
||||
}
|
||||
|
||||
if (cpus_have_final_cap(ARM64_HAS_HCX))
|
||||
write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
|
||||
}
|
||||
|
||||
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
|
||||
|
@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
|
||||
int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
|
||||
enum kvm_pgtable_prot prot,
|
||||
unsigned long *haddr);
|
||||
int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
|
||||
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
|
||||
|
||||
#endif /* __KVM_HYP_MM_H */
|
||||
|
@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
|
||||
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
|
||||
}
|
||||
|
||||
static void
|
||||
handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
|
||||
DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
|
||||
DECLARE_REG(unsigned long, pages, host_ctxt, 3);
|
||||
|
||||
__kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
|
||||
}
|
||||
|
||||
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
|
||||
@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
|
||||
HANDLE_FUNC(__kvm_flush_cpu_context),
|
||||
HANDLE_FUNC(__kvm_timer_set_cntvoff),
|
||||
HANDLE_FUNC(__vgic_v3_read_vmcr),
|
||||
|
@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
|
||||
{
|
||||
unsigned long cur;
|
||||
|
||||
hyp_assert_lock_held(&pkvm_pgd_lock);
|
||||
|
||||
if (!start || start < __io_map_base)
|
||||
return -EINVAL;
|
||||
|
||||
/* The allocated size is always a multiple of PAGE_SIZE */
|
||||
cur = start + PAGE_ALIGN(size);
|
||||
|
||||
/* Are we overflowing on the vmemmap ? */
|
||||
if (cur > __hyp_vmemmap)
|
||||
return -ENOMEM;
|
||||
|
||||
__io_map_base = cur;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* pkvm_alloc_private_va_range - Allocates a private VA range.
|
||||
* @size: The size of the VA range to reserve.
|
||||
@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
|
||||
*/
|
||||
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
|
||||
{
|
||||
unsigned long base, addr;
|
||||
int ret = 0;
|
||||
unsigned long addr;
|
||||
int ret;
|
||||
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
|
||||
/* Align the allocation based on the order of its size */
|
||||
addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
|
||||
|
||||
/* The allocated size is always a multiple of PAGE_SIZE */
|
||||
base = addr + PAGE_ALIGN(size);
|
||||
|
||||
/* Are we overflowing on the vmemmap ? */
|
||||
if (!addr || base > __hyp_vmemmap)
|
||||
ret = -ENOMEM;
|
||||
else {
|
||||
__io_map_base = base;
|
||||
*haddr = addr;
|
||||
}
|
||||
|
||||
addr = __io_map_base;
|
||||
ret = __pkvm_alloc_private_va_range(addr, size);
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
|
||||
*haddr = addr;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
|
||||
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
|
||||
}
|
||||
|
||||
int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
|
||||
{
|
||||
unsigned long addr, prev_base;
|
||||
size_t size;
|
||||
int ret;
|
||||
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
|
||||
prev_base = __io_map_base;
|
||||
/*
|
||||
* Efficient stack verification using the PAGE_SHIFT bit implies
|
||||
* an alignment of our allocation on the order of the size.
|
||||
*/
|
||||
size = PAGE_SIZE * 2;
|
||||
addr = ALIGN(__io_map_base, size);
|
||||
|
||||
ret = __pkvm_alloc_private_va_range(addr, size);
|
||||
if (!ret) {
|
||||
/*
|
||||
* Since the stack grows downwards, map the stack to the page
|
||||
* at the higher address and leave the lower guard page
|
||||
* unbacked.
|
||||
*
|
||||
* Any valid stack address now has the PAGE_SHIFT bit as 1
|
||||
* and addresses corresponding to the guard page have the
|
||||
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
|
||||
*/
|
||||
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
|
||||
PAGE_SIZE, phys, PAGE_HYP);
|
||||
if (ret)
|
||||
__io_map_base = prev_base;
|
||||
}
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
|
||||
*haddr = addr + size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *admit_host_page(void *arg)
|
||||
{
|
||||
struct kvm_hyp_memcache *host_mc = arg;
|
||||
|
@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
||||
|
||||
for (i = 0; i < hyp_nr_cpus; i++) {
|
||||
struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
|
||||
unsigned long hyp_addr;
|
||||
|
||||
start = (void *)kern_hyp_va(per_cpu_base[i]);
|
||||
end = start + PAGE_ALIGN(hyp_percpu_size);
|
||||
@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Allocate a contiguous HYP private VA range for the stack
|
||||
* and guard page. The allocation is also aligned based on
|
||||
* the order of its size.
|
||||
*/
|
||||
ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
|
||||
ret = pkvm_create_stack(params->stack_pa, ¶ms->stack_hyp_va);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Since the stack grows downwards, map the stack to the page
|
||||
* at the higher address and leave the lower guard page
|
||||
* unbacked.
|
||||
*
|
||||
* Any valid stack address now has the PAGE_SHIFT bit as 1
|
||||
* and addresses corresponding to the guard page have the
|
||||
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
|
||||
*/
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
|
||||
PAGE_SIZE, params->stack_pa, PAGE_HYP);
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Update stack_hyp_va to end of the stack's private VA range */
|
||||
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -236,7 +236,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
* KVM_ARM_VCPU_INIT, however, this is likely not possible for
|
||||
* protected VMs.
|
||||
*/
|
||||
vcpu->arch.target = -1;
|
||||
vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
|
||||
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
|
||||
*exit_code |= ARM_EXCEPTION_IL;
|
||||
}
|
||||
|
@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t start, unsigned long pages)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
unsigned long stride;
|
||||
|
||||
/*
|
||||
* Since the range of addresses may not be mapped at
|
||||
* the same level, assume the worst case as PAGE_SIZE
|
||||
*/
|
||||
stride = PAGE_SIZE;
|
||||
start = round_down(start, stride);
|
||||
|
||||
/* Switch to requested VMID */
|
||||
__tlb_switch_to_guest(mmu, &cxt, false);
|
||||
|
||||
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
|
||||
|
||||
dsb(ish);
|
||||
__tlbi(vmalle1is);
|
||||
dsb(ish);
|
||||
isb();
|
||||
|
||||
/* See the comment in __kvm_tlb_flush_vmid_ipa() */
|
||||
if (icache_is_vpipt())
|
||||
icache_inval_all_pou();
|
||||
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
|
||||
return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
|
||||
}
|
||||
|
||||
void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t addr, size_t size)
|
||||
{
|
||||
unsigned long pages, inval_pages;
|
||||
|
||||
if (!system_supports_tlb_range()) {
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
|
||||
return;
|
||||
}
|
||||
|
||||
pages = size >> PAGE_SHIFT;
|
||||
while (pages > 0) {
|
||||
inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
|
||||
|
||||
addr += inval_pages << PAGE_SHIFT;
|
||||
pages -= inval_pages;
|
||||
}
|
||||
}
|
||||
|
||||
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
|
||||
|
||||
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
|
||||
@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
* evicted pte value (if any).
|
||||
*/
|
||||
if (kvm_pte_table(ctx->old, ctx->level))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
|
||||
kvm_tlb_flush_vmid_range(mmu, ctx->addr,
|
||||
kvm_granule_size(ctx->level));
|
||||
else if (kvm_pte_valid(ctx->old))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
|
||||
ctx->addr, ctx->level);
|
||||
@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
|
||||
smp_store_release(ctx->ptep, new);
|
||||
}
|
||||
|
||||
static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
|
||||
struct kvm_pgtable_mm_ops *mm_ops)
|
||||
static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
|
||||
{
|
||||
/*
|
||||
* Clear the existing PTE, and perform break-before-make with
|
||||
* TLB maintenance if it was valid.
|
||||
* If FEAT_TLBIRANGE is implemented, defer the individual
|
||||
* TLB invalidations until the entire walk is finished, and
|
||||
* then use the range-based TLBI instructions to do the
|
||||
* invalidations. Condition deferred TLB invalidation on the
|
||||
* system supporting FWB as the optimization is entirely
|
||||
* pointless when the unmap walker needs to perform CMOs.
|
||||
*/
|
||||
return system_supports_tlb_range() && stage2_has_fwb(pgt);
|
||||
}
|
||||
|
||||
static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
struct kvm_s2_mmu *mmu,
|
||||
struct kvm_pgtable_mm_ops *mm_ops)
|
||||
{
|
||||
struct kvm_pgtable *pgt = ctx->arg;
|
||||
|
||||
/*
|
||||
* Clear the existing PTE, and perform break-before-make if it was
|
||||
* valid. Depending on the system support, defer the TLB maintenance
|
||||
* for the same until the entire unmap walk is completed.
|
||||
*/
|
||||
if (kvm_pte_valid(ctx->old)) {
|
||||
kvm_clear_pte(ctx->ptep);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
|
||||
|
||||
if (!stage2_unmap_defer_tlb_flush(pgt))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
|
||||
ctx->addr, ctx->level);
|
||||
}
|
||||
|
||||
mm_ops->put_page(ctx->ptep);
|
||||
@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
* block entry and rely on the remaining portions being faulted
|
||||
* back lazily.
|
||||
*/
|
||||
stage2_put_pte(ctx, mmu, mm_ops);
|
||||
stage2_unmap_put_pte(ctx, mmu, mm_ops);
|
||||
|
||||
if (need_flush && mm_ops->dcache_clean_inval_poc)
|
||||
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
|
||||
@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
|
||||
int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
{
|
||||
int ret;
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_unmap_walker,
|
||||
.arg = pgt,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
|
||||
};
|
||||
|
||||
return kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
if (stage2_unmap_defer_tlb_flush(pgt))
|
||||
/* Perform the deferred TLB invalidations */
|
||||
kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct stage2_attr_data {
|
||||
|
@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t start, unsigned long pages)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
unsigned long stride;
|
||||
|
||||
/*
|
||||
* Since the range of addresses may not be mapped at
|
||||
* the same level, assume the worst case as PAGE_SIZE
|
||||
*/
|
||||
stride = PAGE_SIZE;
|
||||
start = round_down(start, stride);
|
||||
|
||||
dsb(ishst);
|
||||
|
||||
/* Switch to requested VMID */
|
||||
__tlb_switch_to_guest(mmu, &cxt);
|
||||
|
||||
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
|
||||
|
||||
dsb(ish);
|
||||
__tlbi(vmalle1is);
|
||||
dsb(ish);
|
||||
isb();
|
||||
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
|
||||
* kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
|
||||
* @kvm: pointer to kvm structure.
|
||||
*
|
||||
* Interface to HYP function to flush all VM TLB entries
|
||||
*/
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
++kvm->stat.generic.remote_tlb_flush_requests;
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
|
||||
gfn_t gfn, u64 nr_pages)
|
||||
{
|
||||
kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
|
||||
gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool kvm_is_device_pfn(unsigned long pfn)
|
||||
@ -592,6 +600,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __hyp_alloc_private_va_range(unsigned long base)
|
||||
{
|
||||
lockdep_assert_held(&kvm_hyp_pgd_mutex);
|
||||
|
||||
if (!PAGE_ALIGNED(base))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
|
||||
* allocating the new area, as it would indicate we've
|
||||
* overflowed the idmap/IO address range.
|
||||
*/
|
||||
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
|
||||
return -ENOMEM;
|
||||
|
||||
io_map_base = base;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* hyp_alloc_private_va_range - Allocates a private VA range.
|
||||
@ -612,26 +639,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
|
||||
|
||||
/*
|
||||
* This assumes that we have enough space below the idmap
|
||||
* page to allocate our VAs. If not, the check below will
|
||||
* kick. A potential alternative would be to detect that
|
||||
* overflow and switch to an allocation above the idmap.
|
||||
* page to allocate our VAs. If not, the check in
|
||||
* __hyp_alloc_private_va_range() will kick. A potential
|
||||
* alternative would be to detect that overflow and switch
|
||||
* to an allocation above the idmap.
|
||||
*
|
||||
* The allocated size is always a multiple of PAGE_SIZE.
|
||||
*/
|
||||
base = io_map_base - PAGE_ALIGN(size);
|
||||
|
||||
/* Align the allocation based on the order of its size */
|
||||
base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
|
||||
|
||||
/*
|
||||
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
|
||||
* allocating the new area, as it would indicate we've
|
||||
* overflowed the idmap/IO address range.
|
||||
*/
|
||||
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
|
||||
ret = -ENOMEM;
|
||||
else
|
||||
*haddr = io_map_base = base;
|
||||
size = PAGE_ALIGN(size);
|
||||
base = io_map_base - size;
|
||||
ret = __hyp_alloc_private_va_range(base);
|
||||
|
||||
mutex_unlock(&kvm_hyp_pgd_mutex);
|
||||
|
||||
@ -668,6 +685,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
|
||||
{
|
||||
unsigned long base;
|
||||
size_t size;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&kvm_hyp_pgd_mutex);
|
||||
/*
|
||||
* Efficient stack verification using the PAGE_SHIFT bit implies
|
||||
* an alignment of our allocation on the order of the size.
|
||||
*/
|
||||
size = PAGE_SIZE * 2;
|
||||
base = ALIGN_DOWN(io_map_base - size, size);
|
||||
|
||||
ret = __hyp_alloc_private_va_range(base);
|
||||
|
||||
mutex_unlock(&kvm_hyp_pgd_mutex);
|
||||
|
||||
if (ret) {
|
||||
kvm_err("Cannot allocate hyp stack guard page\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since the stack grows downwards, map the stack to the page
|
||||
* at the higher address and leave the lower guard page
|
||||
* unbacked.
|
||||
*
|
||||
* Any valid stack address now has the PAGE_SHIFT bit as 1
|
||||
* and addresses corresponding to the guard page have the
|
||||
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
|
||||
*/
|
||||
ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
|
||||
PAGE_HYP);
|
||||
if (ret)
|
||||
kvm_err("Cannot map hyp stack\n");
|
||||
|
||||
*haddr = base + size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* create_hyp_io_mappings - Map IO into both kernel and HYP
|
||||
* @phys_addr: The physical start address which gets mapped
|
||||
@ -1075,7 +1134,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
|
||||
write_lock(&kvm->mmu_lock);
|
||||
stage2_wp_range(&kvm->arch.mmu, start, end);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
kvm_flush_remote_tlbs_memslot(kvm, memslot);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1541,7 +1600,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
|
||||
out_unlock:
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
kvm_set_pfn_accessed(pfn);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return ret != -EAGAIN ? ret : 0;
|
||||
}
|
||||
@ -1721,7 +1779,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
|
||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
kvm_pfn_t pfn = pte_pfn(range->pte);
|
||||
kvm_pfn_t pfn = pte_pfn(range->arg.pte);
|
||||
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
return false;
|
||||
|
@ -71,8 +71,9 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
|
||||
break;
|
||||
|
||||
case SYS_ID_AA64MMFR0_EL1:
|
||||
/* Hide ECV, FGT, ExS, Secure Memory */
|
||||
val &= ~(GENMASK_ULL(63, 43) |
|
||||
/* Hide ECV, ExS, Secure Memory */
|
||||
val &= ~(NV_FTR(MMFR0, ECV) |
|
||||
NV_FTR(MMFR0, EXS) |
|
||||
NV_FTR(MMFR0, TGRAN4_2) |
|
||||
NV_FTR(MMFR0, TGRAN16_2) |
|
||||
NV_FTR(MMFR0, TGRAN64_2) |
|
||||
@ -116,7 +117,8 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
|
||||
break;
|
||||
|
||||
case SYS_ID_AA64MMFR1_EL1:
|
||||
val &= (NV_FTR(MMFR1, PAN) |
|
||||
val &= (NV_FTR(MMFR1, HCX) |
|
||||
NV_FTR(MMFR1, PAN) |
|
||||
NV_FTR(MMFR1, LO) |
|
||||
NV_FTR(MMFR1, HPDS) |
|
||||
NV_FTR(MMFR1, VH) |
|
||||
@ -124,8 +126,7 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
|
||||
break;
|
||||
|
||||
case SYS_ID_AA64MMFR2_EL1:
|
||||
val &= ~(NV_FTR(MMFR2, EVT) |
|
||||
NV_FTR(MMFR2, BBM) |
|
||||
val &= ~(NV_FTR(MMFR2, BBM) |
|
||||
NV_FTR(MMFR2, TTL) |
|
||||
GENMASK_ULL(47, 44) |
|
||||
NV_FTR(MMFR2, ST) |
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <kvm/arm_pmu.h>
|
||||
#include <kvm/arm_vgic.h>
|
||||
#include <asm/arm_pmuv3.h>
|
||||
|
||||
#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
|
||||
|
||||
@ -35,12 +36,8 @@ static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
|
||||
return &vcpu->arch.pmu.pmc[cnt_idx];
|
||||
}
|
||||
|
||||
static u32 kvm_pmu_event_mask(struct kvm *kvm)
|
||||
static u32 __kvm_pmu_event_mask(unsigned int pmuver)
|
||||
{
|
||||
unsigned int pmuver;
|
||||
|
||||
pmuver = kvm->arch.arm_pmu->pmuver;
|
||||
|
||||
switch (pmuver) {
|
||||
case ID_AA64DFR0_EL1_PMUVer_IMP:
|
||||
return GENMASK(9, 0);
|
||||
@ -55,6 +52,14 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
|
||||
}
|
||||
}
|
||||
|
||||
static u32 kvm_pmu_event_mask(struct kvm *kvm)
|
||||
{
|
||||
u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1);
|
||||
u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0);
|
||||
|
||||
return __kvm_pmu_event_mask(pmuver);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmc_is_64bit - determine if counter is 64bit
|
||||
* @pmc: counter context
|
||||
@ -672,8 +677,11 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
|
||||
{
|
||||
struct arm_pmu_entry *entry;
|
||||
|
||||
if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
|
||||
pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
|
||||
/*
|
||||
* Check the sanitised PMU version for the system, as KVM does not
|
||||
* support implementations where PMUv3 exists on a subset of CPUs.
|
||||
*/
|
||||
if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
|
||||
return;
|
||||
|
||||
mutex_lock(&arm_pmus_lock);
|
||||
@ -750,11 +758,12 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
|
||||
} else {
|
||||
val = read_sysreg(pmceid1_el0);
|
||||
/*
|
||||
* Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
|
||||
* Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
|
||||
* as RAZ
|
||||
*/
|
||||
if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
|
||||
val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
|
||||
val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
|
||||
BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
|
||||
BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
|
||||
base = 32;
|
||||
}
|
||||
|
||||
@ -950,11 +959,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
||||
return 0;
|
||||
}
|
||||
case KVM_ARM_VCPU_PMU_V3_FILTER: {
|
||||
u8 pmuver = kvm_arm_pmu_get_pmuver_limit();
|
||||
struct kvm_pmu_event_filter __user *uaddr;
|
||||
struct kvm_pmu_event_filter filter;
|
||||
int nr_events;
|
||||
|
||||
nr_events = kvm_pmu_event_mask(kvm) + 1;
|
||||
/*
|
||||
* Allow userspace to specify an event filter for the entire
|
||||
* event range supported by PMUVer of the hardware, rather
|
||||
* than the guest's PMUVer for KVM backward compatibility.
|
||||
*/
|
||||
nr_events = __kvm_pmu_event_mask(pmuver) + 1;
|
||||
|
||||
uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
|
||||
|
||||
|
@ -236,3 +236,21 @@ bool kvm_set_pmuserenr(u64 val)
|
||||
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we interrupted the guest to update the host PMU context, make
|
||||
* sure we re-apply the guest EL0 state.
|
||||
*/
|
||||
void kvm_vcpu_pmu_resync_el0(void)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
if (!has_vhe() || !in_interrupt())
|
||||
return;
|
||||
|
||||
vcpu = kvm_get_running_vcpu();
|
||||
if (!vcpu)
|
||||
return;
|
||||
|
||||
kvm_make_request(KVM_REQ_RESYNC_PMU_EL0, vcpu);
|
||||
}
|
||||
|
@ -248,21 +248,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
switch (vcpu->arch.target) {
|
||||
default:
|
||||
if (vcpu_el1_is_32bit(vcpu)) {
|
||||
pstate = VCPU_RESET_PSTATE_SVC;
|
||||
} else if (vcpu_has_nv(vcpu)) {
|
||||
pstate = VCPU_RESET_PSTATE_EL2;
|
||||
} else {
|
||||
pstate = VCPU_RESET_PSTATE_EL1;
|
||||
}
|
||||
if (vcpu_el1_is_32bit(vcpu))
|
||||
pstate = VCPU_RESET_PSTATE_SVC;
|
||||
else if (vcpu_has_nv(vcpu))
|
||||
pstate = VCPU_RESET_PSTATE_EL2;
|
||||
else
|
||||
pstate = VCPU_RESET_PSTATE_EL1;
|
||||
|
||||
if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
break;
|
||||
if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Reset core registers */
|
||||
|
@ -2151,6 +2151,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
|
||||
{ SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
|
||||
|
||||
{ SYS_DESC(SYS_ACCDATA_EL1), undef_access },
|
||||
|
||||
{ SYS_DESC(SYS_SCXTNUM_EL1), undef_access },
|
||||
|
||||
{ SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
|
||||
@ -2365,8 +2367,13 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
|
||||
EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HFGITR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HACR_EL2, access_rw, reset_val, 0),
|
||||
|
||||
EL2_REG(HCRX_EL2, access_rw, reset_val, 0),
|
||||
|
||||
EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
|
||||
@ -2374,6 +2381,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
|
||||
|
||||
{ SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
|
||||
EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(ELR_EL2, access_rw, reset_val, 0),
|
||||
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
|
||||
@ -3170,6 +3179,9 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
|
||||
|
||||
trace_kvm_handle_sys_reg(esr);
|
||||
|
||||
if (__check_nv_sr_forward(vcpu))
|
||||
return 1;
|
||||
|
||||
params = esr_sys64_to_params(esr);
|
||||
params.regval = vcpu_get_reg(vcpu, Rt);
|
||||
|
||||
@ -3587,5 +3599,8 @@ int __init kvm_sys_reg_table_init(void)
|
||||
if (!first_idreg)
|
||||
return -EINVAL;
|
||||
|
||||
if (kvm_get_mode() == KVM_MODE_NV)
|
||||
return populate_nv_trap_config();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -364,6 +364,32 @@ TRACE_EVENT(kvm_inject_nested_exception,
|
||||
__entry->hcr_el2)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_forward_sysreg_trap,
|
||||
TP_PROTO(struct kvm_vcpu *vcpu, u32 sysreg, bool is_read),
|
||||
TP_ARGS(vcpu, sysreg, is_read),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, pc)
|
||||
__field(u32, sysreg)
|
||||
__field(bool, is_read)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->pc = *vcpu_pc(vcpu);
|
||||
__entry->sysreg = sysreg;
|
||||
__entry->is_read = is_read;
|
||||
),
|
||||
|
||||
TP_printk("%llx %c (%d,%d,%d,%d,%d)",
|
||||
__entry->pc,
|
||||
__entry->is_read ? 'R' : 'W',
|
||||
sys_reg_Op0(__entry->sysreg),
|
||||
sys_reg_Op1(__entry->sysreg),
|
||||
sys_reg_CRn(__entry->sysreg),
|
||||
sys_reg_CRm(__entry->sysreg),
|
||||
sys_reg_Op2(__entry->sysreg))
|
||||
);
|
||||
|
||||
#endif /* _TRACE_ARM_ARM64_KVM_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
@ -199,7 +199,6 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
|
||||
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
|
||||
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
|
||||
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
|
||||
void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
|
||||
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
|
||||
int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
||||
int offset, u32 *val);
|
||||
@ -233,7 +232,6 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
|
||||
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
|
||||
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
void vgic_v3_enable(struct kvm_vcpu *vcpu);
|
||||
|
@ -26,6 +26,7 @@ HAS_ECV
|
||||
HAS_ECV_CNTPOFF
|
||||
HAS_EPAN
|
||||
HAS_EVT
|
||||
HAS_FGT
|
||||
HAS_GENERIC_AUTH
|
||||
HAS_GENERIC_AUTH_ARCH_QARMA3
|
||||
HAS_GENERIC_AUTH_ARCH_QARMA5
|
||||
|
@ -2156,6 +2156,135 @@ Field 1 ICIALLU
|
||||
Field 0 ICIALLUIS
|
||||
EndSysreg
|
||||
|
||||
Sysreg HDFGRTR_EL2 3 4 3 1 4
|
||||
Field 63 PMBIDR_EL1
|
||||
Field 62 nPMSNEVFR_EL1
|
||||
Field 61 nBRBDATA
|
||||
Field 60 nBRBCTL
|
||||
Field 59 nBRBIDR
|
||||
Field 58 PMCEIDn_EL0
|
||||
Field 57 PMUSERENR_EL0
|
||||
Field 56 TRBTRG_EL1
|
||||
Field 55 TRBSR_EL1
|
||||
Field 54 TRBPTR_EL1
|
||||
Field 53 TRBMAR_EL1
|
||||
Field 52 TRBLIMITR_EL1
|
||||
Field 51 TRBIDR_EL1
|
||||
Field 50 TRBBASER_EL1
|
||||
Res0 49
|
||||
Field 48 TRCVICTLR
|
||||
Field 47 TRCSTATR
|
||||
Field 46 TRCSSCSRn
|
||||
Field 45 TRCSEQSTR
|
||||
Field 44 TRCPRGCTLR
|
||||
Field 43 TRCOSLSR
|
||||
Res0 42
|
||||
Field 41 TRCIMSPECn
|
||||
Field 40 TRCID
|
||||
Res0 39:38
|
||||
Field 37 TRCCNTVRn
|
||||
Field 36 TRCCLAIM
|
||||
Field 35 TRCAUXCTLR
|
||||
Field 34 TRCAUTHSTATUS
|
||||
Field 33 TRC
|
||||
Field 32 PMSLATFR_EL1
|
||||
Field 31 PMSIRR_EL1
|
||||
Field 30 PMSIDR_EL1
|
||||
Field 29 PMSICR_EL1
|
||||
Field 28 PMSFCR_EL1
|
||||
Field 27 PMSEVFR_EL1
|
||||
Field 26 PMSCR_EL1
|
||||
Field 25 PMBSR_EL1
|
||||
Field 24 PMBPTR_EL1
|
||||
Field 23 PMBLIMITR_EL1
|
||||
Field 22 PMMIR_EL1
|
||||
Res0 21:20
|
||||
Field 19 PMSELR_EL0
|
||||
Field 18 PMOVS
|
||||
Field 17 PMINTEN
|
||||
Field 16 PMCNTEN
|
||||
Field 15 PMCCNTR_EL0
|
||||
Field 14 PMCCFILTR_EL0
|
||||
Field 13 PMEVTYPERn_EL0
|
||||
Field 12 PMEVCNTRn_EL0
|
||||
Field 11 OSDLR_EL1
|
||||
Field 10 OSECCR_EL1
|
||||
Field 9 OSLSR_EL1
|
||||
Res0 8
|
||||
Field 7 DBGPRCR_EL1
|
||||
Field 6 DBGAUTHSTATUS_EL1
|
||||
Field 5 DBGCLAIM
|
||||
Field 4 MDSCR_EL1
|
||||
Field 3 DBGWVRn_EL1
|
||||
Field 2 DBGWCRn_EL1
|
||||
Field 1 DBGBVRn_EL1
|
||||
Field 0 DBGBCRn_EL1
|
||||
EndSysreg
|
||||
|
||||
Sysreg HDFGWTR_EL2 3 4 3 1 5
|
||||
Res0 63
|
||||
Field 62 nPMSNEVFR_EL1
|
||||
Field 61 nBRBDATA
|
||||
Field 60 nBRBCTL
|
||||
Res0 59:58
|
||||
Field 57 PMUSERENR_EL0
|
||||
Field 56 TRBTRG_EL1
|
||||
Field 55 TRBSR_EL1
|
||||
Field 54 TRBPTR_EL1
|
||||
Field 53 TRBMAR_EL1
|
||||
Field 52 TRBLIMITR_EL1
|
||||
Res0 51
|
||||
Field 50 TRBBASER_EL1
|
||||
Field 49 TRFCR_EL1
|
||||
Field 48 TRCVICTLR
|
||||
Res0 47
|
||||
Field 46 TRCSSCSRn
|
||||
Field 45 TRCSEQSTR
|
||||
Field 44 TRCPRGCTLR
|
||||
Res0 43
|
||||
Field 42 TRCOSLAR
|
||||
Field 41 TRCIMSPECn
|
||||
Res0 40:38
|
||||
Field 37 TRCCNTVRn
|
||||
Field 36 TRCCLAIM
|
||||
Field 35 TRCAUXCTLR
|
||||
Res0 34
|
||||
Field 33 TRC
|
||||
Field 32 PMSLATFR_EL1
|
||||
Field 31 PMSIRR_EL1
|
||||
Res0 30
|
||||
Field 29 PMSICR_EL1
|
||||
Field 28 PMSFCR_EL1
|
||||
Field 27 PMSEVFR_EL1
|
||||
Field 26 PMSCR_EL1
|
||||
Field 25 PMBSR_EL1
|
||||
Field 24 PMBPTR_EL1
|
||||
Field 23 PMBLIMITR_EL1
|
||||
Res0 22
|
||||
Field 21 PMCR_EL0
|
||||
Field 20 PMSWINC_EL0
|
||||
Field 19 PMSELR_EL0
|
||||
Field 18 PMOVS
|
||||
Field 17 PMINTEN
|
||||
Field 16 PMCNTEN
|
||||
Field 15 PMCCNTR_EL0
|
||||
Field 14 PMCCFILTR_EL0
|
||||
Field 13 PMEVTYPERn_EL0
|
||||
Field 12 PMEVCNTRn_EL0
|
||||
Field 11 OSDLR_EL1
|
||||
Field 10 OSECCR_EL1
|
||||
Res0 9
|
||||
Field 8 OSLAR_EL1
|
||||
Field 7 DBGPRCR_EL1
|
||||
Res0 6
|
||||
Field 5 DBGCLAIM
|
||||
Field 4 MDSCR_EL1
|
||||
Field 3 DBGWVRn_EL1
|
||||
Field 2 DBGWCRn_EL1
|
||||
Field 1 DBGBVRn_EL1
|
||||
Field 0 DBGBCRn_EL1
|
||||
EndSysreg
|
||||
|
||||
Sysreg ZCR_EL2 3 4 1 2 0
|
||||
Fields ZCR_ELx
|
||||
EndSysreg
|
||||
|
@ -896,7 +896,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
|
||||
int kvm_arch_flush_remote_tlb(struct kvm *kvm);
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
|
||||
|
||||
#endif /* __MIPS_KVM_HOST_H__ */
|
||||
|
@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
/* Flush slot from GPA */
|
||||
kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
|
||||
slot->base_gfn + slot->npages - 1);
|
||||
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
|
||||
kvm_flush_remote_tlbs_memslot(kvm, slot);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
|
||||
new->base_gfn + new->npages - 1);
|
||||
if (needs_flush)
|
||||
kvm_arch_flush_remote_tlbs_memslot(kvm, new);
|
||||
kvm_flush_remote_tlbs_memslot(kvm, new);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
@ -981,18 +981,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
|
||||
}
|
||||
|
||||
int kvm_arch_flush_remote_tlb(struct kvm *kvm)
|
||||
int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
kvm_mips_callbacks->prepare_flush_shadow(kvm);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
int r;
|
||||
|
@ -447,7 +447,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
gpa_t gpa = range->start << PAGE_SHIFT;
|
||||
pte_t hva_pte = range->pte;
|
||||
pte_t hva_pte = range->arg.pte;
|
||||
pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
|
||||
pte_t old_pte;
|
||||
|
||||
|
@ -54,6 +54,7 @@
|
||||
#ifndef CONFIG_64BIT
|
||||
#define SATP_PPN _AC(0x003FFFFF, UL)
|
||||
#define SATP_MODE_32 _AC(0x80000000, UL)
|
||||
#define SATP_MODE_SHIFT 31
|
||||
#define SATP_ASID_BITS 9
|
||||
#define SATP_ASID_SHIFT 22
|
||||
#define SATP_ASID_MASK _AC(0x1FF, UL)
|
||||
@ -62,6 +63,7 @@
|
||||
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
|
||||
#define SATP_MODE_48 _AC(0x9000000000000000, UL)
|
||||
#define SATP_MODE_57 _AC(0xa000000000000000, UL)
|
||||
#define SATP_MODE_SHIFT 60
|
||||
#define SATP_ASID_BITS 16
|
||||
#define SATP_ASID_SHIFT 44
|
||||
#define SATP_ASID_MASK _AC(0xFFFF, UL)
|
||||
|
@ -337,6 +337,15 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
|
||||
void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
|
||||
|
||||
void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu);
|
||||
unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu);
|
||||
int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
|
||||
u64 __user *uindices);
|
||||
int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg);
|
||||
int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg);
|
||||
|
||||
int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
|
||||
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
|
||||
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
|
||||
|
@ -74,9 +74,7 @@ static inline void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
|
||||
int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg,
|
||||
unsigned long rtype);
|
||||
const struct kvm_one_reg *reg);
|
||||
int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg,
|
||||
unsigned long rtype);
|
||||
const struct kvm_one_reg *reg);
|
||||
#endif
|
||||
|
@ -55,6 +55,7 @@ struct kvm_riscv_config {
|
||||
unsigned long marchid;
|
||||
unsigned long mimpid;
|
||||
unsigned long zicboz_block_size;
|
||||
unsigned long satp_mode;
|
||||
};
|
||||
|
||||
/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
|
||||
@ -124,6 +125,12 @@ enum KVM_RISCV_ISA_EXT_ID {
|
||||
KVM_RISCV_ISA_EXT_SSAIA,
|
||||
KVM_RISCV_ISA_EXT_V,
|
||||
KVM_RISCV_ISA_EXT_SVNAPOT,
|
||||
KVM_RISCV_ISA_EXT_ZBA,
|
||||
KVM_RISCV_ISA_EXT_ZBS,
|
||||
KVM_RISCV_ISA_EXT_ZICNTR,
|
||||
KVM_RISCV_ISA_EXT_ZICSR,
|
||||
KVM_RISCV_ISA_EXT_ZIFENCEI,
|
||||
KVM_RISCV_ISA_EXT_ZIHPM,
|
||||
KVM_RISCV_ISA_EXT_MAX,
|
||||
};
|
||||
|
||||
@ -193,6 +200,15 @@ enum KVM_RISCV_SBI_EXT_ID {
|
||||
|
||||
/* ISA Extension registers are mapped as type 7 */
|
||||
#define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_ISA_SINGLE (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_ISA_MULTI_EN (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_ISA_MULTI_DIS (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_ISA_MULTI_REG(__ext_id) \
|
||||
((__ext_id) / __BITS_PER_LONG)
|
||||
#define KVM_REG_RISCV_ISA_MULTI_MASK(__ext_id) \
|
||||
(1UL << ((__ext_id) % __BITS_PER_LONG))
|
||||
#define KVM_REG_RISCV_ISA_MULTI_REG_LAST \
|
||||
KVM_REG_RISCV_ISA_MULTI_REG(KVM_RISCV_ISA_EXT_MAX - 1)
|
||||
|
||||
/* SBI extension registers are mapped as type 8 */
|
||||
#define KVM_REG_RISCV_SBI_EXT (0x08 << KVM_REG_RISCV_TYPE_SHIFT)
|
||||
|
@ -19,6 +19,7 @@ kvm-y += vcpu_exit.o
|
||||
kvm-y += vcpu_fp.o
|
||||
kvm-y += vcpu_vector.o
|
||||
kvm-y += vcpu_insn.o
|
||||
kvm-y += vcpu_onereg.o
|
||||
kvm-y += vcpu_switch.o
|
||||
kvm-y += vcpu_sbi.o
|
||||
kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
|
||||
|
@ -176,7 +176,7 @@ int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
|
||||
|
||||
if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
|
||||
*out_val = 0;
|
||||
if (kvm_riscv_aia_available())
|
||||
@ -192,7 +192,7 @@ int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
|
||||
|
||||
if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
|
||||
if (kvm_riscv_aia_available()) {
|
||||
((unsigned long *)csr)[reg_num] = val;
|
||||
|
@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
{
|
||||
}
|
||||
|
||||
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
|
||||
{
|
||||
}
|
||||
@ -559,7 +553,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
int ret;
|
||||
kvm_pfn_t pfn = pte_pfn(range->pte);
|
||||
kvm_pfn_t pfn = pte_pfn(range->arg.pte);
|
||||
|
||||
if (!kvm->arch.pgd)
|
||||
return false;
|
||||
|
@ -13,16 +13,12 @@
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/sbi.h>
|
||||
#include <asm/vector.h>
|
||||
#include <asm/kvm_vcpu_vector.h>
|
||||
|
||||
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
||||
@ -46,79 +42,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
sizeof(kvm_vcpu_stats_desc),
|
||||
};
|
||||
|
||||
#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0)
|
||||
|
||||
#define KVM_ISA_EXT_ARR(ext) [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
|
||||
|
||||
/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
|
||||
static const unsigned long kvm_isa_ext_arr[] = {
|
||||
[KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
|
||||
[KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
|
||||
[KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
|
||||
[KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
|
||||
[KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
|
||||
[KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
|
||||
[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
|
||||
[KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
|
||||
|
||||
KVM_ISA_EXT_ARR(SSAIA),
|
||||
KVM_ISA_EXT_ARR(SSTC),
|
||||
KVM_ISA_EXT_ARR(SVINVAL),
|
||||
KVM_ISA_EXT_ARR(SVNAPOT),
|
||||
KVM_ISA_EXT_ARR(SVPBMT),
|
||||
KVM_ISA_EXT_ARR(ZBB),
|
||||
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
|
||||
KVM_ISA_EXT_ARR(ZICBOM),
|
||||
KVM_ISA_EXT_ARR(ZICBOZ),
|
||||
};
|
||||
|
||||
static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
|
||||
if (kvm_isa_ext_arr[i] == base_ext)
|
||||
return i;
|
||||
}
|
||||
|
||||
return KVM_RISCV_ISA_EXT_MAX;
|
||||
}
|
||||
|
||||
static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
|
||||
{
|
||||
switch (ext) {
|
||||
case KVM_RISCV_ISA_EXT_H:
|
||||
return false;
|
||||
case KVM_RISCV_ISA_EXT_V:
|
||||
return riscv_v_vstate_ctrl_user_allowed();
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
|
||||
{
|
||||
switch (ext) {
|
||||
case KVM_RISCV_ISA_EXT_A:
|
||||
case KVM_RISCV_ISA_EXT_C:
|
||||
case KVM_RISCV_ISA_EXT_I:
|
||||
case KVM_RISCV_ISA_EXT_M:
|
||||
case KVM_RISCV_ISA_EXT_SSAIA:
|
||||
case KVM_RISCV_ISA_EXT_SSTC:
|
||||
case KVM_RISCV_ISA_EXT_SVINVAL:
|
||||
case KVM_RISCV_ISA_EXT_SVNAPOT:
|
||||
case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
|
||||
case KVM_RISCV_ISA_EXT_ZBB:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
@ -176,7 +99,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
int rc;
|
||||
struct kvm_cpu_context *cntx;
|
||||
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
|
||||
unsigned long host_isa, i;
|
||||
|
||||
/* Mark this VCPU never ran */
|
||||
vcpu->arch.ran_atleast_once = false;
|
||||
@ -184,12 +106,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
|
||||
|
||||
/* Setup ISA features available to VCPU */
|
||||
for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
|
||||
host_isa = kvm_isa_ext_arr[i];
|
||||
if (__riscv_isa_extension_available(NULL, host_isa) &&
|
||||
kvm_riscv_vcpu_isa_enable_allowed(i))
|
||||
set_bit(host_isa, vcpu->arch.isa);
|
||||
}
|
||||
kvm_riscv_vcpu_setup_isa(vcpu);
|
||||
|
||||
/* Setup vendor, arch, and implementation details */
|
||||
vcpu->arch.mvendorid = sbi_get_mvendorid();
|
||||
@ -294,450 +211,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CONFIG);
|
||||
unsigned long reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_CONFIG_REG(isa):
|
||||
reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
|
||||
break;
|
||||
case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
|
||||
if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
|
||||
return -EINVAL;
|
||||
reg_val = riscv_cbom_block_size;
|
||||
break;
|
||||
case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
|
||||
if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
|
||||
return -EINVAL;
|
||||
reg_val = riscv_cboz_block_size;
|
||||
break;
|
||||
case KVM_REG_RISCV_CONFIG_REG(mvendorid):
|
||||
reg_val = vcpu->arch.mvendorid;
|
||||
break;
|
||||
case KVM_REG_RISCV_CONFIG_REG(marchid):
|
||||
reg_val = vcpu->arch.marchid;
|
||||
break;
|
||||
case KVM_REG_RISCV_CONFIG_REG(mimpid):
|
||||
reg_val = vcpu->arch.mimpid;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CONFIG);
|
||||
unsigned long i, isa_ext, reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_CONFIG_REG(isa):
|
||||
/*
|
||||
* This ONE REG interface is only defined for
|
||||
* single letter extensions.
|
||||
*/
|
||||
if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
|
||||
return -EINVAL;
|
||||
|
||||
if (!vcpu->arch.ran_atleast_once) {
|
||||
/* Ignore the enable/disable request for certain extensions */
|
||||
for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
|
||||
isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
|
||||
if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
|
||||
reg_val &= ~BIT(i);
|
||||
continue;
|
||||
}
|
||||
if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
|
||||
if (reg_val & BIT(i))
|
||||
reg_val &= ~BIT(i);
|
||||
if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
|
||||
if (!(reg_val & BIT(i)))
|
||||
reg_val |= BIT(i);
|
||||
}
|
||||
reg_val &= riscv_isa_extension_base(NULL);
|
||||
/* Do not modify anything beyond single letter extensions */
|
||||
reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
|
||||
(reg_val & KVM_RISCV_BASE_ISA_MASK);
|
||||
vcpu->arch.isa[0] = reg_val;
|
||||
kvm_riscv_vcpu_fp_reset(vcpu);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
break;
|
||||
case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
|
||||
return -EOPNOTSUPP;
|
||||
case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
|
||||
return -EOPNOTSUPP;
|
||||
case KVM_REG_RISCV_CONFIG_REG(mvendorid):
|
||||
if (!vcpu->arch.ran_atleast_once)
|
||||
vcpu->arch.mvendorid = reg_val;
|
||||
else
|
||||
return -EBUSY;
|
||||
break;
|
||||
case KVM_REG_RISCV_CONFIG_REG(marchid):
|
||||
if (!vcpu->arch.ran_atleast_once)
|
||||
vcpu->arch.marchid = reg_val;
|
||||
else
|
||||
return -EBUSY;
|
||||
break;
|
||||
case KVM_REG_RISCV_CONFIG_REG(mimpid):
|
||||
if (!vcpu->arch.ran_atleast_once)
|
||||
vcpu->arch.mimpid = reg_val;
|
||||
else
|
||||
return -EBUSY;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CORE);
|
||||
unsigned long reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
|
||||
reg_val = cntx->sepc;
|
||||
else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
|
||||
reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
|
||||
reg_val = ((unsigned long *)cntx)[reg_num];
|
||||
else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
|
||||
reg_val = (cntx->sstatus & SR_SPP) ?
|
||||
KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CORE);
|
||||
unsigned long reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
|
||||
cntx->sepc = reg_val;
|
||||
else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
|
||||
reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
|
||||
((unsigned long *)cntx)[reg_num] = reg_val;
|
||||
else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
|
||||
if (reg_val == KVM_RISCV_MODE_S)
|
||||
cntx->sstatus |= SR_SPP;
|
||||
else
|
||||
cntx->sstatus &= ~SR_SPP;
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu,
|
||||
unsigned long reg_num,
|
||||
unsigned long *out_val)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
|
||||
if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
|
||||
kvm_riscv_vcpu_flush_interrupts(vcpu);
|
||||
*out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
|
||||
*out_val |= csr->hvip & ~IRQ_LOCAL_MASK;
|
||||
} else
|
||||
*out_val = ((unsigned long *)csr)[reg_num];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu,
|
||||
unsigned long reg_num,
|
||||
unsigned long reg_val)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
|
||||
if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
|
||||
reg_val &= VSIP_VALID_MASK;
|
||||
reg_val <<= VSIP_TO_HVIP_SHIFT;
|
||||
}
|
||||
|
||||
((unsigned long *)csr)[reg_num] = reg_val;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
|
||||
WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
int rc;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CSR);
|
||||
unsigned long reg_val, reg_subtype;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
|
||||
reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
|
||||
switch (reg_subtype) {
|
||||
case KVM_REG_RISCV_CSR_GENERAL:
|
||||
rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, ®_val);
|
||||
break;
|
||||
case KVM_REG_RISCV_CSR_AIA:
|
||||
rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, ®_val);
|
||||
break;
|
||||
default:
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
int rc;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CSR);
|
||||
unsigned long reg_val, reg_subtype;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
|
||||
reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
|
||||
switch (reg_subtype) {
|
||||
case KVM_REG_RISCV_CSR_GENERAL:
|
||||
rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val);
|
||||
break;
|
||||
case KVM_REG_RISCV_CSR_AIA:
|
||||
rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val);
|
||||
break;
|
||||
default:
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_ISA_EXT);
|
||||
unsigned long reg_val = 0;
|
||||
unsigned long host_isa_ext;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
|
||||
reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
|
||||
return -EINVAL;
|
||||
|
||||
host_isa_ext = kvm_isa_ext_arr[reg_num];
|
||||
if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
|
||||
reg_val = 1; /* Mark the given extension as available */
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_ISA_EXT);
|
||||
unsigned long reg_val;
|
||||
unsigned long host_isa_ext;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
|
||||
reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
host_isa_ext = kvm_isa_ext_arr[reg_num];
|
||||
if (!__riscv_isa_extension_available(NULL, host_isa_ext))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!vcpu->arch.ran_atleast_once) {
|
||||
/*
|
||||
* All multi-letter extension and a few single letter
|
||||
* extension can be disabled
|
||||
*/
|
||||
if (reg_val == 1 &&
|
||||
kvm_riscv_vcpu_isa_enable_allowed(reg_num))
|
||||
set_bit(host_isa_ext, vcpu->arch.isa);
|
||||
else if (!reg_val &&
|
||||
kvm_riscv_vcpu_isa_disable_allowed(reg_num))
|
||||
clear_bit(host_isa_ext, vcpu->arch.isa);
|
||||
else
|
||||
return -EINVAL;
|
||||
kvm_riscv_vcpu_fp_reset(vcpu);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
|
||||
case KVM_REG_RISCV_CONFIG:
|
||||
return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
|
||||
case KVM_REG_RISCV_CORE:
|
||||
return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
|
||||
case KVM_REG_RISCV_CSR:
|
||||
return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
|
||||
case KVM_REG_RISCV_TIMER:
|
||||
return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
|
||||
case KVM_REG_RISCV_FP_F:
|
||||
return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
|
||||
KVM_REG_RISCV_FP_F);
|
||||
case KVM_REG_RISCV_FP_D:
|
||||
return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
|
||||
KVM_REG_RISCV_FP_D);
|
||||
case KVM_REG_RISCV_ISA_EXT:
|
||||
return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
|
||||
case KVM_REG_RISCV_SBI_EXT:
|
||||
return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
|
||||
case KVM_REG_RISCV_VECTOR:
|
||||
return kvm_riscv_vcpu_set_reg_vector(vcpu, reg,
|
||||
KVM_REG_RISCV_VECTOR);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
|
||||
case KVM_REG_RISCV_CONFIG:
|
||||
return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
|
||||
case KVM_REG_RISCV_CORE:
|
||||
return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
|
||||
case KVM_REG_RISCV_CSR:
|
||||
return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
|
||||
case KVM_REG_RISCV_TIMER:
|
||||
return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
|
||||
case KVM_REG_RISCV_FP_F:
|
||||
return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
|
||||
KVM_REG_RISCV_FP_F);
|
||||
case KVM_REG_RISCV_FP_D:
|
||||
return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
|
||||
KVM_REG_RISCV_FP_D);
|
||||
case KVM_REG_RISCV_ISA_EXT:
|
||||
return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
|
||||
case KVM_REG_RISCV_SBI_EXT:
|
||||
return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
|
||||
case KVM_REG_RISCV_VECTOR:
|
||||
return kvm_riscv_vcpu_get_reg_vector(vcpu, reg,
|
||||
KVM_REG_RISCV_VECTOR);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
long kvm_arch_vcpu_async_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
@ -781,6 +254,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = kvm_riscv_vcpu_get_reg(vcpu, ®);
|
||||
break;
|
||||
}
|
||||
case KVM_GET_REG_LIST: {
|
||||
struct kvm_reg_list __user *user_list = argp;
|
||||
struct kvm_reg_list reg_list;
|
||||
unsigned int n;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(®_list, user_list, sizeof(reg_list)))
|
||||
break;
|
||||
n = reg_list.n;
|
||||
reg_list.n = kvm_riscv_vcpu_num_regs(vcpu);
|
||||
if (copy_to_user(user_list, ®_list, sizeof(reg_list)))
|
||||
break;
|
||||
r = -E2BIG;
|
||||
if (n < reg_list.n)
|
||||
break;
|
||||
r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
|
||||
reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
|
||||
reg_val = &cntx->fp.f.f[reg_num];
|
||||
else
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
} else if ((rtype == KVM_REG_RISCV_FP_D) &&
|
||||
riscv_isa_extension_available(vcpu->arch.isa, d)) {
|
||||
if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
|
||||
@ -109,9 +109,9 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
reg_val = &cntx->fp.d.f[reg_num];
|
||||
} else
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
} else
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
|
||||
if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
@ -141,7 +141,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
|
||||
reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
|
||||
reg_val = &cntx->fp.f.f[reg_num];
|
||||
else
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
} else if ((rtype == KVM_REG_RISCV_FP_D) &&
|
||||
riscv_isa_extension_available(vcpu->arch.isa, d)) {
|
||||
if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
|
||||
@ -154,9 +154,9 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
reg_val = &cntx->fp.d.f[reg_num];
|
||||
} else
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
} else
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
|
||||
if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
1051
arch/riscv/kvm/vcpu_onereg.c
Normal file
1051
arch/riscv/kvm/vcpu_onereg.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -140,8 +140,10 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_riscv_sbi_extension_entry *sext = NULL;
|
||||
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
|
||||
|
||||
if (reg_num >= KVM_RISCV_SBI_EXT_MAX ||
|
||||
(reg_val != 1 && reg_val != 0))
|
||||
if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
|
||||
return -ENOENT;
|
||||
|
||||
if (reg_val != 1 && reg_val != 0)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
|
||||
@ -175,7 +177,7 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
|
||||
|
||||
if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
|
||||
if (sbi_ext[i].ext_idx == reg_num) {
|
||||
@ -206,7 +208,7 @@ static int riscv_vcpu_set_sbi_ext_multi(struct kvm_vcpu *vcpu,
|
||||
unsigned long i, ext_id;
|
||||
|
||||
if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
|
||||
for_each_set_bit(i, ®_val, BITS_PER_LONG) {
|
||||
ext_id = i + reg_num * BITS_PER_LONG;
|
||||
@ -226,7 +228,7 @@ static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu,
|
||||
unsigned long i, ext_id, ext_val;
|
||||
|
||||
if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
|
||||
for (i = 0; i < BITS_PER_LONG; i++) {
|
||||
ext_id = i + reg_num * BITS_PER_LONG;
|
||||
@ -272,7 +274,7 @@ int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
|
||||
case KVM_REG_RISCV_SBI_MULTI_DIS:
|
||||
return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, false);
|
||||
default:
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -307,7 +309,7 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
|
||||
reg_val = ~reg_val;
|
||||
break;
|
||||
default:
|
||||
rc = -EINVAL;
|
||||
rc = -ENOENT;
|
||||
}
|
||||
if (rc)
|
||||
return rc;
|
||||
|
@ -170,7 +170,7 @@ int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u64))
|
||||
return -EINVAL;
|
||||
if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_TIMER_REG(frequency):
|
||||
@ -187,7 +187,7 @@ int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
|
||||
KVM_RISCV_TIMER_STATE_OFF;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
@ -211,14 +211,15 @@ int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u64))
|
||||
return -EINVAL;
|
||||
if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
|
||||
return -EINVAL;
|
||||
return -ENOENT;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_TIMER_REG(frequency):
|
||||
ret = -EOPNOTSUPP;
|
||||
if (reg_val != riscv_timebase)
|
||||
return -EINVAL;
|
||||
break;
|
||||
case KVM_REG_RISCV_TIMER_REG(time):
|
||||
gt->time_delta = reg_val - get_cycles64();
|
||||
@ -233,7 +234,7 @@ int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
|
||||
ret = kvm_riscv_vcpu_timer_cancel(t);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
ret = -ENOENT;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -91,95 +91,93 @@ void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void *kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
|
||||
unsigned long reg_num,
|
||||
size_t reg_size)
|
||||
static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
|
||||
unsigned long reg_num,
|
||||
size_t reg_size,
|
||||
void **reg_addr)
|
||||
{
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
void *reg_val;
|
||||
size_t vlenb = riscv_v_vsize / 32;
|
||||
|
||||
if (reg_num < KVM_REG_RISCV_VECTOR_REG(0)) {
|
||||
if (reg_size != sizeof(unsigned long))
|
||||
return NULL;
|
||||
return -EINVAL;
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
|
||||
reg_val = &cntx->vector.vstart;
|
||||
*reg_addr = &cntx->vector.vstart;
|
||||
break;
|
||||
case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
|
||||
reg_val = &cntx->vector.vl;
|
||||
*reg_addr = &cntx->vector.vl;
|
||||
break;
|
||||
case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
|
||||
reg_val = &cntx->vector.vtype;
|
||||
*reg_addr = &cntx->vector.vtype;
|
||||
break;
|
||||
case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
|
||||
reg_val = &cntx->vector.vcsr;
|
||||
*reg_addr = &cntx->vector.vcsr;
|
||||
break;
|
||||
case KVM_REG_RISCV_VECTOR_CSR_REG(datap):
|
||||
default:
|
||||
return NULL;
|
||||
return -ENOENT;
|
||||
}
|
||||
} else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) {
|
||||
if (reg_size != vlenb)
|
||||
return NULL;
|
||||
reg_val = cntx->vector.datap
|
||||
+ (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
|
||||
return -EINVAL;
|
||||
*reg_addr = cntx->vector.datap +
|
||||
(reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
|
||||
} else {
|
||||
return NULL;
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return reg_val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg,
|
||||
unsigned long rtype)
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
unsigned long *isa = vcpu->arch.isa;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
rtype);
|
||||
void *reg_val = NULL;
|
||||
KVM_REG_RISCV_VECTOR);
|
||||
size_t reg_size = KVM_REG_SIZE(reg->id);
|
||||
void *reg_addr;
|
||||
int rc;
|
||||
|
||||
if (rtype == KVM_REG_RISCV_VECTOR &&
|
||||
riscv_isa_extension_available(isa, v)) {
|
||||
reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
|
||||
}
|
||||
if (!riscv_isa_extension_available(isa, v))
|
||||
return -ENOENT;
|
||||
|
||||
if (!reg_val)
|
||||
return -EINVAL;
|
||||
rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, ®_addr);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (copy_to_user(uaddr, reg_val, reg_size))
|
||||
if (copy_to_user(uaddr, reg_addr, reg_size))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg,
|
||||
unsigned long rtype)
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
unsigned long *isa = vcpu->arch.isa;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
rtype);
|
||||
void *reg_val = NULL;
|
||||
KVM_REG_RISCV_VECTOR);
|
||||
size_t reg_size = KVM_REG_SIZE(reg->id);
|
||||
void *reg_addr;
|
||||
int rc;
|
||||
|
||||
if (rtype == KVM_REG_RISCV_VECTOR &&
|
||||
riscv_isa_extension_available(isa, v)) {
|
||||
reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
|
||||
}
|
||||
if (!riscv_isa_extension_available(isa, v))
|
||||
return -ENOENT;
|
||||
|
||||
if (!reg_val)
|
||||
return -EINVAL;
|
||||
rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, ®_addr);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (copy_from_user(reg_val, uaddr, reg_size))
|
||||
if (copy_from_user(reg_addr, uaddr, reg_size))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
|
@ -817,6 +817,8 @@ struct kvm_s390_cpu_model {
|
||||
__u64 *fac_list;
|
||||
u64 cpuid;
|
||||
unsigned short ibc;
|
||||
/* subset of available UV-features for pv-guests enabled by user space */
|
||||
struct kvm_s390_vm_cpu_uv_feat uv_feat_guest;
|
||||
};
|
||||
|
||||
typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
|
||||
|
@ -99,6 +99,8 @@ enum uv_cmds_inst {
|
||||
enum uv_feat_ind {
|
||||
BIT_UV_FEAT_MISC = 0,
|
||||
BIT_UV_FEAT_AIV = 1,
|
||||
BIT_UV_FEAT_AP = 4,
|
||||
BIT_UV_FEAT_AP_INTR = 5,
|
||||
};
|
||||
|
||||
struct uv_cb_header {
|
||||
@ -159,7 +161,15 @@ struct uv_cb_cgc {
|
||||
u64 guest_handle;
|
||||
u64 conf_base_stor_origin;
|
||||
u64 conf_virt_stor_origin;
|
||||
u64 reserved30;
|
||||
u8 reserved30[6];
|
||||
union {
|
||||
struct {
|
||||
u16 : 14;
|
||||
u16 ap_instr_intr : 1;
|
||||
u16 ap_allow_instr : 1;
|
||||
};
|
||||
u16 raw;
|
||||
} flags;
|
||||
u64 guest_stor_origin;
|
||||
u64 guest_stor_len;
|
||||
u64 guest_sca;
|
||||
@ -397,6 +407,13 @@ struct uv_info {
|
||||
|
||||
extern struct uv_info uv_info;
|
||||
|
||||
static inline bool uv_has_feature(u8 feature_bit)
|
||||
{
|
||||
if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8)
|
||||
return false;
|
||||
return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
|
||||
extern int prot_virt_guest;
|
||||
|
||||
|
@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
|
||||
__u8 reserved[1728];
|
||||
};
|
||||
|
||||
#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6
|
||||
#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST 7
|
||||
|
||||
#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS 64
|
||||
struct kvm_s390_vm_cpu_uv_feat {
|
||||
union {
|
||||
struct {
|
||||
__u64 : 4;
|
||||
__u64 ap : 1; /* bit 4 */
|
||||
__u64 ap_intr : 1; /* bit 5 */
|
||||
__u64 : 58;
|
||||
};
|
||||
__u64 feat;
|
||||
};
|
||||
};
|
||||
|
||||
/* kvm attributes for crypto */
|
||||
#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0
|
||||
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
|
||||
|
@ -258,7 +258,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
|
||||
* shared page from a different protected VM will automatically also
|
||||
* transfer its ownership.
|
||||
*/
|
||||
if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications))
|
||||
if (uv_has_feature(BIT_UV_FEAT_MISC))
|
||||
return false;
|
||||
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
|
||||
return false;
|
||||
|
@ -228,6 +228,21 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
|
||||
|
||||
#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
|
||||
|
||||
static bool should_handle_per_event(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!guestdbg_enabled(vcpu) || !per_event(vcpu))
|
||||
return false;
|
||||
if (guestdbg_sstep_enabled(vcpu) &&
|
||||
vcpu->arch.sie_block->iprcc != PGM_PER) {
|
||||
/*
|
||||
* __vcpu_run() will exit after delivering the concurrently
|
||||
* indicated condition.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int handle_prog(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
psw_t psw;
|
||||
@ -242,7 +257,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
|
||||
if (kvm_s390_pv_cpu_is_protected(vcpu))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
|
||||
if (should_handle_per_event(vcpu)) {
|
||||
rc = kvm_s390_handle_per_event(vcpu);
|
||||
if (rc)
|
||||
return rc;
|
||||
@ -571,6 +586,19 @@ static int handle_pv_notification(struct kvm_vcpu *vcpu)
|
||||
return handle_instruction(vcpu);
|
||||
}
|
||||
|
||||
static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc)
|
||||
{
|
||||
/* Process PER, also if the instruction is processed in user space. */
|
||||
if (!(vcpu->arch.sie_block->icptstatus & 0x02))
|
||||
return false;
|
||||
if (rc != 0 && rc != -EOPNOTSUPP)
|
||||
return false;
|
||||
if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs)
|
||||
/* __vcpu_run() will exit after delivering the interrupt. */
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int rc, per_rc = 0;
|
||||
@ -605,8 +633,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
|
||||
rc = handle_partial_execution(vcpu);
|
||||
break;
|
||||
case ICPT_KSS:
|
||||
rc = kvm_s390_skey_check_enable(vcpu);
|
||||
break;
|
||||
/* Instruction will be redriven, skip the PER check. */
|
||||
return kvm_s390_skey_check_enable(vcpu);
|
||||
case ICPT_MCHKREQ:
|
||||
case ICPT_INT_ENABLE:
|
||||
/*
|
||||
@ -633,9 +661,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* process PER, also if the instruction is processed in user space */
|
||||
if (vcpu->arch.sie_block->icptstatus & 0x02 &&
|
||||
(!rc || rc == -EOPNOTSUPP))
|
||||
if (should_handle_per_ifetch(vcpu, rc))
|
||||
per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
|
||||
return per_rc ? per_rc : rc;
|
||||
}
|
||||
|
@ -1392,6 +1392,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
int rc = 0;
|
||||
bool delivered = false;
|
||||
unsigned long irq_type;
|
||||
unsigned long irqs;
|
||||
|
||||
@ -1465,6 +1466,19 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
|
||||
WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
|
||||
clear_bit(irq_type, &li->pending_irqs);
|
||||
}
|
||||
delivered |= !rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* We delivered at least one interrupt and modified the PC. Force a
|
||||
* singlestep event now.
|
||||
*/
|
||||
if (delivered && guestdbg_sstep_enabled(vcpu)) {
|
||||
struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
|
||||
|
||||
debug_exit->addr = vcpu->arch.sie_block->gpsw.addr;
|
||||
debug_exit->type = KVM_SINGLESTEP;
|
||||
vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
|
||||
}
|
||||
|
||||
set_intercept_indicators(vcpu);
|
||||
|
@ -1531,6 +1531,39 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK \
|
||||
( \
|
||||
((struct kvm_s390_vm_cpu_uv_feat){ \
|
||||
.ap = 1, \
|
||||
.ap_intr = 1, \
|
||||
}) \
|
||||
.feat \
|
||||
)
|
||||
|
||||
static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
|
||||
unsigned long data, filter;
|
||||
|
||||
filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
|
||||
if (get_user(data, &ptr->feat))
|
||||
return -EFAULT;
|
||||
if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
if (kvm->created_vcpus) {
|
||||
mutex_unlock(&kvm->lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
kvm->arch.model.uv_feat_guest.feat = data;
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret = -ENXIO;
|
||||
@ -1545,6 +1578,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
|
||||
ret = kvm_s390_set_processor_subfunc(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
|
||||
ret = kvm_s390_set_uv_feat(kvm, attr);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -1777,6 +1813,33 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
|
||||
unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
|
||||
|
||||
if (put_user(feat, &dst->feat))
|
||||
return -EFAULT;
|
||||
VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
|
||||
unsigned long feat;
|
||||
|
||||
BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
|
||||
|
||||
feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
|
||||
if (put_user(feat, &dst->feat))
|
||||
return -EFAULT;
|
||||
VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret = -ENXIO;
|
||||
@ -1800,6 +1863,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
|
||||
ret = kvm_s390_get_machine_subfunc(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
|
||||
ret = kvm_s390_get_processor_uv_feat(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
|
||||
ret = kvm_s390_get_machine_uv_feat(kvm, attr);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -1952,6 +2021,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_CPU_MACHINE_FEAT:
|
||||
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
|
||||
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
|
||||
case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
|
||||
case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
@ -2406,7 +2477,7 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
/* Disable the GISA if the ultravisor does not support AIV. */
|
||||
if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
|
||||
if (!uv_has_feature(BIT_UV_FEAT_AIV))
|
||||
kvm_s390_gisa_disable(kvm);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
@ -3296,6 +3367,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
|
||||
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
|
||||
|
||||
kvm->arch.model.uv_feat_guest.feat = 0;
|
||||
|
||||
kvm_s390_crypto_init(kvm);
|
||||
|
||||
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
|
||||
@ -4611,7 +4684,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
||||
rc = kvm_s390_deliver_pending_interrupts(vcpu);
|
||||
if (rc)
|
||||
if (rc || guestdbg_exit_pending(vcpu))
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -4738,7 +4811,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
do {
|
||||
rc = vcpu_pre_run(vcpu);
|
||||
if (rc)
|
||||
if (rc || guestdbg_exit_pending(vcpu))
|
||||
break;
|
||||
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
@ -5383,6 +5456,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
|
||||
{
|
||||
struct kvm_vcpu *vcpu = filp->private_data;
|
||||
void __user *argp = (void __user *)arg;
|
||||
int rc;
|
||||
|
||||
switch (ioctl) {
|
||||
case KVM_S390_IRQ: {
|
||||
@ -5390,7 +5464,8 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
|
||||
|
||||
if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
|
||||
return -EFAULT;
|
||||
return kvm_s390_inject_vcpu(vcpu, &s390irq);
|
||||
rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
|
||||
break;
|
||||
}
|
||||
case KVM_S390_INTERRUPT: {
|
||||
struct kvm_s390_interrupt s390int;
|
||||
@ -5400,10 +5475,25 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
|
||||
return -EFAULT;
|
||||
if (s390int_to_s390irq(&s390int, &s390irq))
|
||||
return -EINVAL;
|
||||
return kvm_s390_inject_vcpu(vcpu, &s390irq);
|
||||
rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
rc = -ENOIOCTLCMD;
|
||||
break;
|
||||
}
|
||||
return -ENOIOCTLCMD;
|
||||
|
||||
/*
|
||||
* To simplify single stepping of userspace-emulated instructions,
|
||||
* KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
|
||||
* should_handle_per_ifetch()). However, if userspace emulation injects
|
||||
* an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
|
||||
* after (and not before) the interrupt delivery.
|
||||
*/
|
||||
if (!rc)
|
||||
vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
|
||||
|
@ -285,7 +285,8 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
|
||||
uvcb.header.rc, uvcb.header.rrc);
|
||||
WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
|
||||
WARN_ONCE(cc && uvcb.header.rc != 0x104,
|
||||
"protvirt destroy vm fast failed handle %llx rc %x rrc %x",
|
||||
kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
|
||||
/* Intended memory leak on "impossible" error */
|
||||
if (!cc)
|
||||
@ -575,12 +576,14 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
uvcb.conf_base_stor_origin =
|
||||
virt_to_phys((void *)kvm->arch.pv.stor_base);
|
||||
uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
|
||||
uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
|
||||
uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
|
||||
|
||||
cc = uv_call_sched(0, (u64)&uvcb);
|
||||
*rc = uvcb.header.rc;
|
||||
*rrc = uvcb.header.rrc;
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
|
||||
uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
|
||||
uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
|
||||
|
||||
/* Outputs */
|
||||
kvm->arch.pv.handle = uvcb.guest_handle;
|
||||
|
@ -598,7 +598,7 @@ void do_secure_storage_access(struct pt_regs *regs)
|
||||
* reliable without the misc UV feature so we need to check
|
||||
* for that as well.
|
||||
*/
|
||||
if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
|
||||
if (uv_has_feature(BIT_UV_FEAT_MISC) &&
|
||||
!test_bit_inv(61, ®s->int_parm_long)) {
|
||||
/*
|
||||
* When this happens, userspace did something that it
|
||||
|
@ -439,6 +439,7 @@
|
||||
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
|
||||
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
|
||||
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
|
||||
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
|
||||
|
||||
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
|
||||
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
|
||||
|
@ -205,8 +205,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef void crash_vmclear_fn(void);
|
||||
extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
|
||||
extern void kdump_nmi_shootdown_cpus(void);
|
||||
|
||||
#ifdef CONFIG_CRASH_HOTPLUG
|
||||
|
@ -288,13 +288,13 @@ struct kvm_kernel_irq_routing_entry;
|
||||
* kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
|
||||
* also includes TDP pages) to determine whether or not a page can be used in
|
||||
* the given MMU context. This is a subset of the overall kvm_cpu_role to
|
||||
* minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
|
||||
* 2 bytes per gfn instead of 4 bytes per gfn.
|
||||
* minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
|
||||
* allocating 2 bytes per gfn instead of 4 bytes per gfn.
|
||||
*
|
||||
* Upper-level shadow pages having gptes are tracked for write-protection via
|
||||
* gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create
|
||||
* more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
|
||||
* gfn_track will overflow and explosions will ensure.
|
||||
* gfn_write_track. As above, gfn_write_track is a 16 bit counter, so KVM must
|
||||
* not create more than 2^16-1 upper-level shadow pages at a single gfn,
|
||||
* otherwise gfn_write_track will overflow and explosions will ensue.
|
||||
*
|
||||
* A unique shadow page (SP) for a gfn is created if and only if an existing SP
|
||||
* cannot be reused. The ability to reuse a SP is tracked by its role, which
|
||||
@ -746,7 +746,6 @@ struct kvm_vcpu_arch {
|
||||
u64 smi_count;
|
||||
bool at_instruction_boundary;
|
||||
bool tpr_access_reporting;
|
||||
bool xsaves_enabled;
|
||||
bool xfd_no_write_intercept;
|
||||
u64 ia32_xss;
|
||||
u64 microcode_version;
|
||||
@ -831,6 +830,25 @@ struct kvm_vcpu_arch {
|
||||
struct kvm_cpuid_entry2 *cpuid_entries;
|
||||
struct kvm_hypervisor_cpuid kvm_cpuid;
|
||||
|
||||
/*
|
||||
* FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
|
||||
* when "struct kvm_vcpu_arch" is no longer defined in an
|
||||
* arch/x86/include/asm header. The max is mostly arbitrary, i.e.
|
||||
* can be increased as necessary.
|
||||
*/
|
||||
#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
|
||||
|
||||
/*
|
||||
* Track whether or not the guest is allowed to use features that are
|
||||
* governed by KVM, where "governed" means KVM needs to manage state
|
||||
* and/or explicitly enable the feature in hardware. Typically, but
|
||||
* not always, governed features can be used by the guest if and only
|
||||
* if both KVM and userspace want to expose the feature to the guest.
|
||||
*/
|
||||
struct {
|
||||
DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
|
||||
} governed_features;
|
||||
|
||||
u64 reserved_gpa_bits;
|
||||
int maxphyaddr;
|
||||
|
||||
@ -1005,7 +1023,7 @@ struct kvm_lpage_info {
|
||||
struct kvm_arch_memory_slot {
|
||||
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
|
||||
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
||||
unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
|
||||
unsigned short *gfn_write_track;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1247,8 +1265,9 @@ struct kvm_arch {
|
||||
* create an NX huge page (without hanging the guest).
|
||||
*/
|
||||
struct list_head possible_nx_huge_pages;
|
||||
struct kvm_page_track_notifier_node mmu_sp_tracker;
|
||||
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
||||
struct kvm_page_track_notifier_head track_notifier_head;
|
||||
#endif
|
||||
/*
|
||||
* Protects marking pages unsync during page faults, as TDP MMU page
|
||||
* faults only take mmu_lock for read. For simplicity, the unsync
|
||||
@ -1655,8 +1674,8 @@ struct kvm_x86_ops {
|
||||
|
||||
u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
|
||||
u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
|
||||
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
|
||||
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
|
||||
void (*write_tsc_offset)(struct kvm_vcpu *vcpu);
|
||||
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
|
||||
|
||||
/*
|
||||
* Retrieve somewhat arbitrary exit information. Intended to
|
||||
@ -1795,8 +1814,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
|
||||
#define __KVM_HAVE_ARCH_VM_FREE
|
||||
void kvm_arch_free_vm(struct kvm *kvm);
|
||||
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
|
||||
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
|
||||
static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
if (kvm_x86_ops.flush_remote_tlbs &&
|
||||
!static_call(kvm_x86_flush_remote_tlbs)(kvm))
|
||||
@ -1805,6 +1824,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
|
||||
|
||||
#define kvm_arch_pmi_in_guest(vcpu) \
|
||||
((vcpu) && (vcpu)->arch.handling_intr_from_guest)
|
||||
|
||||
@ -1833,7 +1854,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_zap_all(struct kvm *kvm);
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
|
||||
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
|
||||
|
||||
|
@ -2,11 +2,9 @@
|
||||
#ifndef _ASM_X86_KVM_PAGE_TRACK_H
|
||||
#define _ASM_X86_KVM_PAGE_TRACK_H
|
||||
|
||||
enum kvm_page_track_mode {
|
||||
KVM_PAGE_TRACK_WRITE,
|
||||
KVM_PAGE_TRACK_MAX,
|
||||
};
|
||||
#include <linux/kvm_types.h>
|
||||
|
||||
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
||||
/*
|
||||
* The notifier represented by @kvm_page_track_notifier_node is linked into
|
||||
* the head which will be notified when guest is triggering the track event.
|
||||
@ -26,54 +24,39 @@ struct kvm_page_track_notifier_node {
|
||||
* It is called when guest is writing the write-tracked page
|
||||
* and write emulation is finished at that time.
|
||||
*
|
||||
* @vcpu: the vcpu where the write access happened.
|
||||
* @gpa: the physical address written by guest.
|
||||
* @new: the data was written to the address.
|
||||
* @bytes: the written length.
|
||||
* @node: this node
|
||||
*/
|
||||
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes, struct kvm_page_track_notifier_node *node);
|
||||
/*
|
||||
* It is called when memory slot is being moved or removed
|
||||
* users can drop write-protection for the pages in that memory slot
|
||||
*
|
||||
* @kvm: the kvm where memory slot being moved or removed
|
||||
* @slot: the memory slot being moved or removed
|
||||
* @node: this node
|
||||
*/
|
||||
void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
void (*track_write)(gpa_t gpa, const u8 *new, int bytes,
|
||||
struct kvm_page_track_notifier_node *node);
|
||||
|
||||
/*
|
||||
* Invoked when a memory region is removed from the guest. Or in KVM
|
||||
* terms, when a memslot is deleted.
|
||||
*
|
||||
* @gfn: base gfn of the region being removed
|
||||
* @nr_pages: number of pages in the to-be-removed region
|
||||
* @node: this node
|
||||
*/
|
||||
void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages,
|
||||
struct kvm_page_track_notifier_node *node);
|
||||
};
|
||||
|
||||
int kvm_page_track_init(struct kvm *kvm);
|
||||
void kvm_page_track_cleanup(struct kvm *kvm);
|
||||
int kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n);
|
||||
void kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n);
|
||||
|
||||
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
|
||||
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
|
||||
int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
|
||||
int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
|
||||
#else
|
||||
/*
|
||||
* Allow defining a node in a structure even if page tracking is disabled, e.g.
|
||||
* to play nice with testing headers via direct inclusion from the command line.
|
||||
*/
|
||||
struct kvm_page_track_notifier_node {};
|
||||
#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
|
||||
int kvm_page_track_create_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned long npages);
|
||||
|
||||
void kvm_slot_page_track_add_page(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode);
|
||||
void kvm_slot_page_track_remove_page(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode);
|
||||
bool kvm_slot_page_track_is_active(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, enum kvm_page_track_mode mode);
|
||||
|
||||
void
|
||||
kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n);
|
||||
void
|
||||
kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n);
|
||||
void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes);
|
||||
void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
|
||||
#endif
|
||||
|
@ -25,7 +25,14 @@ void __noreturn machine_real_restart(unsigned int type);
|
||||
#define MRR_BIOS 0
|
||||
#define MRR_APM 1
|
||||
|
||||
#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
|
||||
typedef void (cpu_emergency_virt_cb)(void);
|
||||
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
|
||||
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
|
||||
void cpu_emergency_disable_virtualization(void);
|
||||
#else
|
||||
static inline void cpu_emergency_disable_virtualization(void) {}
|
||||
#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
|
||||
|
||||
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
|
||||
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
|
||||
|
@ -288,6 +288,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
|
||||
|
||||
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
|
||||
|
||||
#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
|
||||
|
||||
struct vmcb_seg {
|
||||
u16 selector;
|
||||
@ -345,7 +346,7 @@ struct vmcb_save_area {
|
||||
u64 last_excp_from;
|
||||
u64 last_excp_to;
|
||||
u8 reserved_0x298[72];
|
||||
u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
|
||||
u64 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
|
||||
} __packed;
|
||||
|
||||
/* Save area definition for SEV-ES and SEV-SNP guests */
|
||||
@ -512,7 +513,7 @@ struct ghcb {
|
||||
} __packed;
|
||||
|
||||
|
||||
#define EXPECTED_VMCB_SAVE_AREA_SIZE 740
|
||||
#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
|
||||
#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
|
||||
#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648
|
||||
#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024
|
||||
|
@ -1,154 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/* CPU virtualization extensions handling
|
||||
*
|
||||
* This should carry the code for handling CPU virtualization extensions
|
||||
* that needs to live in the kernel core.
|
||||
*
|
||||
* Author: Eduardo Habkost <ehabkost@redhat.com>
|
||||
*
|
||||
* Copyright (C) 2008, Red Hat Inc.
|
||||
*
|
||||
* Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
|
||||
*/
|
||||
#ifndef _ASM_X86_VIRTEX_H
|
||||
#define _ASM_X86_VIRTEX_H
|
||||
|
||||
#include <asm/processor.h>
|
||||
|
||||
#include <asm/vmx.h>
|
||||
#include <asm/svm.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
/*
|
||||
* VMX functions:
|
||||
*/
|
||||
|
||||
static inline int cpu_has_vmx(void)
|
||||
{
|
||||
unsigned long ecx = cpuid_ecx(1);
|
||||
return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* cpu_vmxoff() - Disable VMX on the current CPU
|
||||
*
|
||||
* Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
|
||||
*
|
||||
* Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
|
||||
* atomically track post-VMXON state, e.g. this may be called in NMI context.
|
||||
* Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
|
||||
* faults are guaranteed to be due to the !post-VMXON check unless the CPU is
|
||||
* magically in RM, VM86, compat mode, or at CPL>0.
|
||||
*/
|
||||
static inline int cpu_vmxoff(void)
|
||||
{
|
||||
asm_volatile_goto("1: vmxoff\n\t"
|
||||
_ASM_EXTABLE(1b, %l[fault])
|
||||
::: "cc", "memory" : fault);
|
||||
|
||||
cr4_clear_bits(X86_CR4_VMXE);
|
||||
return 0;
|
||||
|
||||
fault:
|
||||
cr4_clear_bits(X86_CR4_VMXE);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static inline int cpu_vmx_enabled(void)
|
||||
{
|
||||
return __read_cr4() & X86_CR4_VMXE;
|
||||
}
|
||||
|
||||
/** Disable VMX if it is enabled on the current CPU
|
||||
*
|
||||
* You shouldn't call this if cpu_has_vmx() returns 0.
|
||||
*/
|
||||
static inline void __cpu_emergency_vmxoff(void)
|
||||
{
|
||||
if (cpu_vmx_enabled())
|
||||
cpu_vmxoff();
|
||||
}
|
||||
|
||||
/** Disable VMX if it is supported and enabled on the current CPU
|
||||
*/
|
||||
static inline void cpu_emergency_vmxoff(void)
|
||||
{
|
||||
if (cpu_has_vmx())
|
||||
__cpu_emergency_vmxoff();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* SVM functions:
|
||||
*/
|
||||
|
||||
/** Check if the CPU has SVM support
|
||||
*
|
||||
* You can use the 'msg' arg to get a message describing the problem,
|
||||
* if the function returns zero. Simply pass NULL if you are not interested
|
||||
* on the messages; gcc should take care of not generating code for
|
||||
* the messages on this case.
|
||||
*/
|
||||
static inline int cpu_has_svm(const char **msg)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
|
||||
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) {
|
||||
if (msg)
|
||||
*msg = "not amd or hygon";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
|
||||
if (msg)
|
||||
*msg = "can't execute cpuid_8000000a";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_SVM)) {
|
||||
if (msg)
|
||||
*msg = "svm not available";
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/** Disable SVM on the current CPU
|
||||
*
|
||||
* You should call this only if cpu_has_svm() returned true.
|
||||
*/
|
||||
static inline void cpu_svm_disable(void)
|
||||
{
|
||||
uint64_t efer;
|
||||
|
||||
wrmsrl(MSR_VM_HSAVE_PA, 0);
|
||||
rdmsrl(MSR_EFER, efer);
|
||||
if (efer & EFER_SVME) {
|
||||
/*
|
||||
* Force GIF=1 prior to disabling SVM to ensure INIT and NMI
|
||||
* aren't blocked, e.g. if a fatal error occurred between CLGI
|
||||
* and STGI. Note, STGI may #UD if SVM is disabled from NMI
|
||||
* context between reading EFER and executing STGI. In that
|
||||
* case, GIF must already be set, otherwise the NMI would have
|
||||
* been blocked, so just eat the fault.
|
||||
*/
|
||||
asm_volatile_goto("1: stgi\n\t"
|
||||
_ASM_EXTABLE(1b, %l[fault])
|
||||
::: "memory" : fault);
|
||||
fault:
|
||||
wrmsrl(MSR_EFER, efer & ~EFER_SVME);
|
||||
}
|
||||
}
|
||||
|
||||
/** Makes sure SVM is disabled, if it is supported on the CPU
|
||||
*/
|
||||
static inline void cpu_emergency_svm_disable(void)
|
||||
{
|
||||
if (cpu_has_svm(NULL))
|
||||
cpu_svm_disable();
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_VIRTEX_H */
|
@ -71,7 +71,7 @@
|
||||
#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING)
|
||||
#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
|
||||
#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
|
||||
#define SECONDARY_EXEC_XSAVES VMCS_CONTROL_BIT(XSAVES)
|
||||
#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES)
|
||||
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
|
||||
#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
|
||||
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
|
||||
|
@ -48,38 +48,12 @@ struct crash_memmap_data {
|
||||
unsigned int type;
|
||||
};
|
||||
|
||||
/*
|
||||
* This is used to VMCLEAR all VMCSs loaded on the
|
||||
* processor. And when loading kvm_intel module, the
|
||||
* callback function pointer will be assigned.
|
||||
*
|
||||
* protected by rcu.
|
||||
*/
|
||||
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
|
||||
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
|
||||
|
||||
static inline void cpu_crash_vmclear_loaded_vmcss(void)
|
||||
{
|
||||
crash_vmclear_fn *do_vmclear_operation = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
|
||||
if (do_vmclear_operation)
|
||||
do_vmclear_operation();
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
|
||||
|
||||
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
|
||||
{
|
||||
crash_save_cpu(regs, cpu);
|
||||
|
||||
/*
|
||||
* VMCLEAR VMCSs loaded on all cpus if needed.
|
||||
*/
|
||||
cpu_crash_vmclear_loaded_vmcss();
|
||||
|
||||
/*
|
||||
* Disable Intel PT to stop its logging
|
||||
*/
|
||||
@ -133,11 +107,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
|
||||
|
||||
crash_smp_send_stop();
|
||||
|
||||
/*
|
||||
* VMCLEAR VMCSs loaded on this cpu if needed.
|
||||
*/
|
||||
cpu_crash_vmclear_loaded_vmcss();
|
||||
|
||||
cpu_emergency_disable_virtualization();
|
||||
|
||||
/*
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include <asm/reboot_fixups.h>
|
||||
#include <asm/reboot.h>
|
||||
#include <asm/pci_x86.h>
|
||||
#include <asm/virtext.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/smp.h>
|
||||
@ -530,9 +529,54 @@ static inline void kb_wait(void)
|
||||
|
||||
static inline void nmi_shootdown_cpus_on_restart(void);
|
||||
|
||||
#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
|
||||
/* RCU-protected callback to disable virtualization prior to reboot. */
|
||||
static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
|
||||
|
||||
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
|
||||
{
|
||||
if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback)))
|
||||
return;
|
||||
|
||||
rcu_assign_pointer(cpu_emergency_virt_callback, callback);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback);
|
||||
|
||||
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
|
||||
{
|
||||
if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback))
|
||||
return;
|
||||
|
||||
rcu_assign_pointer(cpu_emergency_virt_callback, NULL);
|
||||
synchronize_rcu();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
|
||||
|
||||
/*
|
||||
* Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
|
||||
* reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
|
||||
* GIF=0, i.e. if the crash occurred between CLGI and STGI.
|
||||
*/
|
||||
void cpu_emergency_disable_virtualization(void)
|
||||
{
|
||||
cpu_emergency_virt_cb *callback;
|
||||
|
||||
/*
|
||||
* IRQs must be disabled as KVM enables virtualization in hardware via
|
||||
* function call IPIs, i.e. IRQs need to be disabled to guarantee
|
||||
* virtualization stays disabled.
|
||||
*/
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
rcu_read_lock();
|
||||
callback = rcu_dereference(cpu_emergency_virt_callback);
|
||||
if (callback)
|
||||
callback();
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void emergency_reboot_disable_virtualization(void)
|
||||
{
|
||||
/* Just make sure we won't change CPUs while doing this */
|
||||
local_irq_disable();
|
||||
|
||||
/*
|
||||
@ -545,7 +589,7 @@ static void emergency_reboot_disable_virtualization(void)
|
||||
* Do the NMI shootdown even if virtualization is off on _this_ CPU, as
|
||||
* other CPUs may have virtualization enabled.
|
||||
*/
|
||||
if (cpu_has_vmx() || cpu_has_svm(NULL)) {
|
||||
if (rcu_access_pointer(cpu_emergency_virt_callback)) {
|
||||
/* Safely force _this_ CPU out of VMX/SVM operation. */
|
||||
cpu_emergency_disable_virtualization();
|
||||
|
||||
@ -553,7 +597,9 @@ static void emergency_reboot_disable_virtualization(void)
|
||||
nmi_shootdown_cpus_on_restart();
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
static void emergency_reboot_disable_virtualization(void) { }
|
||||
#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
|
||||
|
||||
void __attribute__((weak)) mach_reboot_fixups(void)
|
||||
{
|
||||
@ -787,21 +833,9 @@ void machine_crash_shutdown(struct pt_regs *regs)
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* This is the CPU performing the emergency shutdown work. */
|
||||
int crashing_cpu = -1;
|
||||
|
||||
/*
|
||||
* Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
|
||||
* reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
|
||||
* GIF=0, i.e. if the crash occurred between CLGI and STGI.
|
||||
*/
|
||||
void cpu_emergency_disable_virtualization(void)
|
||||
{
|
||||
cpu_emergency_vmxoff();
|
||||
cpu_emergency_svm_disable();
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SMP)
|
||||
|
||||
static nmi_shootdown_cb shootdown_callback;
|
||||
|
@ -101,7 +101,7 @@ config X86_SGX_KVM
|
||||
|
||||
config KVM_AMD
|
||||
tristate "KVM for AMD processors support"
|
||||
depends on KVM
|
||||
depends on KVM && (CPU_SUP_AMD || CPU_SUP_HYGON)
|
||||
help
|
||||
Provides support for KVM on AMD processors equipped with the AMD-V
|
||||
(SVM) extensions.
|
||||
@ -138,6 +138,19 @@ config KVM_XEN
|
||||
|
||||
If in doubt, say "N".
|
||||
|
||||
config KVM_PROVE_MMU
|
||||
bool "Prove KVM MMU correctness"
|
||||
depends on DEBUG_KERNEL
|
||||
depends on KVM
|
||||
depends on EXPERT
|
||||
help
|
||||
Enables runtime assertions in KVM's MMU that are too costly to enable
|
||||
in anything remotely resembling a production environment, e.g. this
|
||||
gates code that verifies a to-be-freed page table doesn't have any
|
||||
present SPTEs.
|
||||
|
||||
If in doubt, say "N".
|
||||
|
||||
config KVM_EXTERNAL_WRITE_TRACKING
|
||||
bool
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include "linux/lockdep.h"
|
||||
#include <linux/export.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
@ -84,6 +85,18 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
|
||||
struct kvm_cpuid_entry2 *e;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* KVM has a semi-arbitrary rule that querying the guest's CPUID model
|
||||
* with IRQs disabled is disallowed. The CPUID model can legitimately
|
||||
* have over one hundred entries, i.e. the lookup is slow, and IRQs are
|
||||
* typically disabled in KVM only when KVM is in a performance critical
|
||||
* path, e.g. the core VM-Enter/VM-Exit run loop. Nothing will break
|
||||
* if this rule is violated, this assertion is purely to flag potential
|
||||
* performance issues. If this fires, consider moving the lookup out
|
||||
* of the hotpath, e.g. by caching information during CPUID updates.
|
||||
*/
|
||||
lockdep_assert_irqs_enabled();
|
||||
|
||||
for (i = 0; i < nent; i++) {
|
||||
e = &entries[i];
|
||||
|
||||
@ -312,6 +325,27 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
bool allow_gbpages;
|
||||
|
||||
BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES);
|
||||
bitmap_zero(vcpu->arch.governed_features.enabled,
|
||||
KVM_MAX_NR_GOVERNED_FEATURES);
|
||||
|
||||
/*
|
||||
* If TDP is enabled, let the guest use GBPAGES if they're supported in
|
||||
* hardware. The hardware page walker doesn't let KVM disable GBPAGES,
|
||||
* i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
|
||||
* walk for performance and complexity reasons. Not to mention KVM
|
||||
* _can't_ solve the problem because GVA->GPA walks aren't visible to
|
||||
* KVM once a TDP translation is installed. Mimic hardware behavior so
|
||||
* that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
|
||||
* If TDP is disabled, honor *only* guest CPUID as KVM has full control
|
||||
* and can install smaller shadow pages if the host lacks 1GiB support.
|
||||
*/
|
||||
allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
|
||||
if (allow_gbpages)
|
||||
kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES);
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 1);
|
||||
if (best && apic) {
|
||||
@ -647,7 +681,8 @@ void kvm_set_cpu_caps(void)
|
||||
);
|
||||
|
||||
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
|
||||
F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI)
|
||||
F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
|
||||
F(AMX_COMPLEX)
|
||||
);
|
||||
|
||||
kvm_cpu_cap_mask(CPUID_D_1_EAX,
|
||||
@ -1154,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
|
||||
cpuid_entry_override(entry, CPUID_8000_0001_ECX);
|
||||
break;
|
||||
case 0x80000005:
|
||||
/* Pass host L1 cache and TLB info. */
|
||||
break;
|
||||
case 0x80000006:
|
||||
/* Drop reserved bits, pass host L2 cache and TLB info. */
|
||||
entry->edx &= ~GENMASK(17, 16);
|
||||
|
@ -232,4 +232,50 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
|
||||
return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
|
||||
}
|
||||
|
||||
enum kvm_governed_features {
|
||||
#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x,
|
||||
#include "governed_features.h"
|
||||
KVM_NR_GOVERNED_FEATURES
|
||||
};
|
||||
|
||||
static __always_inline int kvm_governed_feature_index(unsigned int x86_feature)
|
||||
{
|
||||
switch (x86_feature) {
|
||||
#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x;
|
||||
#include "governed_features.h"
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature)
|
||||
{
|
||||
return kvm_governed_feature_index(x86_feature) >= 0;
|
||||
}
|
||||
|
||||
static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu,
|
||||
unsigned int x86_feature)
|
||||
{
|
||||
BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
|
||||
|
||||
__set_bit(kvm_governed_feature_index(x86_feature),
|
||||
vcpu->arch.governed_features.enabled);
|
||||
}
|
||||
|
||||
static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu,
|
||||
unsigned int x86_feature)
|
||||
{
|
||||
if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature))
|
||||
kvm_governed_feature_set(vcpu, x86_feature);
|
||||
}
|
||||
|
||||
static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
|
||||
unsigned int x86_feature)
|
||||
{
|
||||
BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
|
||||
|
||||
return test_bit(kvm_governed_feature_index(x86_feature),
|
||||
vcpu->arch.governed_features.enabled);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1799,13 +1799,11 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
|
||||
op->addr.mem,
|
||||
&op->val,
|
||||
op->bytes);
|
||||
break;
|
||||
case OP_MEM_STR:
|
||||
return segmented_write(ctxt,
|
||||
op->addr.mem,
|
||||
op->data,
|
||||
op->bytes * op->count);
|
||||
break;
|
||||
case OP_XMM:
|
||||
kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
|
||||
break;
|
||||
|
21
arch/x86/kvm/governed_features.h
Normal file
21
arch/x86/kvm/governed_features.h
Normal file
@ -0,0 +1,21 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#if !defined(KVM_GOVERNED_FEATURE) || defined(KVM_GOVERNED_X86_FEATURE)
|
||||
BUILD_BUG()
|
||||
#endif
|
||||
|
||||
#define KVM_GOVERNED_X86_FEATURE(x) KVM_GOVERNED_FEATURE(X86_FEATURE_##x)
|
||||
|
||||
KVM_GOVERNED_X86_FEATURE(GBPAGES)
|
||||
KVM_GOVERNED_X86_FEATURE(XSAVES)
|
||||
KVM_GOVERNED_X86_FEATURE(VMX)
|
||||
KVM_GOVERNED_X86_FEATURE(NRIPS)
|
||||
KVM_GOVERNED_X86_FEATURE(TSCRATEMSR)
|
||||
KVM_GOVERNED_X86_FEATURE(V_VMSAVE_VMLOAD)
|
||||
KVM_GOVERNED_X86_FEATURE(LBRV)
|
||||
KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
|
||||
KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
|
||||
KVM_GOVERNED_X86_FEATURE(VGIF)
|
||||
KVM_GOVERNED_X86_FEATURE(VNMI)
|
||||
|
||||
#undef KVM_GOVERNED_X86_FEATURE
|
||||
#undef KVM_GOVERNED_FEATURE
|
@ -1293,7 +1293,6 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
|
||||
case HV_X64_MSR_VP_ASSIST_PAGE:
|
||||
return hv_vcpu->cpuid_cache.features_eax &
|
||||
HV_MSR_APIC_ACCESS_AVAILABLE;
|
||||
break;
|
||||
case HV_X64_MSR_TSC_FREQUENCY:
|
||||
case HV_X64_MSR_APIC_FREQUENCY:
|
||||
return hv_vcpu->cpuid_cache.features_eax &
|
||||
|
@ -213,7 +213,6 @@ struct x86_emulate_ops {
|
||||
|
||||
bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
|
||||
u32 *ecx, u32 *edx, bool exact_only);
|
||||
bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
|
||||
bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
|
||||
bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
|
||||
bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
|
||||
|
@ -376,7 +376,8 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
|
||||
struct kvm_vcpu *vcpu;
|
||||
unsigned long i;
|
||||
u32 max_id = 255; /* enough space for any xAPIC ID */
|
||||
bool xapic_id_mismatch = false;
|
||||
bool xapic_id_mismatch;
|
||||
int r;
|
||||
|
||||
/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
|
||||
if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
|
||||
@ -386,9 +387,14 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
|
||||
"Dirty APIC map without an in-kernel local APIC");
|
||||
|
||||
mutex_lock(&kvm->arch.apic_map_lock);
|
||||
|
||||
retry:
|
||||
/*
|
||||
* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
|
||||
* (if clean) or the APIC registers (if dirty).
|
||||
* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean)
|
||||
* or the APIC registers (if dirty). Note, on retry the map may have
|
||||
* not yet been marked dirty by whatever task changed a vCPU's x2APIC
|
||||
* ID, i.e. the map may still show up as in-progress. In that case
|
||||
* this task still needs to retry and complete its calculation.
|
||||
*/
|
||||
if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
|
||||
DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
|
||||
@ -397,6 +403,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the mismatch flag between attempts so that KVM does the right
|
||||
* thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e.
|
||||
* keep max_id strictly increasing. Disallowing max_id from shrinking
|
||||
* ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU
|
||||
* with the highest x2APIC ID is toggling its APIC on and off.
|
||||
*/
|
||||
xapic_id_mismatch = false;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
if (kvm_apic_present(vcpu))
|
||||
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
|
||||
@ -415,9 +430,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
|
||||
if (!kvm_apic_present(vcpu))
|
||||
continue;
|
||||
|
||||
if (kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch)) {
|
||||
r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch);
|
||||
if (r) {
|
||||
kvfree(new);
|
||||
new = NULL;
|
||||
if (r == -E2BIG) {
|
||||
cond_resched();
|
||||
goto retry;
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -121,6 +121,8 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes);
|
||||
|
||||
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "smm.h"
|
||||
#include "kvm_emulate.h"
|
||||
#include "page_track.h"
|
||||
#include "cpuid.h"
|
||||
#include "spte.h"
|
||||
|
||||
@ -53,7 +54,7 @@
|
||||
#include <asm/io.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/vmx.h>
|
||||
#include <asm/kvm_page_track.h>
|
||||
|
||||
#include "trace.h"
|
||||
|
||||
extern bool itlb_multihit_kvm_mitigation;
|
||||
@ -115,11 +116,6 @@ static int max_huge_page_level __read_mostly;
|
||||
static int tdp_root_level __read_mostly;
|
||||
static int max_tdp_level __read_mostly;
|
||||
|
||||
#ifdef MMU_DEBUG
|
||||
bool dbg = 0;
|
||||
module_param(dbg, bool, 0644);
|
||||
#endif
|
||||
|
||||
#define PTE_PREFETCH_NUM 8
|
||||
|
||||
#include <trace/events/kvm.h>
|
||||
@ -278,16 +274,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void)
|
||||
return kvm_x86_ops.flush_remote_tlbs_range;
|
||||
}
|
||||
|
||||
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
|
||||
gfn_t nr_pages)
|
||||
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
|
||||
{
|
||||
int ret = -EOPNOTSUPP;
|
||||
if (!kvm_x86_ops.flush_remote_tlbs_range)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (kvm_x86_ops.flush_remote_tlbs_range)
|
||||
ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
|
||||
nr_pages);
|
||||
if (ret)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
|
||||
}
|
||||
|
||||
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
|
||||
@ -490,7 +482,7 @@ retry:
|
||||
*/
|
||||
static void mmu_spte_set(u64 *sptep, u64 new_spte)
|
||||
{
|
||||
WARN_ON(is_shadow_present_pte(*sptep));
|
||||
WARN_ON_ONCE(is_shadow_present_pte(*sptep));
|
||||
__set_spte(sptep, new_spte);
|
||||
}
|
||||
|
||||
@ -502,7 +494,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
|
||||
{
|
||||
u64 old_spte = *sptep;
|
||||
|
||||
WARN_ON(!is_shadow_present_pte(new_spte));
|
||||
WARN_ON_ONCE(!is_shadow_present_pte(new_spte));
|
||||
check_spte_writable_invariants(new_spte);
|
||||
|
||||
if (!is_shadow_present_pte(old_spte)) {
|
||||
@ -515,7 +507,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
|
||||
else
|
||||
old_spte = __update_clear_spte_slow(sptep, new_spte);
|
||||
|
||||
WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
|
||||
WARN_ON_ONCE(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
|
||||
|
||||
return old_spte;
|
||||
}
|
||||
@ -597,7 +589,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
|
||||
* by a refcounted page, the refcount is elevated.
|
||||
*/
|
||||
page = kvm_pfn_to_refcounted_page(pfn);
|
||||
WARN_ON(page && !page_count(page));
|
||||
WARN_ON_ONCE(page && !page_count(page));
|
||||
|
||||
if (is_accessed_spte(old_spte))
|
||||
kvm_set_pfn_accessed(pfn);
|
||||
@ -812,7 +804,7 @@ static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot,
|
||||
for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
|
||||
linfo = lpage_info_slot(gfn, slot, i);
|
||||
linfo->disallow_lpage += count;
|
||||
WARN_ON(linfo->disallow_lpage < 0);
|
||||
WARN_ON_ONCE(linfo->disallow_lpage < 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -839,8 +831,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
|
||||
/* the non-leaf shadow pages are keeping readonly. */
|
||||
if (sp->role.level > PG_LEVEL_4K)
|
||||
return kvm_slot_page_track_add_page(kvm, slot, gfn,
|
||||
KVM_PAGE_TRACK_WRITE);
|
||||
return __kvm_write_track_add_gfn(kvm, slot, gfn);
|
||||
|
||||
kvm_mmu_gfn_disallow_lpage(slot, gfn);
|
||||
|
||||
@ -886,8 +877,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
slots = kvm_memslots_for_spte_role(kvm, sp->role);
|
||||
slot = __gfn_to_memslot(slots, gfn);
|
||||
if (sp->role.level > PG_LEVEL_4K)
|
||||
return kvm_slot_page_track_remove_page(kvm, slot, gfn,
|
||||
KVM_PAGE_TRACK_WRITE);
|
||||
return __kvm_write_track_remove_gfn(kvm, slot, gfn);
|
||||
|
||||
kvm_mmu_gfn_allow_lpage(slot, gfn);
|
||||
}
|
||||
@ -941,10 +931,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
|
||||
int count = 0;
|
||||
|
||||
if (!rmap_head->val) {
|
||||
rmap_printk("%p %llx 0->1\n", spte, *spte);
|
||||
rmap_head->val = (unsigned long)spte;
|
||||
} else if (!(rmap_head->val & 1)) {
|
||||
rmap_printk("%p %llx 1->many\n", spte, *spte);
|
||||
desc = kvm_mmu_memory_cache_alloc(cache);
|
||||
desc->sptes[0] = (u64 *)rmap_head->val;
|
||||
desc->sptes[1] = spte;
|
||||
@ -953,7 +941,6 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
|
||||
rmap_head->val = (unsigned long)desc | 1;
|
||||
++count;
|
||||
} else {
|
||||
rmap_printk("%p %llx many->many\n", spte, *spte);
|
||||
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
|
||||
count = desc->tail_count + desc->spte_count;
|
||||
|
||||
@ -973,7 +960,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
|
||||
return count;
|
||||
}
|
||||
|
||||
static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
|
||||
static void pte_list_desc_remove_entry(struct kvm *kvm,
|
||||
struct kvm_rmap_head *rmap_head,
|
||||
struct pte_list_desc *desc, int i)
|
||||
{
|
||||
struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
|
||||
@ -984,7 +972,7 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
|
||||
* when adding an entry and the previous head is full, and heads are
|
||||
* removed (this flow) when they become empty.
|
||||
*/
|
||||
BUG_ON(j < 0);
|
||||
KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm);
|
||||
|
||||
/*
|
||||
* Replace the to-be-freed SPTE with the last valid entry from the head
|
||||
@ -1009,35 +997,34 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
|
||||
mmu_free_pte_list_desc(head_desc);
|
||||
}
|
||||
|
||||
static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
|
||||
static void pte_list_remove(struct kvm *kvm, u64 *spte,
|
||||
struct kvm_rmap_head *rmap_head)
|
||||
{
|
||||
struct pte_list_desc *desc;
|
||||
int i;
|
||||
|
||||
if (!rmap_head->val) {
|
||||
pr_err("%s: %p 0->BUG\n", __func__, spte);
|
||||
BUG();
|
||||
} else if (!(rmap_head->val & 1)) {
|
||||
rmap_printk("%p 1->0\n", spte);
|
||||
if ((u64 *)rmap_head->val != spte) {
|
||||
pr_err("%s: %p 1->BUG\n", __func__, spte);
|
||||
BUG();
|
||||
}
|
||||
if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
|
||||
return;
|
||||
|
||||
if (!(rmap_head->val & 1)) {
|
||||
if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
|
||||
return;
|
||||
|
||||
rmap_head->val = 0;
|
||||
} else {
|
||||
rmap_printk("%p many->many\n", spte);
|
||||
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
|
||||
while (desc) {
|
||||
for (i = 0; i < desc->spte_count; ++i) {
|
||||
if (desc->sptes[i] == spte) {
|
||||
pte_list_desc_remove_entry(rmap_head, desc, i);
|
||||
pte_list_desc_remove_entry(kvm, rmap_head,
|
||||
desc, i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
desc = desc->more;
|
||||
}
|
||||
pr_err("%s: %p many->many\n", __func__, spte);
|
||||
BUG();
|
||||
|
||||
KVM_BUG_ON_DATA_CORRUPTION(true, kvm);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1045,7 +1032,7 @@ static void kvm_zap_one_rmap_spte(struct kvm *kvm,
|
||||
struct kvm_rmap_head *rmap_head, u64 *sptep)
|
||||
{
|
||||
mmu_spte_clear_track_bits(kvm, sptep);
|
||||
pte_list_remove(sptep, rmap_head);
|
||||
pte_list_remove(kvm, sptep, rmap_head);
|
||||
}
|
||||
|
||||
/* Return true if at least one SPTE was zapped, false otherwise */
|
||||
@ -1120,7 +1107,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
|
||||
slot = __gfn_to_memslot(slots, gfn);
|
||||
rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
|
||||
|
||||
pte_list_remove(spte, rmap_head);
|
||||
pte_list_remove(kvm, spte, rmap_head);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1212,7 +1199,7 @@ static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush)
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
sp = sptep_to_sp(sptep);
|
||||
WARN_ON(sp->role.level == PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K);
|
||||
|
||||
drop_spte(kvm, sptep);
|
||||
|
||||
@ -1241,8 +1228,6 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
|
||||
!(pt_protect && is_mmu_writable_spte(spte)))
|
||||
return false;
|
||||
|
||||
rmap_printk("spte %p %llx\n", sptep, *sptep);
|
||||
|
||||
if (pt_protect)
|
||||
spte &= ~shadow_mmu_writable_mask;
|
||||
spte = spte & ~PT_WRITABLE_MASK;
|
||||
@ -1267,9 +1252,7 @@ static bool spte_clear_dirty(u64 *sptep)
|
||||
{
|
||||
u64 spte = *sptep;
|
||||
|
||||
rmap_printk("spte %p %llx\n", sptep, *sptep);
|
||||
|
||||
MMU_WARN_ON(!spte_ad_enabled(spte));
|
||||
KVM_MMU_WARN_ON(!spte_ad_enabled(spte));
|
||||
spte &= ~shadow_dirty_mask;
|
||||
return mmu_spte_update(sptep, spte);
|
||||
}
|
||||
@ -1475,14 +1458,11 @@ static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
||||
u64 new_spte;
|
||||
kvm_pfn_t new_pfn;
|
||||
|
||||
WARN_ON(pte_huge(pte));
|
||||
WARN_ON_ONCE(pte_huge(pte));
|
||||
new_pfn = pte_pfn(pte);
|
||||
|
||||
restart:
|
||||
for_each_rmap_spte(rmap_head, &iter, sptep) {
|
||||
rmap_printk("spte %p %llx gfn %llx (%d)\n",
|
||||
sptep, *sptep, gfn, level);
|
||||
|
||||
need_flush = true;
|
||||
|
||||
if (pte_write(pte)) {
|
||||
@ -1588,7 +1568,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
|
||||
for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
|
||||
range->start, range->end - 1, &iterator)
|
||||
ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
|
||||
iterator.level, range->pte);
|
||||
iterator.level, range->arg.pte);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1710,21 +1690,19 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
return young;
|
||||
}
|
||||
|
||||
#ifdef MMU_DEBUG
|
||||
static int is_empty_shadow_page(u64 *spt)
|
||||
static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp)
|
||||
{
|
||||
u64 *pos;
|
||||
u64 *end;
|
||||
#ifdef CONFIG_KVM_PROVE_MMU
|
||||
int i;
|
||||
|
||||
for (pos = spt, end = pos + SPTE_ENT_PER_PAGE; pos != end; pos++)
|
||||
if (is_shadow_present_pte(*pos)) {
|
||||
printk(KERN_ERR "%s: %p %llx\n", __func__,
|
||||
pos, *pos);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
|
||||
if (KVM_MMU_WARN_ON(is_shadow_present_pte(sp->spt[i])))
|
||||
pr_err_ratelimited("SPTE %llx (@ %p) for gfn %llx shadow-present at free",
|
||||
sp->spt[i], &sp->spt[i],
|
||||
kvm_mmu_page_get_gfn(sp, i));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* This value is the sum of all of the kvm instances's
|
||||
@ -1752,7 +1730,8 @@ static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
|
||||
static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
|
||||
{
|
||||
MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
|
||||
kvm_mmu_check_sptes_at_free(sp);
|
||||
|
||||
hlist_del(&sp->hash_link);
|
||||
list_del(&sp->link);
|
||||
free_page((unsigned long)sp->spt);
|
||||
@ -1775,16 +1754,16 @@ static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache,
|
||||
pte_list_add(cache, parent_pte, &sp->parent_ptes);
|
||||
}
|
||||
|
||||
static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
|
||||
static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
u64 *parent_pte)
|
||||
{
|
||||
pte_list_remove(parent_pte, &sp->parent_ptes);
|
||||
pte_list_remove(kvm, parent_pte, &sp->parent_ptes);
|
||||
}
|
||||
|
||||
static void drop_parent_pte(struct kvm_mmu_page *sp,
|
||||
static void drop_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
u64 *parent_pte)
|
||||
{
|
||||
mmu_page_remove_parent_pte(sp, parent_pte);
|
||||
mmu_page_remove_parent_pte(kvm, sp, parent_pte);
|
||||
mmu_spte_clear_no_track(parent_pte);
|
||||
}
|
||||
|
||||
@ -1840,7 +1819,7 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
|
||||
static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
|
||||
{
|
||||
--sp->unsync_children;
|
||||
WARN_ON((int)sp->unsync_children < 0);
|
||||
WARN_ON_ONCE((int)sp->unsync_children < 0);
|
||||
__clear_bit(idx, sp->unsync_child_bitmap);
|
||||
}
|
||||
|
||||
@ -1898,7 +1877,7 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
|
||||
|
||||
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
WARN_ON(!sp->unsync);
|
||||
WARN_ON_ONCE(!sp->unsync);
|
||||
trace_kvm_mmu_sync_page(sp);
|
||||
sp->unsync = 0;
|
||||
--kvm->stat.mmu_unsync;
|
||||
@ -2073,11 +2052,11 @@ static int mmu_pages_first(struct kvm_mmu_pages *pvec,
|
||||
if (pvec->nr == 0)
|
||||
return 0;
|
||||
|
||||
WARN_ON(pvec->page[0].idx != INVALID_INDEX);
|
||||
WARN_ON_ONCE(pvec->page[0].idx != INVALID_INDEX);
|
||||
|
||||
sp = pvec->page[0].sp;
|
||||
level = sp->role.level;
|
||||
WARN_ON(level == PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(level == PG_LEVEL_4K);
|
||||
|
||||
parents->parent[level-2] = sp;
|
||||
|
||||
@ -2099,7 +2078,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
|
||||
if (!sp)
|
||||
return;
|
||||
|
||||
WARN_ON(idx == INVALID_INDEX);
|
||||
WARN_ON_ONCE(idx == INVALID_INDEX);
|
||||
clear_unsync_child_bit(sp, idx);
|
||||
level++;
|
||||
} while (!sp->unsync_children);
|
||||
@ -2220,7 +2199,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
WARN_ON(!list_empty(&invalid_list));
|
||||
WARN_ON_ONCE(!list_empty(&invalid_list));
|
||||
if (ret > 0)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
@ -2499,7 +2478,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
if (child->role.access == direct_access)
|
||||
return;
|
||||
|
||||
drop_parent_pte(child, sptep);
|
||||
drop_parent_pte(vcpu->kvm, child, sptep);
|
||||
kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep);
|
||||
}
|
||||
}
|
||||
@ -2517,7 +2496,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
drop_spte(kvm, spte);
|
||||
} else {
|
||||
child = spte_to_child_sp(pte);
|
||||
drop_parent_pte(child, spte);
|
||||
drop_parent_pte(kvm, child, spte);
|
||||
|
||||
/*
|
||||
* Recursively zap nested TDP SPs, parentless SPs are
|
||||
@ -2548,13 +2527,13 @@ static int kvm_mmu_page_unlink_children(struct kvm *kvm,
|
||||
return zapped;
|
||||
}
|
||||
|
||||
static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp)
|
||||
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
u64 *sptep;
|
||||
struct rmap_iterator iter;
|
||||
|
||||
while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
|
||||
drop_parent_pte(sp, sptep);
|
||||
drop_parent_pte(kvm, sp, sptep);
|
||||
}
|
||||
|
||||
static int mmu_zap_unsync_children(struct kvm *kvm,
|
||||
@ -2593,7 +2572,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
|
||||
++kvm->stat.mmu_shadow_zapped;
|
||||
*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
|
||||
*nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list);
|
||||
kvm_mmu_unlink_parents(sp);
|
||||
kvm_mmu_unlink_parents(kvm, sp);
|
||||
|
||||
/* Zapping children means active_mmu_pages has become unstable. */
|
||||
list_unstable = *nr_zapped;
|
||||
@ -2675,7 +2654,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
list_for_each_entry_safe(sp, nsp, invalid_list, link) {
|
||||
WARN_ON(!sp->role.invalid || sp->root_count);
|
||||
WARN_ON_ONCE(!sp->role.invalid || sp->root_count);
|
||||
kvm_mmu_free_shadow_page(sp);
|
||||
}
|
||||
}
|
||||
@ -2775,12 +2754,9 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
||||
LIST_HEAD(invalid_list);
|
||||
int r;
|
||||
|
||||
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
|
||||
r = 0;
|
||||
write_lock(&kvm->mmu_lock);
|
||||
for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) {
|
||||
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
|
||||
sp->role.word);
|
||||
r = 1;
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
}
|
||||
@ -2831,7 +2807,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
|
||||
* track machinery is used to write-protect upper-level shadow pages,
|
||||
* i.e. this guards the role.level == 4K assertion below!
|
||||
*/
|
||||
if (kvm_slot_page_track_is_active(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE))
|
||||
if (kvm_gfn_is_write_tracked(kvm, slot, gfn))
|
||||
return -EPERM;
|
||||
|
||||
/*
|
||||
@ -2873,7 +2849,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
|
||||
continue;
|
||||
}
|
||||
|
||||
WARN_ON(sp->role.level != PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
|
||||
kvm_unsync_page(kvm, sp);
|
||||
}
|
||||
if (locked)
|
||||
@ -2938,9 +2914,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
|
||||
bool prefetch = !fault || fault->prefetch;
|
||||
bool write_fault = fault && fault->write;
|
||||
|
||||
pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
|
||||
*sptep, write_fault, gfn);
|
||||
|
||||
if (unlikely(is_noslot_pfn(pfn))) {
|
||||
vcpu->stat.pf_mmio_spte_created++;
|
||||
mark_mmio_spte(vcpu, sptep, gfn, pte_access);
|
||||
@ -2957,11 +2930,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
|
||||
u64 pte = *sptep;
|
||||
|
||||
child = spte_to_child_sp(pte);
|
||||
drop_parent_pte(child, sptep);
|
||||
drop_parent_pte(vcpu->kvm, child, sptep);
|
||||
flush = true;
|
||||
} else if (pfn != spte_to_pfn(*sptep)) {
|
||||
pgprintk("hfn old %llx new %llx\n",
|
||||
spte_to_pfn(*sptep), pfn);
|
||||
drop_spte(vcpu->kvm, sptep);
|
||||
flush = true;
|
||||
} else
|
||||
@ -2986,8 +2957,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
|
||||
|
||||
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
|
||||
|
||||
if (!was_rmapped) {
|
||||
WARN_ON_ONCE(ret == RET_PF_SPURIOUS);
|
||||
rmap_add(vcpu, slot, sptep, gfn, pte_access);
|
||||
@ -3033,7 +3002,7 @@ static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
|
||||
u64 *spte, *start = NULL;
|
||||
int i;
|
||||
|
||||
WARN_ON(!sp->role.direct);
|
||||
WARN_ON_ONCE(!sp->role.direct);
|
||||
|
||||
i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1);
|
||||
spte = sp->spt + i;
|
||||
@ -3574,12 +3543,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
|
||||
if (!VALID_PAGE(*root_hpa))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The "root" may be a special root, e.g. a PAE entry, treat it as a
|
||||
* SPTE to ensure any non-PA bits are dropped.
|
||||
*/
|
||||
sp = spte_to_child_sp(*root_hpa);
|
||||
if (WARN_ON(!sp))
|
||||
sp = root_to_sp(*root_hpa);
|
||||
if (WARN_ON_ONCE(!sp))
|
||||
return;
|
||||
|
||||
if (is_tdp_mmu_page(sp))
|
||||
@ -3624,7 +3589,9 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
|
||||
&invalid_list);
|
||||
|
||||
if (free_active_root) {
|
||||
if (to_shadow_page(mmu->root.hpa)) {
|
||||
if (kvm_mmu_is_dummy_root(mmu->root.hpa)) {
|
||||
/* Nothing to cleanup for dummy roots. */
|
||||
} else if (root_to_sp(mmu->root.hpa)) {
|
||||
mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list);
|
||||
} else if (mmu->pae_root) {
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -3648,6 +3615,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
|
||||
void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
|
||||
{
|
||||
unsigned long roots_to_free = 0;
|
||||
struct kvm_mmu_page *sp;
|
||||
hpa_t root_hpa;
|
||||
int i;
|
||||
|
||||
@ -3662,8 +3630,8 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
|
||||
if (!VALID_PAGE(root_hpa))
|
||||
continue;
|
||||
|
||||
if (!to_shadow_page(root_hpa) ||
|
||||
to_shadow_page(root_hpa)->role.guest_mode)
|
||||
sp = root_to_sp(root_hpa);
|
||||
if (!sp || sp->role.guest_mode)
|
||||
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
|
||||
}
|
||||
|
||||
@ -3671,19 +3639,6 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots);
|
||||
|
||||
|
||||
static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
|
||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant,
|
||||
u8 level)
|
||||
{
|
||||
@ -3821,8 +3776,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
|
||||
root_gfn = root_pgd >> PAGE_SHIFT;
|
||||
|
||||
if (mmu_check_root(vcpu, root_gfn))
|
||||
return 1;
|
||||
if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
|
||||
mmu->root.hpa = kvm_mmu_get_dummy_root();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* On SVM, reading PDPTRs might access guest memory, which might fault
|
||||
@ -3834,8 +3791,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
if (!(pdptrs[i] & PT_PRESENT_MASK))
|
||||
continue;
|
||||
|
||||
if (mmu_check_root(vcpu, pdptrs[i] >> PAGE_SHIFT))
|
||||
return 1;
|
||||
if (!kvm_vcpu_is_visible_gfn(vcpu, pdptrs[i] >> PAGE_SHIFT))
|
||||
pdptrs[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4002,7 +3959,7 @@ static bool is_unsync_root(hpa_t root)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
if (!VALID_PAGE(root))
|
||||
if (!VALID_PAGE(root) || kvm_mmu_is_dummy_root(root))
|
||||
return false;
|
||||
|
||||
/*
|
||||
@ -4018,7 +3975,7 @@ static bool is_unsync_root(hpa_t root)
|
||||
* requirement isn't satisfied.
|
||||
*/
|
||||
smp_rmb();
|
||||
sp = to_shadow_page(root);
|
||||
sp = root_to_sp(root);
|
||||
|
||||
/*
|
||||
* PAE roots (somewhat arbitrarily) aren't backed by shadow pages, the
|
||||
@ -4048,11 +4005,12 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu->root.hpa;
|
||||
sp = to_shadow_page(root);
|
||||
|
||||
if (!is_unsync_root(root))
|
||||
return;
|
||||
|
||||
sp = root_to_sp(root);
|
||||
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
mmu_sync_children(vcpu, sp, true);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
@ -4194,7 +4152,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
||||
return RET_PF_EMULATE;
|
||||
|
||||
reserved = get_mmio_spte(vcpu, addr, &spte);
|
||||
if (WARN_ON(reserved))
|
||||
if (WARN_ON_ONCE(reserved))
|
||||
return -EINVAL;
|
||||
|
||||
if (is_mmio_spte(spte)) {
|
||||
@ -4232,7 +4190,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
|
||||
* guest is writing the page which is write tracked which can
|
||||
* not be fixed by page fault handler.
|
||||
*/
|
||||
if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
|
||||
if (kvm_gfn_is_write_tracked(vcpu->kvm, fault->slot, fault->gfn))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@ -4382,7 +4340,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
||||
static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa);
|
||||
struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
|
||||
|
||||
/* Special roots, e.g. pae_root, are not backed by shadow pages. */
|
||||
if (sp && is_obsolete_sp(vcpu->kvm, sp))
|
||||
@ -4407,6 +4365,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
{
|
||||
int r;
|
||||
|
||||
/* Dummy roots are used only for shadowing bad guest roots. */
|
||||
if (WARN_ON_ONCE(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa)))
|
||||
return RET_PF_RETRY;
|
||||
|
||||
if (page_fault_handle_page_track(vcpu, fault))
|
||||
return RET_PF_EMULATE;
|
||||
|
||||
@ -4443,8 +4405,6 @@ out_unlock:
|
||||
static int nonpaging_page_fault(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
pgprintk("%s: gva %lx error %x\n", __func__, fault->addr, fault->error_code);
|
||||
|
||||
/* This path builds a PAE pagetable, we can map 2mb pages at maximum. */
|
||||
fault->max_level = PG_LEVEL_2M;
|
||||
return direct_page_fault(vcpu, fault);
|
||||
@ -4562,9 +4522,19 @@ static void nonpaging_init_context(struct kvm_mmu *context)
|
||||
static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
|
||||
union kvm_mmu_page_role role)
|
||||
{
|
||||
return (role.direct || pgd == root->pgd) &&
|
||||
VALID_PAGE(root->hpa) &&
|
||||
role.word == to_shadow_page(root->hpa)->role.word;
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
if (!VALID_PAGE(root->hpa))
|
||||
return false;
|
||||
|
||||
if (!role.direct && pgd != root->pgd)
|
||||
return false;
|
||||
|
||||
sp = root_to_sp(root->hpa);
|
||||
if (WARN_ON_ONCE(!sp))
|
||||
return false;
|
||||
|
||||
return role.word == sp->role.word;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4634,11 +4604,10 @@ static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu,
|
||||
gpa_t new_pgd, union kvm_mmu_page_role new_role)
|
||||
{
|
||||
/*
|
||||
* For now, limit the caching to 64-bit hosts+VMs in order to avoid
|
||||
* having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
|
||||
* later if necessary.
|
||||
* Limit reuse to 64-bit hosts+VMs without "special" roots in order to
|
||||
* avoid having to deal with PDPTEs and other complexities.
|
||||
*/
|
||||
if (VALID_PAGE(mmu->root.hpa) && !to_shadow_page(mmu->root.hpa))
|
||||
if (VALID_PAGE(mmu->root.hpa) && !root_to_sp(mmu->root.hpa))
|
||||
kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT);
|
||||
|
||||
if (VALID_PAGE(mmu->root.hpa))
|
||||
@ -4684,9 +4653,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
|
||||
* If this is a direct root page, it doesn't have a write flooding
|
||||
* count. Otherwise, clear the write flooding count.
|
||||
*/
|
||||
if (!new_role.direct)
|
||||
__clear_sp_write_flooding_count(
|
||||
to_shadow_page(vcpu->arch.mmu->root.hpa));
|
||||
if (!new_role.direct) {
|
||||
struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
|
||||
|
||||
if (!WARN_ON_ONCE(!sp))
|
||||
__clear_sp_write_flooding_count(sp);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
|
||||
|
||||
@ -4808,28 +4780,13 @@ static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check,
|
||||
}
|
||||
}
|
||||
|
||||
static bool guest_can_use_gbpages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* If TDP is enabled, let the guest use GBPAGES if they're supported in
|
||||
* hardware. The hardware page walker doesn't let KVM disable GBPAGES,
|
||||
* i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
|
||||
* walk for performance and complexity reasons. Not to mention KVM
|
||||
* _can't_ solve the problem because GVA->GPA walks aren't visible to
|
||||
* KVM once a TDP translation is installed. Mimic hardware behavior so
|
||||
* that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
|
||||
*/
|
||||
return tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
|
||||
}
|
||||
|
||||
static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu *context)
|
||||
{
|
||||
__reset_rsvds_bits_mask(&context->guest_rsvd_check,
|
||||
vcpu->arch.reserved_gpa_bits,
|
||||
context->cpu_role.base.level, is_efer_nx(context),
|
||||
guest_can_use_gbpages(vcpu),
|
||||
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
|
||||
is_cr4_pse(context),
|
||||
guest_cpuid_is_amd_or_hygon(vcpu));
|
||||
}
|
||||
@ -4906,7 +4863,8 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
|
||||
__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
|
||||
context->root_role.level,
|
||||
context->root_role.efer_nx,
|
||||
guest_can_use_gbpages(vcpu), is_pse, is_amd);
|
||||
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
|
||||
is_pse, is_amd);
|
||||
|
||||
if (!shadow_me_mask)
|
||||
return;
|
||||
@ -5467,8 +5425,8 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
* physical address properties) in a single VM would require tracking
|
||||
* all relevant CPUID information in kvm_mmu_page_role. That is very
|
||||
* undesirable as it would increase the memory requirements for
|
||||
* gfn_track (see struct kvm_mmu_page_role comments). For now that
|
||||
* problem is swept under the rug; KVM's CPUID API is horrific and
|
||||
* gfn_write_track (see struct kvm_mmu_page_role comments). For now
|
||||
* that problem is swept under the rug; KVM's CPUID API is horrific and
|
||||
* it's all but impossible to solve it without introducing a new API.
|
||||
*/
|
||||
vcpu->arch.root_mmu.root_role.word = 0;
|
||||
@ -5531,9 +5489,9 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
|
||||
WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
|
||||
WARN_ON_ONCE(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
|
||||
kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
|
||||
WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
|
||||
WARN_ON_ONCE(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
|
||||
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
|
||||
}
|
||||
|
||||
@ -5546,16 +5504,21 @@ static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa)
|
||||
|
||||
/*
|
||||
* When freeing obsolete roots, treat roots as obsolete if they don't
|
||||
* have an associated shadow page. This does mean KVM will get false
|
||||
* have an associated shadow page, as it's impossible to determine if
|
||||
* such roots are fresh or stale. This does mean KVM will get false
|
||||
* positives and free roots that don't strictly need to be freed, but
|
||||
* such false positives are relatively rare:
|
||||
*
|
||||
* (a) only PAE paging and nested NPT has roots without shadow pages
|
||||
* (a) only PAE paging and nested NPT have roots without shadow pages
|
||||
* (or any shadow paging flavor with a dummy root, see note below)
|
||||
* (b) remote reloads due to a memslot update obsoletes _all_ roots
|
||||
* (c) KVM doesn't track previous roots for PAE paging, and the guest
|
||||
* is unlikely to zap an in-use PGD.
|
||||
*
|
||||
* Note! Dummy roots are unique in that they are obsoleted by memslot
|
||||
* _creation_! See also FNAME(fetch).
|
||||
*/
|
||||
sp = to_shadow_page(root_hpa);
|
||||
sp = root_to_sp(root_hpa);
|
||||
return !sp || is_obsolete_sp(kvm, sp);
|
||||
}
|
||||
|
||||
@ -5634,9 +5597,6 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
|
||||
{
|
||||
unsigned offset, pte_size, misaligned;
|
||||
|
||||
pgprintk("misaligned: gpa %llx bytes %d role %x\n",
|
||||
gpa, bytes, sp->role.word);
|
||||
|
||||
offset = offset_in_page(gpa);
|
||||
pte_size = sp->role.has_4_byte_gpte ? 4 : 8;
|
||||
|
||||
@ -5684,9 +5644,8 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
|
||||
return spte;
|
||||
}
|
||||
|
||||
static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const u8 *new, int bytes,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes)
|
||||
{
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
struct kvm_mmu_page *sp;
|
||||
@ -5702,8 +5661,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
|
||||
return;
|
||||
|
||||
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
|
||||
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
|
||||
@ -5742,7 +5699,18 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
|
||||
int r, emulation_type = EMULTYPE_PF;
|
||||
bool direct = vcpu->arch.mmu->root_role.direct;
|
||||
|
||||
if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
|
||||
/*
|
||||
* IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP
|
||||
* checks when emulating instructions that triggers implicit access.
|
||||
* WARN if hardware generates a fault with an error code that collides
|
||||
* with the KVM-defined value. Clear the flag and continue on, i.e.
|
||||
* don't terminate the VM, as KVM can't possibly be relying on a flag
|
||||
* that KVM doesn't know about.
|
||||
*/
|
||||
if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
|
||||
error_code &= ~PFERR_IMPLICIT_ACCESS;
|
||||
|
||||
if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
|
||||
return RET_PF_RETRY;
|
||||
|
||||
r = RET_PF_INVALID;
|
||||
@ -6099,7 +6067,7 @@ restart:
|
||||
* pages. Skip the bogus page, otherwise we'll get stuck in an
|
||||
* infinite loop if the page gets put back on the list (again).
|
||||
*/
|
||||
if (WARN_ON(sp->role.invalid))
|
||||
if (WARN_ON_ONCE(sp->role.invalid))
|
||||
continue;
|
||||
|
||||
/*
|
||||
@ -6199,16 +6167,8 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
|
||||
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
|
||||
}
|
||||
|
||||
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
{
|
||||
kvm_mmu_zap_all_fast(kvm);
|
||||
}
|
||||
|
||||
int kvm_mmu_init_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||
int r;
|
||||
|
||||
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
||||
@ -6222,10 +6182,6 @@ int kvm_mmu_init_vm(struct kvm *kvm)
|
||||
return r;
|
||||
}
|
||||
|
||||
node->track_write = kvm_mmu_pte_write;
|
||||
node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
|
||||
kvm_page_track_register_notifier(kvm, node);
|
||||
|
||||
kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
|
||||
kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;
|
||||
|
||||
@ -6246,10 +6202,6 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
|
||||
|
||||
void kvm_mmu_uninit_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||
|
||||
kvm_page_track_unregister_notifier(kvm, node);
|
||||
|
||||
if (tdp_mmu_enabled)
|
||||
kvm_mmu_uninit_tdp_mmu(kvm);
|
||||
|
||||
@ -6670,7 +6622,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
|
||||
*/
|
||||
if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
|
||||
PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
|
||||
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
|
||||
kvm_flush_remote_tlbs_memslot(kvm, slot);
|
||||
}
|
||||
|
||||
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
@ -6689,20 +6641,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
/*
|
||||
* All current use cases for flushing the TLBs for a specific memslot
|
||||
* related to dirty logging, and many do the TLB flush out of mmu_lock.
|
||||
* The interaction between the various operations on memslot must be
|
||||
* serialized by slots_locks to ensure the TLB flush from one operation
|
||||
* is observed by any other operation on the same memslot.
|
||||
*/
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
|
||||
}
|
||||
|
||||
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
@ -6732,7 +6670,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
*/
|
||||
}
|
||||
|
||||
void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
static void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_mmu_page *sp, *node;
|
||||
LIST_HEAD(invalid_list);
|
||||
@ -6741,7 +6679,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
write_lock(&kvm->mmu_lock);
|
||||
restart:
|
||||
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
|
||||
if (WARN_ON(sp->role.invalid))
|
||||
if (WARN_ON_ONCE(sp->role.invalid))
|
||||
continue;
|
||||
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
|
||||
goto restart;
|
||||
@ -6757,9 +6695,20 @@ restart:
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
kvm_mmu_zap_all(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvm_mmu_zap_all_fast(kvm);
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
|
||||
{
|
||||
WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
|
||||
WARN_ON_ONCE(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
|
||||
|
||||
gen &= MMIO_SPTE_GEN_MASK;
|
||||
|
||||
@ -6862,7 +6811,7 @@ static void mmu_destroy_caches(void)
|
||||
static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
|
||||
{
|
||||
if (nx_hugepage_mitigation_hard_disabled)
|
||||
return sprintf(buffer, "never\n");
|
||||
return sysfs_emit(buffer, "never\n");
|
||||
|
||||
return param_get_bool(buffer, kp);
|
||||
}
|
||||
|
@ -6,18 +6,10 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/kvm_host.h>
|
||||
|
||||
#undef MMU_DEBUG
|
||||
|
||||
#ifdef MMU_DEBUG
|
||||
extern bool dbg;
|
||||
|
||||
#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
|
||||
#define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0)
|
||||
#define MMU_WARN_ON(x) WARN_ON(x)
|
||||
#ifdef CONFIG_KVM_PROVE_MMU
|
||||
#define KVM_MMU_WARN_ON(x) WARN_ON_ONCE(x)
|
||||
#else
|
||||
#define pgprintk(x...) do { } while (0)
|
||||
#define rmap_printk(x...) do { } while (0)
|
||||
#define MMU_WARN_ON(x) do { } while (0)
|
||||
#define KVM_MMU_WARN_ON(x) BUILD_BUG_ON_INVALID(x)
|
||||
#endif
|
||||
|
||||
/* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
|
||||
@ -44,6 +36,16 @@ extern bool dbg;
|
||||
#define INVALID_PAE_ROOT 0
|
||||
#define IS_VALID_PAE_ROOT(x) (!!(x))
|
||||
|
||||
static inline hpa_t kvm_mmu_get_dummy_root(void)
|
||||
{
|
||||
return my_zero_pfn(0) << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline bool kvm_mmu_is_dummy_root(hpa_t shadow_page)
|
||||
{
|
||||
return is_zero_pfn(shadow_page >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
typedef u64 __rcu *tdp_ptep_t;
|
||||
|
||||
struct kvm_mmu_page {
|
||||
@ -170,9 +172,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, u64 gfn,
|
||||
int min_level);
|
||||
|
||||
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
|
||||
gfn_t nr_pages);
|
||||
|
||||
/* Flush the given page (huge or not) of guest memory. */
|
||||
static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
|
||||
{
|
||||
|
@ -12,13 +12,13 @@
|
||||
*/
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/rculist.h>
|
||||
|
||||
#include <asm/kvm_page_track.h>
|
||||
|
||||
#include "mmu.h"
|
||||
#include "mmu_internal.h"
|
||||
#include "page_track.h"
|
||||
|
||||
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
|
||||
{
|
||||
@ -28,103 +28,64 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
|
||||
{
|
||||
int i;
|
||||
kvfree(slot->arch.gfn_write_track);
|
||||
slot->arch.gfn_write_track = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
|
||||
kvfree(slot->arch.gfn_track[i]);
|
||||
slot->arch.gfn_track[i] = NULL;
|
||||
}
|
||||
static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
|
||||
unsigned long npages)
|
||||
{
|
||||
const size_t size = sizeof(*slot->arch.gfn_write_track);
|
||||
|
||||
if (!slot->arch.gfn_write_track)
|
||||
slot->arch.gfn_write_track = __vcalloc(npages, size,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
|
||||
return slot->arch.gfn_write_track ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
int kvm_page_track_create_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned long npages)
|
||||
{
|
||||
int i;
|
||||
if (!kvm_page_track_write_tracking_enabled(kvm))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
|
||||
if (i == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm))
|
||||
continue;
|
||||
|
||||
slot->arch.gfn_track[i] =
|
||||
__vcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!slot->arch.gfn_track[i])
|
||||
goto track_free;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
track_free:
|
||||
kvm_page_track_free_memslot(slot);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
|
||||
{
|
||||
if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return __kvm_page_track_write_tracking_alloc(slot, npages);
|
||||
}
|
||||
|
||||
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
|
||||
{
|
||||
unsigned short *gfn_track;
|
||||
|
||||
if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE])
|
||||
return 0;
|
||||
|
||||
gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (gfn_track == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track;
|
||||
return 0;
|
||||
return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
|
||||
}
|
||||
|
||||
static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode, short count)
|
||||
static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
short count)
|
||||
{
|
||||
int index, val;
|
||||
|
||||
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
|
||||
|
||||
val = slot->arch.gfn_track[mode][index];
|
||||
val = slot->arch.gfn_write_track[index];
|
||||
|
||||
if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
|
||||
if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
|
||||
return;
|
||||
|
||||
slot->arch.gfn_track[mode][index] += count;
|
||||
slot->arch.gfn_write_track[index] += count;
|
||||
}
|
||||
|
||||
/*
|
||||
* add guest page to the tracking pool so that corresponding access on that
|
||||
* page will be intercepted.
|
||||
*
|
||||
* It should be called under the protection both of mmu-lock and kvm->srcu
|
||||
* or kvm->slots_lock.
|
||||
*
|
||||
* @kvm: the guest instance we are interested in.
|
||||
* @slot: the @gfn belongs to.
|
||||
* @gfn: the guest page.
|
||||
* @mode: tracking mode, currently only write track is supported.
|
||||
*/
|
||||
void kvm_slot_page_track_add_page(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode)
|
||||
void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn)
|
||||
{
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
if (WARN_ON(!page_track_mode_is_valid(mode)))
|
||||
lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
|
||||
srcu_read_lock_held(&kvm->srcu));
|
||||
|
||||
if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
|
||||
return;
|
||||
|
||||
if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm)))
|
||||
return;
|
||||
|
||||
update_gfn_track(slot, gfn, mode, 1);
|
||||
update_gfn_write_track(slot, gfn, 1);
|
||||
|
||||
/*
|
||||
* new track stops large page mapping for the
|
||||
@ -132,37 +93,22 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
|
||||
*/
|
||||
kvm_mmu_gfn_disallow_lpage(slot, gfn);
|
||||
|
||||
if (mode == KVM_PAGE_TRACK_WRITE)
|
||||
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
|
||||
|
||||
/*
|
||||
* remove the guest page from the tracking pool which stops the interception
|
||||
* of corresponding access on that page. It is the opposed operation of
|
||||
* kvm_slot_page_track_add_page().
|
||||
*
|
||||
* It should be called under the protection both of mmu-lock and kvm->srcu
|
||||
* or kvm->slots_lock.
|
||||
*
|
||||
* @kvm: the guest instance we are interested in.
|
||||
* @slot: the @gfn belongs to.
|
||||
* @gfn: the guest page.
|
||||
* @mode: tracking mode, currently only write track is supported.
|
||||
*/
|
||||
void kvm_slot_page_track_remove_page(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode)
|
||||
void __kvm_write_track_remove_gfn(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
if (WARN_ON(!page_track_mode_is_valid(mode)))
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
|
||||
srcu_read_lock_held(&kvm->srcu));
|
||||
|
||||
if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
|
||||
return;
|
||||
|
||||
if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm)))
|
||||
return;
|
||||
|
||||
update_gfn_track(slot, gfn, mode, -1);
|
||||
update_gfn_write_track(slot, gfn, -1);
|
||||
|
||||
/*
|
||||
* allow large page mapping for the tracked page
|
||||
@ -170,31 +116,26 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
|
||||
*/
|
||||
kvm_mmu_gfn_allow_lpage(slot, gfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
|
||||
|
||||
/*
|
||||
* check if the corresponding access on the specified guest page is tracked.
|
||||
*/
|
||||
bool kvm_slot_page_track_is_active(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, enum kvm_page_track_mode mode)
|
||||
bool kvm_gfn_is_write_tracked(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
int index;
|
||||
|
||||
if (WARN_ON(!page_track_mode_is_valid(mode)))
|
||||
return false;
|
||||
|
||||
if (!slot)
|
||||
return false;
|
||||
|
||||
if (mode == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm))
|
||||
if (!kvm_page_track_write_tracking_enabled(kvm))
|
||||
return false;
|
||||
|
||||
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
|
||||
return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
|
||||
return !!READ_ONCE(slot->arch.gfn_write_track[index]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
||||
void kvm_page_track_cleanup(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
@ -216,17 +157,22 @@ int kvm_page_track_init(struct kvm *kvm)
|
||||
* register the notifier so that event interception for the tracked guest
|
||||
* pages can be received.
|
||||
*/
|
||||
void
|
||||
kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n)
|
||||
int kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
|
||||
if (!kvm || kvm->mm != current->mm)
|
||||
return -ESRCH;
|
||||
|
||||
kvm_get_kvm(kvm);
|
||||
|
||||
head = &kvm->arch.track_notifier_head;
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
|
||||
|
||||
@ -234,9 +180,8 @@ EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
|
||||
* stop receiving the event interception. It is the opposed operation of
|
||||
* kvm_page_track_register_notifier().
|
||||
*/
|
||||
void
|
||||
kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n)
|
||||
void kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
|
||||
@ -246,6 +191,8 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
hlist_del_rcu(&n->node);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
synchronize_srcu(&head->track_srcu);
|
||||
|
||||
kvm_put_kvm(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
|
||||
|
||||
@ -256,34 +203,7 @@ EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
|
||||
* The node should figure out if the written page is the one that node is
|
||||
* interested in by itself.
|
||||
*/
|
||||
void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
struct kvm_page_track_notifier_node *n;
|
||||
int idx;
|
||||
|
||||
head = &vcpu->kvm->arch.track_notifier_head;
|
||||
|
||||
if (hlist_empty(&head->track_notifier_list))
|
||||
return;
|
||||
|
||||
idx = srcu_read_lock(&head->track_srcu);
|
||||
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
|
||||
srcu_read_lock_held(&head->track_srcu))
|
||||
if (n->track_write)
|
||||
n->track_write(vcpu, gpa, new, bytes, n);
|
||||
srcu_read_unlock(&head->track_srcu, idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify the node that memory slot is being removed or moved so that it can
|
||||
* drop write-protection for the pages in the memory slot.
|
||||
*
|
||||
* The node should figure out it has any write-protected pages in this slot
|
||||
* by itself.
|
||||
*/
|
||||
void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
struct kvm_page_track_notifier_node *n;
|
||||
@ -296,8 +216,92 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
|
||||
idx = srcu_read_lock(&head->track_srcu);
|
||||
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
|
||||
srcu_read_lock_held(&head->track_srcu))
|
||||
if (n->track_flush_slot)
|
||||
n->track_flush_slot(kvm, slot, n);
|
||||
srcu_read_lock_held(&head->track_srcu))
|
||||
if (n->track_write)
|
||||
n->track_write(gpa, new, bytes, n);
|
||||
srcu_read_unlock(&head->track_srcu, idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify external page track nodes that a memory region is being removed from
|
||||
* the VM, e.g. so that users can free any associated metadata.
|
||||
*/
|
||||
void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
struct kvm_page_track_notifier_node *n;
|
||||
int idx;
|
||||
|
||||
head = &kvm->arch.track_notifier_head;
|
||||
|
||||
if (hlist_empty(&head->track_notifier_list))
|
||||
return;
|
||||
|
||||
idx = srcu_read_lock(&head->track_srcu);
|
||||
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
|
||||
srcu_read_lock_held(&head->track_srcu))
|
||||
if (n->track_remove_region)
|
||||
n->track_remove_region(slot->base_gfn, slot->npages, n);
|
||||
srcu_read_unlock(&head->track_srcu, idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* add guest page to the tracking pool so that corresponding access on that
|
||||
* page will be intercepted.
|
||||
*
|
||||
* @kvm: the guest instance we are interested in.
|
||||
* @gfn: the guest page.
|
||||
*/
|
||||
int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
int idx;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (!slot) {
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
__kvm_write_track_add_gfn(kvm, slot, gfn);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
|
||||
|
||||
/*
|
||||
* remove the guest page from the tracking pool which stops the interception
|
||||
* of corresponding access on that page.
|
||||
*
|
||||
* @kvm: the guest instance we are interested in.
|
||||
* @gfn: the guest page.
|
||||
*/
|
||||
int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
int idx;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (!slot) {
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
__kvm_write_track_remove_gfn(kvm, slot, gfn);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
|
||||
#endif
|
||||
|
58
arch/x86/kvm/mmu/page_track.h
Normal file
58
arch/x86/kvm/mmu/page_track.h
Normal file
@ -0,0 +1,58 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_PAGE_TRACK_H
|
||||
#define __KVM_X86_PAGE_TRACK_H
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/kvm_page_track.h>
|
||||
|
||||
|
||||
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
|
||||
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
|
||||
int kvm_page_track_create_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned long npages);
|
||||
|
||||
void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn);
|
||||
void __kvm_write_track_remove_gfn(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
|
||||
bool kvm_gfn_is_write_tracked(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
|
||||
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
||||
int kvm_page_track_init(struct kvm *kvm);
|
||||
void kvm_page_track_cleanup(struct kvm *kvm);
|
||||
|
||||
void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes);
|
||||
void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
|
||||
|
||||
static inline bool kvm_page_track_has_external_user(struct kvm *kvm)
|
||||
{
|
||||
return !hlist_empty(&kvm->arch.track_notifier_head.track_notifier_list);
|
||||
}
|
||||
#else
|
||||
static inline int kvm_page_track_init(struct kvm *kvm) { return 0; }
|
||||
static inline void kvm_page_track_cleanup(struct kvm *kvm) { }
|
||||
|
||||
static inline void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa,
|
||||
const u8 *new, int bytes) { }
|
||||
static inline void kvm_page_track_delete_slot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot) { }
|
||||
|
||||
static inline bool kvm_page_track_has_external_user(struct kvm *kvm) { return false; }
|
||||
|
||||
#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
|
||||
|
||||
static inline void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const u8 *new, int bytes)
|
||||
{
|
||||
__kvm_page_track_write(vcpu->kvm, gpa, new, bytes);
|
||||
|
||||
kvm_mmu_track_write(vcpu, gpa, new, bytes);
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_PAGE_TRACK_H */
|
@ -338,7 +338,6 @@ retry_walk:
|
||||
}
|
||||
#endif
|
||||
walker->max_level = walker->level;
|
||||
ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
|
||||
|
||||
/*
|
||||
* FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
|
||||
@ -348,9 +347,21 @@ retry_walk:
|
||||
nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
|
||||
|
||||
pte_access = ~0;
|
||||
|
||||
/*
|
||||
* Queue a page fault for injection if this assertion fails, as callers
|
||||
* assume that walker.fault contains sane info on a walk failure. I.e.
|
||||
* avoid making the situation worse by inducing even worse badness
|
||||
* between when the assertion fails and when KVM kicks the vCPU out to
|
||||
* userspace (because the VM is bugged).
|
||||
*/
|
||||
if (KVM_BUG_ON(is_long_mode(vcpu) && !is_pae(vcpu), vcpu->kvm))
|
||||
goto error;
|
||||
|
||||
++walker->level;
|
||||
|
||||
do {
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned long host_addr;
|
||||
|
||||
pt_access = pte_access;
|
||||
@ -381,7 +392,11 @@ retry_walk:
|
||||
if (unlikely(real_gpa == INVALID_GPA))
|
||||
return 0;
|
||||
|
||||
host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa),
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(real_gpa));
|
||||
if (!kvm_is_visible_memslot(slot))
|
||||
goto error;
|
||||
|
||||
host_addr = gfn_to_hva_memslot_prot(slot, gpa_to_gfn(real_gpa),
|
||||
&walker->pte_writable[walker->level - 1]);
|
||||
if (unlikely(kvm_is_error_hva(host_addr)))
|
||||
goto error;
|
||||
@ -456,9 +471,6 @@ retry_walk:
|
||||
goto retry_walk;
|
||||
}
|
||||
|
||||
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
|
||||
__func__, (u64)pte, walker->pte_access,
|
||||
walker->pt_access[walker->level - 1]);
|
||||
return 1;
|
||||
|
||||
error:
|
||||
@ -529,8 +541,6 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
||||
return false;
|
||||
|
||||
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
||||
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
pte_access = sp->role.access & FNAME(gpte_access)(gpte);
|
||||
FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
|
||||
@ -638,9 +648,20 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
||||
if (FNAME(gpte_changed)(vcpu, gw, top_level))
|
||||
goto out_gpte_changed;
|
||||
|
||||
if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
|
||||
if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
|
||||
goto out_gpte_changed;
|
||||
|
||||
/*
|
||||
* Load a new root and retry the faulting instruction in the extremely
|
||||
* unlikely scenario that the guest root gfn became visible between
|
||||
* loading a dummy root and handling the resulting page fault, e.g. if
|
||||
* userspace create a memslot in the interim.
|
||||
*/
|
||||
if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) {
|
||||
kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu);
|
||||
goto out_gpte_changed;
|
||||
}
|
||||
|
||||
for_each_shadow_entry(vcpu, fault->addr, it) {
|
||||
gfn_t table_gfn;
|
||||
|
||||
@ -758,7 +779,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
struct guest_walker walker;
|
||||
int r;
|
||||
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
|
||||
WARN_ON_ONCE(fault->is_tdp);
|
||||
|
||||
/*
|
||||
@ -773,7 +793,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
* The page is not mapped by the guest. Let the guest handle it.
|
||||
*/
|
||||
if (!r) {
|
||||
pgprintk("%s: guest page fault\n", __func__);
|
||||
if (!fault->prefetch)
|
||||
kvm_inject_emulated_page_fault(vcpu, &walker.fault);
|
||||
|
||||
@ -837,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
|
||||
{
|
||||
int offset = 0;
|
||||
|
||||
WARN_ON(sp->role.level != PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
|
||||
|
||||
if (PTTYPE == 32)
|
||||
offset = sp->role.quadrant << SPTE_LEVEL_BITS;
|
||||
|
@ -61,7 +61,7 @@ static u64 generation_mmio_spte_mask(u64 gen)
|
||||
{
|
||||
u64 mask;
|
||||
|
||||
WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
|
||||
WARN_ON_ONCE(gen & ~MMIO_SPTE_GEN_MASK);
|
||||
|
||||
mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
|
||||
mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
|
||||
@ -221,8 +221,6 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
* shadow pages and unsync'ing pages is not allowed.
|
||||
*/
|
||||
if (mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, can_unsync, prefetch)) {
|
||||
pgprintk("%s: found shadow page for %llx, marking ro\n",
|
||||
__func__, gfn);
|
||||
wrprot = true;
|
||||
pte_access &= ~ACC_WRITE_MASK;
|
||||
spte &= ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
|
||||
@ -242,7 +240,7 @@ out:
|
||||
|
||||
if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
|
||||
/* Enforced by kvm_mmu_hugepage_adjust. */
|
||||
WARN_ON(level > PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(level > PG_LEVEL_4K);
|
||||
mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#ifndef KVM_X86_MMU_SPTE_H
|
||||
#define KVM_X86_MMU_SPTE_H
|
||||
|
||||
#include "mmu.h"
|
||||
#include "mmu_internal.h"
|
||||
|
||||
/*
|
||||
@ -236,6 +237,18 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
|
||||
return to_shadow_page(__pa(sptep));
|
||||
}
|
||||
|
||||
static inline struct kvm_mmu_page *root_to_sp(hpa_t root)
|
||||
{
|
||||
if (kvm_mmu_is_dummy_root(root))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* The "root" may be a special root, e.g. a PAE entry, treat it as a
|
||||
* SPTE to ensure any non-PA bits are dropped.
|
||||
*/
|
||||
return spte_to_child_sp(root);
|
||||
}
|
||||
|
||||
static inline bool is_mmio_spte(u64 spte)
|
||||
{
|
||||
return (spte & shadow_mmio_mask) == shadow_mmio_value &&
|
||||
@ -265,13 +278,13 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
|
||||
|
||||
static inline bool spte_ad_enabled(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED;
|
||||
}
|
||||
|
||||
static inline bool spte_ad_need_write_protect(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
/*
|
||||
* This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0',
|
||||
* and non-TDP SPTEs will never set these bits. Optimize for 64-bit
|
||||
@ -282,13 +295,13 @@ static inline bool spte_ad_need_write_protect(u64 spte)
|
||||
|
||||
static inline u64 spte_shadow_accessed_mask(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
|
||||
}
|
||||
|
||||
static inline u64 spte_shadow_dirty_mask(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
|
||||
return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
|
||||
}
|
||||
|
||||
|
@ -39,13 +39,14 @@ void tdp_iter_restart(struct tdp_iter *iter)
|
||||
void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
|
||||
int min_level, gfn_t next_last_level_gfn)
|
||||
{
|
||||
int root_level = root->role.level;
|
||||
|
||||
WARN_ON(root_level < 1);
|
||||
WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
|
||||
if (WARN_ON_ONCE(!root || (root->role.level < 1) ||
|
||||
(root->role.level > PT64_ROOT_MAX_LEVEL))) {
|
||||
iter->valid = false;
|
||||
return;
|
||||
}
|
||||
|
||||
iter->next_last_level_gfn = next_last_level_gfn;
|
||||
iter->root_level = root_level;
|
||||
iter->root_level = root->role.level;
|
||||
iter->min_level = min_level;
|
||||
iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
|
||||
iter->as_id = kvm_mmu_page_as_id(root);
|
||||
|
@ -475,9 +475,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
bool is_leaf = is_present && is_last_spte(new_spte, level);
|
||||
bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
|
||||
|
||||
WARN_ON(level > PT64_ROOT_MAX_LEVEL);
|
||||
WARN_ON(level < PG_LEVEL_4K);
|
||||
WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
|
||||
WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL);
|
||||
WARN_ON_ONCE(level < PG_LEVEL_4K);
|
||||
WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
|
||||
|
||||
/*
|
||||
* If this warning were to trigger it would indicate that there was a
|
||||
@ -522,9 +522,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
* impact the guest since both the former and current SPTEs
|
||||
* are nonpresent.
|
||||
*/
|
||||
if (WARN_ON(!is_mmio_spte(old_spte) &&
|
||||
!is_mmio_spte(new_spte) &&
|
||||
!is_removed_spte(new_spte)))
|
||||
if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
|
||||
!is_mmio_spte(new_spte) &&
|
||||
!is_removed_spte(new_spte)))
|
||||
pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
|
||||
"should not be replaced with another,\n"
|
||||
"different nonpresent SPTE, unless one or both\n"
|
||||
@ -661,7 +661,7 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
|
||||
* should be used. If operating under the MMU lock in write mode, the
|
||||
* use of the removed SPTE should not be necessary.
|
||||
*/
|
||||
WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte));
|
||||
WARN_ON_ONCE(is_removed_spte(old_spte) || is_removed_spte(new_spte));
|
||||
|
||||
old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
|
||||
|
||||
@ -689,7 +689,7 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
|
||||
else
|
||||
|
||||
#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \
|
||||
for_each_tdp_pte(_iter, to_shadow_page(_mmu->root.hpa), _start, _end)
|
||||
for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)
|
||||
|
||||
/*
|
||||
* Yield if the MMU lock is contended or this thread needs to return control
|
||||
@ -709,7 +709,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
||||
struct tdp_iter *iter,
|
||||
bool flush, bool shared)
|
||||
{
|
||||
WARN_ON(iter->yielded);
|
||||
WARN_ON_ONCE(iter->yielded);
|
||||
|
||||
/* Ensure forward progress has been made before yielding. */
|
||||
if (iter->next_last_level_gfn == iter->yielded_gfn)
|
||||
@ -728,7 +728,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
WARN_ON(iter->gfn > iter->next_last_level_gfn);
|
||||
WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn);
|
||||
|
||||
iter->yielded = true;
|
||||
}
|
||||
@ -1241,7 +1241,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
|
||||
u64 new_spte;
|
||||
|
||||
/* Huge pages aren't expected to be modified without first being zapped. */
|
||||
WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end);
|
||||
WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
|
||||
|
||||
if (iter->level != PG_LEVEL_4K ||
|
||||
!is_shadow_present_pte(iter->old_spte))
|
||||
@ -1255,9 +1255,9 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
|
||||
*/
|
||||
tdp_mmu_iter_set_spte(kvm, iter, 0);
|
||||
|
||||
if (!pte_write(range->pte)) {
|
||||
if (!pte_write(range->arg.pte)) {
|
||||
new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
|
||||
pte_pfn(range->pte));
|
||||
pte_pfn(range->arg.pte));
|
||||
|
||||
tdp_mmu_iter_set_spte(kvm, iter, new_spte);
|
||||
}
|
||||
@ -1548,8 +1548,8 @@ retry:
|
||||
if (!is_shadow_present_pte(iter.old_spte))
|
||||
continue;
|
||||
|
||||
MMU_WARN_ON(kvm_ad_enabled() &&
|
||||
spte_ad_need_write_protect(iter.old_spte));
|
||||
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
|
||||
spte_ad_need_write_protect(iter.old_spte));
|
||||
|
||||
if (!(iter.old_spte & dbit))
|
||||
continue;
|
||||
@ -1600,6 +1600,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
shadow_dirty_mask;
|
||||
struct tdp_iter iter;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
|
||||
@ -1607,8 +1609,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
if (!mask)
|
||||
break;
|
||||
|
||||
MMU_WARN_ON(kvm_ad_enabled() &&
|
||||
spte_ad_need_write_protect(iter.old_spte));
|
||||
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
|
||||
spte_ad_need_write_protect(iter.old_spte));
|
||||
|
||||
if (iter.level > PG_LEVEL_4K ||
|
||||
!(mask & (1UL << (iter.gfn - gfn))))
|
||||
@ -1646,7 +1648,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
||||
{
|
||||
struct kvm_mmu_page *root;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
for_each_tdp_mmu_root(kvm, root, slot->as_id)
|
||||
clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
|
||||
}
|
||||
|
@ -382,9 +382,6 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
|
||||
struct kvm_x86_pmu_event_filter *filter;
|
||||
struct kvm *kvm = pmc->vcpu->kvm;
|
||||
|
||||
if (!static_call(kvm_x86_pmu_hw_event_available)(pmc))
|
||||
return false;
|
||||
|
||||
filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
|
||||
if (!filter)
|
||||
return true;
|
||||
@ -398,6 +395,7 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
|
||||
static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
|
||||
{
|
||||
return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
|
||||
static_call(kvm_x86_pmu_hw_event_available)(pmc) &&
|
||||
check_pmu_event_filter(pmc);
|
||||
}
|
||||
|
||||
|
@ -43,6 +43,7 @@ enum kvm_only_cpuid_leafs {
|
||||
/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
|
||||
#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
|
||||
#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
|
||||
#define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
|
||||
#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
|
||||
|
||||
/* CPUID level 0x80000007 (EDX). */
|
||||
|
@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
struct amd_svm_iommu_ir *ir;
|
||||
u64 entry;
|
||||
|
||||
/**
|
||||
* In some cases, the existing irte is updated and re-set,
|
||||
@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
||||
ir->data = pi->ir_data;
|
||||
|
||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||
|
||||
/*
|
||||
* Update the target pCPU for IOMMU doorbells if the vCPU is running.
|
||||
* If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
|
||||
* will update the pCPU info when the vCPU awkened and/or scheduled in.
|
||||
* See also avic_vcpu_load().
|
||||
*/
|
||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
||||
if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
|
||||
amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
|
||||
true, pi->ir_data);
|
||||
|
||||
list_add(&ir->node, &svm->ir_list);
|
||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||
out:
|
||||
@ -986,10 +999,11 @@ static inline int
|
||||
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
struct amd_svm_iommu_ir *ir;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
lockdep_assert_held(&svm->ir_list_lock);
|
||||
|
||||
if (!kvm_arch_has_assigned_device(vcpu->kvm))
|
||||
return 0;
|
||||
|
||||
@ -997,19 +1011,15 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
|
||||
* Here, we go through the per-vcpu ir_list to update all existing
|
||||
* interrupt remapping table entry targeting this vcpu.
|
||||
*/
|
||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||
|
||||
if (list_empty(&svm->ir_list))
|
||||
goto out;
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(ir, &svm->ir_list, node) {
|
||||
ret = amd_iommu_update_ga(cpu, r, ir->data);
|
||||
if (ret)
|
||||
break;
|
||||
return ret;
|
||||
}
|
||||
out:
|
||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
@ -1017,6 +1027,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
u64 entry;
|
||||
int h_physical_id = kvm_cpu_get_apicid(cpu);
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
unsigned long flags;
|
||||
|
||||
lockdep_assert_preemption_disabled();
|
||||
|
||||
@ -1033,6 +1044,15 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
if (kvm_vcpu_is_blocking(vcpu))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Grab the per-vCPU interrupt remapping lock even if the VM doesn't
|
||||
* _currently_ have assigned devices, as that can change. Holding
|
||||
* ir_list_lock ensures that either svm_ir_list_add() will consume
|
||||
* up-to-date entry information, or that this task will wait until
|
||||
* svm_ir_list_add() completes to set the new target pCPU.
|
||||
*/
|
||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||
|
||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
||||
WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
|
||||
|
||||
@ -1042,25 +1062,48 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
||||
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
|
||||
|
||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||
}
|
||||
|
||||
void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 entry;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
unsigned long flags;
|
||||
|
||||
lockdep_assert_preemption_disabled();
|
||||
|
||||
/*
|
||||
* Note, reading the Physical ID entry outside of ir_list_lock is safe
|
||||
* as only the pCPU that has loaded (or is loading) the vCPU is allowed
|
||||
* to modify the entry, and preemption is disabled. I.e. the vCPU
|
||||
* can't be scheduled out and thus avic_vcpu_{put,load}() can't run
|
||||
* recursively.
|
||||
*/
|
||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
||||
|
||||
/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
|
||||
if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Take and hold the per-vCPU interrupt remapping lock while updating
|
||||
* the Physical ID entry even though the lock doesn't protect against
|
||||
* multiple writers (see above). Holding ir_list_lock ensures that
|
||||
* either svm_ir_list_add() will consume up-to-date entry information,
|
||||
* or that this task will wait until svm_ir_list_add() completes to
|
||||
* mark the vCPU as not running.
|
||||
*/
|
||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||
|
||||
avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
|
||||
|
||||
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
||||
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
||||
|
||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||
|
||||
}
|
||||
|
||||
void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
||||
|
@ -107,7 +107,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
|
||||
|
||||
static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
|
||||
{
|
||||
if (!svm->v_vmload_vmsave_enabled)
|
||||
if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
|
||||
return true;
|
||||
|
||||
if (!nested_npt_enabled(svm))
|
||||
@ -552,6 +552,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
|
||||
bool new_vmcb12 = false;
|
||||
struct vmcb *vmcb01 = svm->vmcb01.ptr;
|
||||
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
|
||||
nested_vmcb02_compute_g_pat(svm);
|
||||
|
||||
@ -577,18 +578,18 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
|
||||
vmcb_mark_dirty(vmcb02, VMCB_DT);
|
||||
}
|
||||
|
||||
kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
|
||||
kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
|
||||
|
||||
svm_set_efer(&svm->vcpu, svm->nested.save.efer);
|
||||
svm_set_efer(vcpu, svm->nested.save.efer);
|
||||
|
||||
svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
|
||||
svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
|
||||
svm_set_cr0(vcpu, svm->nested.save.cr0);
|
||||
svm_set_cr4(vcpu, svm->nested.save.cr4);
|
||||
|
||||
svm->vcpu.arch.cr2 = vmcb12->save.cr2;
|
||||
|
||||
kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
|
||||
kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
|
||||
kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
|
||||
kvm_rax_write(vcpu, vmcb12->save.rax);
|
||||
kvm_rsp_write(vcpu, vmcb12->save.rsp);
|
||||
kvm_rip_write(vcpu, vmcb12->save.rip);
|
||||
|
||||
/* In case we don't even reach vcpu_run, the fields are not updated */
|
||||
vmcb02->save.rax = vmcb12->save.rax;
|
||||
@ -602,7 +603,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
|
||||
vmcb_mark_dirty(vmcb02, VMCB_DR);
|
||||
}
|
||||
|
||||
if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
|
||||
if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
|
||||
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
|
||||
/*
|
||||
* Reserved bits of DEBUGCTL are ignored. Be consistent with
|
||||
* svm_set_msr's definition of reserved bits.
|
||||
@ -658,7 +660,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
|
||||
* exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
|
||||
*/
|
||||
|
||||
if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
|
||||
if (guest_can_use(vcpu, X86_FEATURE_VGIF) &&
|
||||
(svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
|
||||
int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
|
||||
else
|
||||
int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
|
||||
@ -695,10 +698,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
|
||||
|
||||
vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
|
||||
|
||||
if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
|
||||
WARN_ON(!svm->tsc_scaling_enabled);
|
||||
if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
|
||||
svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio)
|
||||
nested_svm_update_tsc_ratio_msr(vcpu);
|
||||
}
|
||||
|
||||
vmcb02->control.int_ctl =
|
||||
(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
|
||||
@ -717,7 +719,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
|
||||
* what a nrips=0 CPU would do (L1 is responsible for advancing RIP
|
||||
* prior to injecting the event).
|
||||
*/
|
||||
if (svm->nrips_enabled)
|
||||
if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
|
||||
vmcb02->control.next_rip = svm->nested.ctl.next_rip;
|
||||
else if (boot_cpu_has(X86_FEATURE_NRIPS))
|
||||
vmcb02->control.next_rip = vmcb12_rip;
|
||||
@ -727,7 +729,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
|
||||
svm->soft_int_injected = true;
|
||||
svm->soft_int_csbase = vmcb12_csbase;
|
||||
svm->soft_int_old_rip = vmcb12_rip;
|
||||
if (svm->nrips_enabled)
|
||||
if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
|
||||
svm->soft_int_next_rip = svm->nested.ctl.next_rip;
|
||||
else
|
||||
svm->soft_int_next_rip = vmcb12_rip;
|
||||
@ -735,15 +737,21 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
|
||||
|
||||
vmcb02->control.virt_ext = vmcb01->control.virt_ext &
|
||||
LBR_CTL_ENABLE_MASK;
|
||||
if (svm->lbrv_enabled)
|
||||
if (guest_can_use(vcpu, X86_FEATURE_LBRV))
|
||||
vmcb02->control.virt_ext |=
|
||||
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
|
||||
|
||||
if (!nested_vmcb_needs_vls_intercept(svm))
|
||||
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
|
||||
|
||||
pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
|
||||
pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
|
||||
if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER))
|
||||
pause_count12 = svm->nested.ctl.pause_filter_count;
|
||||
else
|
||||
pause_count12 = 0;
|
||||
if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD))
|
||||
pause_thresh12 = svm->nested.ctl.pause_filter_thresh;
|
||||
else
|
||||
pause_thresh12 = 0;
|
||||
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
|
||||
/* use guest values since host doesn't intercept PAUSE */
|
||||
vmcb02->control.pause_filter_count = pause_count12;
|
||||
@ -1027,7 +1035,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
if (vmcb12->control.exit_code != SVM_EXIT_ERR)
|
||||
nested_save_pending_event_to_vmcb12(svm, vmcb12);
|
||||
|
||||
if (svm->nrips_enabled)
|
||||
if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
|
||||
vmcb12->control.next_rip = vmcb02->control.next_rip;
|
||||
|
||||
vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
|
||||
@ -1066,7 +1074,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
if (!nested_exit_on_intr(svm))
|
||||
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
|
||||
|
||||
if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
|
||||
if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
|
||||
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
|
||||
svm_copy_lbrs(vmcb12, vmcb02);
|
||||
svm_update_lbrv(vcpu);
|
||||
} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
|
||||
@ -1101,10 +1110,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
|
||||
}
|
||||
|
||||
if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
|
||||
WARN_ON(!svm->tsc_scaling_enabled);
|
||||
if (kvm_caps.has_tsc_control &&
|
||||
vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) {
|
||||
vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
|
||||
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
|
||||
svm_write_tsc_multiplier(vcpu);
|
||||
}
|
||||
|
||||
svm->nested.ctl.nested_cr3 = 0;
|
||||
@ -1537,7 +1546,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.tsc_scaling_ratio =
|
||||
kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
|
||||
svm->tsc_ratio_msr);
|
||||
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
|
||||
svm_write_tsc_multiplier(vcpu);
|
||||
}
|
||||
|
||||
/* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <asm/pkru.h>
|
||||
#include <asm/trapnr.h>
|
||||
#include <asm/fpu/xcr.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
#include "mmu.h"
|
||||
#include "x86.h"
|
||||
@ -54,9 +55,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
|
||||
/* enable/disable SEV-ES support */
|
||||
static bool sev_es_enabled = true;
|
||||
module_param_named(sev_es, sev_es_enabled, bool, 0444);
|
||||
|
||||
/* enable/disable SEV-ES DebugSwap support */
|
||||
static bool sev_es_debug_swap_enabled = true;
|
||||
module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
|
||||
#else
|
||||
#define sev_enabled false
|
||||
#define sev_es_enabled false
|
||||
#define sev_es_debug_swap_enabled false
|
||||
#endif /* CONFIG_KVM_AMD_SEV */
|
||||
|
||||
static u8 sev_enc_bit;
|
||||
@ -606,6 +612,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
|
||||
save->xss = svm->vcpu.arch.ia32_xss;
|
||||
save->dr6 = svm->vcpu.arch.dr6;
|
||||
|
||||
if (sev_es_debug_swap_enabled)
|
||||
save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
|
||||
|
||||
pr_debug("Virtual Machine Save Area (VMSA):\n");
|
||||
print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
|
||||
|
||||
@ -619,6 +628,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
int ret;
|
||||
|
||||
if (vcpu->guest_debug) {
|
||||
pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Perform some pre-encryption checks against the VMSA */
|
||||
ret = sev_es_sync_vmsa(svm);
|
||||
if (ret)
|
||||
@ -1725,7 +1739,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
|
||||
* Note, the source is not required to have the same number of
|
||||
* vCPUs as the destination when migrating a vanilla SEV VM.
|
||||
*/
|
||||
src_vcpu = kvm_get_vcpu(dst_kvm, i);
|
||||
src_vcpu = kvm_get_vcpu(src_kvm, i);
|
||||
src_svm = to_svm(src_vcpu);
|
||||
|
||||
/*
|
||||
@ -2171,7 +2185,7 @@ void __init sev_hardware_setup(void)
|
||||
bool sev_es_supported = false;
|
||||
bool sev_supported = false;
|
||||
|
||||
if (!sev_enabled || !npt_enabled)
|
||||
if (!sev_enabled || !npt_enabled || !nrips)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
@ -2256,6 +2270,9 @@ out:
|
||||
|
||||
sev_enabled = sev_supported;
|
||||
sev_es_enabled = sev_es_supported;
|
||||
if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
|
||||
!cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
|
||||
sev_es_debug_swap_enabled = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2881,7 +2898,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
|
||||
svm->sev_es.ghcb_sa);
|
||||
break;
|
||||
case SVM_VMGEXIT_NMI_COMPLETE:
|
||||
ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
|
||||
++vcpu->stat.nmi_window_exits;
|
||||
svm->nmi_masked = false;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
ret = 1;
|
||||
break;
|
||||
case SVM_VMGEXIT_AP_HLT_LOOP:
|
||||
ret = kvm_emulate_ap_reset_hold(vcpu);
|
||||
@ -2944,6 +2964,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
|
||||
|
||||
static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
|
||||
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
|
||||
@ -2952,9 +2973,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
||||
/*
|
||||
* An SEV-ES guest requires a VMSA area that is a separate from the
|
||||
* VMCB page. Do not include the encryption mask on the VMSA physical
|
||||
* address since hardware will access it using the guest key.
|
||||
* address since hardware will access it using the guest key. Note,
|
||||
* the VMSA will be NULL if this vCPU is the destination for intrahost
|
||||
* migration, and will be copied later.
|
||||
*/
|
||||
svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
|
||||
if (svm->sev_es.vmsa)
|
||||
svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
|
||||
|
||||
/* Can't intercept CR register access, HV can't modify CR registers */
|
||||
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
|
||||
@ -2972,8 +2996,23 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
||||
svm_set_intercept(svm, TRAP_CR4_WRITE);
|
||||
svm_set_intercept(svm, TRAP_CR8_WRITE);
|
||||
|
||||
/* No support for enable_vmware_backdoor */
|
||||
clr_exception_intercept(svm, GP_VECTOR);
|
||||
vmcb->control.intercepts[INTERCEPT_DR] = 0;
|
||||
if (!sev_es_debug_swap_enabled) {
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
|
||||
recalc_intercepts(svm);
|
||||
} else {
|
||||
/*
|
||||
* Disable #DB intercept iff DebugSwap is enabled. KVM doesn't
|
||||
* allow debugging SEV-ES guests, and enables DebugSwap iff
|
||||
* NO_NESTED_DATA_BP is supported, so there's no reason to
|
||||
* intercept #DB when DebugSwap is enabled. For simplicity
|
||||
* with respect to guest debug, intercept #DB for other VMs
|
||||
* even if NO_NESTED_DATA_BP is supported, i.e. even if the
|
||||
* guest can't DoS the CPU with infinite #DB vectoring.
|
||||
*/
|
||||
clr_exception_intercept(svm, DB_VECTOR);
|
||||
}
|
||||
|
||||
/* Can't intercept XSETBV, HV can't modify XCR0 directly */
|
||||
svm_clr_intercept(svm, INTERCEPT_XSETBV);
|
||||
@ -3000,6 +3039,12 @@ void sev_init_vmcb(struct vcpu_svm *svm)
|
||||
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
|
||||
clr_exception_intercept(svm, UD_VECTOR);
|
||||
|
||||
/*
|
||||
* Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
|
||||
* KVM can't decrypt guest memory to decode the faulting instruction.
|
||||
*/
|
||||
clr_exception_intercept(svm, GP_VECTOR);
|
||||
|
||||
if (sev_es_guest(svm->vcpu.kvm))
|
||||
sev_es_init_vmcb(svm);
|
||||
}
|
||||
@ -3018,20 +3063,41 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
|
||||
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
|
||||
{
|
||||
/*
|
||||
* As an SEV-ES guest, hardware will restore the host state on VMEXIT,
|
||||
* of which one step is to perform a VMLOAD. KVM performs the
|
||||
* corresponding VMSAVE in svm_prepare_guest_switch for both
|
||||
* traditional and SEV-ES guests.
|
||||
* All host state for SEV-ES guests is categorized into three swap types
|
||||
* based on how it is handled by hardware during a world switch:
|
||||
*
|
||||
* A: VMRUN: Host state saved in host save area
|
||||
* VMEXIT: Host state loaded from host save area
|
||||
*
|
||||
* B: VMRUN: Host state _NOT_ saved in host save area
|
||||
* VMEXIT: Host state loaded from host save area
|
||||
*
|
||||
* C: VMRUN: Host state _NOT_ saved in host save area
|
||||
* VMEXIT: Host state initialized to default(reset) values
|
||||
*
|
||||
* Manually save type-B state, i.e. state that is loaded by VMEXIT but
|
||||
* isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
|
||||
* by common SVM code).
|
||||
*/
|
||||
|
||||
/* XCR0 is restored on VMEXIT, save the current host value */
|
||||
hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
|
||||
/* PKRU is restored on VMEXIT, save the current host value */
|
||||
hostsa->pkru = read_pkru();
|
||||
|
||||
/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
|
||||
hostsa->xss = host_xss;
|
||||
|
||||
/*
|
||||
* If DebugSwap is enabled, debug registers are loaded but NOT saved by
|
||||
* the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
|
||||
* saves and loads debug registers (Type-A).
|
||||
*/
|
||||
if (sev_es_debug_swap_enabled) {
|
||||
hostsa->dr0 = native_get_debugreg(0);
|
||||
hostsa->dr1 = native_get_debugreg(1);
|
||||
hostsa->dr2 = native_get_debugreg(2);
|
||||
hostsa->dr3 = native_get_debugreg(3);
|
||||
hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
|
||||
hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
|
||||
hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
|
||||
hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
|
||||
}
|
||||
}
|
||||
|
||||
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
|
||||
|
@ -39,10 +39,9 @@
|
||||
#include <asm/spec-ctrl.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/reboot.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
#include <asm/virtext.h>
|
||||
|
||||
#include <trace/events/ipi.h>
|
||||
|
||||
#include "trace.h"
|
||||
@ -203,7 +202,7 @@ static int nested = true;
|
||||
module_param(nested, int, S_IRUGO);
|
||||
|
||||
/* enable/disable Next RIP Save */
|
||||
static int nrips = true;
|
||||
int nrips = true;
|
||||
module_param(nrips, int, 0444);
|
||||
|
||||
/* enable/disable Virtual VMLOAD VMSAVE */
|
||||
@ -365,6 +364,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
|
||||
svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
|
||||
|
||||
}
|
||||
static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
|
||||
void *insn, int insn_len);
|
||||
|
||||
static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
|
||||
bool commit_side_effects)
|
||||
@ -385,6 +386,14 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
if (!svm->next_rip) {
|
||||
/*
|
||||
* FIXME: Drop this when kvm_emulate_instruction() does the
|
||||
* right thing and treats "can't emulate" as outright failure
|
||||
* for EMULTYPE_SKIP.
|
||||
*/
|
||||
if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
|
||||
return 0;
|
||||
|
||||
if (unlikely(!commit_side_effects))
|
||||
old_rflags = svm->vmcb->save.rflags;
|
||||
|
||||
@ -517,14 +526,21 @@ static void svm_init_osvw(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.osvw.status |= 1;
|
||||
}
|
||||
|
||||
static bool kvm_is_svm_supported(void)
|
||||
static bool __kvm_is_svm_supported(void)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
const char *msg;
|
||||
int cpu = smp_processor_id();
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
u64 vm_cr;
|
||||
|
||||
if (!cpu_has_svm(&msg)) {
|
||||
pr_err("SVM not supported by CPU %d, %s\n", cpu, msg);
|
||||
if (c->x86_vendor != X86_VENDOR_AMD &&
|
||||
c->x86_vendor != X86_VENDOR_HYGON) {
|
||||
pr_err("CPU %d isn't AMD or Hygon\n", cpu);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_SVM)) {
|
||||
pr_err("SVM not supported by CPU %d\n", cpu);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -542,25 +558,55 @@ static bool kvm_is_svm_supported(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool kvm_is_svm_supported(void)
|
||||
{
|
||||
bool supported;
|
||||
|
||||
migrate_disable();
|
||||
supported = __kvm_is_svm_supported();
|
||||
migrate_enable();
|
||||
|
||||
return supported;
|
||||
}
|
||||
|
||||
static int svm_check_processor_compat(void)
|
||||
{
|
||||
if (!kvm_is_svm_supported())
|
||||
if (!__kvm_is_svm_supported())
|
||||
return -EIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __svm_write_tsc_multiplier(u64 multiplier)
|
||||
static void __svm_write_tsc_multiplier(u64 multiplier)
|
||||
{
|
||||
preempt_disable();
|
||||
|
||||
if (multiplier == __this_cpu_read(current_tsc_ratio))
|
||||
goto out;
|
||||
return;
|
||||
|
||||
wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
|
||||
__this_cpu_write(current_tsc_ratio, multiplier);
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void kvm_cpu_svm_disable(void)
|
||||
{
|
||||
uint64_t efer;
|
||||
|
||||
wrmsrl(MSR_VM_HSAVE_PA, 0);
|
||||
rdmsrl(MSR_EFER, efer);
|
||||
if (efer & EFER_SVME) {
|
||||
/*
|
||||
* Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
|
||||
* NMI aren't blocked.
|
||||
*/
|
||||
stgi();
|
||||
wrmsrl(MSR_EFER, efer & ~EFER_SVME);
|
||||
}
|
||||
}
|
||||
|
||||
static void svm_emergency_disable(void)
|
||||
{
|
||||
kvm_rebooting = true;
|
||||
|
||||
kvm_cpu_svm_disable();
|
||||
}
|
||||
|
||||
static void svm_hardware_disable(void)
|
||||
@ -569,7 +615,7 @@ static void svm_hardware_disable(void)
|
||||
if (tsc_scaling)
|
||||
__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
|
||||
|
||||
cpu_svm_disable();
|
||||
kvm_cpu_svm_disable();
|
||||
|
||||
amd_pmu_disable_virt();
|
||||
}
|
||||
@ -677,6 +723,39 @@ free_save_area:
|
||||
|
||||
}
|
||||
|
||||
static void set_dr_intercepts(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
|
||||
|
||||
recalc_intercepts(svm);
|
||||
}
|
||||
|
||||
static void clr_dr_intercepts(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
|
||||
vmcb->control.intercepts[INTERCEPT_DR] = 0;
|
||||
|
||||
recalc_intercepts(svm);
|
||||
}
|
||||
|
||||
static int direct_access_msr_slot(u32 msr)
|
||||
{
|
||||
u32 i;
|
||||
@ -947,50 +1026,24 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
|
||||
svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
|
||||
}
|
||||
|
||||
static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
|
||||
static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
|
||||
{
|
||||
/*
|
||||
* If the LBR virtualization is disabled, the LBR msrs are always
|
||||
* kept in the vmcb01 to avoid copying them on nested guest entries.
|
||||
*
|
||||
* If nested, and the LBR virtualization is enabled/disabled, the msrs
|
||||
* are moved between the vmcb01 and vmcb02 as needed.
|
||||
* If LBR virtualization is disabled, the LBR MSRs are always kept in
|
||||
* vmcb01. If LBR virtualization is enabled and L1 is running VMs of
|
||||
* its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
|
||||
*/
|
||||
struct vmcb *vmcb =
|
||||
(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
|
||||
svm->vmcb : svm->vmcb01.ptr;
|
||||
|
||||
switch (index) {
|
||||
case MSR_IA32_DEBUGCTLMSR:
|
||||
return vmcb->save.dbgctl;
|
||||
case MSR_IA32_LASTBRANCHFROMIP:
|
||||
return vmcb->save.br_from;
|
||||
case MSR_IA32_LASTBRANCHTOIP:
|
||||
return vmcb->save.br_to;
|
||||
case MSR_IA32_LASTINTFROMIP:
|
||||
return vmcb->save.last_excp_from;
|
||||
case MSR_IA32_LASTINTTOIP:
|
||||
return vmcb->save.last_excp_to;
|
||||
default:
|
||||
KVM_BUG(false, svm->vcpu.kvm,
|
||||
"%s: Unknown MSR 0x%x", __func__, index);
|
||||
return 0;
|
||||
}
|
||||
return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
|
||||
svm->vmcb01.ptr;
|
||||
}
|
||||
|
||||
void svm_update_lbrv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
|
||||
DEBUGCTLMSR_LBR;
|
||||
|
||||
bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
|
||||
LBR_CTL_ENABLE_MASK);
|
||||
|
||||
if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
|
||||
if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
|
||||
enable_lbrv = true;
|
||||
bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
|
||||
bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
|
||||
(is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
|
||||
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
|
||||
|
||||
if (enable_lbrv == current_enable_lbrv)
|
||||
return;
|
||||
@ -1101,21 +1154,23 @@ static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
|
||||
return svm->tsc_ratio_msr;
|
||||
}
|
||||
|
||||
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
|
||||
svm->vmcb->control.tsc_offset = offset;
|
||||
svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
|
||||
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
|
||||
}
|
||||
|
||||
static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
|
||||
void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__svm_write_tsc_multiplier(multiplier);
|
||||
preempt_disable();
|
||||
if (to_svm(vcpu)->guest_state_loaded)
|
||||
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
|
||||
/* Evaluate instruction intercepts that depend on guest CPUID features. */
|
||||
static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
|
||||
struct vcpu_svm *svm)
|
||||
@ -1156,8 +1211,6 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
|
||||
|
||||
svm->v_vmload_vmsave_enabled = false;
|
||||
} else {
|
||||
/*
|
||||
* If hardware supports Virtual VMLOAD VMSAVE then enable it
|
||||
@ -1201,10 +1254,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
||||
* Guest access to VMware backdoor ports could legitimately
|
||||
* trigger #GP because of TSS I/O permission bitmap.
|
||||
* We intercept those #GP and allow access to them anyway
|
||||
* as VMware does. Don't intercept #GP for SEV guests as KVM can't
|
||||
* decrypt guest memory to decode the faulting instruction.
|
||||
* as VMware does.
|
||||
*/
|
||||
if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
|
||||
if (enable_vmware_backdoor)
|
||||
set_exception_intercept(svm, GP_VECTOR);
|
||||
|
||||
svm_set_intercept(svm, INTERCEPT_INTR);
|
||||
@ -1949,7 +2001,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (vcpu->arch.guest_state_protected)
|
||||
if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
|
||||
return;
|
||||
|
||||
get_debugreg(vcpu->arch.db[0], 0);
|
||||
@ -2510,12 +2562,13 @@ static int iret_interception(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
WARN_ON_ONCE(sev_es_guest(vcpu->kvm));
|
||||
|
||||
++vcpu->stat.nmi_window_exits;
|
||||
svm->awaiting_iret_completion = true;
|
||||
|
||||
svm_clr_iret_intercept(svm);
|
||||
if (!sev_es_guest(vcpu->kvm))
|
||||
svm->nmi_iret_rip = kvm_rip_read(vcpu);
|
||||
svm->nmi_iret_rip = kvm_rip_read(vcpu);
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
return 1;
|
||||
@ -2680,6 +2733,13 @@ static int dr_interception(struct kvm_vcpu *vcpu)
|
||||
unsigned long val;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
|
||||
* for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
|
||||
*/
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
return 1;
|
||||
|
||||
if (vcpu->guest_debug == 0) {
|
||||
/*
|
||||
* No more DR vmexits; force a reload of the debug registers
|
||||
@ -2764,7 +2824,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
|
||||
switch (msr_info->index) {
|
||||
case MSR_AMD64_TSC_RATIO:
|
||||
if (!msr_info->host_initiated && !svm->tsc_scaling_enabled)
|
||||
if (!msr_info->host_initiated &&
|
||||
!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR))
|
||||
return 1;
|
||||
msr_info->data = svm->tsc_ratio_msr;
|
||||
break;
|
||||
@ -2802,11 +2863,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
msr_info->data = svm->tsc_aux;
|
||||
break;
|
||||
case MSR_IA32_DEBUGCTLMSR:
|
||||
msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
|
||||
break;
|
||||
case MSR_IA32_LASTBRANCHFROMIP:
|
||||
msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
|
||||
break;
|
||||
case MSR_IA32_LASTBRANCHTOIP:
|
||||
msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
|
||||
break;
|
||||
case MSR_IA32_LASTINTFROMIP:
|
||||
msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
|
||||
break;
|
||||
case MSR_IA32_LASTINTTOIP:
|
||||
msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
|
||||
msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
|
||||
break;
|
||||
case MSR_VM_HSAVE_PA:
|
||||
msr_info->data = svm->nested.hsave_msr;
|
||||
@ -2906,7 +2975,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
switch (ecx) {
|
||||
case MSR_AMD64_TSC_RATIO:
|
||||
|
||||
if (!svm->tsc_scaling_enabled) {
|
||||
if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) {
|
||||
|
||||
if (!msr->host_initiated)
|
||||
return 1;
|
||||
@ -2928,7 +2997,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
|
||||
svm->tsc_ratio_msr = data;
|
||||
|
||||
if (svm->tsc_scaling_enabled && is_guest_mode(vcpu))
|
||||
if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
|
||||
is_guest_mode(vcpu))
|
||||
nested_svm_update_tsc_ratio_msr(vcpu);
|
||||
|
||||
break;
|
||||
@ -3037,13 +3107,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
if (data & DEBUGCTL_RESERVED_BITS)
|
||||
return 1;
|
||||
|
||||
if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
|
||||
svm->vmcb->save.dbgctl = data;
|
||||
else
|
||||
svm->vmcb01.ptr->save.dbgctl = data;
|
||||
|
||||
svm_get_lbr_vmcb(svm)->save.dbgctl = data;
|
||||
svm_update_lbrv(vcpu);
|
||||
|
||||
break;
|
||||
case MSR_VM_HSAVE_PA:
|
||||
/*
|
||||
@ -3769,6 +3834,19 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
|
||||
if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
|
||||
return; /* IRET will cause a vm exit */
|
||||
|
||||
/*
|
||||
* SEV-ES guests are responsible for signaling when a vCPU is ready to
|
||||
* receive a new NMI, as SEV-ES guests can't be single-stepped, i.e.
|
||||
* KVM can't intercept and single-step IRET to detect when NMIs are
|
||||
* unblocked (architecturally speaking). See SVM_VMGEXIT_NMI_COMPLETE.
|
||||
*
|
||||
* Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware
|
||||
* ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not
|
||||
* supported NAEs in the GHCB protocol.
|
||||
*/
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
return;
|
||||
|
||||
if (!gif_set(svm)) {
|
||||
if (vgif)
|
||||
svm_set_intercept(svm, INTERCEPT_STGI);
|
||||
@ -3918,12 +3996,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
|
||||
svm->soft_int_injected = false;
|
||||
|
||||
/*
|
||||
* If we've made progress since setting HF_IRET_MASK, we've
|
||||
* If we've made progress since setting awaiting_iret_completion, we've
|
||||
* executed an IRET and can allow NMI injection.
|
||||
*/
|
||||
if (svm->awaiting_iret_completion &&
|
||||
(sev_es_guest(vcpu->kvm) ||
|
||||
kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
|
||||
kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
|
||||
svm->awaiting_iret_completion = false;
|
||||
svm->nmi_masked = false;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
@ -4209,28 +4286,37 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
|
||||
boot_cpu_has(X86_FEATURE_XSAVE) &&
|
||||
boot_cpu_has(X86_FEATURE_XSAVES);
|
||||
/*
|
||||
* SVM doesn't provide a way to disable just XSAVES in the guest, KVM
|
||||
* can only disable all variants of by disallowing CR4.OSXSAVE from
|
||||
* being set. As a result, if the host has XSAVE and XSAVES, and the
|
||||
* guest has XSAVE enabled, the guest can execute XSAVES without
|
||||
* faulting. Treat XSAVES as enabled in this case regardless of
|
||||
* whether it's advertised to the guest so that KVM context switches
|
||||
* XSS on VM-Enter/VM-Exit. Failure to do so would effectively give
|
||||
* the guest read/write access to the host's XSS.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVE) &&
|
||||
boot_cpu_has(X86_FEATURE_XSAVES) &&
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
|
||||
kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES);
|
||||
|
||||
/* Update nrips enabled cache */
|
||||
svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS);
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR);
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
|
||||
|
||||
svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
|
||||
svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV);
|
||||
/*
|
||||
* Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
|
||||
* VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
|
||||
* SVM on Intel is bonkers and extremely unlikely to work).
|
||||
*/
|
||||
if (!guest_cpuid_is_intel(vcpu))
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
|
||||
|
||||
svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
|
||||
|
||||
svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
|
||||
|
||||
svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
|
||||
|
||||
svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
|
||||
|
||||
svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI);
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD);
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF);
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI);
|
||||
|
||||
svm_recalc_instruction_intercepts(vcpu, svm);
|
||||
|
||||
@ -4651,16 +4737,25 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
|
||||
* and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
|
||||
* decode garbage.
|
||||
*
|
||||
* Inject #UD if KVM reached this point without an instruction buffer.
|
||||
* In practice, this path should never be hit by a well-behaved guest,
|
||||
* e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
|
||||
* is still theoretically reachable, e.g. via unaccelerated fault-like
|
||||
* AVIC access, and needs to be handled by KVM to avoid putting the
|
||||
* guest into an infinite loop. Injecting #UD is somewhat arbitrary,
|
||||
* but its the least awful option given lack of insight into the guest.
|
||||
* If KVM is NOT trying to simply skip an instruction, inject #UD if
|
||||
* KVM reached this point without an instruction buffer. In practice,
|
||||
* this path should never be hit by a well-behaved guest, e.g. KVM
|
||||
* doesn't intercept #UD or #GP for SEV guests, but this path is still
|
||||
* theoretically reachable, e.g. via unaccelerated fault-like AVIC
|
||||
* access, and needs to be handled by KVM to avoid putting the guest
|
||||
* into an infinite loop. Injecting #UD is somewhat arbitrary, but
|
||||
* its the least awful option given lack of insight into the guest.
|
||||
*
|
||||
* If KVM is trying to skip an instruction, simply resume the guest.
|
||||
* If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
|
||||
* will attempt to re-inject the INT3/INTO and skip the instruction.
|
||||
* In that scenario, retrying the INT3/INTO and hoping the guest will
|
||||
* make forward progress is the only option that has a chance of
|
||||
* success (and in practice it will work the vast majority of the time).
|
||||
*/
|
||||
if (unlikely(!insn)) {
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
if (!(emul_type & EMULTYPE_SKIP))
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -5112,9 +5207,11 @@ static __init int svm_hardware_setup(void)
|
||||
|
||||
svm_adjust_mmio_mask();
|
||||
|
||||
nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
|
||||
|
||||
/*
|
||||
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
|
||||
* may be modified by svm_adjust_mmio_mask()).
|
||||
* may be modified by svm_adjust_mmio_mask()), as well as nrips.
|
||||
*/
|
||||
sev_hardware_setup();
|
||||
|
||||
@ -5126,11 +5223,6 @@ static __init int svm_hardware_setup(void)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (nrips) {
|
||||
if (!boot_cpu_has(X86_FEATURE_NRIPS))
|
||||
nrips = false;
|
||||
}
|
||||
|
||||
enable_apicv = avic = avic && avic_hardware_setup();
|
||||
|
||||
if (!enable_apicv) {
|
||||
@ -5213,6 +5305,13 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = {
|
||||
.pmu_ops = &amd_pmu_ops,
|
||||
};
|
||||
|
||||
static void __svm_exit(void)
|
||||
{
|
||||
kvm_x86_vendor_exit();
|
||||
|
||||
cpu_emergency_unregister_virt_callback(svm_emergency_disable);
|
||||
}
|
||||
|
||||
static int __init svm_init(void)
|
||||
{
|
||||
int r;
|
||||
@ -5226,6 +5325,8 @@ static int __init svm_init(void)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
cpu_emergency_register_virt_callback(svm_emergency_disable);
|
||||
|
||||
/*
|
||||
* Common KVM initialization _must_ come last, after this, /dev/kvm is
|
||||
* exposed to userspace!
|
||||
@ -5238,14 +5339,14 @@ static int __init svm_init(void)
|
||||
return 0;
|
||||
|
||||
err_kvm_init:
|
||||
kvm_x86_vendor_exit();
|
||||
__svm_exit();
|
||||
return r;
|
||||
}
|
||||
|
||||
static void __exit svm_exit(void)
|
||||
{
|
||||
kvm_exit();
|
||||
kvm_x86_vendor_exit();
|
||||
__svm_exit();
|
||||
}
|
||||
|
||||
module_init(svm_init)
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <asm/svm.h>
|
||||
#include <asm/sev-common.h>
|
||||
|
||||
#include "cpuid.h"
|
||||
#include "kvm_cache_regs.h"
|
||||
|
||||
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
|
||||
@ -33,6 +34,7 @@
|
||||
#define MSRPM_OFFSETS 32
|
||||
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
|
||||
extern bool npt_enabled;
|
||||
extern int nrips;
|
||||
extern int vgif;
|
||||
extern bool intercept_smi;
|
||||
extern bool x2avic_enabled;
|
||||
@ -260,16 +262,6 @@ struct vcpu_svm {
|
||||
unsigned long soft_int_next_rip;
|
||||
bool soft_int_injected;
|
||||
|
||||
/* optional nested SVM features that are enabled for this guest */
|
||||
bool nrips_enabled : 1;
|
||||
bool tsc_scaling_enabled : 1;
|
||||
bool v_vmload_vmsave_enabled : 1;
|
||||
bool lbrv_enabled : 1;
|
||||
bool pause_filter_enabled : 1;
|
||||
bool pause_threshold_enabled : 1;
|
||||
bool vgif_enabled : 1;
|
||||
bool vnmi_enabled : 1;
|
||||
|
||||
u32 ldr_reg;
|
||||
u32 dfr_reg;
|
||||
struct page *avic_backing_page;
|
||||
@ -406,48 +398,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3
|
||||
return test_bit(bit, (unsigned long *)&control->intercepts);
|
||||
}
|
||||
|
||||
static inline void set_dr_intercepts(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
|
||||
if (!sev_es_guest(svm->vcpu.kvm)) {
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
|
||||
}
|
||||
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
|
||||
|
||||
recalc_intercepts(svm);
|
||||
}
|
||||
|
||||
static inline void clr_dr_intercepts(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
|
||||
vmcb->control.intercepts[INTERCEPT_DR] = 0;
|
||||
|
||||
/* DR7 access must remain intercepted for an SEV-ES guest */
|
||||
if (sev_es_guest(svm->vcpu.kvm)) {
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
|
||||
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
|
||||
}
|
||||
|
||||
recalc_intercepts(svm);
|
||||
}
|
||||
|
||||
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
@ -493,7 +443,8 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
|
||||
|
||||
static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
|
||||
{
|
||||
return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
|
||||
return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) &&
|
||||
(svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
|
||||
}
|
||||
|
||||
static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
|
||||
@ -544,7 +495,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
|
||||
|
||||
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
|
||||
{
|
||||
return svm->vnmi_enabled &&
|
||||
return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) &&
|
||||
(svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
|
||||
}
|
||||
|
||||
@ -660,7 +611,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
|
||||
bool has_error_code, u32 error_code);
|
||||
int nested_svm_exit_special(struct vcpu_svm *svm);
|
||||
void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
|
||||
void __svm_write_tsc_multiplier(u64 multiplier);
|
||||
void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu);
|
||||
void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
|
||||
struct vmcb_control_area *control);
|
||||
void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
|
||||
|
@ -252,7 +252,7 @@ static inline bool cpu_has_vmx_pml(void)
|
||||
static inline bool cpu_has_vmx_xsaves(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_XSAVES;
|
||||
SECONDARY_EXEC_ENABLE_XSAVES;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_waitpkg(void)
|
||||
|
@ -78,7 +78,7 @@
|
||||
SECONDARY_EXEC_DESC | \
|
||||
SECONDARY_EXEC_ENABLE_RDTSCP | \
|
||||
SECONDARY_EXEC_ENABLE_INVPCID | \
|
||||
SECONDARY_EXEC_XSAVES | \
|
||||
SECONDARY_EXEC_ENABLE_XSAVES | \
|
||||
SECONDARY_EXEC_RDSEED_EXITING | \
|
||||
SECONDARY_EXEC_RDRAND_EXITING | \
|
||||
SECONDARY_EXEC_TSC_SCALING | \
|
||||
|
@ -2307,7 +2307,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
|
||||
SECONDARY_EXEC_ENABLE_INVPCID |
|
||||
SECONDARY_EXEC_ENABLE_RDTSCP |
|
||||
SECONDARY_EXEC_XSAVES |
|
||||
SECONDARY_EXEC_ENABLE_XSAVES |
|
||||
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
@ -6331,7 +6331,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
|
||||
* If if it were, XSS would have to be checked against
|
||||
* the XSS exit bitmap in vmcs12.
|
||||
*/
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
|
||||
case EXIT_REASON_UMWAIT:
|
||||
case EXIT_REASON_TPAUSE:
|
||||
return nested_cpu_has2(vmcs12,
|
||||
@ -6426,7 +6426,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
|
||||
vmx = to_vmx(vcpu);
|
||||
vmcs12 = get_vmcs12(vcpu);
|
||||
|
||||
if (nested_vmx_allowed(vcpu) &&
|
||||
if (guest_can_use(vcpu, X86_FEATURE_VMX) &&
|
||||
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
|
||||
kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
|
||||
kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
|
||||
@ -6567,7 +6567,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
|
||||
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (!nested_vmx_allowed(vcpu))
|
||||
if (!guest_can_use(vcpu, X86_FEATURE_VMX))
|
||||
return -EINVAL;
|
||||
|
||||
if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
|
||||
@ -6601,7 +6601,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
|
||||
if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
|
||||
(!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
|
||||
(!guest_can_use(vcpu, X86_FEATURE_VMX) ||
|
||||
!vmx->nested.enlightened_vmcs_enabled))
|
||||
return -EINVAL;
|
||||
|
||||
vmx_leave_nested(vcpu);
|
||||
@ -6874,7 +6875,7 @@ static void nested_vmx_setup_secondary_ctls(u32 ept_caps,
|
||||
SECONDARY_EXEC_ENABLE_INVPCID |
|
||||
SECONDARY_EXEC_ENABLE_VMFUNC |
|
||||
SECONDARY_EXEC_RDSEED_EXITING |
|
||||
SECONDARY_EXEC_XSAVES |
|
||||
SECONDARY_EXEC_ENABLE_XSAVES |
|
||||
SECONDARY_EXEC_TSC_SCALING |
|
||||
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
|
||||
|
||||
|
@ -168,7 +168,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
|
||||
|
||||
static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user