2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-15 08:44:14 +08:00
- missing TLB flush
 
 - nested virtualization fixes for SMM (secure boot on nested hypervisor)
   and other nested SVM fixes
 
 - syscall fuzzing fixes
 
 - live migration fix for AMD SEV
 
 - mirror VMs now work for SEV-ES too
 
 - fixes for reset
 
 - possible out-of-bounds access in IOAPIC emulation
 
 - fix enlightened VMCS on Windows 2022
 
 ARM:
 
 - Add missing FORCE target when building the EL2 object
 
 - Fix a PMU probe regression on some platforms
 
 Generic:
 
 - KCSAN fixes
 
 selftests:
 
 - random fixes, mostly for clang compilation
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmFN0EwUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroNqaQf/Vx7ePFTqwWpo+8wKapnc6JN9SLjC
 hM4jipxfc1WyQWcfCt8ZuPhCnhF7o8mG/mrqTm+JB+oGqIsydHW19DiUT8ekv09F
 dQ+XYSiR4B547wUH5XLQc4xG9imwYlXGEOHqrE7eJvGH3LOqVFX2fLRBnFefZbO8
 GKhRJrGXwG3/JSAP6A0c22iVU+pLbfV9gpKwrAj0V7o8nzT2b3Wmh74WBNb47BzE
 a4+AwKpWO4rqJGOwdYwy67pdFHh1YmrlZ59cFZc7fzlXE+o0D0bitaJyioZALpOl
 4mRGdzoYkNB++ZjDzVFnAClCYQV/oNxCNGFaFF2mh/gzXG1TLmN7B8zGDg==
 =7oVh
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "A bit late... I got sidetracked by back-from-vacation routines and
  conferences. But most of these patches are already a few weeks old and
  things look more calm on the mailing list than what this pull request
  would suggest.

  x86:

   - missing TLB flush

   - nested virtualization fixes for SMM (secure boot on nested
     hypervisor) and other nested SVM fixes

   - syscall fuzzing fixes

   - live migration fix for AMD SEV

   - mirror VMs now work for SEV-ES too

   - fixes for reset

   - possible out-of-bounds access in IOAPIC emulation

   - fix enlightened VMCS on Windows 2022

  ARM:

   - Add missing FORCE target when building the EL2 object

   - Fix a PMU probe regression on some platforms

  Generic:

   - KCSAN fixes

  selftests:

   - random fixes, mostly for clang compilation"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (43 commits)
  selftests: KVM: Explicitly use movq to read xmm registers
  selftests: KVM: Call ucall_init when setting up in rseq_test
  KVM: Remove tlbs_dirty
  KVM: X86: Synchronize the shadow pagetable before link it
  KVM: X86: Fix missed remote tlb flush in rmap_write_protect()
  KVM: x86: nSVM: don't copy virt_ext from vmcb12
  KVM: x86: nSVM: test eax for 4K alignment for GP errata workaround
  KVM: x86: selftests: test simultaneous uses of V_IRQ from L1 and L0
  KVM: x86: nSVM: restore int_vector in svm_clear_vintr
  kvm: x86: Add AMD PMU MSRs to msrs_to_save_all[]
  KVM: x86: nVMX: re-evaluate emulation_required on nested VM exit
  KVM: x86: nVMX: don't fail nested VM entry on invalid guest state if !from_vmentry
  KVM: x86: VMX: synthesize invalid VM exit when emulating invalid guest state
  KVM: x86: nSVM: refactor svm_leave_smm and smm_enter_smm
  KVM: x86: SVM: call KVM_REQ_GET_NESTED_STATE_PAGES on exit from SMM mode
  KVM: x86: reset pdptrs_from_userspace when exiting smm
  KVM: x86: nSVM: restore the L1 host state prior to resuming nested guest on SMM exit
  KVM: nVMX: Filter out all unsupported controls when eVMCS was activated
  KVM: KVM: Use cpumask_available() to check for NULL cpumask when kicking vCPUs
  KVM: Clean up benign vcpu->cpu data races when kicking vCPUs
  ...
This commit is contained in:
Linus Torvalds 2021-09-27 13:58:23 -07:00
commit 9cccec2bf3
40 changed files with 556 additions and 269 deletions

View File

@ -54,7 +54,7 @@ $(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
# runtime. Because the hypervisor is part of the kernel binary, relocations # runtime. Because the hypervisor is part of the kernel binary, relocations
# produce a kernel VA. We enumerate relocations targeting hyp at build time # produce a kernel VA. We enumerate relocations targeting hyp at build time
# and convert the kernel VAs at those positions to hyp VAs. # and convert the kernel VAs at those positions to hyp VAs.
$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel $(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel FORCE
$(call if_changed,hyprel) $(call if_changed,hyprel)
# 5) Compile hyp-reloc.S and link it into the existing partially linked object. # 5) Compile hyp-reloc.S and link it into the existing partially linked object.

View File

@ -50,9 +50,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
int kvm_perf_init(void) int kvm_perf_init(void)
{ {
if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
static_branch_enable(&kvm_arm_pmu_available);
return perf_register_guest_info_callbacks(&kvm_guest_cbs); return perf_register_guest_info_callbacks(&kvm_guest_cbs);
} }

View File

@ -740,7 +740,14 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
kvm_pmu_create_perf_event(vcpu, select_idx); kvm_pmu_create_perf_event(vcpu, select_idx);
} }
int kvm_pmu_probe_pmuver(void) void kvm_host_pmu_init(struct arm_pmu *pmu)
{
if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
!kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
static_branch_enable(&kvm_arm_pmu_available);
}
static int kvm_pmu_probe_pmuver(void)
{ {
struct perf_event_attr attr = { }; struct perf_event_attr attr = { };
struct perf_event *event; struct perf_event *event;

View File

@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
static void __set_cpu_idle(struct kvm_vcpu *vcpu) static void __set_cpu_idle(struct kvm_vcpu *vcpu)
{ {
kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
} }
static void __unset_cpu_idle(struct kvm_vcpu *vcpu) static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
{ {
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
} }
static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)

View File

@ -4066,7 +4066,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
kvm_s390_patch_guest_per_regs(vcpu); kvm_s390_patch_guest_per_regs(vcpu);
} }
clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask); clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
vcpu->arch.sie_block->icptcode = 0; vcpu->arch.sie_block->icptcode = 0;
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);

View File

@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
{ {
return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); return test_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
} }
static inline int kvm_is_ucontrol(struct kvm *kvm) static inline int kvm_is_ucontrol(struct kvm *kvm)

View File

@ -46,7 +46,7 @@ struct kvm_page_track_notifier_node {
struct kvm_page_track_notifier_node *node); struct kvm_page_track_notifier_node *node);
}; };
void kvm_page_track_init(struct kvm *kvm); int kvm_page_track_init(struct kvm *kvm);
void kvm_page_track_cleanup(struct kvm *kvm); void kvm_page_track_cleanup(struct kvm *kvm);
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);

View File

@ -4206,7 +4206,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
u64 cr4 = ctxt->ops->get_cr(ctxt, 4); u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt)) if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
return emulate_ud(ctxt); return emulate_gp(ctxt, 0);
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }

View File

@ -939,7 +939,7 @@ static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
stimer_init(&hv_vcpu->stimer[i], i); stimer_init(&hv_vcpu->stimer[i], i);
hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); hv_vcpu->vp_index = vcpu->vcpu_idx;
return 0; return 0;
} }
@ -1444,7 +1444,6 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
switch (msr) { switch (msr) {
case HV_X64_MSR_VP_INDEX: { case HV_X64_MSR_VP_INDEX: {
struct kvm_hv *hv = to_kvm_hv(vcpu->kvm); struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
int vcpu_idx = kvm_vcpu_get_idx(vcpu);
u32 new_vp_index = (u32)data; u32 new_vp_index = (u32)data;
if (!host || new_vp_index >= KVM_MAX_VCPUS) if (!host || new_vp_index >= KVM_MAX_VCPUS)
@ -1459,9 +1458,9 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
* VP index is changing, adjust num_mismatched_vp_indexes if * VP index is changing, adjust num_mismatched_vp_indexes if
* it now matches or no longer matches vcpu_idx. * it now matches or no longer matches vcpu_idx.
*/ */
if (hv_vcpu->vp_index == vcpu_idx) if (hv_vcpu->vp_index == vcpu->vcpu_idx)
atomic_inc(&hv->num_mismatched_vp_indexes); atomic_inc(&hv->num_mismatched_vp_indexes);
else if (new_vp_index == vcpu_idx) else if (new_vp_index == vcpu->vcpu_idx)
atomic_dec(&hv->num_mismatched_vp_indexes); atomic_dec(&hv->num_mismatched_vp_indexes);
hv_vcpu->vp_index = new_vp_index; hv_vcpu->vp_index = new_vp_index;

View File

@ -83,7 +83,7 @@ static inline u32 kvm_hv_get_vpindex(struct kvm_vcpu *vcpu)
{ {
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
return hv_vcpu ? hv_vcpu->vp_index : kvm_vcpu_get_idx(vcpu); return hv_vcpu ? hv_vcpu->vp_index : vcpu->vcpu_idx;
} }
int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);

View File

@ -319,8 +319,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
unsigned index; unsigned index;
bool mask_before, mask_after; bool mask_before, mask_after;
union kvm_ioapic_redirect_entry *e; union kvm_ioapic_redirect_entry *e;
unsigned long vcpu_bitmap;
int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode; int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode;
DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
switch (ioapic->ioregsel) { switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION: case IOAPIC_REG_VERSION:
@ -384,9 +384,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
irq.shorthand = APIC_DEST_NOSHORT; irq.shorthand = APIC_DEST_NOSHORT;
irq.dest_id = e->fields.dest_id; irq.dest_id = e->fields.dest_id;
irq.msi_redir_hint = false; irq.msi_redir_hint = false;
bitmap_zero(&vcpu_bitmap, 16); bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
&vcpu_bitmap); vcpu_bitmap);
if (old_dest_mode != e->fields.dest_mode || if (old_dest_mode != e->fields.dest_mode ||
old_dest_id != e->fields.dest_id) { old_dest_id != e->fields.dest_id) {
/* /*
@ -399,10 +399,10 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
kvm_lapic_irq_dest_mode( kvm_lapic_irq_dest_mode(
!!e->fields.dest_mode); !!e->fields.dest_mode);
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
&vcpu_bitmap); vcpu_bitmap);
} }
kvm_make_scan_ioapic_request_mask(ioapic->kvm, kvm_make_scan_ioapic_request_mask(ioapic->kvm,
&vcpu_bitmap); vcpu_bitmap);
} else { } else {
kvm_make_scan_ioapic_request(ioapic->kvm); kvm_make_scan_ioapic_request(ioapic->kvm);
} }

View File

@ -2027,8 +2027,8 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
} while (!sp->unsync_children); } while (!sp->unsync_children);
} }
static void mmu_sync_children(struct kvm_vcpu *vcpu, static int mmu_sync_children(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *parent) struct kvm_mmu_page *parent, bool can_yield)
{ {
int i; int i;
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
@ -2055,12 +2055,18 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
} }
if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) { if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
if (!can_yield) {
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
return -EINTR;
}
cond_resched_rwlock_write(&vcpu->kvm->mmu_lock); cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
flush = false; flush = false;
} }
} }
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
return 0;
} }
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
@ -2146,9 +2152,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
} }
if (sp->unsync_children)
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
__clear_sp_write_flooding_count(sp); __clear_sp_write_flooding_count(sp);
trace_get_page: trace_get_page:
@ -3684,7 +3687,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
write_lock(&vcpu->kvm->mmu_lock); write_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
mmu_sync_children(vcpu, sp); mmu_sync_children(vcpu, sp, true);
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
write_unlock(&vcpu->kvm->mmu_lock); write_unlock(&vcpu->kvm->mmu_lock);
@ -3700,7 +3703,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
if (IS_VALID_PAE_ROOT(root)) { if (IS_VALID_PAE_ROOT(root)) {
root &= PT64_BASE_ADDR_MASK; root &= PT64_BASE_ADDR_MASK;
sp = to_shadow_page(root); sp = to_shadow_page(root);
mmu_sync_children(vcpu, sp); mmu_sync_children(vcpu, sp, true);
} }
} }

View File

@ -164,13 +164,13 @@ void kvm_page_track_cleanup(struct kvm *kvm)
cleanup_srcu_struct(&head->track_srcu); cleanup_srcu_struct(&head->track_srcu);
} }
void kvm_page_track_init(struct kvm *kvm) int kvm_page_track_init(struct kvm *kvm)
{ {
struct kvm_page_track_notifier_head *head; struct kvm_page_track_notifier_head *head;
head = &kvm->arch.track_notifier_head; head = &kvm->arch.track_notifier_head;
init_srcu_struct(&head->track_srcu);
INIT_HLIST_HEAD(&head->track_notifier_list); INIT_HLIST_HEAD(&head->track_notifier_list);
return init_srcu_struct(&head->track_srcu);
} }
/* /*

View File

@ -707,8 +707,27 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
if (!is_shadow_present_pte(*it.sptep)) { if (!is_shadow_present_pte(*it.sptep)) {
table_gfn = gw->table_gfn[it.level - 2]; table_gfn = gw->table_gfn[it.level - 2];
access = gw->pt_access[it.level - 2]; access = gw->pt_access[it.level - 2];
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, sp = kvm_mmu_get_page(vcpu, table_gfn, addr,
false, access); it.level-1, false, access);
/*
* We must synchronize the pagetable before linking it
* because the guest doesn't need to flush tlb when
* the gpte is changed from non-present to present.
* Otherwise, the guest may use the wrong mapping.
*
* For PG_LEVEL_4K, kvm_mmu_get_page() has already
* synchronized it transiently via kvm_sync_page().
*
* For higher level pagetable, we synchronize it via
* the slower mmu_sync_children(). If it needs to
* break, some progress has been made; return
* RET_PF_RETRY and retry on the next #PF.
* KVM_REQ_MMU_SYNC is not necessary but it
* expedites the process.
*/
if (sp->unsync_children &&
mmu_sync_children(vcpu, sp, false))
return RET_PF_RETRY;
} }
/* /*
@ -1047,14 +1066,6 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
* Using the cached information from sp->gfns is safe because: * Using the cached information from sp->gfns is safe because:
* - The spte has a reference to the struct page, so the pfn for a given gfn * - The spte has a reference to the struct page, so the pfn for a given gfn
* can't change unless all sptes pointing to it are nuked first. * can't change unless all sptes pointing to it are nuked first.
*
* Note:
* We should flush all tlbs if spte is dropped even though guest is
* responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page
* and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
* used by guest then tlbs are not flushed, so guest is allowed to access the
* freed pages.
* And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
*/ */
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{ {
@ -1107,13 +1118,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
return 0; return 0;
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
/* set_spte_ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
* Update spte before increasing tlbs_dirty to make
* sure no tlb flush is lost after spte is zapped; see
* the comments in kvm_flush_remote_tlbs().
*/
smp_wmb();
vcpu->kvm->tlbs_dirty++;
continue; continue;
} }
@ -1128,12 +1133,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
if (gfn != sp->gfns[i]) { if (gfn != sp->gfns[i]) {
drop_spte(vcpu->kvm, &sp->spt[i]); drop_spte(vcpu->kvm, &sp->spt[i]);
/* set_spte_ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
* The same as above where we are doing
* prefetch_invalid_gpte().
*/
smp_wmb();
vcpu->kvm->tlbs_dirty++;
continue; continue;
} }

View File

@ -545,7 +545,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) | (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
(svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits); (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
svm->vmcb->control.int_state = svm->nested.ctl.int_state; svm->vmcb->control.int_state = svm->nested.ctl.int_state;
svm->vmcb->control.event_inj = svm->nested.ctl.event_inj; svm->vmcb->control.event_inj = svm->nested.ctl.event_inj;
@ -579,7 +578,7 @@ static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to
} }
int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
struct vmcb *vmcb12) struct vmcb *vmcb12, bool from_vmrun)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
int ret; int ret;
@ -609,13 +608,16 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
nested_vmcb02_prepare_save(svm, vmcb12); nested_vmcb02_prepare_save(svm, vmcb12);
ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3, ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
nested_npt_enabled(svm), true); nested_npt_enabled(svm), from_vmrun);
if (ret) if (ret)
return ret; return ret;
if (!npt_enabled) if (!npt_enabled)
vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested; vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
if (!from_vmrun)
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
svm_set_gif(svm, true); svm_set_gif(svm, true);
return 0; return 0;
@ -681,7 +683,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
svm->nested.nested_run_pending = 1; svm->nested.nested_run_pending = 1;
if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12)) if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
goto out_exit_err; goto out_exit_err;
if (nested_svm_vmrun_msrpm(svm)) if (nested_svm_vmrun_msrpm(svm))

View File

@ -595,43 +595,50 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
return 0; return 0;
} }
static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
int *error)
{
struct sev_data_launch_update_vmsa vmsa;
struct vcpu_svm *svm = to_svm(vcpu);
int ret;
/* Perform some pre-encryption checks against the VMSA */
ret = sev_es_sync_vmsa(svm);
if (ret)
return ret;
/*
* The LAUNCH_UPDATE_VMSA command will perform in-place encryption of
* the VMSA memory content (i.e it will write the same memory region
* with the guest's key), so invalidate it first.
*/
clflush_cache_range(svm->vmsa, PAGE_SIZE);
vmsa.reserved = 0;
vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
vmsa.address = __sme_pa(svm->vmsa);
vmsa.len = PAGE_SIZE;
return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
}
static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
{ {
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_update_vmsa vmsa;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
int i, ret; int i, ret;
if (!sev_es_guest(kvm)) if (!sev_es_guest(kvm))
return -ENOTTY; return -ENOTTY;
vmsa.reserved = 0;
kvm_for_each_vcpu(i, vcpu, kvm) { kvm_for_each_vcpu(i, vcpu, kvm) {
struct vcpu_svm *svm = to_svm(vcpu); ret = mutex_lock_killable(&vcpu->mutex);
/* Perform some pre-encryption checks against the VMSA */
ret = sev_es_sync_vmsa(svm);
if (ret) if (ret)
return ret; return ret;
/* ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error);
* The LAUNCH_UPDATE_VMSA command will perform in-place
* encryption of the VMSA memory content (i.e it will write
* the same memory region with the guest's key), so invalidate
* it first.
*/
clflush_cache_range(svm->vmsa, PAGE_SIZE);
vmsa.handle = sev->handle; mutex_unlock(&vcpu->mutex);
vmsa.address = __sme_pa(svm->vmsa);
vmsa.len = PAGE_SIZE;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa,
&argp->error);
if (ret) if (ret)
return ret; return ret;
svm->vcpu.arch.guest_state_protected = true;
} }
return 0; return 0;
@ -1397,8 +1404,10 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Bind ASID to this guest */ /* Bind ASID to this guest */
ret = sev_bind_asid(kvm, start.handle, error); ret = sev_bind_asid(kvm, start.handle, error);
if (ret) if (ret) {
sev_decommission(start.handle);
goto e_free_session; goto e_free_session;
}
params.handle = start.handle; params.handle = start.handle;
if (copy_to_user((void __user *)(uintptr_t)argp->data, if (copy_to_user((void __user *)(uintptr_t)argp->data,
@ -1464,7 +1473,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Pin guest memory */ /* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
PAGE_SIZE, &n, 0); PAGE_SIZE, &n, 1);
if (IS_ERR(guest_page)) { if (IS_ERR(guest_page)) {
ret = PTR_ERR(guest_page); ret = PTR_ERR(guest_page);
goto e_free_trans; goto e_free_trans;
@ -1501,6 +1510,20 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
} }
static bool cmd_allowed_from_miror(u32 cmd_id)
{
/*
* Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
* active mirror VMs. Also allow the debugging and status commands.
*/
if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA ||
cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT ||
cmd_id == KVM_SEV_DBG_ENCRYPT)
return true;
return false;
}
int svm_mem_enc_op(struct kvm *kvm, void __user *argp) int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
{ {
struct kvm_sev_cmd sev_cmd; struct kvm_sev_cmd sev_cmd;
@ -1517,8 +1540,9 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
mutex_lock(&kvm->lock); mutex_lock(&kvm->lock);
/* enc_context_owner handles all memory enc operations */ /* Only the enc_context_owner handles some memory enc operations. */
if (is_mirroring_enc_context(kvm)) { if (is_mirroring_enc_context(kvm) &&
!cmd_allowed_from_miror(sev_cmd.id)) {
r = -EINVAL; r = -EINVAL;
goto out; goto out;
} }
@ -1715,8 +1739,7 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
{ {
struct file *source_kvm_file; struct file *source_kvm_file;
struct kvm *source_kvm; struct kvm *source_kvm;
struct kvm_sev_info *mirror_sev; struct kvm_sev_info source_sev, *mirror_sev;
unsigned int asid;
int ret; int ret;
source_kvm_file = fget(source_fd); source_kvm_file = fget(source_fd);
@ -1739,7 +1762,8 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
goto e_source_unlock; goto e_source_unlock;
} }
asid = to_kvm_svm(source_kvm)->sev_info.asid; memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
sizeof(source_sev));
/* /*
* The mirror kvm holds an enc_context_owner ref so its asid can't * The mirror kvm holds an enc_context_owner ref so its asid can't
@ -1759,8 +1783,16 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
/* Set enc_context_owner and copy its encryption context over */ /* Set enc_context_owner and copy its encryption context over */
mirror_sev = &to_kvm_svm(kvm)->sev_info; mirror_sev = &to_kvm_svm(kvm)->sev_info;
mirror_sev->enc_context_owner = source_kvm; mirror_sev->enc_context_owner = source_kvm;
mirror_sev->asid = asid;
mirror_sev->active = true; mirror_sev->active = true;
mirror_sev->asid = source_sev.asid;
mirror_sev->fd = source_sev.fd;
mirror_sev->es_active = source_sev.es_active;
mirror_sev->handle = source_sev.handle;
/*
* Do not copy ap_jump_table. Since the mirror does not share the same
* KVM contexts as the original, and they may have different
* memory-views.
*/
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->lock);
return 0; return 0;

View File

@ -1566,6 +1566,8 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
V_IRQ_INJECTION_BITS_MASK; V_IRQ_INJECTION_BITS_MASK;
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
} }
vmcb_mark_dirty(svm->vmcb, VMCB_INTR); vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
@ -2222,6 +2224,10 @@ static int gp_interception(struct kvm_vcpu *vcpu)
if (error_code) if (error_code)
goto reinject; goto reinject;
/* All SVM instructions expect page aligned RAX */
if (svm->vmcb->save.rax & ~PAGE_MASK)
goto reinject;
/* Decode the instruction for usage later */ /* Decode the instruction for usage later */
if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
goto reinject; goto reinject;
@ -4285,43 +4291,44 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
struct kvm_host_map map_save; struct kvm_host_map map_save;
int ret; int ret;
if (is_guest_mode(vcpu)) { if (!is_guest_mode(vcpu))
/* FED8h - SVM Guest */ return 0;
put_smstate(u64, smstate, 0x7ed8, 1);
/* FEE0h - SVM Guest VMCB Physical Address */
put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; /* FED8h - SVM Guest */
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; put_smstate(u64, smstate, 0x7ed8, 1);
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; /* FEE0h - SVM Guest VMCB Physical Address */
put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
ret = nested_svm_vmexit(svm); svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
if (ret) svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
return ret; svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
/* ret = nested_svm_vmexit(svm);
* KVM uses VMCB01 to store L1 host state while L2 runs but if (ret)
* VMCB01 is going to be used during SMM and thus the state will return ret;
* be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
* area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
* format of the area is identical to guest save area offsetted
* by 0x400 (matches the offset of 'struct vmcb_save_area'
* within 'struct vmcb'). Note: HSAVE area may also be used by
* L1 hypervisor to save additional host context (e.g. KVM does
* that, see svm_prepare_guest_switch()) which must be
* preserved.
*/
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
&map_save) == -EINVAL)
return 1;
BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); /*
* KVM uses VMCB01 to store L1 host state while L2 runs but
* VMCB01 is going to be used during SMM and thus the state will
* be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
* area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
* format of the area is identical to guest save area offsetted
* by 0x400 (matches the offset of 'struct vmcb_save_area'
* within 'struct vmcb'). Note: HSAVE area may also be used by
* L1 hypervisor to save additional host context (e.g. KVM does
* that, see svm_prepare_guest_switch()) which must be
* preserved.
*/
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
&map_save) == -EINVAL)
return 1;
svm_copy_vmrun_state(map_save.hva + 0x400, BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
&svm->vmcb01.ptr->save);
kvm_vcpu_unmap(vcpu, &map_save, true); svm_copy_vmrun_state(map_save.hva + 0x400,
} &svm->vmcb01.ptr->save);
kvm_vcpu_unmap(vcpu, &map_save, true);
return 0; return 0;
} }
@ -4329,50 +4336,54 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map, map_save; struct kvm_host_map map, map_save;
int ret = 0; u64 saved_efer, vmcb12_gpa;
struct vmcb *vmcb12;
int ret;
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); return 0;
u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
struct vmcb *vmcb12;
if (guest) { /* Non-zero if SMI arrived while vCPU was in guest mode. */
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) if (!GET_SMSTATE(u64, smstate, 0x7ed8))
return 1; return 0;
if (!(saved_efer & EFER_SVME)) if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
return 1; return 1;
if (kvm_vcpu_map(vcpu, saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL) if (!(saved_efer & EFER_SVME))
return 1; return 1;
if (svm_allocate_nested(svm)) vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
return 1; if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
return 1;
vmcb12 = map.hva; ret = 1;
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
goto unmap_map;
nested_load_control_from_vmcb12(svm, &vmcb12->control); if (svm_allocate_nested(svm))
goto unmap_save;
ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12); /*
kvm_vcpu_unmap(vcpu, &map, true); * Restore L1 host state from L1 HSAVE area as VMCB01 was
* used during SMM (see svm_enter_smm())
*/
/* svm_copy_vmrun_state(&svm->vmcb01.ptr->save, map_save.hva + 0x400);
* Restore L1 host state from L1 HSAVE area as VMCB01 was
* used during SMM (see svm_enter_smm())
*/
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
&map_save) == -EINVAL)
return 1;
svm_copy_vmrun_state(&svm->vmcb01.ptr->save, /*
map_save.hva + 0x400); * Enter the nested guest now
*/
kvm_vcpu_unmap(vcpu, &map_save, true); vmcb12 = map.hva;
} nested_load_control_from_vmcb12(svm, &vmcb12->control);
} ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
unmap_save:
kvm_vcpu_unmap(vcpu, &map_save, true);
unmap_map:
kvm_vcpu_unmap(vcpu, &map, true);
return ret; return ret;
} }

View File

@ -459,7 +459,8 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI); return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
} }
int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12); int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
void svm_leave_nested(struct vcpu_svm *svm); void svm_leave_nested(struct vcpu_svm *svm);
void svm_free_nested(struct vcpu_svm *svm); void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm);

View File

@ -353,14 +353,20 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
switch (msr_index) { switch (msr_index) {
case MSR_IA32_VMX_EXIT_CTLS: case MSR_IA32_VMX_EXIT_CTLS:
case MSR_IA32_VMX_TRUE_EXIT_CTLS: case MSR_IA32_VMX_TRUE_EXIT_CTLS:
ctl_high &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; ctl_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
break; break;
case MSR_IA32_VMX_ENTRY_CTLS: case MSR_IA32_VMX_ENTRY_CTLS:
case MSR_IA32_VMX_TRUE_ENTRY_CTLS: case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
ctl_high &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; ctl_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
break; break;
case MSR_IA32_VMX_PROCBASED_CTLS2: case MSR_IA32_VMX_PROCBASED_CTLS2:
ctl_high &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
break;
case MSR_IA32_VMX_PINBASED_CTLS:
ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
break;
case MSR_IA32_VMX_VMFUNC:
ctl_low &= ~EVMCS1_UNSUPPORTED_VMFUNC;
break; break;
} }

View File

@ -2583,8 +2583,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* Guest state is invalid and unrestricted guest is disabled, * Guest state is invalid and unrestricted guest is disabled,
* which means L1 attempted VMEntry to L2 with invalid state. * which means L1 attempted VMEntry to L2 with invalid state.
* Fail the VMEntry. * Fail the VMEntry.
*
* However when force loading the guest state (SMM exit or
* loading nested state after migration, it is possible to
* have invalid guest state now, which will be later fixed by
* restoring L2 register state
*/ */
if (CC(!vmx_guest_state_valid(vcpu))) { if (CC(from_vmentry && !vmx_guest_state_valid(vcpu))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT; *entry_failure_code = ENTRY_FAIL_DEFAULT;
return -EINVAL; return -EINVAL;
} }
@ -4351,6 +4356,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
vmcs12->vm_exit_msr_load_count)) vmcs12->vm_exit_msr_load_count))
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
} }
static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx) static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
@ -4899,14 +4906,7 @@ out_vmcs02:
return -ENOMEM; return -ENOMEM;
} }
/* /* Emulate the VMXON instruction. */
* Emulate the VMXON instruction.
* Currently, we just remember that VMX is active, and do not save or even
* inspect the argument to VMXON (the so-called "VMXON pointer") because we
* do not currently need to store anything in that guest-allocated memory
* region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
* argument is different from the VMXON pointer (which the spec says they do).
*/
static int handle_vmon(struct kvm_vcpu *vcpu) static int handle_vmon(struct kvm_vcpu *vcpu)
{ {
int ret; int ret;
@ -5903,6 +5903,12 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
case EXIT_REASON_VMFUNC: case EXIT_REASON_VMFUNC:
/* VM functions are emulated through L2->L0 vmexits. */ /* VM functions are emulated through L2->L0 vmexits. */
return true; return true;
case EXIT_REASON_BUS_LOCK:
/*
* At present, bus lock VM exit is never exposed to L1.
* Handle L2's bus locks in L0 directly.
*/
return true;
default: default:
break; break;
} }

View File

@ -1323,7 +1323,7 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
vmx_prepare_switch_to_host(to_vmx(vcpu)); vmx_prepare_switch_to_host(to_vmx(vcpu));
} }
static bool emulation_required(struct kvm_vcpu *vcpu) bool vmx_emulation_required(struct kvm_vcpu *vcpu)
{ {
return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu); return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu);
} }
@ -1367,7 +1367,7 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
vmcs_writel(GUEST_RFLAGS, rflags); vmcs_writel(GUEST_RFLAGS, rflags);
if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM) if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
vmx->emulation_required = emulation_required(vcpu); vmx->emulation_required = vmx_emulation_required(vcpu);
} }
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
@ -1837,10 +1837,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
&msr_info->data)) &msr_info->data))
return 1; return 1;
/* /*
* Enlightened VMCS v1 doesn't have certain fields, but buggy * Enlightened VMCS v1 doesn't have certain VMCS fields but
* Hyper-V versions are still trying to use corresponding * instead of just ignoring the features, different Hyper-V
* features when they are exposed. Filter out the essential * versions are either trying to use them and fail or do some
* minimum. * sanity checking and refuse to boot. Filter all unsupported
* features out.
*/ */
if (!msr_info->host_initiated && if (!msr_info->host_initiated &&
vmx->nested.enlightened_vmcs_enabled) vmx->nested.enlightened_vmcs_enabled)
@ -3077,7 +3078,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
} }
/* depends on vcpu->arch.cr0 to be set to a new value */ /* depends on vcpu->arch.cr0 to be set to a new value */
vmx->emulation_required = emulation_required(vcpu); vmx->emulation_required = vmx_emulation_required(vcpu);
} }
static int vmx_get_max_tdp_level(void) static int vmx_get_max_tdp_level(void)
@ -3330,7 +3331,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int
{ {
__vmx_set_segment(vcpu, var, seg); __vmx_set_segment(vcpu, var, seg);
to_vmx(vcpu)->emulation_required = emulation_required(vcpu); to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
} }
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@ -6621,10 +6622,24 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->loaded_vmcs->soft_vnmi_blocked)) vmx->loaded_vmcs->soft_vnmi_blocked))
vmx->loaded_vmcs->entry_time = ktime_get(); vmx->loaded_vmcs->entry_time = ktime_get();
/* Don't enter VMX if guest state is invalid, let the exit handler /*
start emulation until we arrive back to a valid state */ * Don't enter VMX if guest state is invalid, let the exit handler
if (vmx->emulation_required) * start emulation until we arrive back to a valid state. Synthesize a
* consistency check VM-Exit due to invalid guest state and bail.
*/
if (unlikely(vmx->emulation_required)) {
/* We don't emulate invalid state of a nested guest */
vmx->fail = is_guest_mode(vcpu);
vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
vmx->exit_reason.failed_vmentry = 1;
kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1);
vmx->exit_qualification = ENTRY_FAIL_DEFAULT;
kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2);
vmx->exit_intr_info = 0;
return EXIT_FASTPATH_NONE; return EXIT_FASTPATH_NONE;
}
trace_kvm_entry(vcpu); trace_kvm_entry(vcpu);

View File

@ -248,12 +248,8 @@ struct vcpu_vmx {
* only loaded into hardware when necessary, e.g. SYSCALL #UDs outside * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
* of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
* be loaded into hardware if those conditions aren't met. * be loaded into hardware if those conditions aren't met.
* nr_active_uret_msrs tracks the number of MSRs that need to be loaded
* into hardware when running the guest. guest_uret_msrs[] is resorted
* whenever the number of "active" uret MSRs is modified.
*/ */
struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS]; struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
int nr_active_uret_msrs;
bool guest_uret_msrs_loaded; bool guest_uret_msrs_loaded;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base; u64 msr_host_kernel_gs_base;
@ -359,6 +355,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
unsigned long fs_base, unsigned long gs_base); unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu); int vmx_get_cpl(struct kvm_vcpu *vcpu);
bool vmx_emulation_required(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu); u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);

View File

@ -1332,6 +1332,13 @@ static const u32 msrs_to_save_all[] = {
MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
}; };
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)]; static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
@ -2969,7 +2976,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
offsetof(struct compat_vcpu_info, time)); offsetof(struct compat_vcpu_info, time));
if (vcpu->xen.vcpu_time_info_set) if (vcpu->xen.vcpu_time_info_set)
kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0); kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
if (v == kvm_get_vcpu(v->kvm, 0)) if (!v->vcpu_idx)
kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
return 0; return 0;
} }
@ -7658,6 +7665,13 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
/* Process a latched INIT or SMI, if any. */ /* Process a latched INIT or SMI, if any. */
kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu);
/*
* Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
* on SMM exit we still need to reload them from
* guest memory
*/
vcpu->arch.pdptrs_from_userspace = false;
} }
kvm_mmu_reset_context(vcpu); kvm_mmu_reset_context(vcpu);
@ -10652,6 +10666,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
int r; int r;
vcpu->arch.last_vmentry_cpu = -1; vcpu->arch.last_vmentry_cpu = -1;
vcpu->arch.regs_avail = ~0;
vcpu->arch.regs_dirty = ~0;
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@ -10893,6 +10909,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0); kvm_rip_write(vcpu, 0xfff0);
vcpu->arch.cr3 = 0;
kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
/* /*
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions * CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
* of Intel's SDM list CD/NW as being set on INIT, but they contradict * of Intel's SDM list CD/NW as being set on INIT, but they contradict
@ -11139,9 +11158,15 @@ void kvm_arch_free_vm(struct kvm *kvm)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{ {
int ret;
if (type) if (type)
return -EINVAL; return -EINVAL;
ret = kvm_page_track_init(kvm);
if (ret)
return ret;
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
@ -11174,7 +11199,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_apicv_init(kvm); kvm_apicv_init(kvm);
kvm_hv_init_vm(kvm); kvm_hv_init_vm(kvm);
kvm_page_track_init(kvm);
kvm_mmu_init_vm(kvm); kvm_mmu_init_vm(kvm);
kvm_xen_init_vm(kvm); kvm_xen_init_vm(kvm);

View File

@ -952,6 +952,8 @@ int armpmu_register(struct arm_pmu *pmu)
pmu->name, pmu->num_events, pmu->name, pmu->num_events,
has_nmi ? ", using NMIs" : ""); has_nmi ? ", using NMIs" : "");
kvm_host_pmu_init(pmu);
return 0; return 0;
out_destroy: out_destroy:

View File

@ -61,7 +61,6 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr); struct kvm_device_attr *attr);
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
int kvm_pmu_probe_pmuver(void);
#else #else
struct kvm_pmu { struct kvm_pmu {
}; };
@ -118,8 +117,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
return 0; return 0;
} }
static inline int kvm_pmu_probe_pmuver(void) { return 0xf; }
#endif #endif
#endif #endif

View File

@ -608,7 +608,6 @@ struct kvm {
unsigned long mmu_notifier_range_start; unsigned long mmu_notifier_range_start;
unsigned long mmu_notifier_range_end; unsigned long mmu_notifier_range_end;
#endif #endif
long tlbs_dirty;
struct list_head devices; struct list_head devices;
u64 manual_dirty_log_protect; u64 manual_dirty_log_protect;
struct dentry *debugfs_dentry; struct dentry *debugfs_dentry;
@ -721,11 +720,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
return NULL; return NULL;
} }
static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
{
return vcpu->vcpu_idx;
}
#define kvm_for_each_memslot(memslot, slots) \ #define kvm_for_each_memslot(memslot, slots) \
for (memslot = &slots->memslots[0]; \ for (memslot = &slots->memslots[0]; \
memslot < slots->memslots + slots->used_slots; memslot++) \ memslot < slots->memslots + slots->used_slots; memslot++) \

View File

@ -163,6 +163,12 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
#endif #endif
#ifdef CONFIG_KVM
void kvm_host_pmu_init(struct arm_pmu *pmu);
#else
#define kvm_host_pmu_init(x) do { } while(0)
#endif
/* Internal functions only for core arm_pmu code */ /* Internal functions only for core arm_pmu code */
struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc(void);
struct arm_pmu *armpmu_alloc_atomic(void); struct arm_pmu *armpmu_alloc_atomic(void);

View File

@ -24,6 +24,7 @@
/x86_64/smm_test /x86_64/smm_test
/x86_64/state_test /x86_64/state_test
/x86_64/svm_vmcall_test /x86_64/svm_vmcall_test
/x86_64/svm_int_ctl_test
/x86_64/sync_regs_test /x86_64/sync_regs_test
/x86_64/tsc_msrs_test /x86_64/tsc_msrs_test
/x86_64/userspace_msr_exit_test /x86_64/userspace_msr_exit_test

View File

@ -56,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test

View File

@ -371,9 +371,7 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n"); printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n" printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n"); " them into a separate region of memory for each vCPU.\n");
printf(" -s: specify the type of memory that should be used to\n" backing_src_help("-s");
" back the guest data region.\n\n");
backing_src_help();
puts(""); puts("");
exit(0); exit(0);
} }
@ -381,7 +379,7 @@ static void help(char *name)
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
struct test_params params = { struct test_params params = {
.backing_src = VM_MEM_SRC_ANONYMOUS, .backing_src = DEFAULT_VM_MEM_SRC,
.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
.vcpus = 1, .vcpus = 1,
}; };

View File

@ -179,7 +179,7 @@ static void *uffd_handler_thread_fn(void *arg)
return NULL; return NULL;
} }
if (!pollfd[0].revents & POLLIN) if (!(pollfd[0].revents & POLLIN))
continue; continue;
r = read(uffd, &msg, sizeof(msg)); r = read(uffd, &msg, sizeof(msg));
@ -416,7 +416,7 @@ static void help(char *name)
{ {
puts(""); puts("");
printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
" [-b memory] [-t type] [-v vcpus] [-o]\n", name); " [-b memory] [-s type] [-v vcpus] [-o]\n", name);
guest_modes_help(); guest_modes_help();
printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
" UFFD registration mode: 'MISSING' or 'MINOR'.\n"); " UFFD registration mode: 'MISSING' or 'MINOR'.\n");
@ -426,8 +426,7 @@ static void help(char *name)
printf(" -b: specify the size of the memory region which should be\n" printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n" " demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n"); " Default: 1G\n");
printf(" -t: The type of backing memory to use. Default: anonymous\n"); backing_src_help("-s");
backing_src_help();
printf(" -v: specify the number of vCPUs to run.\n"); printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n" printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n"); " them into a separate region of memory for each vCPU.\n");
@ -439,14 +438,14 @@ int main(int argc, char *argv[])
{ {
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = { struct test_params p = {
.src_type = VM_MEM_SRC_ANONYMOUS, .src_type = DEFAULT_VM_MEM_SRC,
.partition_vcpu_memory_access = true, .partition_vcpu_memory_access = true,
}; };
int opt; int opt;
guest_modes_append_default(); guest_modes_append_default();
while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) { while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
switch (opt) { switch (opt) {
case 'm': case 'm':
guest_modes_cmdline(optarg); guest_modes_cmdline(optarg);
@ -465,7 +464,7 @@ int main(int argc, char *argv[])
case 'b': case 'b':
guest_percpu_mem_size = parse_size(optarg); guest_percpu_mem_size = parse_size(optarg);
break; break;
case 't': case 's':
p.src_type = parse_backing_src_type(optarg); p.src_type = parse_backing_src_type(optarg);
break; break;
case 'v': case 'v':
@ -485,7 +484,7 @@ int main(int argc, char *argv[])
if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR && if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
!backing_src_is_shared(p.src_type)) { !backing_src_is_shared(p.src_type)) {
TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t"); TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
} }
for_each_guest_mode(run_test, &p); for_each_guest_mode(run_test, &p);

View File

@ -118,42 +118,64 @@ static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
toggle_dirty_logging(vm, slots, false); toggle_dirty_logging(vm, slots, false);
} }
static void get_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap, static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
uint64_t nr_pages)
{ {
uint64_t slot_pages = nr_pages / slots;
int i; int i;
for (i = 0; i < slots; i++) { for (i = 0; i < slots; i++) {
int slot = PERF_TEST_MEM_SLOT_INDEX + i; int slot = PERF_TEST_MEM_SLOT_INDEX + i;
unsigned long *slot_bitmap = bitmap + i * slot_pages;
kvm_vm_get_dirty_log(vm, slot, slot_bitmap); kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
} }
} }
static void clear_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap, static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
uint64_t nr_pages) int slots, uint64_t pages_per_slot)
{ {
uint64_t slot_pages = nr_pages / slots;
int i; int i;
for (i = 0; i < slots; i++) { for (i = 0; i < slots; i++) {
int slot = PERF_TEST_MEM_SLOT_INDEX + i; int slot = PERF_TEST_MEM_SLOT_INDEX + i;
unsigned long *slot_bitmap = bitmap + i * slot_pages;
kvm_vm_clear_dirty_log(vm, slot, slot_bitmap, 0, slot_pages); kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
} }
} }
static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot)
{
unsigned long **bitmaps;
int i;
bitmaps = malloc(slots * sizeof(bitmaps[0]));
TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
for (i = 0; i < slots; i++) {
bitmaps[i] = bitmap_zalloc(pages_per_slot);
TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
}
return bitmaps;
}
static void free_bitmaps(unsigned long *bitmaps[], int slots)
{
int i;
for (i = 0; i < slots; i++)
free(bitmaps[i]);
free(bitmaps);
}
static void run_test(enum vm_guest_mode mode, void *arg) static void run_test(enum vm_guest_mode mode, void *arg)
{ {
struct test_params *p = arg; struct test_params *p = arg;
pthread_t *vcpu_threads; pthread_t *vcpu_threads;
struct kvm_vm *vm; struct kvm_vm *vm;
unsigned long *bmap; unsigned long **bitmaps;
uint64_t guest_num_pages; uint64_t guest_num_pages;
uint64_t host_num_pages; uint64_t host_num_pages;
uint64_t pages_per_slot;
int vcpu_id; int vcpu_id;
struct timespec start; struct timespec start;
struct timespec ts_diff; struct timespec ts_diff;
@ -171,7 +193,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
host_num_pages = vm_num_host_pages(mode, guest_num_pages); host_num_pages = vm_num_host_pages(mode, guest_num_pages);
bmap = bitmap_zalloc(host_num_pages); pages_per_slot = host_num_pages / p->slots;
bitmaps = alloc_bitmaps(p->slots, pages_per_slot);
if (dirty_log_manual_caps) { if (dirty_log_manual_caps) {
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
@ -239,7 +263,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
iteration, ts_diff.tv_sec, ts_diff.tv_nsec); iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
clock_gettime(CLOCK_MONOTONIC, &start); clock_gettime(CLOCK_MONOTONIC, &start);
get_dirty_log(vm, p->slots, bmap, host_num_pages); get_dirty_log(vm, bitmaps, p->slots);
ts_diff = timespec_elapsed(start); ts_diff = timespec_elapsed(start);
get_dirty_log_total = timespec_add(get_dirty_log_total, get_dirty_log_total = timespec_add(get_dirty_log_total,
ts_diff); ts_diff);
@ -248,7 +272,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
if (dirty_log_manual_caps) { if (dirty_log_manual_caps) {
clock_gettime(CLOCK_MONOTONIC, &start); clock_gettime(CLOCK_MONOTONIC, &start);
clear_dirty_log(vm, p->slots, bmap, host_num_pages); clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot);
ts_diff = timespec_elapsed(start); ts_diff = timespec_elapsed(start);
clear_dirty_log_total = timespec_add(clear_dirty_log_total, clear_dirty_log_total = timespec_add(clear_dirty_log_total,
ts_diff); ts_diff);
@ -281,7 +305,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
} }
free(bmap); free_bitmaps(bitmaps, p->slots);
free(vcpu_threads); free(vcpu_threads);
perf_test_destroy_vm(vm); perf_test_destroy_vm(vm);
} }
@ -308,11 +332,9 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n"); printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n" printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n"); " them into a separate region of memory for each vCPU.\n");
printf(" -s: specify the type of memory that should be used to\n" backing_src_help("-s");
" back the guest data region.\n\n");
printf(" -x: Split the memory region into this number of memslots.\n" printf(" -x: Split the memory region into this number of memslots.\n"
" (default: 1)"); " (default: 1)\n");
backing_src_help();
puts(""); puts("");
exit(0); exit(0);
} }
@ -324,7 +346,7 @@ int main(int argc, char *argv[])
.iterations = TEST_HOST_LOOP_N, .iterations = TEST_HOST_LOOP_N,
.wr_fract = 1, .wr_fract = 1,
.partition_vcpu_memory_access = true, .partition_vcpu_memory_access = true,
.backing_src = VM_MEM_SRC_ANONYMOUS, .backing_src = DEFAULT_VM_MEM_SRC,
.slots = 1, .slots = 1,
}; };
int opt; int opt;

View File

@ -90,6 +90,8 @@ enum vm_mem_backing_src_type {
NUM_SRC_TYPES, NUM_SRC_TYPES,
}; };
#define DEFAULT_VM_MEM_SRC VM_MEM_SRC_ANONYMOUS
struct vm_mem_backing_src_alias { struct vm_mem_backing_src_alias {
const char *name; const char *name;
uint32_t flag; uint32_t flag;
@ -102,7 +104,7 @@ size_t get_trans_hugepagesz(void);
size_t get_def_hugetlb_pagesz(void); size_t get_def_hugetlb_pagesz(void);
const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i); const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
size_t get_backing_src_pagesz(uint32_t i); size_t get_backing_src_pagesz(uint32_t i);
void backing_src_help(void); void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
long get_run_delay(void); long get_run_delay(void);

View File

@ -312,37 +312,37 @@ static inline void set_xmm(int n, unsigned long val)
} }
} }
typedef unsigned long v1di __attribute__ ((vector_size (8))); #define GET_XMM(__xmm) \
({ \
unsigned long __val; \
asm volatile("movq %%"#__xmm", %0" : "=r"(__val) : : #__xmm); \
__val; \
})
static inline unsigned long get_xmm(int n) static inline unsigned long get_xmm(int n)
{ {
assert(n >= 0 && n <= 7); assert(n >= 0 && n <= 7);
register v1di xmm0 __asm__("%xmm0");
register v1di xmm1 __asm__("%xmm1");
register v1di xmm2 __asm__("%xmm2");
register v1di xmm3 __asm__("%xmm3");
register v1di xmm4 __asm__("%xmm4");
register v1di xmm5 __asm__("%xmm5");
register v1di xmm6 __asm__("%xmm6");
register v1di xmm7 __asm__("%xmm7");
switch (n) { switch (n) {
case 0: case 0:
return (unsigned long)xmm0; return GET_XMM(xmm0);
case 1: case 1:
return (unsigned long)xmm1; return GET_XMM(xmm1);
case 2: case 2:
return (unsigned long)xmm2; return GET_XMM(xmm2);
case 3: case 3:
return (unsigned long)xmm3; return GET_XMM(xmm3);
case 4: case 4:
return (unsigned long)xmm4; return GET_XMM(xmm4);
case 5: case 5:
return (unsigned long)xmm5; return GET_XMM(xmm5);
case 6: case 6:
return (unsigned long)xmm6; return GET_XMM(xmm6);
case 7: case 7:
return (unsigned long)xmm7; return GET_XMM(xmm7);
} }
/* never reached */
return 0; return 0;
} }

View File

@ -456,10 +456,7 @@ static void help(char *name)
" (default: 1G)\n"); " (default: 1G)\n");
printf(" -v: specify the number of vCPUs to run\n" printf(" -v: specify the number of vCPUs to run\n"
" (default: 1)\n"); " (default: 1)\n");
printf(" -s: specify the type of memory that should be used to\n" backing_src_help("-s");
" back the guest data region.\n"
" (default: anonymous)\n\n");
backing_src_help();
puts(""); puts("");
} }
@ -468,7 +465,7 @@ int main(int argc, char *argv[])
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = { struct test_params p = {
.test_mem_size = DEFAULT_TEST_MEM_SIZE, .test_mem_size = DEFAULT_TEST_MEM_SIZE,
.src_type = VM_MEM_SRC_ANONYMOUS, .src_type = DEFAULT_VM_MEM_SRC,
}; };
int opt; int opt;

View File

@ -283,13 +283,22 @@ size_t get_backing_src_pagesz(uint32_t i)
} }
} }
void backing_src_help(void) static void print_available_backing_src_types(const char *prefix)
{ {
int i; int i;
printf("Available backing src types:\n"); printf("%sAvailable backing src types:\n", prefix);
for (i = 0; i < NUM_SRC_TYPES; i++) for (i = 0; i < NUM_SRC_TYPES; i++)
printf("\t%s\n", vm_mem_backing_src_alias(i)->name); printf("%s %s\n", prefix, vm_mem_backing_src_alias(i)->name);
}
void backing_src_help(const char *flag)
{
printf(" %s: specify the type of memory that should be used to\n"
" back the guest data region. (default: %s)\n",
flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name);
print_available_backing_src_types(" ");
} }
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name) enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
@ -300,7 +309,7 @@ enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name)) if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name))
return i; return i;
backing_src_help(); print_available_backing_src_types("");
TEST_FAIL("Unknown backing src type: %s", type_name); TEST_FAIL("Unknown backing src type: %s", type_name);
return -1; return -1;
} }

View File

@ -180,6 +180,7 @@ int main(int argc, char *argv[])
* CPU affinity. * CPU affinity.
*/ */
vm = vm_create_default(VCPU_ID, 0, guest_code); vm = vm_create_default(VCPU_ID, 0, guest_code);
ucall_init(vm, NULL);
pthread_create(&migration_thread, NULL, migration_worker, 0); pthread_create(&migration_thread, NULL, migration_worker, 0);

View File

@ -116,12 +116,12 @@ struct st_time {
uint64_t st_time; uint64_t st_time;
}; };
static int64_t smccc(uint32_t func, uint32_t arg) static int64_t smccc(uint32_t func, uint64_t arg)
{ {
unsigned long ret; unsigned long ret;
asm volatile( asm volatile(
"mov x0, %1\n" "mov w0, %w1\n"
"mov x1, %2\n" "mov x1, %2\n"
"hvc #0\n" "hvc #0\n"
"mov %0, x0\n" "mov %0, x0\n"

View File

@ -0,0 +1,128 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* svm_int_ctl_test
*
* Copyright (C) 2021, Red Hat, Inc.
*
* Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
*/
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "svm_util.h"
#include "apic.h"
#define VCPU_ID 0
static struct kvm_vm *vm;
bool vintr_irq_called;
bool intr_irq_called;
#define VINTR_IRQ_NUMBER 0x20
#define INTR_IRQ_NUMBER 0x30
static void vintr_irq_handler(struct ex_regs *regs)
{
vintr_irq_called = true;
}
static void intr_irq_handler(struct ex_regs *regs)
{
x2apic_write_reg(APIC_EOI, 0x00);
intr_irq_called = true;
}
static void l2_guest_code(struct svm_test_data *svm)
{
/* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
* and since L1 didn't enable virtual interrupt masking,
* L2 should receive it and not L1.
*
* L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
* so it should also receive it after the following 'sti'.
*/
x2apic_write_reg(APIC_ICR,
APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
__asm__ __volatile__(
"sti\n"
"nop\n"
);
GUEST_ASSERT(vintr_irq_called);
GUEST_ASSERT(intr_irq_called);
__asm__ __volatile__(
"vmcall\n"
);
}
static void l1_guest_code(struct svm_test_data *svm)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
struct vmcb *vmcb = svm->vmcb;
x2apic_enable();
/* Prepare for L2 execution. */
generic_svm_setup(svm, l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
/* No virtual interrupt masking */
vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
/* No intercepts for real and virtual interrupts */
vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
/* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
vmcb->control.int_vector = VINTR_IRQ_NUMBER;
run_guest(vmcb, svm->vmcb_gpa);
GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
GUEST_DONE();
}
int main(int argc, char *argv[])
{
vm_vaddr_t svm_gva;
nested_svm_check_supported();
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
vcpu_alloc_svm(vm, &svm_gva);
vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
TEST_FAIL("%s", (const char *)uc.args[0]);
break;
/* NOT REACHED */
case UCALL_DONE:
goto done;
default:
TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
}
done:
kvm_vm_free(vm);
return 0;
}

View File

@ -235,9 +235,13 @@ static void ack_flush(void *_completed)
{ {
} }
static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait) static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait)
{ {
if (unlikely(!cpus)) const struct cpumask *cpus;
if (likely(cpumask_available(tmp)))
cpus = tmp;
else
cpus = cpu_online_mask; cpus = cpu_online_mask;
if (cpumask_empty(cpus)) if (cpumask_empty(cpus))
@ -263,14 +267,34 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
continue; continue;
kvm_make_request(req, vcpu); kvm_make_request(req, vcpu);
cpu = vcpu->cpu;
if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
continue; continue;
if (tmp != NULL && cpu != -1 && cpu != me && /*
kvm_request_needs_ipi(vcpu, req)) * tmp can be "unavailable" if cpumasks are allocated off stack
__cpumask_set_cpu(cpu, tmp); * as allocation of the mask is deliberately not fatal and is
* handled by falling back to kicking all online CPUs.
*/
if (!cpumask_available(tmp))
continue;
/*
* Note, the vCPU could get migrated to a different pCPU at any
* point after kvm_request_needs_ipi(), which could result in
* sending an IPI to the previous pCPU. But, that's ok because
* the purpose of the IPI is to ensure the vCPU returns to
* OUTSIDE_GUEST_MODE, which is satisfied if the vCPU migrates.
* Entering READING_SHADOW_PAGE_TABLES after this point is also
* ok, as the requirement is only that KVM wait for vCPUs that
* were reading SPTEs _before_ any changes were finalized. See
* kvm_vcpu_kick() for more details on handling requests.
*/
if (kvm_request_needs_ipi(vcpu, req)) {
cpu = READ_ONCE(vcpu->cpu);
if (cpu != -1 && cpu != me)
__cpumask_set_cpu(cpu, tmp);
}
} }
called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT)); called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT));
@ -302,13 +326,8 @@ EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm) void kvm_flush_remote_tlbs(struct kvm *kvm)
{ {
/*
* Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
* kvm_make_all_cpus_request.
*/
long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
++kvm->stat.generic.remote_tlb_flush_requests; ++kvm->stat.generic.remote_tlb_flush_requests;
/* /*
* We want to publish modifications to the page tables before reading * We want to publish modifications to the page tables before reading
* mode. Pairs with a memory barrier in arch-specific code. * mode. Pairs with a memory barrier in arch-specific code.
@ -323,7 +342,6 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
if (!kvm_arch_flush_remote_tlb(kvm) if (!kvm_arch_flush_remote_tlb(kvm)
|| kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.generic.remote_tlb_flush; ++kvm->stat.generic.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
} }
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
#endif #endif
@ -528,7 +546,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
} }
} }
if (range->flush_on_ret && (ret || kvm->tlbs_dirty)) if (range->flush_on_ret && ret)
kvm_flush_remote_tlbs(kvm); kvm_flush_remote_tlbs(kvm);
if (locked) if (locked)
@ -3134,15 +3152,19 @@ out:
static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
{ {
unsigned int old, val, shrink; unsigned int old, val, shrink, grow_start;
old = val = vcpu->halt_poll_ns; old = val = vcpu->halt_poll_ns;
shrink = READ_ONCE(halt_poll_ns_shrink); shrink = READ_ONCE(halt_poll_ns_shrink);
grow_start = READ_ONCE(halt_poll_ns_grow_start);
if (shrink == 0) if (shrink == 0)
val = 0; val = 0;
else else
val /= shrink; val /= shrink;
if (val < grow_start)
val = 0;
vcpu->halt_poll_ns = val; vcpu->halt_poll_ns = val;
trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
} }
@ -3290,16 +3312,24 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
*/ */
void kvm_vcpu_kick(struct kvm_vcpu *vcpu) void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{ {
int me; int me, cpu;
int cpu = vcpu->cpu;
if (kvm_vcpu_wake_up(vcpu)) if (kvm_vcpu_wake_up(vcpu))
return; return;
/*
* Note, the vCPU could get migrated to a different pCPU at any point
* after kvm_arch_vcpu_should_kick(), which could result in sending an
* IPI to the previous pCPU. But, that's ok because the purpose of the
* IPI is to force the vCPU to leave IN_GUEST_MODE, and migrating the
* vCPU also requires it to leave IN_GUEST_MODE.
*/
me = get_cpu(); me = get_cpu();
if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) if (kvm_arch_vcpu_should_kick(vcpu)) {
if (kvm_arch_vcpu_should_kick(vcpu)) cpu = READ_ONCE(vcpu->cpu);
if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
smp_send_reschedule(cpu); smp_send_reschedule(cpu);
}
put_cpu(); put_cpu();
} }
EXPORT_SYMBOL_GPL(kvm_vcpu_kick); EXPORT_SYMBOL_GPL(kvm_vcpu_kick);