KVM SVM changes for 6.4:

- Add support for virtual NMIs
 
  - Fixes for edge cases related to virtual interrupts
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCgAwFiEEMHr+pfEFOIzK+KY1YJEiAU0MEvkFAmRGuLISHHNlYW5qY0Bn
 b29nbGUuY29tAAoJEGCRIgFNDBL5NOMQAKy1Od54yzQsIKyAZZJVfOEm7N5VLQgz
 +jLilXgHd8dm/g0g/KVCDPFoZ/ut2Tf5Dn4WwyoPWOpgGsOyTwdDIJabf9rustkA
 goZFcfUXz+P1nangTidrj6CFYgGmVS13Uu//H19X4bSzT+YifVevJ4QkRVElj9Mh
 VBUeXppC/gMGBZ9tKEzl+AU3FwJ58cB88q4boovBFYiDdciv/fF86t02Lc+dCIX1
 6hTcOAnjAcp3eJY0wPQJUAEScufDKcMf6tSrsB/yWXv9KB9ANXFNXry8/+lW/Ux/
 oOUmUVdRXrrsRUqtYk9+KuMoIN7CL1SBV0RCm5ApqwqwnTVdHS+odHU3c2s7E/uU
 QXIW4vwSne3W9Y4YApDgFjwDwmzY85dvblWlWBnR2LW2I3Or48xK+S8LpWG+lj6l
 EDf7RzeqAipJ1qUq6qDYJlyg/YsyYlcoErtra423skg38HBWxQXdqkVIz3SYdKjA
 0OcBQIRI28KzJDn1gU6P3Q0Wr/cKsx9EGy6+jWBhf4Yf3eHP7+3WUTrg/Up0q8ny
 0j/+cbe5kBb6k2T9y2X6jm6TVbPV5FyMBOF/UxmqEbRLmxXjBe8tMnFwV+qN871I
 gk5HTSIkX39GU9kNA3h5HoWjdNeRfhazKR9ZVrELVc1zjHnGLthXBPZbIAUsPPMx
 vgM6jf8NwLXZ
 =9xNX
 -----END PGP SIGNATURE-----

Merge tag 'kvm-x86-svm-6.4' of https://github.com/kvm-x86/linux into HEAD

KVM SVM changes for 6.4:

 - Add support for virtual NMIs

 - Fixes for edge cases related to virtual interrupts
This commit is contained in:
Paolo Bonzini 2023-04-26 15:56:27 -04:00
commit 4a5fd41995
8 changed files with 292 additions and 58 deletions

View File

@ -226,10 +226,9 @@
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
@ -370,6 +369,7 @@
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */

View File

@ -68,6 +68,8 @@ KVM_X86_OP(get_interrupt_shadow)
KVM_X86_OP(patch_hypercall)
KVM_X86_OP(inject_irq)
KVM_X86_OP(inject_nmi)
KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending)
KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending)
KVM_X86_OP(inject_exception)
KVM_X86_OP(cancel_injection)
KVM_X86_OP(interrupt_allowed)

View File

@ -874,7 +874,8 @@ struct kvm_vcpu_arch {
u64 tsc_scaling_ratio; /* current scaling ratio */
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */
/* Number of NMIs pending injection, not including hardware vNMIs. */
unsigned int nmi_pending;
bool nmi_injected; /* Trying to inject an NMI this entry */
bool smi_pending; /* SMI queued after currently running handler */
u8 handling_intr_from_guest;
@ -1619,6 +1620,13 @@ struct kvm_x86_ops {
int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
/* Whether or not a virtual NMI is pending in hardware. */
bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu);
/*
* Attempt to pend a virtual NMI in harware. Returns %true on success
* to allow using static_call_ret0 as the fallback.
*/
bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
@ -1999,6 +2007,7 @@ int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
void kvm_update_dr7(struct kvm_vcpu *vcpu);

View File

@ -183,6 +183,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_GIF_SHIFT 9
#define V_GIF_MASK (1 << V_GIF_SHIFT)
#define V_NMI_PENDING_SHIFT 11
#define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT)
#define V_NMI_BLOCKING_SHIFT 12
#define V_NMI_BLOCKING_MASK (1 << V_NMI_BLOCKING_SHIFT)
#define V_INTR_PRIO_SHIFT 16
#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
@ -197,6 +203,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_GIF_ENABLE_SHIFT 25
#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
#define V_NMI_ENABLE_SHIFT 26
#define V_NMI_ENABLE_MASK (1 << V_NMI_ENABLE_SHIFT)
#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
@ -278,7 +287,6 @@ static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
struct vmcb_seg {

View File

@ -139,13 +139,18 @@ void recalc_intercepts(struct vcpu_svm *svm)
if (g->int_ctl & V_INTR_MASKING_MASK) {
/*
* Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8
* does not affect any interrupt we may want to inject;
* therefore, writes to CR8 are irrelevant to L0, as are
* interrupt window vmexits.
* If L2 is active and V_INTR_MASKING is enabled in vmcb12,
* disable intercept of CR8 writes as L2's CR8 does not affect
* any interrupt KVM may want to inject.
*
* Similarly, disable intercept of virtual interrupts (used to
* detect interrupt windows) if the saved RFLAGS.IF is '0', as
* the effective RFLAGS.IF for L1 interrupts will never be set
* while L2 is running (L2's RFLAGS.IF doesn't affect L1 IRQs).
*/
vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
vmcb_clr_intercept(c, INTERCEPT_VINTR);
if (!(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF))
vmcb_clr_intercept(c, INTERCEPT_VINTR);
}
/*
@ -276,6 +281,11 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
return false;
if (CC((control->int_ctl & V_NMI_ENABLE_MASK) &&
!vmcb12_is_intercept(control, INTERCEPT_NMI))) {
return false;
}
return true;
}
@ -416,22 +426,24 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
/* Only a few fields of int_ctl are written by the processor. */
mask = V_IRQ_MASK | V_TPR_MASK;
if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
svm_is_intercept(svm, INTERCEPT_VINTR)) {
/*
* In order to request an interrupt window, L0 is usurping
* svm->vmcb->control.int_ctl and possibly setting V_IRQ
* even if it was clear in L1's VMCB. Restoring it would be
* wrong. However, in this case V_IRQ will remain true until
* interrupt_window_interception calls svm_clear_vintr and
* restores int_ctl. We can just leave it aside.
*/
/*
* Don't sync vmcb02 V_IRQ back to vmcb12 if KVM (L0) is intercepting
* virtual interrupts in order to request an interrupt window, as KVM
* has usurped vmcb02's int_ctl. If an interrupt window opens before
* the next VM-Exit, svm_clear_vintr() will restore vmcb12's int_ctl.
* If no window opens, V_IRQ will be correctly preserved in vmcb12's
* int_ctl (because it was never recognized while L2 was running).
*/
if (svm_is_intercept(svm, INTERCEPT_VINTR) &&
!test_bit(INTERCEPT_VINTR, (unsigned long *)svm->nested.ctl.intercepts))
mask &= ~V_IRQ_MASK;
}
if (nested_vgif_enabled(svm))
mask |= V_GIF_MASK;
if (nested_vnmi_enabled(svm))
mask |= V_NMI_BLOCKING_MASK | V_NMI_PENDING_MASK;
svm->nested.ctl.int_ctl &= ~mask;
svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask;
}
@ -651,6 +663,17 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
else
int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
if (vnmi) {
if (vmcb01->control.int_ctl & V_NMI_PENDING_MASK) {
svm->vcpu.arch.nmi_pending++;
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
}
if (nested_vnmi_enabled(svm))
int_ctl_vmcb12_bits |= (V_NMI_PENDING_MASK |
V_NMI_ENABLE_MASK |
V_NMI_BLOCKING_MASK);
}
/* Copied from vmcb01. msrpm_base can be overwritten later. */
vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
@ -1021,6 +1044,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm_switch_vmcb(svm, &svm->vmcb01);
/*
* Rules for synchronizing int_ctl bits from vmcb02 to vmcb01:
*
* V_IRQ, V_IRQ_VECTOR, V_INTR_PRIO_MASK, V_IGN_TPR: If L1 doesn't
* intercept interrupts, then KVM will use vmcb02's V_IRQ (and related
* flags) to detect interrupt windows for L1 IRQs (even if L1 uses
* virtual interrupt masking). Raise KVM_REQ_EVENT to ensure that
* KVM re-requests an interrupt window if necessary, which implicitly
* copies this bits from vmcb02 to vmcb01.
*
* V_TPR: If L1 doesn't use virtual interrupt masking, then L1's vTPR
* is stored in vmcb02, but its value doesn't need to be copied from/to
* vmcb01 because it is copied from/to the virtual APIC's TPR register
* on each VM entry/exit.
*
* V_GIF: If nested vGIF is not used, KVM uses vmcb02's V_GIF for L1's
* V_GIF. However, GIF is architecturally clear on each VM exit, thus
* there is no need to copy V_GIF from vmcb02 to vmcb01.
*/
if (!nested_exit_on_intr(svm))
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
svm_copy_lbrs(vmcb12, vmcb02);
svm_update_lbrv(vcpu);
@ -1029,6 +1074,20 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm_update_lbrv(vcpu);
}
if (vnmi) {
if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK)
vmcb01->control.int_ctl |= V_NMI_BLOCKING_MASK;
else
vmcb01->control.int_ctl &= ~V_NMI_BLOCKING_MASK;
if (vcpu->arch.nmi_pending) {
vcpu->arch.nmi_pending--;
vmcb01->control.int_ctl |= V_NMI_PENDING_MASK;
} else {
vmcb01->control.int_ctl &= ~V_NMI_PENDING_MASK;
}
}
/*
* On vmexit the GIF is set to false and
* no event can be injected in L1.

View File

@ -231,6 +231,8 @@ module_param(dump_invalid_vmcb, bool, 0644);
bool intercept_smi = true;
module_param(intercept_smi, bool, 0444);
bool vnmi = true;
module_param(vnmi, bool, 0444);
static bool svm_gp_erratum_intercept = true;
@ -1312,6 +1314,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
if (kvm_vcpu_apicv_active(vcpu))
avic_init_vmcb(svm, vmcb);
if (vnmi)
svm->vmcb->control.int_ctl |= V_NMI_ENABLE_MASK;
if (vgif) {
svm_clr_intercept(svm, INTERCEPT_STGI);
svm_clr_intercept(svm, INTERCEPT_CLGI);
@ -1584,6 +1589,16 @@ static void svm_set_vintr(struct vcpu_svm *svm)
svm_set_intercept(svm, INTERCEPT_VINTR);
/*
* Recalculating intercepts may have cleared the VINTR intercept. If
* V_INTR_MASKING is enabled in vmcb12, then the effective RFLAGS.IF
* for L1 physical interrupts is L1's RFLAGS.IF at the time of VMRUN.
* Requesting an interrupt window if save.RFLAGS.IF=0 is pointless as
* interrupts will never be unblocked while L2 is running.
*/
if (!svm_is_intercept(svm, INTERCEPT_VINTR))
return;
/*
* This is just a dummy VINTR to actually cause a vmexit to happen.
* Actual injection of virtual interrupts happens through EVENTINJ.
@ -2481,16 +2496,29 @@ static int task_switch_interception(struct kvm_vcpu *vcpu)
has_error_code, error_code);
}
static void svm_clr_iret_intercept(struct vcpu_svm *svm)
{
if (!sev_es_guest(svm->vcpu.kvm))
svm_clr_intercept(svm, INTERCEPT_IRET);
}
static void svm_set_iret_intercept(struct vcpu_svm *svm)
{
if (!sev_es_guest(svm->vcpu.kvm))
svm_set_intercept(svm, INTERCEPT_IRET);
}
static int iret_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
++vcpu->stat.nmi_window_exits;
svm->awaiting_iret_completion = true;
if (!sev_es_guest(vcpu->kvm)) {
svm_clr_intercept(svm, INTERCEPT_IRET);
svm_clr_iret_intercept(svm);
if (!sev_es_guest(vcpu->kvm))
svm->nmi_iret_rip = kvm_rip_read(vcpu);
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
}
@ -3467,11 +3495,43 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
return;
svm->nmi_masked = true;
if (!sev_es_guest(vcpu->kvm))
svm_set_intercept(svm, INTERCEPT_IRET);
svm_set_iret_intercept(svm);
++vcpu->stat.nmi_injections;
}
static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (!is_vnmi_enabled(svm))
return false;
return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK);
}
static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (!is_vnmi_enabled(svm))
return false;
if (svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK)
return false;
svm->vmcb->control.int_ctl |= V_NMI_PENDING_MASK;
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
/*
* Because the pending NMI is serviced by hardware, KVM can't know when
* the NMI is "injected", but for all intents and purposes, passing the
* NMI off to hardware counts as injection.
*/
++vcpu->stat.nmi_injections;
return true;
}
static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -3567,6 +3627,35 @@ static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
}
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (is_vnmi_enabled(svm))
return svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK;
else
return svm->nmi_masked;
}
static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (is_vnmi_enabled(svm)) {
if (masked)
svm->vmcb->control.int_ctl |= V_NMI_BLOCKING_MASK;
else
svm->vmcb->control.int_ctl &= ~V_NMI_BLOCKING_MASK;
} else {
svm->nmi_masked = masked;
if (masked)
svm_set_iret_intercept(svm);
else
svm_clr_iret_intercept(svm);
}
}
bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -3578,8 +3667,10 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
return false;
return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
svm->nmi_masked;
if (svm_get_nmi_mask(vcpu))
return true;
return vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK;
}
static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
@ -3597,26 +3688,6 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return 1;
}
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
return to_svm(vcpu)->nmi_masked;
}
static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (masked) {
svm->nmi_masked = true;
if (!sev_es_guest(vcpu->kvm))
svm_set_intercept(svm, INTERCEPT_IRET);
} else {
svm->nmi_masked = false;
if (!sev_es_guest(vcpu->kvm))
svm_clr_intercept(svm, INTERCEPT_IRET);
}
}
bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -3697,7 +3768,16 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (svm->nmi_masked && !svm->awaiting_iret_completion)
/*
* KVM should never request an NMI window when vNMI is enabled, as KVM
* allows at most one to-be-injected NMI and one pending NMI, i.e. if
* two NMIs arrive simultaneously, KVM will inject one and set
* V_NMI_PENDING for the other. WARN, but continue with the standard
* single-step approach to try and salvage the pending NMI.
*/
WARN_ON_ONCE(is_vnmi_enabled(svm));
if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
return; /* IRET will cause a vm exit */
if (!gif_set(svm)) {
@ -4135,6 +4215,8 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI);
svm_recalc_instruction_intercepts(vcpu, svm);
if (boot_cpu_has(X86_FEATURE_IBPB))
@ -4752,6 +4834,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.patch_hypercall = svm_patch_hypercall,
.inject_irq = svm_inject_irq,
.inject_nmi = svm_inject_nmi,
.is_vnmi_pending = svm_is_vnmi_pending,
.set_vnmi_pending = svm_set_vnmi_pending,
.inject_exception = svm_inject_exception,
.cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed,
@ -4894,6 +4978,9 @@ static __init void svm_set_cpu_caps(void)
if (vgif)
kvm_cpu_cap_set(X86_FEATURE_VGIF);
if (vnmi)
kvm_cpu_cap_set(X86_FEATURE_VNMI);
/* Nested VM can receive #VMEXIT instead of triggering #GP */
kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
}
@ -5045,6 +5132,16 @@ static __init int svm_hardware_setup(void)
pr_info("Virtual GIF supported\n");
}
vnmi = vgif && vnmi && boot_cpu_has(X86_FEATURE_VNMI);
if (vnmi)
pr_info("Virtual NMI enabled\n");
if (!vnmi) {
svm_x86_ops.is_vnmi_pending = NULL;
svm_x86_ops.set_vnmi_pending = NULL;
}
if (lbrv) {
if (!boot_cpu_has(X86_FEATURE_LBRV))
lbrv = false;

View File

@ -36,6 +36,7 @@ extern bool npt_enabled;
extern int vgif;
extern bool intercept_smi;
extern bool x2avic_enabled;
extern bool vnmi;
/*
* Clean bits in VMCB.
@ -265,6 +266,7 @@ struct vcpu_svm {
bool pause_filter_enabled : 1;
bool pause_threshold_enabled : 1;
bool vgif_enabled : 1;
bool vnmi_enabled : 1;
u32 ldr_reg;
u32 dfr_reg;
@ -539,6 +541,12 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
}
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
{
return svm->vnmi_enabled &&
(svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
}
static inline bool is_x2apic_msrpm_offset(u32 offset)
{
/* 4 msrs per u8, and 4 u8 in u32 */
@ -548,6 +556,27 @@ static inline bool is_x2apic_msrpm_offset(u32 offset)
(msr < (APIC_BASE_MSR + 0x100));
}
static inline struct vmcb *get_vnmi_vmcb_l1(struct vcpu_svm *svm)
{
if (!vnmi)
return NULL;
if (is_guest_mode(&svm->vcpu))
return NULL;
else
return svm->vmcb01.ptr;
}
static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_vnmi_vmcb_l1(svm);
if (vmcb)
return !!(vmcb->control.int_ctl & V_NMI_ENABLE_MASK);
else
return false;
}
/* svm.c */
#define MSR_INVALID 0xffffffffU

View File

@ -5185,7 +5185,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending != 0;
events->nmi.pending = kvm_get_nr_pending_nmis(vcpu);
events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
/* events->sipi_vector is never valid when reporting to user space */
@ -5272,8 +5272,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
events->interrupt.shadow);
vcpu->arch.nmi_injected = events->nmi.injected;
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
vcpu->arch.nmi_pending = events->nmi.pending;
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) {
vcpu->arch.nmi_pending = 0;
atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending);
kvm_make_request(KVM_REQ_NMI, vcpu);
}
static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
@ -10210,19 +10213,46 @@ out:
static void process_nmi(struct kvm_vcpu *vcpu)
{
unsigned limit = 2;
unsigned int limit;
/*
* x86 is limited to one NMI running, and one NMI pending after it.
* If an NMI is already in progress, limit further NMIs to just one.
* Otherwise, allow two (and we'll inject the first one immediately).
* x86 is limited to one NMI pending, but because KVM can't react to
* incoming NMIs as quickly as bare metal, e.g. if the vCPU is
* scheduled out, KVM needs to play nice with two queued NMIs showing
* up at the same time. To handle this scenario, allow two NMIs to be
* (temporarily) pending so long as NMIs are not blocked and KVM is not
* waiting for a previous NMI injection to complete (which effectively
* blocks NMIs). KVM will immediately inject one of the two NMIs, and
* will request an NMI window to handle the second NMI.
*/
if (static_call(kvm_x86_get_nmi_mask)(vcpu) || vcpu->arch.nmi_injected)
limit = 1;
else
limit = 2;
/*
* Adjust the limit to account for pending virtual NMIs, which aren't
* tracked in vcpu->arch.nmi_pending.
*/
if (static_call(kvm_x86_is_vnmi_pending)(vcpu))
limit--;
vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
kvm_make_request(KVM_REQ_EVENT, vcpu);
if (vcpu->arch.nmi_pending &&
(static_call(kvm_x86_set_vnmi_pending)(vcpu)))
vcpu->arch.nmi_pending--;
if (vcpu->arch.nmi_pending)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
/* Return total number of NMIs pending injection to the VM */
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu)
{
return vcpu->arch.nmi_pending +
static_call(kvm_x86_is_vnmi_pending)(vcpu);
}
void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,