2020-09-24 02:31:11 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef __KVM_X86_VMX_POSTED_INTR_H
|
|
|
|
#define __KVM_X86_VMX_POSTED_INTR_H
|
|
|
|
|
|
|
|
#define POSTED_INTR_ON 0
|
|
|
|
#define POSTED_INTR_SN 1
|
|
|
|
|
|
|
|
/* Posted-Interrupt Descriptor */
|
|
|
|
struct pi_desc {
|
|
|
|
u32 pir[8]; /* Posted interrupt requested */
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
/* bit 256 - Outstanding Notification */
|
|
|
|
u16 on : 1,
|
|
|
|
/* bit 257 - Suppress Notification */
|
|
|
|
sn : 1,
|
|
|
|
/* bit 271:258 - Reserved */
|
|
|
|
rsvd_1 : 14;
|
|
|
|
/* bit 279:272 - Notification Vector */
|
|
|
|
u8 nv;
|
|
|
|
/* bit 287:280 - Reserved */
|
|
|
|
u8 rsvd_2;
|
|
|
|
/* bit 319:288 - Notification Destination */
|
|
|
|
u32 ndst;
|
|
|
|
};
|
|
|
|
u64 control;
|
|
|
|
};
|
|
|
|
u32 rsvd[6];
|
|
|
|
} __aligned(64);
|
|
|
|
|
|
|
|
static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
|
|
|
|
{
|
|
|
|
return test_and_set_bit(POSTED_INTR_ON,
|
|
|
|
(unsigned long *)&pi_desc->control);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
|
|
|
|
{
|
|
|
|
return test_and_clear_bit(POSTED_INTR_ON,
|
|
|
|
(unsigned long *)&pi_desc->control);
|
|
|
|
}
|
|
|
|
|
KVM: VMX: Handle PI descriptor updates during vcpu_put/load
Move the posted interrupt pre/post_block logic into vcpu_put/load
respectively, using the kvm_vcpu_is_blocking() to determining whether or
not the wakeup handler needs to be set (and unset). This avoids updating
the PI descriptor if halt-polling is successful, reduces the number of
touchpoints for updating the descriptor, and eliminates the confusing
behavior of intentionally leaving a "stale" PI.NDST when a blocking vCPU
is scheduled back in after preemption.
The downside is that KVM will do the PID update twice if the vCPU is
preempted after prepare_to_rcuwait() but before schedule(), but that's a
rare case (and non-existent on !PREEMPT kernels).
The notable wart is the need to send a self-IPI on the wakeup vector if
an outstanding notification is pending after configuring the wakeup
vector. Ideally, KVM would just do a kvm_vcpu_wake_up() in this case,
but the scheduler doesn't support waking a task from its preemption
notifier callback, i.e. while the task is right in the middle of
being scheduled out.
Note, setting the wakeup vector before halt-polling is not necessary:
once the pending IRQ will be recorded in the PIR, kvm_vcpu_has_events()
will detect this (via kvm_cpu_get_interrupt(), kvm_apic_get_interrupt(),
apic_has_interrupt_for_ppr() and finally vmx_sync_pir_to_irr()) and
terminate the polling.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20211208015236.1616697-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-12-08 09:52:14 +08:00
|
|
|
static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc)
|
|
|
|
{
|
|
|
|
return test_and_clear_bit(POSTED_INTR_SN,
|
|
|
|
(unsigned long *)&pi_desc->control);
|
|
|
|
}
|
|
|
|
|
2021-10-09 10:12:16 +08:00
|
|
|
static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
|
2020-09-24 02:31:11 +08:00
|
|
|
{
|
|
|
|
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
|
|
|
|
{
|
|
|
|
return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void pi_set_sn(struct pi_desc *pi_desc)
|
|
|
|
{
|
|
|
|
set_bit(POSTED_INTR_SN,
|
|
|
|
(unsigned long *)&pi_desc->control);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void pi_set_on(struct pi_desc *pi_desc)
|
|
|
|
{
|
|
|
|
set_bit(POSTED_INTR_ON,
|
|
|
|
(unsigned long *)&pi_desc->control);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void pi_clear_on(struct pi_desc *pi_desc)
|
|
|
|
{
|
|
|
|
clear_bit(POSTED_INTR_ON,
|
|
|
|
(unsigned long *)&pi_desc->control);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void pi_clear_sn(struct pi_desc *pi_desc)
|
|
|
|
{
|
|
|
|
clear_bit(POSTED_INTR_SN,
|
|
|
|
(unsigned long *)&pi_desc->control);
|
|
|
|
}
|
|
|
|
|
2021-10-09 10:12:16 +08:00
|
|
|
static inline bool pi_test_on(struct pi_desc *pi_desc)
|
2020-09-24 02:31:11 +08:00
|
|
|
{
|
|
|
|
return test_bit(POSTED_INTR_ON,
|
|
|
|
(unsigned long *)&pi_desc->control);
|
|
|
|
}
|
|
|
|
|
2021-10-09 10:12:16 +08:00
|
|
|
static inline bool pi_test_sn(struct pi_desc *pi_desc)
|
2020-09-24 02:31:11 +08:00
|
|
|
{
|
|
|
|
return test_bit(POSTED_INTR_SN,
|
|
|
|
(unsigned long *)&pi_desc->control);
|
|
|
|
}
|
|
|
|
|
|
|
|
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
|
|
|
|
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
|
|
|
|
void pi_wakeup_handler(void);
|
2020-10-24 16:08:37 +08:00
|
|
|
void __init pi_init_cpu(int cpu);
|
2020-09-24 02:31:11 +08:00
|
|
|
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
|
|
|
|
int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
|
|
|
|
bool set);
|
KVM: VMX: update vcpu posted-interrupt descriptor when assigning device
For VMX, when a vcpu enters HLT emulation, pi_post_block will:
1) Add vcpu to per-cpu list of blocked vcpus.
2) Program the posted-interrupt descriptor "notification vector"
to POSTED_INTR_WAKEUP_VECTOR
With interrupt remapping, an interrupt will set the PIR bit for the
vector programmed for the device on the CPU, test-and-set the
ON bit on the posted interrupt descriptor, and if the ON bit is clear
generate an interrupt for the notification vector.
This way, the target CPU wakes upon a device interrupt and wakes up
the target vcpu.
Problem is that pi_post_block only programs the notification vector
if kvm_arch_has_assigned_device() is true. Its possible for the
following to happen:
1) vcpu V HLTs on pcpu P, kvm_arch_has_assigned_device is false,
notification vector is not programmed
2) device is assigned to VM
3) device interrupts vcpu V, sets ON bit
(notification vector not programmed, so pcpu P remains in idle)
4) vcpu 0 IPIs vcpu V (in guest), but since pi descriptor ON bit is set,
kvm_vcpu_kick is skipped
5) vcpu 0 busy spins on vcpu V's response for several seconds, until
RCU watchdog NMIs all vCPUs.
To fix this, use the start_assignment kvm_x86_ops callback to kick
vcpus out of the halt loop, so the notification vector is
properly reprogrammed to the wakeup vector.
Reported-by: Pei Zhang <pezhang@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Message-Id: <20210526172014.GA29007@fuller.cnet>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-05-27 01:20:14 +08:00
|
|
|
void vmx_pi_start_assignment(struct kvm *kvm);
|
2020-09-24 02:31:11 +08:00
|
|
|
|
2020-10-24 16:08:37 +08:00
|
|
|
#endif /* __KVM_X86_VMX_POSTED_INTR_H */
|