Trimmed second batch of KVM changes for Linux 4.15

* GICv4 Support for KVM/ARM
 
 All ARM patches were in next-20171113.  I have postponed most x86 fixes
 to 4.15-rc2 and UMIP to 4.16, but there are fixes that would be good to
 have already in 4.15-rc1:
 
 * re-introduce support for CPUs without virtual NMI (cc stable)
   and allow testing of KVM without virtual NMI on available CPUs
 
 * fix long-standing performance issues with assigned devices on AMD
   (cc stable)
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABCAAGBQJaGECGAAoJEED/6hsPKofoT08H/AuaMi8qprw2BNpVBbQxWRWM
 O4WPk7yz1zB4SkdRNrPzCMBy+qoK7FcV/3BpsFPuQS4NHQ+GvQ87N/7tUbouVyl6
 CuPGJMCnNzMQ8GvLOJgB1/sz+uW5W/ph3y8kv1UP3/hNCZU4fqukoUeLroOH/wr6
 N3bSY8bok7ycdpgybHmbUHY0Yk4IUk3m0RXWY9U5Jl3sjoNEwCw3pWdrq9Swfs/6
 W8QJRdE4Z6KHPqW5sRnPj24IpoUpCxu+IT+gPuGlDUCN/h3sfhYvMS6GgDrCjiiZ
 2z1TwaIAo+wGjlBQzGmyTUjUPjbGew+f3ixBlf2BtmNutX+tX2qsVfl1NKXYTto=
 =GGge
 -----END PGP SIGNATURE-----

Merge tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Radim Krčmář:
 "Trimmed second batch of KVM changes for Linux 4.15:

   - GICv4 Support for KVM/ARM

   - re-introduce support for CPUs without virtual NMI (cc stable) and
     allow testing of KVM without virtual NMI on available CPUs

   - fix long-standing performance issues with assigned devices on AMD
     (cc stable)"

* tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits)
  kvm: vmx: Allow disabling virtual NMI support
  kvm: vmx: Reinstate support for CPUs without virtual NMI
  KVM: SVM: obey guest PAT
  KVM: arm/arm64: Don't queue VLPIs on INV/INVALL
  KVM: arm/arm64: Fix GICv4 ITS initialization issues
  KVM: arm/arm64: GICv4: Theory of operations
  KVM: arm/arm64: GICv4: Enable VLPI support
  KVM: arm/arm64: GICv4: Prevent userspace from changing doorbell affinity
  KVM: arm/arm64: GICv4: Prevent a VM using GICv4 from being saved
  KVM: arm/arm64: GICv4: Enable virtual cpuif if VLPIs can be delivered
  KVM: arm/arm64: GICv4: Hook vPE scheduling into vgic flush/sync
  KVM: arm/arm64: GICv4: Use the doorbell interrupt as an unblocking source
  KVM: arm/arm64: GICv4: Add doorbell interrupt handling
  KVM: arm/arm64: GICv4: Use pending_last as a scheduling hint
  KVM: arm/arm64: GICv4: Handle INVALL applied to a vPE
  KVM: arm/arm64: GICv4: Propagate property updates to VLPIs
  KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE
  KVM: arm/arm64: GICv4: Handle CLEAR applied to a VLPI
  KVM: arm/arm64: GICv4: Propagate affinity changes to the physical ITS
  KVM: arm/arm64: GICv4: Unmap VLPI when freeing an LPI
  ...
This commit is contained in:
Linus Torvalds 2017-11-24 19:44:25 -10:00
commit 7753ea0964
19 changed files with 819 additions and 158 deletions

View File

@ -1890,6 +1890,10 @@
[KVM,ARM] Trap guest accesses to GICv3 common
system registers
kvm-arm.vgic_v4_enable=
[KVM,ARM] Allow use of GICv4 for direct injection of
LPIs.
kvm-intel.ept= [KVM,Intel] Disable extended page tables
(virtualized MMU) support on capable Intel chips.
Default is 1 (enabled)

View File

@ -64,6 +64,8 @@ Groups:
-EINVAL: Inconsistent restored data
-EFAULT: Invalid guest ram access
-EBUSY: One or more VCPUS are running
-EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
state is not available
KVM_DEV_ARM_VGIC_GRP_ITS_REGS
Attributes:

View File

@ -4,6 +4,7 @@
#
source "virt/kvm/Kconfig"
source "virt/lib/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
@ -23,6 +24,8 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select ARM_GIC
select ARM_GIC_V3
select ARM_GIC_V3_ITS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
@ -36,6 +39,8 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help---
Support hosting virtualized guest machines.

View File

@ -32,6 +32,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o
obj-y += $(KVM)/arm/vgic/vgic-irqfd.o
obj-y += $(KVM)/arm/vgic/vgic-v2.o
obj-y += $(KVM)/arm/vgic/vgic-v3.o
obj-y += $(KVM)/arm/vgic/vgic-v4.o
obj-y += $(KVM)/arm/vgic/vgic-mmio.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o

View File

@ -4,6 +4,7 @@
#
source "virt/kvm/Kconfig"
source "virt/lib/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
@ -36,6 +37,8 @@ config KVM
select HAVE_KVM_MSI
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple

View File

@ -27,6 +27,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o

View File

@ -3671,6 +3671,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
switch (ecx) {
case MSR_IA32_CR_PAT:
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
return 1;
vcpu->arch.pat = data;
svm->vmcb->save.g_pat = data;
mark_dirty(svm->vmcb, VMCB_NPT);
break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr);
break;

View File

@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
static bool __read_mostly enable_vnmi = 1;
module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
static bool __read_mostly flexpriority_enabled = 1;
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
@ -202,6 +205,10 @@ struct loaded_vmcs {
bool nmi_known_unmasked;
unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
s64 vnmi_blocked_time;
struct list_head loaded_vmcss_on_cpu_link;
};
@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID;
}
static inline bool cpu_has_virtual_nmis(void)
{
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
}
static inline bool cpu_has_vmx_wbinvd_exit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
(vmcs12->secondary_vm_exec_control & bit);
}
static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
{
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
{
return vmcs12->pin_based_vm_exec_control &
@ -3712,9 +3719,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0)
return -EIO;
min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS;
opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
@ -5232,6 +5239,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
if (!enable_vnmi)
pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
/* Enable the preemption timer dynamically */
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl;
@ -5666,7 +5677,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
if (!enable_vnmi ||
vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
@ -5706,6 +5718,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (!enable_vnmi) {
/*
* Tracking the NMI-blocked state in software is built upon
* finding the next open IRQ window. This, in turn, depends on
* well-behaving guests: They have to keep IRQs disabled at
* least as long as the NMI handler runs. Otherwise we may
* cause NMI nesting, maybe breaking the guest. But as this is
* highly unlikely, we can live with the residual risk.
*/
vmx->loaded_vmcs->soft_vnmi_blocked = 1;
vmx->loaded_vmcs->vnmi_blocked_time = 0;
}
++vcpu->stat.nmi_injections;
vmx->loaded_vmcs->nmi_known_unmasked = false;
@ -5724,6 +5749,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool masked;
if (!enable_vnmi)
return vmx->loaded_vmcs->soft_vnmi_blocked;
if (vmx->loaded_vmcs->nmi_known_unmasked)
return false;
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@ -5735,13 +5762,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
vmx->loaded_vmcs->nmi_known_unmasked = !masked;
if (masked)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
if (!enable_vnmi) {
if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
vmx->loaded_vmcs->soft_vnmi_blocked = masked;
vmx->loaded_vmcs->vnmi_blocked_time = 0;
}
} else {
vmx->loaded_vmcs->nmi_known_unmasked = !masked;
if (masked)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
}
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@ -5749,6 +5783,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;
if (!enable_vnmi &&
to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
return 0;
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
@ -6476,6 +6514,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
@ -6535,6 +6574,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!enable_vnmi);
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_NMI_PENDING);
++vcpu->stat.nmi_window_exits;
@ -6758,6 +6798,9 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_flexpriority())
flexpriority_enabled = 0;
if (!cpu_has_virtual_nmis())
enable_vnmi = 0;
/*
* set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not
@ -6962,7 +7005,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
}
/* Create a new VMCS */
item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
if (!item)
return NULL;
item->vmcs02.vmcs = alloc_vmcs();
@ -7979,6 +8022,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@ -8823,6 +8867,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
if (unlikely(!enable_vnmi &&
vmx->loaded_vmcs->soft_vnmi_blocked)) {
if (vmx_interrupt_allowed(vcpu)) {
vmx->loaded_vmcs->soft_vnmi_blocked = 0;
} else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
vcpu->arch.nmi_pending) {
/*
* This CPU don't support us in finding the end of an
* NMI-blocked window if the guest runs with IRQs
* disabled. So we pull the trigger after 1 s of
* futile waiting, but inform the user about this.
*/
printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
"state on VCPU %d after 1 s timeout\n",
__func__, vcpu->vcpu_id);
vmx->loaded_vmcs->soft_vnmi_blocked = 0;
}
}
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
@ -9105,33 +9168,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
if (vmx->loaded_vmcs->nmi_known_unmasked)
return;
/*
* Can't use vmx->exit_intr_info since we're not sure what
* the exit reason is.
*/
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
/*
* SDM 3: 27.7.1.2 (September 2008)
* Re-set bit "block by NMI" before VM entry if vmexit caused by
* a guest IRET fault.
* SDM 3: 23.2.2 (September 2008)
* Bit 12 is undefined in any of the following cases:
* If the VM exit sets the valid bit in the IDT-vectoring
* information field.
* If the VM exit is due to a double fault.
*/
if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
vector != DF_VECTOR && !idtv_info_valid)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmx->loaded_vmcs->nmi_known_unmasked =
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
& GUEST_INTR_STATE_NMI);
if (enable_vnmi) {
if (vmx->loaded_vmcs->nmi_known_unmasked)
return;
/*
* Can't use vmx->exit_intr_info since we're not sure what
* the exit reason is.
*/
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
/*
* SDM 3: 27.7.1.2 (September 2008)
* Re-set bit "block by NMI" before VM entry if vmexit caused by
* a guest IRET fault.
* SDM 3: 23.2.2 (September 2008)
* Bit 12 is undefined in any of the following cases:
* If the VM exit sets the valid bit in the IDT-vectoring
* information field.
* If the VM exit is due to a double fault.
*/
if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
vector != DF_VECTOR && !idtv_info_valid)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmx->loaded_vmcs->nmi_known_unmasked =
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
& GUEST_INTR_STATE_NMI);
} else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
vmx->loaded_vmcs->vnmi_blocked_time +=
ktime_to_ns(ktime_sub(ktime_get(),
vmx->loaded_vmcs->entry_time));
}
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@ -9248,6 +9316,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr3, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
vmx->loaded_vmcs->soft_vnmi_blocked))
vmx->loaded_vmcs->entry_time = ktime_get();
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
if (vmx->emulation_required)

View File

@ -26,6 +26,8 @@
#include <linux/list.h>
#include <linux/jump_label.h>
#include <linux/irqchip/arm-gic-v4.h>
#define VGIC_V3_MAX_CPUS 255
#define VGIC_V2_MAX_CPUS 8
#define VGIC_NR_IRQS_LEGACY 256
@ -73,6 +75,9 @@ struct vgic_global {
/* Only needed for the legacy KVM_CREATE_IRQCHIP */
bool can_emulate_gicv2;
/* Hardware has GICv4? */
bool has_gicv4;
/* GIC system register CPU interface */
struct static_key_false gicv3_cpuif;
@ -116,6 +121,7 @@ struct vgic_irq {
bool hw; /* Tied to HW IRQ */
struct kref refcount; /* Used for LPIs */
u32 hwintid; /* HW INTID number */
unsigned int host_irq; /* linux irq corresponding to hwintid */
union {
u8 targets; /* GICv2 target VCPUs mask */
u32 mpidr; /* GICv3 target VCPU */
@ -232,6 +238,15 @@ struct vgic_dist {
/* used by vgic-debug */
struct vgic_state_iter *iter;
/*
* GICv4 ITS per-VM data, containing the IRQ domain, the VPE
* array, the property table pointer as well as allocation
* data. This essentially ties the Linux IRQ core and ITS
* together, and avoids leaking KVM's data structures anywhere
* else.
*/
struct its_vm its_vm;
};
struct vgic_v2_cpu_if {
@ -250,6 +265,14 @@ struct vgic_v3_cpu_if {
u32 vgic_ap0r[4];
u32 vgic_ap1r[4];
u64 vgic_lr[VGIC_V3_MAX_LRS];
/*
* GICv4 ITS per-VPE data, containing the doorbell IRQ, the
* pending table pointer, the its_vm pointer and a few other
* HW specific things. As for the its_vm structure, this is
* linking the Linux IRQ subsystem and the ITS together.
*/
struct its_vpe its_vpe;
};
struct vgic_cpu {
@ -307,9 +330,10 @@ void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
bool level, void *owner);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
u32 vintid);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
@ -349,4 +373,15 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm);
int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner);
struct kvm_kernel_irq_routing_entry;
int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
struct kvm_kernel_irq_routing_entry *irq_entry);
int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
struct kvm_kernel_irq_routing_entry *irq_entry);
void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
#endif /* __KVM_ARM_VGIC_H */

View File

@ -817,9 +817,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct irq_desc *desc;
struct irq_data *data;
int phys_irq;
int ret;
if (timer->enabled)
@ -837,26 +834,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return -EINVAL;
}
/*
* Find the physical IRQ number corresponding to the host_vtimer_irq
*/
desc = irq_to_desc(host_vtimer_irq);
if (!desc) {
kvm_err("%s: no interrupt descriptor\n", __func__);
return -EINVAL;
}
data = irq_desc_get_irq_data(desc);
while (data->parent_data)
data = data->parent_data;
phys_irq = data->hwirq;
/*
* Tell the VGIC that the virtual interrupt is tied to a
* physical interrupt. We do that once per VCPU.
*/
ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq);
ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq);
if (ret)
return ret;

View File

@ -27,6 +27,8 @@
#include <linux/mman.h>
#include <linux/sched.h>
#include <linux/kvm.h>
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
#include <trace/events/kvm.h>
#include <kvm/arm_pmu.h>
@ -175,6 +177,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
{
int i;
kvm_vgic_destroy(kvm);
free_percpu(kvm->arch.last_vcpu_ran);
kvm->arch.last_vcpu_ran = NULL;
@ -184,8 +188,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm->vcpus[i] = NULL;
}
}
kvm_vgic_destroy(kvm);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@ -313,11 +315,13 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
kvm_timer_schedule(vcpu);
kvm_vgic_v4_enable_doorbell(vcpu);
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
kvm_timer_unschedule(vcpu);
kvm_vgic_v4_disable_doorbell(vcpu);
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@ -1450,6 +1454,46 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
return NULL;
}
bool kvm_arch_has_irq_bypass(void)
{
return true;
}
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
&irqfd->irq_entry);
}
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
&irqfd->irq_entry);
}
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
kvm_arm_halt_guest(irqfd->kvm);
}
void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
kvm_arm_resume_guest(irqfd->kvm);
}
/**
* Initialize Hyp-mode and memory mappings on all CPUs.
*/

View File

@ -258,7 +258,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
}
} else {
if (static_branch_unlikely(&vgic_v3_cpuif_trap))
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
cpu_if->its_vpe.its_vm)
write_gicreg(0, ICH_HCR_EL2);
cpu_if->vgic_elrsr = 0xffff;
@ -337,9 +338,11 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
/*
* If we need to trap system registers, we must write
* ICH_HCR_EL2 anyway, even if no interrupts are being
* injected,
* injected. Same thing if GICv4 is used, as VLPI
* delivery is gated by ICH_HCR_EL2.En.
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap))
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
cpu_if->its_vpe.its_vm)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}

View File

@ -285,6 +285,10 @@ int vgic_init(struct kvm *kvm)
if (ret)
goto out;
ret = vgic_v4_init(kvm);
if (ret)
goto out;
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
@ -320,6 +324,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
kfree(dist->spis);
dist->nr_spis = 0;
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
}
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)

View File

@ -38,7 +38,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its);
static int vgic_its_restore_tables_v0(struct vgic_its *its);
static int vgic_its_commit_v0(struct vgic_its *its);
static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
struct kvm_vcpu *filter_vcpu);
struct kvm_vcpu *filter_vcpu, bool needs_inv);
/*
* Creates a new (reference to a) struct vgic_irq for a given LPI.
@ -106,7 +106,7 @@ out_unlock:
* However we only have those structs for mapped IRQs, so we read in
* the respective config data from memory here upon mapping the LPI.
*/
ret = update_lpi_config(kvm, irq, NULL);
ret = update_lpi_config(kvm, irq, NULL, false);
if (ret)
return ERR_PTR(ret);
@ -273,7 +273,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
* VCPU. Unconditionally applies if filter_vcpu is NULL.
*/
static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
struct kvm_vcpu *filter_vcpu)
struct kvm_vcpu *filter_vcpu, bool needs_inv)
{
u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
u8 prop;
@ -292,11 +292,17 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
irq->priority = LPI_PROP_PRIORITY(prop);
irq->enabled = LPI_PROP_ENABLE_BIT(prop);
vgic_queue_irq_unlock(kvm, irq, flags);
} else {
spin_unlock_irqrestore(&irq->irq_lock, flags);
if (!irq->hw) {
vgic_queue_irq_unlock(kvm, irq, flags);
return 0;
}
}
spin_unlock_irqrestore(&irq->irq_lock, flags);
if (irq->hw)
return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
return 0;
}
@ -336,6 +342,29 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
return i;
}
static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
{
int ret = 0;
spin_lock(&irq->irq_lock);
irq->target_vcpu = vcpu;
spin_unlock(&irq->irq_lock);
if (irq->hw) {
struct its_vlpi_map map;
ret = its_get_vlpi(irq->host_irq, &map);
if (ret)
return ret;
map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
ret = its_map_vlpi(irq->host_irq, &map);
}
return ret;
}
/*
* Promotes the ITS view of affinity of an ITTE (which redistributor this LPI
* is targeting) to the VGIC's view, which deals with target VCPUs.
@ -350,10 +379,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)
return;
vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
spin_lock(&ite->irq->irq_lock);
ite->irq->target_vcpu = vcpu;
spin_unlock(&ite->irq->irq_lock);
update_affinity(ite->irq, vcpu);
}
/*
@ -505,19 +531,11 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
return 0;
}
/*
* Find the target VCPU and the LPI number for a given devid/eventid pair
* and make this IRQ pending, possibly injecting it.
* Must be called with the its_lock mutex held.
* Returns 0 on success, a positive error value for any ITS mapping
* related errors and negative error values for generic errors.
*/
static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid)
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq)
{
struct kvm_vcpu *vcpu;
struct its_ite *ite;
unsigned long flags;
if (!its->enabled)
return -EBUSY;
@ -533,26 +551,65 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
if (!vcpu->arch.vgic_cpu.lpis_enabled)
return -EBUSY;
spin_lock_irqsave(&ite->irq->irq_lock, flags);
ite->irq->pending_latch = true;
vgic_queue_irq_unlock(kvm, ite->irq, flags);
*irq = ite->irq;
return 0;
}
static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
{
u64 address;
struct kvm_io_device *kvm_io_dev;
struct vgic_io_device *iodev;
if (dev->ops != &kvm_io_gic_ops)
return NULL;
if (!vgic_has_its(kvm))
return ERR_PTR(-ENODEV);
iodev = container_of(dev, struct vgic_io_device, dev);
if (!(msi->flags & KVM_MSI_VALID_DEVID))
return ERR_PTR(-EINVAL);
address = (u64)msi->address_hi << 32 | msi->address_lo;
kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
if (!kvm_io_dev)
return ERR_PTR(-EINVAL);
if (kvm_io_dev->ops != &kvm_io_gic_ops)
return ERR_PTR(-EINVAL);
iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
if (iodev->iodev_type != IODEV_ITS)
return NULL;
return ERR_PTR(-EINVAL);
return iodev;
return iodev->its;
}
/*
* Find the target VCPU and the LPI number for a given devid/eventid pair
* and make this IRQ pending, possibly injecting it.
* Must be called with the its_lock mutex held.
* Returns 0 on success, a positive error value for any ITS mapping
* related errors and negative error values for generic errors.
*/
static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid)
{
struct vgic_irq *irq = NULL;
unsigned long flags;
int err;
err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq);
if (err)
return err;
if (irq->hw)
return irq_set_irqchip_state(irq->host_irq,
IRQCHIP_STATE_PENDING, true);
spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = true;
vgic_queue_irq_unlock(kvm, irq, flags);
return 0;
}
/*
@ -563,30 +620,16 @@ static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
*/
int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
{
u64 address;
struct kvm_io_device *kvm_io_dev;
struct vgic_io_device *iodev;
struct vgic_its *its;
int ret;
if (!vgic_has_its(kvm))
return -ENODEV;
its = vgic_msi_to_its(kvm, msi);
if (IS_ERR(its))
return PTR_ERR(its);
if (!(msi->flags & KVM_MSI_VALID_DEVID))
return -EINVAL;
address = (u64)msi->address_hi << 32 | msi->address_lo;
kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
if (!kvm_io_dev)
return -EINVAL;
iodev = vgic_get_its_iodev(kvm_io_dev);
if (!iodev)
return -EINVAL;
mutex_lock(&iodev->its->its_lock);
ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
mutex_unlock(&iodev->its->its_lock);
mutex_lock(&its->its_lock);
ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data);
mutex_unlock(&its->its_lock);
if (ret < 0)
return ret;
@ -608,8 +651,12 @@ static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
list_del(&ite->ite_list);
/* This put matches the get in vgic_add_lpi. */
if (ite->irq)
if (ite->irq) {
if (ite->irq->hw)
WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
vgic_put_irq(kvm, ite->irq);
}
kfree(ite);
}
@ -683,11 +730,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
ite->collection = collection;
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
spin_lock(&ite->irq->irq_lock);
ite->irq->target_vcpu = vcpu;
spin_unlock(&ite->irq->irq_lock);
return 0;
return update_affinity(ite->irq, vcpu);
}
/*
@ -1054,6 +1097,10 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
ite->irq->pending_latch = false;
if (ite->irq->hw)
return irq_set_irqchip_state(ite->irq->host_irq,
IRQCHIP_STATE_PENDING, false);
return 0;
}
@ -1073,7 +1120,7 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
if (!ite)
return E_ITS_INV_UNMAPPED_INTERRUPT;
return update_lpi_config(kvm, ite->irq, NULL);
return update_lpi_config(kvm, ite->irq, NULL, true);
}
/*
@ -1108,12 +1155,15 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
irq = vgic_get_irq(kvm, NULL, intids[i]);
if (!irq)
continue;
update_lpi_config(kvm, irq, vcpu);
update_lpi_config(kvm, irq, vcpu, false);
vgic_put_irq(kvm, irq);
}
kfree(intids);
if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
return 0;
}
@ -1128,11 +1178,12 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
u64 *its_cmd)
{
struct vgic_dist *dist = &kvm->arch.vgic;
u32 target1_addr = its_cmd_get_target_addr(its_cmd);
u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);
struct kvm_vcpu *vcpu1, *vcpu2;
struct vgic_irq *irq;
u32 *intids;
int irq_count, i;
if (target1_addr >= atomic_read(&kvm->online_vcpus) ||
target2_addr >= atomic_read(&kvm->online_vcpus))
@ -1144,19 +1195,19 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
vcpu1 = kvm_get_vcpu(kvm, target1_addr);
vcpu2 = kvm_get_vcpu(kvm, target2_addr);
spin_lock(&dist->lpi_list_lock);
irq_count = vgic_copy_lpi_list(vcpu1, &intids);
if (irq_count < 0)
return irq_count;
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
spin_lock(&irq->irq_lock);
for (i = 0; i < irq_count; i++) {
irq = vgic_get_irq(kvm, NULL, intids[i]);
if (irq->target_vcpu == vcpu1)
irq->target_vcpu = vcpu2;
update_affinity(irq, vcpu2);
spin_unlock(&irq->irq_lock);
vgic_put_irq(kvm, irq);
}
spin_unlock(&dist->lpi_list_lock);
kfree(intids);
return 0;
}
@ -1634,6 +1685,14 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
if (!its)
return -ENOMEM;
if (vgic_initialized(dev->kvm)) {
int ret = vgic_v4_init(dev->kvm);
if (ret < 0) {
kfree(its);
return ret;
}
}
mutex_init(&its->its_lock);
mutex_init(&its->cmd_lock);
@ -1946,6 +2005,15 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
list_for_each_entry(ite, &device->itt_head, ite_list) {
gpa_t gpa = base + ite->event_id * ite_esz;
/*
* If an LPI carries the HW bit, this means that this
* interrupt is controlled by GICv4, and we do not
* have direct access to that state. Let's simply fail
* the save operation...
*/
if (ite->irq->hw)
return -EACCES;
ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz);
if (ret)
return ret;

View File

@ -54,6 +54,11 @@ bool vgic_has_its(struct kvm *kvm)
return dist->has_its;
}
bool vgic_supports_direct_msis(struct kvm *kvm)
{
return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
}
static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{

View File

@ -24,6 +24,7 @@
static bool group0_trap;
static bool group1_trap;
static bool common_trap;
static bool gicv4_enable;
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
{
@ -461,6 +462,12 @@ static int __init early_common_trap_cfg(char *buf)
}
early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg);
static int __init early_gicv4_enable(char *buf)
{
return strtobool(buf, &gicv4_enable);
}
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
/**
* vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
* @node: pointer to the DT node
@ -480,6 +487,13 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
kvm_vgic_global_state.can_emulate_gicv2 = false;
kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2;
/* GICv4 support? */
if (info->has_v4) {
kvm_vgic_global_state.has_gicv4 = gicv4_enable;
kvm_info("GICv4 support %sabled\n",
gicv4_enable ? "en" : "dis");
}
if (!info->vcpu.start) {
kvm_info("GICv3: no GICV resource entry\n");
kvm_vgic_global_state.vcpu_base = 0;

364
virt/kvm/arm/vgic/vgic-v4.c Normal file
View File

@ -0,0 +1,364 @@
/*
* Copyright (C) 2017 ARM Ltd.
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/kvm_host.h>
#include <linux/irqchip/arm-gic-v3.h>
#include "vgic.h"
/*
* How KVM uses GICv4 (insert rude comments here):
*
* The vgic-v4 layer acts as a bridge between several entities:
* - The GICv4 ITS representation offered by the ITS driver
* - VFIO, which is in charge of the PCI endpoint
* - The virtual ITS, which is the only thing the guest sees
*
* The configuration of VLPIs is triggered by a callback from VFIO,
* instructing KVM that a PCI device has been configured to deliver
* MSIs to a vITS.
*
* kvm_vgic_v4_set_forwarding() is thus called with the routing entry,
* and this is used to find the corresponding vITS data structures
* (ITS instance, device, event and irq) using a process that is
* extremely similar to the injection of an MSI.
*
* At this stage, we can link the guest's view of an LPI (uniquely
* identified by the routing entry) and the host irq, using the GICv4
* driver mapping operation. Should the mapping succeed, we've then
* successfully upgraded the guest's LPI to a VLPI. We can then start
* with updating GICv4's view of the property table and generating an
* INValidation in order to kickstart the delivery of this VLPI to the
* guest directly, without software intervention. Well, almost.
*
* When the PCI endpoint is deconfigured, this operation is reversed
* with VFIO calling kvm_vgic_v4_unset_forwarding().
*
* Once the VLPI has been mapped, it needs to follow any change the
* guest performs on its LPI through the vITS. For that, a number of
* command handlers have hooks to communicate these changes to the HW:
* - Any invalidation triggers a call to its_prop_update_vlpi()
* - The INT command results in a irq_set_irqchip_state(), which
* generates an INT on the corresponding VLPI.
* - The CLEAR command results in a irq_set_irqchip_state(), which
* generates an CLEAR on the corresponding VLPI.
* - DISCARD translates into an unmap, similar to a call to
* kvm_vgic_v4_unset_forwarding().
* - MOVI is translated by an update of the existing mapping, changing
* the target vcpu, resulting in a VMOVI being generated.
* - MOVALL is translated by a string of mapping updates (similar to
* the handling of MOVI). MOVALL is horrible.
*
* Note that a DISCARD/MAPTI sequence emitted from the guest without
* reprogramming the PCI endpoint after MAPTI does not result in a
* VLPI being mapped, as there is no callback from VFIO (the guest
* will get the interrupt via the normal SW injection). Fixing this is
* not trivial, and requires some horrible messing with the VFIO
* internals. Not fun. Don't do that.
*
* Then there is the scheduling. Each time a vcpu is about to run on a
* physical CPU, KVM must tell the corresponding redistributor about
* it. And if we've migrated our vcpu from one CPU to another, we must
* tell the ITS (so that the messages reach the right redistributor).
* This is done in two steps: first issue a irq_set_affinity() on the
* irq corresponding to the vcpu, then call its_schedule_vpe(). You
* must be in a non-preemptible context. On exit, another call to
* its_schedule_vpe() tells the redistributor that we're done with the
* vcpu.
*
* Finally, the doorbell handling: Each vcpu is allocated an interrupt
* which will fire each time a VLPI is made pending whilst the vcpu is
* not running. Each time the vcpu gets blocked, the doorbell
* interrupt gets enabled. When the vcpu is unblocked (for whatever
* reason), the doorbell interrupt is disabled.
*/
#define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING)
static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
{
struct kvm_vcpu *vcpu = info;
vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
return IRQ_HANDLED;
}
/**
* vgic_v4_init - Initialize the GICv4 data structures
* @kvm: Pointer to the VM being initialized
*
* We may be called each time a vITS is created, or when the
* vgic is initialized. This relies on kvm->lock to be
* held. In both cases, the number of vcpus should now be
* fixed.
*/
int vgic_v4_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
int i, nr_vcpus, ret;
if (!vgic_supports_direct_msis(kvm))
return 0; /* Nothing to see here... move along. */
if (dist->its_vm.vpes)
return 0;
nr_vcpus = atomic_read(&kvm->online_vcpus);
dist->its_vm.vpes = kzalloc(sizeof(*dist->its_vm.vpes) * nr_vcpus,
GFP_KERNEL);
if (!dist->its_vm.vpes)
return -ENOMEM;
dist->its_vm.nr_vpes = nr_vcpus;
kvm_for_each_vcpu(i, vcpu, kvm)
dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
ret = its_alloc_vcpu_irqs(&dist->its_vm);
if (ret < 0) {
kvm_err("VPE IRQ allocation failure\n");
kfree(dist->its_vm.vpes);
dist->its_vm.nr_vpes = 0;
dist->its_vm.vpes = NULL;
return ret;
}
kvm_for_each_vcpu(i, vcpu, kvm) {
int irq = dist->its_vm.vpes[i]->irq;
/*
* Don't automatically enable the doorbell, as we're
* flipping it back and forth when the vcpu gets
* blocked. Also disable the lazy disabling, as the
* doorbell could kick us out of the guest too
* early...
*/
irq_set_status_flags(irq, DB_IRQ_FLAGS);
ret = request_irq(irq, vgic_v4_doorbell_handler,
0, "vcpu", vcpu);
if (ret) {
kvm_err("failed to allocate vcpu IRQ%d\n", irq);
/*
* Trick: adjust the number of vpes so we know
* how many to nuke on teardown...
*/
dist->its_vm.nr_vpes = i;
break;
}
}
if (ret)
vgic_v4_teardown(kvm);
return ret;
}
/**
* vgic_v4_teardown - Free the GICv4 data structures
* @kvm: Pointer to the VM being destroyed
*
* Relies on kvm->lock to be held.
*/
void vgic_v4_teardown(struct kvm *kvm)
{
struct its_vm *its_vm = &kvm->arch.vgic.its_vm;
int i;
if (!its_vm->vpes)
return;
for (i = 0; i < its_vm->nr_vpes; i++) {
struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i);
int irq = its_vm->vpes[i]->irq;
irq_clear_status_flags(irq, DB_IRQ_FLAGS);
free_irq(irq, vcpu);
}
its_free_vcpu_irqs(its_vm);
kfree(its_vm->vpes);
its_vm->nr_vpes = 0;
its_vm->vpes = NULL;
}
int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu)
{
if (!vgic_supports_direct_msis(vcpu->kvm))
return 0;
return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false);
}
int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
{
int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
int err;
if (!vgic_supports_direct_msis(vcpu->kvm))
return 0;
/*
* Before making the VPE resident, make sure the redistributor
* corresponding to our current CPU expects us here. See the
* doc in drivers/irqchip/irq-gic-v4.c to understand how this
* turns into a VMOVP command at the ITS level.
*/
err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
if (err)
return err;
err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true);
if (err)
return err;
/*
* Now that the VPE is resident, let's get rid of a potential
* doorbell interrupt that would still be pending.
*/
err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false);
return err;
}
static struct vgic_its *vgic_get_its(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *irq_entry)
{
struct kvm_msi msi = (struct kvm_msi) {
.address_lo = irq_entry->msi.address_lo,
.address_hi = irq_entry->msi.address_hi,
.data = irq_entry->msi.data,
.flags = irq_entry->msi.flags,
.devid = irq_entry->msi.devid,
};
return vgic_msi_to_its(kvm, &msi);
}
int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
struct kvm_kernel_irq_routing_entry *irq_entry)
{
struct vgic_its *its;
struct vgic_irq *irq;
struct its_vlpi_map map;
int ret;
if (!vgic_supports_direct_msis(kvm))
return 0;
/*
* Get the ITS, and escape early on error (not a valid
* doorbell for any of our vITSs).
*/
its = vgic_get_its(kvm, irq_entry);
if (IS_ERR(its))
return 0;
mutex_lock(&its->its_lock);
/* Perform then actual DevID/EventID -> LPI translation. */
ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
irq_entry->msi.data, &irq);
if (ret)
goto out;
/*
* Emit the mapping request. If it fails, the ITS probably
* isn't v4 compatible, so let's silently bail out. Holding
* the ITS lock should ensure that nothing can modify the
* target vcpu.
*/
map = (struct its_vlpi_map) {
.vm = &kvm->arch.vgic.its_vm,
.vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe,
.vintid = irq->intid,
.properties = ((irq->priority & 0xfc) |
(irq->enabled ? LPI_PROP_ENABLED : 0) |
LPI_PROP_GROUP1),
.db_enabled = true,
};
ret = its_map_vlpi(virq, &map);
if (ret)
goto out;
irq->hw = true;
irq->host_irq = virq;
out:
mutex_unlock(&its->its_lock);
return ret;
}
int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
struct kvm_kernel_irq_routing_entry *irq_entry)
{
struct vgic_its *its;
struct vgic_irq *irq;
int ret;
if (!vgic_supports_direct_msis(kvm))
return 0;
/*
* Get the ITS, and escape early on error (not a valid
* doorbell for any of our vITSs).
*/
its = vgic_get_its(kvm, irq_entry);
if (IS_ERR(its))
return 0;
mutex_lock(&its->its_lock);
ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
irq_entry->msi.data, &irq);
if (ret)
goto out;
WARN_ON(!(irq->hw && irq->host_irq == virq));
irq->hw = false;
ret = its_unmap_vlpi(virq);
out:
mutex_unlock(&its->its_lock);
return ret;
}
void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu)
{
if (vgic_supports_direct_msis(vcpu->kvm)) {
int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
if (irq)
enable_irq(irq);
}
}
void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu)
{
if (vgic_supports_direct_msis(vcpu->kvm)) {
int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
if (irq)
disable_irq(irq);
}
}

View File

@ -17,6 +17,8 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/list_sort.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include "vgic.h"
@ -409,25 +411,56 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
return 0;
}
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
/* @irq->irq_lock must be held */
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
unsigned int host_irq)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
struct irq_desc *desc;
struct irq_data *data;
/*
* Find the physical IRQ number corresponding to @host_irq
*/
desc = irq_to_desc(host_irq);
if (!desc) {
kvm_err("%s: no interrupt descriptor\n", __func__);
return -EINVAL;
}
data = irq_desc_get_irq_data(desc);
while (data->parent_data)
data = data->parent_data;
irq->hw = true;
irq->host_irq = host_irq;
irq->hwintid = data->hwirq;
return 0;
}
/* @irq->irq_lock must be held */
static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
{
irq->hw = false;
irq->hwintid = 0;
}
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
u32 vintid)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
unsigned long flags;
int ret;
BUG_ON(!irq);
spin_lock_irqsave(&irq->irq_lock, flags);
irq->hw = true;
irq->hwintid = phys_irq;
ret = kvm_vgic_map_irq(vcpu, irq, host_irq);
spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
return 0;
return ret;
}
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
{
struct vgic_irq *irq;
unsigned long flags;
@ -435,14 +468,11 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
if (!vgic_initialized(vcpu->kvm))
return -EAGAIN;
irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
BUG_ON(!irq);
spin_lock_irqsave(&irq->irq_lock, flags);
irq->hw = false;
irq->hwintid = 0;
kvm_vgic_unmap_irq(irq);
spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
@ -688,6 +718,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
WARN_ON(vgic_v4_sync_hwstate(vcpu));
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
@ -700,6 +732,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
/* Flush our emulation state into the GIC hardware before entering the guest. */
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
WARN_ON(vgic_v4_flush_hwstate(vcpu));
/*
* If there are no virtual interrupts active or pending for this
* VCPU, then there is no work to do and we can bail out without
@ -751,6 +785,9 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
if (!vcpu->kvm->arch.vgic.enabled)
return false;
if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
return true;
spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
@ -784,9 +821,9 @@ void vgic_kick_vcpus(struct kvm *kvm)
}
}
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
bool map_is_active;
unsigned long flags;

View File

@ -237,4 +237,14 @@ static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
}
}
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq);
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
bool vgic_supports_direct_msis(struct kvm *kvm);
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu);
int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu);
#endif