mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-18 16:44:27 +08:00
The three KVM patches that KVMGT needs.
-----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJYIy78AAoJEL/70l94x66DzbQH/34Mgm52OdLzTe+Yf8Pcae5X A/z2Aicto+6M+dtYoWPn2DfoevPaSpVmymryRrRzAqk5QDPHiVHZ5iCW0RaIEU7M iiPudqSGVGa0oFYBkxsJxCBysAQsHX5sEXEszs4egbO1TtQ8LCAxYUVuLBGlx+Fa qj26Fi/p2ByHVp/RX55A2kF5T8J671KT4LWUvFjzTgGFWo8Kr1bk0q4hmRYB9OBc /pqczV3Mc6KcmzfIg3Rd6xt8UDlEGJ4YQhpNgY6nxrQ1py3AP7vNqBPAH4RXbHJB /OqdAjpqa8+rrwSpQ1f58U+7v/ZO1ZTg0IW9bf60qKjG/aV4fGN6y0/iXKGgKyQ= =cpog -----END PGP SIGNATURE----- Merge tag 'for-kvmgt' of git://git.kernel.org/pub/scm/virt/kvm/kvm into drm-intel-next-queued Paulo Bonzini writes: The three KVM patches that KVMGT needs. Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
This commit is contained in:
commit
11840e2fff
@ -4,7 +4,17 @@ KVM Lock Overview
|
||||
1. Acquisition Orders
|
||||
---------------------
|
||||
|
||||
(to be written)
|
||||
The acquisition orders for mutexes are as follows:
|
||||
|
||||
- kvm->lock is taken outside vcpu->mutex
|
||||
|
||||
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
|
||||
|
||||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||
them together is quite rare.
|
||||
|
||||
For spinlocks, kvm_lock is taken outside kvm->mmu_lock. Everything
|
||||
else is a leaf: no other lock is taken inside the critical sections.
|
||||
|
||||
2: Exception
|
||||
------------
|
||||
|
@ -293,7 +293,10 @@ struct kvm_vcpu_arch {
|
||||
/* Host KSEG0 address of the EI/DI offset */
|
||||
void *kseg0_commpage;
|
||||
|
||||
u32 io_gpr; /* GPR used as IO source/target */
|
||||
/* Resume PC after MMIO completion */
|
||||
unsigned long io_pc;
|
||||
/* GPR used as IO source/target */
|
||||
u32 io_gpr;
|
||||
|
||||
struct hrtimer comparecount_timer;
|
||||
/* Count timer control KVM register */
|
||||
@ -315,8 +318,6 @@ struct kvm_vcpu_arch {
|
||||
/* Bitmask of pending exceptions to be cleared */
|
||||
unsigned long pending_exceptions_clr;
|
||||
|
||||
u32 pending_load_cause;
|
||||
|
||||
/* Save/Restore the entryhi register when are are preempted/scheduled back in */
|
||||
unsigned long preempt_entryhi;
|
||||
|
||||
|
@ -790,15 +790,15 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu)
|
||||
struct mips_coproc *cop0 = vcpu->arch.cop0;
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
|
||||
if (kvm_read_c0_guest_status(cop0) & ST0_EXL) {
|
||||
if (kvm_read_c0_guest_status(cop0) & ST0_ERL) {
|
||||
kvm_clear_c0_guest_status(cop0, ST0_ERL);
|
||||
vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0);
|
||||
} else if (kvm_read_c0_guest_status(cop0) & ST0_EXL) {
|
||||
kvm_debug("[%#lx] ERET to %#lx\n", vcpu->arch.pc,
|
||||
kvm_read_c0_guest_epc(cop0));
|
||||
kvm_clear_c0_guest_status(cop0, ST0_EXL);
|
||||
vcpu->arch.pc = kvm_read_c0_guest_epc(cop0);
|
||||
|
||||
} else if (kvm_read_c0_guest_status(cop0) & ST0_ERL) {
|
||||
kvm_clear_c0_guest_status(cop0, ST0_ERL);
|
||||
vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0);
|
||||
} else {
|
||||
kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n",
|
||||
vcpu->arch.pc);
|
||||
@ -1528,13 +1528,25 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
enum emulation_result er = EMULATE_DO_MMIO;
|
||||
unsigned long curr_pc;
|
||||
u32 op, rt;
|
||||
u32 bytes;
|
||||
|
||||
rt = inst.i_format.rt;
|
||||
op = inst.i_format.opcode;
|
||||
|
||||
vcpu->arch.pending_load_cause = cause;
|
||||
/*
|
||||
* Find the resume PC now while we have safe and easy access to the
|
||||
* prior branch instruction, and save it for
|
||||
* kvm_mips_complete_mmio_load() to restore later.
|
||||
*/
|
||||
curr_pc = vcpu->arch.pc;
|
||||
er = update_pc(vcpu, cause);
|
||||
if (er == EMULATE_FAIL)
|
||||
return er;
|
||||
vcpu->arch.io_pc = vcpu->arch.pc;
|
||||
vcpu->arch.pc = curr_pc;
|
||||
|
||||
vcpu->arch.io_gpr = rt;
|
||||
|
||||
switch (op) {
|
||||
@ -2494,9 +2506,8 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
|
||||
goto done;
|
||||
}
|
||||
|
||||
er = update_pc(vcpu, vcpu->arch.pending_load_cause);
|
||||
if (er == EMULATE_FAIL)
|
||||
return er;
|
||||
/* Restore saved resume PC */
|
||||
vcpu->arch.pc = vcpu->arch.io_pc;
|
||||
|
||||
switch (run->mmio.len) {
|
||||
case 4:
|
||||
@ -2518,11 +2529,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
}
|
||||
|
||||
if (vcpu->arch.pending_load_cause & CAUSEF_BD)
|
||||
kvm_debug("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n",
|
||||
vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr,
|
||||
vcpu->mmio_needed);
|
||||
|
||||
done:
|
||||
return er;
|
||||
}
|
||||
|
@ -426,7 +426,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct mips_coproc *cop0 = vcpu->arch.cop0;
|
||||
int cpu = smp_processor_id();
|
||||
int i, cpu = smp_processor_id();
|
||||
unsigned int gasid;
|
||||
|
||||
/*
|
||||
@ -442,6 +442,9 @@ static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
|
||||
vcpu);
|
||||
vcpu->arch.guest_user_asid[cpu] =
|
||||
vcpu->arch.guest_user_mm.context.asid[cpu];
|
||||
for_each_possible_cpu(i)
|
||||
if (i != cpu)
|
||||
vcpu->arch.guest_user_asid[cpu] = 0;
|
||||
vcpu->arch.last_user_gasid = gasid;
|
||||
}
|
||||
}
|
||||
|
@ -260,13 +260,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) &
|
||||
asid_version_mask(cpu)) {
|
||||
u32 gasid = kvm_read_c0_guest_entryhi(vcpu->arch.cop0) &
|
||||
KVM_ENTRYHI_ASID;
|
||||
|
||||
kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu);
|
||||
vcpu->arch.guest_user_asid[cpu] =
|
||||
vcpu->arch.guest_user_mm.context.asid[cpu];
|
||||
vcpu->arch.last_user_gasid = gasid;
|
||||
newasid++;
|
||||
|
||||
kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
|
||||
|
@ -315,7 +315,7 @@ static void fill_diag(struct sthyi_sctns *sctns)
|
||||
if (r < 0)
|
||||
goto out;
|
||||
|
||||
diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
|
||||
diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
|
||||
if (!diag224_buf || diag224(diag224_buf))
|
||||
goto out;
|
||||
|
||||
@ -378,7 +378,7 @@ static void fill_diag(struct sthyi_sctns *sctns)
|
||||
sctns->par.infpval1 |= PAR_WGHT_VLD;
|
||||
|
||||
out:
|
||||
kfree(diag224_buf);
|
||||
free_page((unsigned long)diag224_buf);
|
||||
vfree(diag204_buf);
|
||||
}
|
||||
|
||||
|
@ -948,7 +948,6 @@ struct kvm_x86_ops {
|
||||
int (*get_lpage_level)(void);
|
||||
bool (*rdtscp_supported)(void);
|
||||
bool (*invpcid_supported)(void);
|
||||
void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment);
|
||||
|
||||
void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||
|
||||
@ -958,8 +957,6 @@ struct kvm_x86_ops {
|
||||
|
||||
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
|
||||
|
||||
u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
|
||||
|
||||
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
|
||||
|
||||
int (*check_intercept)(struct kvm_vcpu *vcpu,
|
||||
|
@ -29,9 +29,20 @@ struct kvm_page_track_notifier_node {
|
||||
* @gpa: the physical address written by guest.
|
||||
* @new: the data was written to the address.
|
||||
* @bytes: the written length.
|
||||
* @node: this node
|
||||
*/
|
||||
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes);
|
||||
int bytes, struct kvm_page_track_notifier_node *node);
|
||||
/*
|
||||
* It is called when memory slot is being moved or removed
|
||||
* users can drop write-protection for the pages in that memory slot
|
||||
*
|
||||
* @kvm: the kvm where memory slot being moved or removed
|
||||
* @slot: the memory slot being moved or removed
|
||||
* @node: this node
|
||||
*/
|
||||
void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
struct kvm_page_track_notifier_node *node);
|
||||
};
|
||||
|
||||
void kvm_page_track_init(struct kvm *kvm);
|
||||
@ -58,4 +69,5 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
struct kvm_page_track_notifier_node *n);
|
||||
void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
int bytes);
|
||||
void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
|
||||
#endif
|
||||
|
@ -5045,7 +5045,7 @@ done_prefixes:
|
||||
/* Decode and fetch the destination operand: register or memory. */
|
||||
rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
|
||||
|
||||
if (ctxt->rip_relative)
|
||||
if (ctxt->rip_relative && likely(ctxt->memopp))
|
||||
ctxt->memopp->addr.mem.ea = address_mask(ctxt,
|
||||
ctxt->memopp->addr.mem.ea + ctxt->_eip);
|
||||
|
||||
|
@ -4405,7 +4405,8 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
|
||||
}
|
||||
|
||||
static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const u8 *new, int bytes)
|
||||
const u8 *new, int bytes,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
{
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
struct kvm_mmu_page *sp;
|
||||
@ -4617,11 +4618,19 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
|
||||
init_kvm_mmu(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
{
|
||||
kvm_mmu_invalidate_zap_all_pages(kvm);
|
||||
}
|
||||
|
||||
void kvm_mmu_init_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||
|
||||
node->track_write = kvm_mmu_pte_write;
|
||||
node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
|
||||
kvm_page_track_register_notifier(kvm, node);
|
||||
}
|
||||
|
||||
|
@ -106,6 +106,7 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
|
||||
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
|
||||
|
||||
/*
|
||||
* remove the guest page from the tracking pool which stops the interception
|
||||
@ -135,6 +136,7 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
|
||||
*/
|
||||
kvm_mmu_gfn_allow_lpage(slot, gfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
|
||||
|
||||
/*
|
||||
* check if the corresponding access on the specified guest page is tracked.
|
||||
@ -181,6 +183,7 @@ kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
|
||||
|
||||
/*
|
||||
* stop receiving the event interception. It is the opposed operation of
|
||||
@ -199,6 +202,7 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
synchronize_srcu(&head->track_srcu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
|
||||
|
||||
/*
|
||||
* Notify the node that write access is intercepted and write emulation is
|
||||
@ -222,6 +226,31 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
|
||||
idx = srcu_read_lock(&head->track_srcu);
|
||||
hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
|
||||
if (n->track_write)
|
||||
n->track_write(vcpu, gpa, new, bytes);
|
||||
n->track_write(vcpu, gpa, new, bytes, n);
|
||||
srcu_read_unlock(&head->track_srcu, idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify the node that memory slot is being removed or moved so that it can
|
||||
* drop write-protection for the pages in the memory slot.
|
||||
*
|
||||
* The node should figure out it has any write-protected pages in this slot
|
||||
* by itself.
|
||||
*/
|
||||
void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
{
|
||||
struct kvm_page_track_notifier_head *head;
|
||||
struct kvm_page_track_notifier_node *n;
|
||||
int idx;
|
||||
|
||||
head = &kvm->arch.track_notifier_head;
|
||||
|
||||
if (hlist_empty(&head->track_notifier_list))
|
||||
return;
|
||||
|
||||
idx = srcu_read_lock(&head->track_srcu);
|
||||
hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
|
||||
if (n->track_flush_slot)
|
||||
n->track_flush_slot(kvm, slot, n);
|
||||
srcu_read_unlock(&head->track_srcu, idx);
|
||||
}
|
||||
|
@ -1138,21 +1138,6 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
|
||||
}
|
||||
|
||||
static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
svm->vmcb->control.tsc_offset += adjustment;
|
||||
if (is_guest_mode(vcpu))
|
||||
svm->nested.hsave->control.tsc_offset += adjustment;
|
||||
else
|
||||
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
|
||||
svm->vmcb->control.tsc_offset - adjustment,
|
||||
svm->vmcb->control.tsc_offset);
|
||||
|
||||
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
|
||||
}
|
||||
|
||||
static void avic_init_vmcb(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb;
|
||||
@ -3449,12 +3434,6 @@ static int cr8_write_interception(struct vcpu_svm *svm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
|
||||
{
|
||||
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
|
||||
return vmcb->control.tsc_offset + host_tsc;
|
||||
}
|
||||
|
||||
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@ -5422,8 +5401,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
.has_wbinvd_exit = svm_has_wbinvd_exit,
|
||||
|
||||
.write_tsc_offset = svm_write_tsc_offset,
|
||||
.adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest,
|
||||
.read_l1_tsc = svm_read_l1_tsc,
|
||||
|
||||
.set_tdp_cr3 = set_tdp_cr3,
|
||||
|
||||
|
@ -187,6 +187,7 @@ struct vmcs {
|
||||
*/
|
||||
struct loaded_vmcs {
|
||||
struct vmcs *vmcs;
|
||||
struct vmcs *shadow_vmcs;
|
||||
int cpu;
|
||||
int launched;
|
||||
struct list_head loaded_vmcss_on_cpu_link;
|
||||
@ -411,7 +412,6 @@ struct nested_vmx {
|
||||
* memory during VMXOFF, VMCLEAR, VMPTRLD.
|
||||
*/
|
||||
struct vmcs12 *cached_vmcs12;
|
||||
struct vmcs *current_shadow_vmcs;
|
||||
/*
|
||||
* Indicates if the shadow vmcs must be updated with the
|
||||
* data hold by vmcs12
|
||||
@ -421,7 +421,6 @@ struct nested_vmx {
|
||||
/* vmcs02_list cache of VMCSs recently used to run L2 guests */
|
||||
struct list_head vmcs02_pool;
|
||||
int vmcs02_num;
|
||||
u64 vmcs01_tsc_offset;
|
||||
bool change_vmcs01_virtual_x2apic_mode;
|
||||
/* L2 must run next, and mustn't decide to exit to L1. */
|
||||
bool nested_run_pending;
|
||||
@ -1419,6 +1418,8 @@ static void vmcs_clear(struct vmcs *vmcs)
|
||||
static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
|
||||
{
|
||||
vmcs_clear(loaded_vmcs->vmcs);
|
||||
if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
|
||||
vmcs_clear(loaded_vmcs->shadow_vmcs);
|
||||
loaded_vmcs->cpu = -1;
|
||||
loaded_vmcs->launched = 0;
|
||||
}
|
||||
@ -2604,20 +2605,6 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
|
||||
return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like guest_read_tsc, but always returns L1's notion of the timestamp
|
||||
* counter, even if a nested guest (L2) is currently running.
|
||||
*/
|
||||
static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
|
||||
{
|
||||
u64 tsc_offset;
|
||||
|
||||
tsc_offset = is_guest_mode(vcpu) ?
|
||||
to_vmx(vcpu)->nested.vmcs01_tsc_offset :
|
||||
vmcs_read64(TSC_OFFSET);
|
||||
return host_tsc + tsc_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* writes 'offset' into guest's timestamp counter offset register
|
||||
*/
|
||||
@ -2631,7 +2618,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
* to the newly set TSC to get L2's TSC.
|
||||
*/
|
||||
struct vmcs12 *vmcs12;
|
||||
to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset;
|
||||
/* recalculate vmcs02.TSC_OFFSET: */
|
||||
vmcs12 = get_vmcs12(vcpu);
|
||||
vmcs_write64(TSC_OFFSET, offset +
|
||||
@ -2644,19 +2630,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
{
|
||||
u64 offset = vmcs_read64(TSC_OFFSET);
|
||||
|
||||
vmcs_write64(TSC_OFFSET, offset + adjustment);
|
||||
if (is_guest_mode(vcpu)) {
|
||||
/* Even when running L2, the adjustment needs to apply to L1 */
|
||||
to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
|
||||
} else
|
||||
trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset,
|
||||
offset + adjustment);
|
||||
}
|
||||
|
||||
static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0);
|
||||
@ -3562,6 +3535,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
|
||||
loaded_vmcs_clear(loaded_vmcs);
|
||||
free_vmcs(loaded_vmcs->vmcs);
|
||||
loaded_vmcs->vmcs = NULL;
|
||||
WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
|
||||
}
|
||||
|
||||
static void free_kvm_area(void)
|
||||
@ -6696,6 +6670,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
|
||||
if (!item)
|
||||
return NULL;
|
||||
item->vmcs02.vmcs = alloc_vmcs();
|
||||
item->vmcs02.shadow_vmcs = NULL;
|
||||
if (!item->vmcs02.vmcs) {
|
||||
kfree(item);
|
||||
return NULL;
|
||||
@ -7072,7 +7047,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
|
||||
shadow_vmcs->revision_id |= (1u << 31);
|
||||
/* init shadow vmcs */
|
||||
vmcs_clear(shadow_vmcs);
|
||||
vmx->nested.current_shadow_vmcs = shadow_vmcs;
|
||||
vmx->vmcs01.shadow_vmcs = shadow_vmcs;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
|
||||
@ -7174,8 +7149,11 @@ static void free_nested(struct vcpu_vmx *vmx)
|
||||
free_page((unsigned long)vmx->nested.msr_bitmap);
|
||||
vmx->nested.msr_bitmap = NULL;
|
||||
}
|
||||
if (enable_shadow_vmcs)
|
||||
free_vmcs(vmx->nested.current_shadow_vmcs);
|
||||
if (enable_shadow_vmcs) {
|
||||
vmcs_clear(vmx->vmcs01.shadow_vmcs);
|
||||
free_vmcs(vmx->vmcs01.shadow_vmcs);
|
||||
vmx->vmcs01.shadow_vmcs = NULL;
|
||||
}
|
||||
kfree(vmx->nested.cached_vmcs12);
|
||||
/* Unpin physical memory we referred to in current vmcs02 */
|
||||
if (vmx->nested.apic_access_page) {
|
||||
@ -7352,7 +7330,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
|
||||
int i;
|
||||
unsigned long field;
|
||||
u64 field_value;
|
||||
struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
|
||||
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
|
||||
const unsigned long *fields = shadow_read_write_fields;
|
||||
const int num_fields = max_shadow_read_write_fields;
|
||||
|
||||
@ -7401,7 +7379,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
|
||||
int i, q;
|
||||
unsigned long field;
|
||||
u64 field_value = 0;
|
||||
struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
|
||||
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
|
||||
|
||||
vmcs_load(shadow_vmcs);
|
||||
|
||||
@ -7591,7 +7569,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
|
||||
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
|
||||
SECONDARY_EXEC_SHADOW_VMCS);
|
||||
vmcs_write64(VMCS_LINK_POINTER,
|
||||
__pa(vmx->nested.current_shadow_vmcs));
|
||||
__pa(vmx->vmcs01.shadow_vmcs));
|
||||
vmx->nested.sync_shadow_vmcs = true;
|
||||
}
|
||||
}
|
||||
@ -7659,7 +7637,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
|
||||
|
||||
types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
|
||||
|
||||
if (!(types & (1UL << type))) {
|
||||
if (type >= 32 || !(types & (1 << type))) {
|
||||
nested_vmx_failValid(vcpu,
|
||||
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
|
||||
skip_emulated_instruction(vcpu);
|
||||
@ -7722,7 +7700,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
|
||||
|
||||
types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
|
||||
|
||||
if (!(types & (1UL << type))) {
|
||||
if (type >= 32 || !(types & (1 << type))) {
|
||||
nested_vmx_failValid(vcpu,
|
||||
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
|
||||
skip_emulated_instruction(vcpu);
|
||||
@ -9156,6 +9134,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
|
||||
vmx->loaded_vmcs = &vmx->vmcs01;
|
||||
vmx->loaded_vmcs->vmcs = alloc_vmcs();
|
||||
vmx->loaded_vmcs->shadow_vmcs = NULL;
|
||||
if (!vmx->loaded_vmcs->vmcs)
|
||||
goto free_msrs;
|
||||
if (!vmm_exclusive)
|
||||
@ -10061,9 +10040,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
||||
|
||||
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
|
||||
vmcs_write64(TSC_OFFSET,
|
||||
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
|
||||
vcpu->arch.tsc_offset + vmcs12->tsc_offset);
|
||||
else
|
||||
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
|
||||
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
|
||||
if (kvm_has_tsc_control)
|
||||
decache_tsc_multiplier(vmx);
|
||||
|
||||
@ -10293,8 +10272,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
||||
|
||||
enter_guest_mode(vcpu);
|
||||
|
||||
vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
|
||||
|
||||
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
|
||||
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
||||
|
||||
@ -10818,7 +10795,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
||||
load_vmcs12_host_state(vcpu, vmcs12);
|
||||
|
||||
/* Update any VMCS fields that might have changed while L2 ran */
|
||||
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
|
||||
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
|
||||
if (vmx->hv_deadline_tsc == -1)
|
||||
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER);
|
||||
@ -11339,8 +11316,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
||||
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
|
||||
|
||||
.write_tsc_offset = vmx_write_tsc_offset,
|
||||
.adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest,
|
||||
.read_l1_tsc = vmx_read_l1_tsc,
|
||||
|
||||
.set_tdp_cr3 = vmx_set_cr3,
|
||||
|
||||
|
@ -1409,7 +1409,7 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
|
||||
|
||||
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
|
||||
{
|
||||
return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc));
|
||||
return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
|
||||
|
||||
@ -1547,7 +1547,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
|
||||
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
|
||||
s64 adjustment)
|
||||
{
|
||||
kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
|
||||
kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
|
||||
}
|
||||
|
||||
static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
@ -1555,7 +1555,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
|
||||
WARN_ON(adjustment < 0);
|
||||
adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
|
||||
kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
|
||||
adjust_tsc_offset_guest(vcpu, adjustment);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -2262,7 +2262,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
/* Drop writes to this legacy MSR -- see rdmsr
|
||||
* counterpart for further detail.
|
||||
*/
|
||||
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
|
||||
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
|
||||
break;
|
||||
case MSR_AMD64_OSVW_ID_LENGTH:
|
||||
if (!guest_cpuid_has_osvw(vcpu))
|
||||
@ -2280,11 +2280,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
if (kvm_pmu_is_valid_msr(vcpu, msr))
|
||||
return kvm_pmu_set_msr(vcpu, msr_info);
|
||||
if (!ignore_msrs) {
|
||||
vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
|
||||
vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
|
||||
msr, data);
|
||||
return 1;
|
||||
} else {
|
||||
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
|
||||
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
|
||||
msr, data);
|
||||
break;
|
||||
}
|
||||
@ -7410,10 +7410,12 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
|
||||
|
||||
kvmclock_reset(vcpu);
|
||||
|
||||
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
|
||||
kvm_x86_ops->vcpu_free(vcpu);
|
||||
free_cpumask_var(wbinvd_dirty_mask);
|
||||
}
|
||||
|
||||
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
||||
@ -8153,7 +8155,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvm_mmu_invalidate_zap_all_pages(kvm);
|
||||
kvm_page_track_flush_slot(kvm, slot);
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
|
||||
|
@ -42,6 +42,7 @@
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
|
||||
static struct workqueue_struct *irqfd_cleanup_wq;
|
||||
|
||||
static void
|
||||
irqfd_inject(struct work_struct *work)
|
||||
@ -167,7 +168,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
|
||||
|
||||
list_del_init(&irqfd->list);
|
||||
|
||||
schedule_work(&irqfd->shutdown);
|
||||
queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
|
||||
}
|
||||
|
||||
int __attribute__((weak)) kvm_arch_set_irq_inatomic(
|
||||
@ -554,7 +555,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
* so that we guarantee there will not be any more interrupts on this
|
||||
* gsi once this deassign function returns.
|
||||
*/
|
||||
flush_work(&irqfd->shutdown);
|
||||
flush_workqueue(irqfd_cleanup_wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -591,7 +592,7 @@ kvm_irqfd_release(struct kvm *kvm)
|
||||
* Block until we know all outstanding shutdown jobs have completed
|
||||
* since we do not take a kvm* reference.
|
||||
*/
|
||||
flush_work(&irqfd->shutdown);
|
||||
flush_workqueue(irqfd_cleanup_wq);
|
||||
|
||||
}
|
||||
|
||||
@ -621,8 +622,23 @@ void kvm_irq_routing_update(struct kvm *kvm)
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* create a host-wide workqueue for issuing deferred shutdown requests
|
||||
* aggregated from all vm* instances. We need our own isolated
|
||||
* queue to ease flushing work items when a VM exits.
|
||||
*/
|
||||
int kvm_irqfd_init(void)
|
||||
{
|
||||
irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
|
||||
if (!irqfd_cleanup_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_irqfd_exit(void)
|
||||
{
|
||||
destroy_workqueue(irqfd_cleanup_wq);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -3846,7 +3846,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
||||
* kvm_arch_init makes sure there's at most one caller
|
||||
* for architectures that support multiple implementations,
|
||||
* like intel and amd on x86.
|
||||
* kvm_arch_init must be called before kvm_irqfd_init to avoid creating
|
||||
* conflicts in case kvm is already setup for another implementation.
|
||||
*/
|
||||
r = kvm_irqfd_init();
|
||||
if (r)
|
||||
goto out_irqfd;
|
||||
|
||||
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
|
||||
r = -ENOMEM;
|
||||
@ -3928,6 +3933,7 @@ out_free_0a:
|
||||
free_cpumask_var(cpus_hardware_enabled);
|
||||
out_free_0:
|
||||
kvm_irqfd_exit();
|
||||
out_irqfd:
|
||||
kvm_arch_exit();
|
||||
out_fail:
|
||||
return r;
|
||||
|
Loading…
Reference in New Issue
Block a user