2020-03-24 17:41:52 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
/*
|
|
|
|
* Kernel-based Virtual Machine driver for Linux
|
|
|
|
*
|
|
|
|
* AMD SVM support
|
|
|
|
*
|
|
|
|
* Copyright (C) 2006 Qumranet, Inc.
|
|
|
|
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Yaniv Kamay <yaniv@qumranet.com>
|
|
|
|
* Avi Kivity <avi@qumranet.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __SVM_SVM_H
|
|
|
|
#define __SVM_SVM_H
|
|
|
|
|
|
|
|
#include <linux/kvm_types.h>
|
|
|
|
#include <linux/kvm_host.h>
|
2020-12-11 01:09:47 +08:00
|
|
|
#include <linux/bits.h>
|
2020-03-24 17:41:52 +08:00
|
|
|
|
|
|
|
#include <asm/svm.h>
|
2021-04-27 19:16:35 +08:00
|
|
|
#include <asm/sev-common.h>
|
2020-03-24 17:41:52 +08:00
|
|
|
|
2023-08-16 04:36:51 +08:00
|
|
|
#include "cpuid.h"
|
2022-03-05 00:10:32 +08:00
|
|
|
#include "kvm_cache_regs.h"
|
|
|
|
|
2020-12-11 01:10:04 +08:00
|
|
|
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
|
|
|
|
|
2021-04-13 05:56:05 +08:00
|
|
|
#define IOPM_SIZE PAGE_SIZE * 3
|
|
|
|
#define MSRPM_SIZE PAGE_SIZE * 2
|
|
|
|
|
KVM: SEV: Do not intercept accesses to MSR_IA32_XSS for SEV-ES guests
When intercepts are enabled for MSR_IA32_XSS, the host will swap in/out
the guest-defined values while context-switching to/from guest mode.
However, in the case of SEV-ES, vcpu->arch.guest_state_protected is set,
so the guest-defined value is effectively ignored when switching to
guest mode with the understanding that the VMSA will handle swapping
in/out this register state.
However, SVM is still configured to intercept these accesses for SEV-ES
guests, so the values in the initial MSR_IA32_XSS are effectively
read-only, and a guest will experience undefined behavior if it actually
tries to write to this MSR. Fortunately, only CET/shadowstack makes use
of this register on SEV-ES-capable systems currently, which isn't yet
widely used, but this may become more of an issue in the future.
Additionally, enabling intercepts of MSR_IA32_XSS results in #VC
exceptions in the guest in certain paths that can lead to unexpected #VC
nesting levels. One example is SEV-SNP guests when handling #VC
exceptions for CPUID instructions involving leaf 0xD, subleaf 0x1, since
they will access MSR_IA32_XSS as part of servicing the CPUID #VC, then
generate another #VC when accessing MSR_IA32_XSS, which can lead to
guest crashes if an NMI occurs at that point in time. Running perf on a
guest while it is issuing such a sequence is one example where these can
be problematic.
Address this by disabling intercepts of MSR_IA32_XSS for SEV-ES guests
if the host/guest configuration allows it. If the host/guest
configuration doesn't allow for MSR_IA32_XSS, leave it intercepted so
that it can be caught by the existing checks in
kvm_{set,get}_msr_common() if the guest still attempts to access it.
Fixes: 376c6d285017 ("KVM: SVM: Provide support for SEV-ES vCPU creation/loading")
Cc: Alexey Kardashevskiy <aik@amd.com>
Suggested-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-Id: <20231016132819.1002933-4-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2023-10-16 21:27:32 +08:00
|
|
|
#define MAX_DIRECT_ACCESS_MSRS 47
|
2022-05-19 18:26:59 +08:00
|
|
|
#define MSRPM_OFFSETS 32
|
2020-03-24 17:41:52 +08:00
|
|
|
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
|
|
|
|
extern bool npt_enabled;
|
2023-08-25 09:36:19 +08:00
|
|
|
extern int nrips;
|
2022-03-23 01:24:49 +08:00
|
|
|
extern int vgif;
|
2021-07-07 20:51:00 +08:00
|
|
|
extern bool intercept_smi;
|
2023-01-06 09:12:44 +08:00
|
|
|
extern bool x2avic_enabled;
|
KVM: x86: Add support for SVM's Virtual NMI
Add support for SVM's Virtual NMIs implementation, which adds proper
tracking of virtual NMI blocking, and an intr_ctrl flag that software can
set to mark a virtual NMI as pending. Pending virtual NMIs are serviced
by hardware if/when virtual NMIs become unblocked, i.e. act more or less
like real NMIs.
Introduce two new kvm_x86_ops callbacks so to support SVM's vNMI, as KVM
needs to treat a pending vNMI as partially injected. Specifically, if
two NMIs (for L1) arrive concurrently in KVM's software model, KVM's ABI
is to inject one and pend the other. Without vNMI, KVM manually tracks
the pending NMI and uses NMI windows to detect when the NMI should be
injected.
With vNMI, the pending NMI is simply stuffed into the VMCB and handed
off to hardware. This means that KVM needs to be able to set a vNMI
pending on-demand, and also query if a vNMI is pending, e.g. to honor the
"at most one NMI pending" rule and to preserve all NMIs across save and
restore.
Warn if KVM attempts to open an NMI window when vNMI is fully enabled,
as the above logic should prevent KVM from ever getting to
kvm_check_and_inject_events() with two NMIs pending _in software_, and
the "at most one NMI pending" logic should prevent having an NMI pending
in hardware and an NMI pending in software if NMIs are also blocked, i.e.
if KVM can't immediately inject the second NMI.
Signed-off-by: Santosh Shukla <Santosh.Shukla@amd.com>
Co-developed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Link: https://lore.kernel.org/r/20230227084016.3368-11-santosh.shukla@amd.com
[sean: rewrite shortlog and changelog, massage code comments]
Signed-off-by: Sean Christopherson <seanjc@google.com>
2023-02-27 16:40:15 +08:00
|
|
|
extern bool vnmi;
|
2022-05-19 18:26:55 +08:00
|
|
|
|
2021-06-03 23:14:37 +08:00
|
|
|
/*
|
|
|
|
* Clean bits in VMCB.
|
|
|
|
* VMCB_ALL_CLEAN_MASK might also need to
|
|
|
|
* be updated if this enum is modified.
|
|
|
|
*/
|
2020-03-24 17:41:52 +08:00
|
|
|
enum {
|
|
|
|
VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
|
|
|
|
pause filter count */
|
|
|
|
VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */
|
|
|
|
VMCB_ASID, /* ASID */
|
|
|
|
VMCB_INTR, /* int_ctl, int_vector */
|
|
|
|
VMCB_NPT, /* npt_en, nCR3, gPAT */
|
|
|
|
VMCB_CR, /* CR0, CR3, CR4, EFER */
|
|
|
|
VMCB_DR, /* DR6, DR7 */
|
|
|
|
VMCB_DT, /* GDT, IDT */
|
|
|
|
VMCB_SEG, /* CS, DS, SS, ES, CPL */
|
|
|
|
VMCB_CR2, /* CR2 only */
|
|
|
|
VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
|
|
|
|
VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
|
|
|
|
* AVIC PHYSICAL_TABLE pointer,
|
|
|
|
* AVIC LOGICAL_TABLE pointer
|
|
|
|
*/
|
2021-06-03 23:14:37 +08:00
|
|
|
VMCB_SW = 31, /* Reserved for hypervisor/software use */
|
2020-03-24 17:41:52 +08:00
|
|
|
};
|
|
|
|
|
2021-06-03 23:14:37 +08:00
|
|
|
#define VMCB_ALL_CLEAN_MASK ( \
|
|
|
|
(1U << VMCB_INTERCEPTS) | (1U << VMCB_PERM_MAP) | \
|
|
|
|
(1U << VMCB_ASID) | (1U << VMCB_INTR) | \
|
|
|
|
(1U << VMCB_NPT) | (1U << VMCB_CR) | (1U << VMCB_DR) | \
|
|
|
|
(1U << VMCB_DT) | (1U << VMCB_SEG) | (1U << VMCB_CR2) | \
|
|
|
|
(1U << VMCB_LBR) | (1U << VMCB_AVIC) | \
|
|
|
|
(1U << VMCB_SW))
|
|
|
|
|
2020-03-24 17:41:52 +08:00
|
|
|
/* TPR and CR2 are always written before VMRUN */
|
|
|
|
#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
|
|
|
|
|
|
|
|
struct kvm_sev_info {
|
|
|
|
bool active; /* SEV enabled guest */
|
2020-12-11 01:09:38 +08:00
|
|
|
bool es_active; /* SEV-ES enabled guest */
|
2020-03-24 17:41:52 +08:00
|
|
|
unsigned int asid; /* ASID used for this guest */
|
|
|
|
unsigned int handle; /* SEV firmware handle */
|
|
|
|
int fd; /* SEV device fd */
|
|
|
|
unsigned long pages_locked; /* Number of pages locked */
|
|
|
|
struct list_head regions_list; /* List of registered regions */
|
2020-12-16 01:44:07 +08:00
|
|
|
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
|
2021-04-09 06:32:14 +08:00
|
|
|
struct kvm *enc_context_owner; /* Owner of copied encryption context */
|
2022-02-12 03:36:34 +08:00
|
|
|
struct list_head mirror_vms; /* List of VMs mirroring */
|
|
|
|
struct list_head mirror_entry; /* Use as a list entry of mirrors */
|
2021-03-30 12:42:06 +08:00
|
|
|
struct misc_cg *misc_cg; /* For misc cgroup accounting */
|
2021-10-22 01:43:00 +08:00
|
|
|
atomic_t migration_in_progress;
|
2020-03-24 17:41:52 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct kvm_svm {
|
|
|
|
struct kvm kvm;
|
|
|
|
|
|
|
|
/* Struct members for AVIC */
|
|
|
|
u32 avic_vm_id;
|
|
|
|
struct page *avic_logical_id_table_page;
|
|
|
|
struct page *avic_physical_id_table_page;
|
|
|
|
struct hlist_node hnode;
|
|
|
|
|
|
|
|
struct kvm_sev_info sev_info;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct kvm_vcpu;
|
|
|
|
|
2021-01-13 20:07:52 +08:00
|
|
|
struct kvm_vmcb_info {
|
|
|
|
struct vmcb *ptr;
|
|
|
|
unsigned long pa;
|
2021-01-13 00:43:12 +08:00
|
|
|
int cpu;
|
2021-01-13 00:43:13 +08:00
|
|
|
uint64_t asid_generation;
|
2021-01-13 20:07:52 +08:00
|
|
|
};
|
|
|
|
|
2021-11-03 22:05:22 +08:00
|
|
|
struct vmcb_save_area_cached {
|
|
|
|
u64 efer;
|
|
|
|
u64 cr4;
|
|
|
|
u64 cr3;
|
|
|
|
u64 cr0;
|
|
|
|
u64 dr7;
|
|
|
|
u64 dr6;
|
|
|
|
};
|
|
|
|
|
2021-11-03 22:05:26 +08:00
|
|
|
struct vmcb_ctrl_area_cached {
|
|
|
|
u32 intercepts[MAX_INTERCEPT];
|
|
|
|
u16 pause_filter_thresh;
|
|
|
|
u16 pause_filter_count;
|
|
|
|
u64 iopm_base_pa;
|
|
|
|
u64 msrpm_base_pa;
|
|
|
|
u64 tsc_offset;
|
|
|
|
u32 asid;
|
|
|
|
u8 tlb_ctl;
|
|
|
|
u32 int_ctl;
|
|
|
|
u32 int_vector;
|
|
|
|
u32 int_state;
|
|
|
|
u32 exit_code;
|
|
|
|
u32 exit_code_hi;
|
|
|
|
u64 exit_info_1;
|
|
|
|
u64 exit_info_2;
|
|
|
|
u32 exit_int_info;
|
|
|
|
u32 exit_int_info_err;
|
|
|
|
u64 nested_ctl;
|
|
|
|
u32 event_inj;
|
|
|
|
u32 event_inj_err;
|
2022-05-02 06:07:25 +08:00
|
|
|
u64 next_rip;
|
2021-11-03 22:05:26 +08:00
|
|
|
u64 nested_cr3;
|
|
|
|
u64 virt_ext;
|
2022-02-02 17:51:00 +08:00
|
|
|
u32 clean;
|
2022-11-01 22:53:41 +08:00
|
|
|
union {
|
2023-12-05 18:36:30 +08:00
|
|
|
#if IS_ENABLED(CONFIG_HYPERV) || IS_ENABLED(CONFIG_KVM_HYPERV)
|
2022-11-01 22:53:42 +08:00
|
|
|
struct hv_vmcb_enlightenments hv_enlightenments;
|
2023-12-05 18:36:30 +08:00
|
|
|
#endif
|
2022-11-01 22:53:41 +08:00
|
|
|
u8 reserved_sw[32];
|
|
|
|
};
|
2021-11-03 22:05:26 +08:00
|
|
|
};
|
|
|
|
|
2020-06-25 16:03:22 +08:00
|
|
|
struct svm_nested_state {
|
2021-01-13 20:07:52 +08:00
|
|
|
struct kvm_vmcb_info vmcb02;
|
2020-03-24 17:41:52 +08:00
|
|
|
u64 hsave_msr;
|
|
|
|
u64 vm_cr_msr;
|
2020-08-28 01:11:39 +08:00
|
|
|
u64 vmcb12_gpa;
|
2021-03-02 04:08:44 +08:00
|
|
|
u64 last_vmcb12_gpa;
|
2020-03-24 17:41:52 +08:00
|
|
|
|
|
|
|
/* These are the merged vectors */
|
|
|
|
u32 *msrpm;
|
|
|
|
|
2020-04-24 01:22:27 +08:00
|
|
|
/* A VMRUN has started but has not yet been performed, so
|
|
|
|
* we cannot inject a nested vmexit yet. */
|
|
|
|
bool nested_run_pending;
|
|
|
|
|
2020-05-14 01:16:12 +08:00
|
|
|
/* cache for control fields of the guest */
|
2021-11-03 22:05:26 +08:00
|
|
|
struct vmcb_ctrl_area_cached ctl;
|
2020-10-01 19:29:54 +08:00
|
|
|
|
2021-11-03 22:05:22 +08:00
|
|
|
/*
|
|
|
|
* Note: this struct is not kept up-to-date while L2 runs; it is only
|
|
|
|
* valid within nested_svm_vmrun.
|
|
|
|
*/
|
|
|
|
struct vmcb_save_area_cached save;
|
|
|
|
|
2020-10-01 19:29:54 +08:00
|
|
|
bool initialized;
|
2022-02-02 17:50:57 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Indicates whether MSR bitmap for L2 needs to be rebuilt due to
|
|
|
|
* changes in MSR bitmap for L1 or switching to a different L2. Note,
|
|
|
|
* this flag can only be used reliably in conjunction with a paravirt L1
|
|
|
|
* which informs L0 whether any changes to MSR bitmap for L2 were done
|
|
|
|
* on its side.
|
|
|
|
*/
|
|
|
|
bool force_msr_bitmap_recalc;
|
2020-03-24 17:41:52 +08:00
|
|
|
};
|
|
|
|
|
2021-10-22 01:42:59 +08:00
|
|
|
struct vcpu_sev_es_state {
|
|
|
|
/* SEV-ES support */
|
2022-04-06 02:27:43 +08:00
|
|
|
struct sev_es_save_area *vmsa;
|
2021-10-22 01:42:59 +08:00
|
|
|
struct ghcb *ghcb;
|
2023-08-05 00:42:45 +08:00
|
|
|
u8 valid_bitmap[16];
|
2021-10-22 01:42:59 +08:00
|
|
|
struct kvm_host_map ghcb_map;
|
|
|
|
bool received_first_sipi;
|
|
|
|
|
|
|
|
/* SEV-ES scratch area support */
|
2023-08-05 00:42:45 +08:00
|
|
|
u64 sw_scratch;
|
2021-10-22 01:42:59 +08:00
|
|
|
void *ghcb_sa;
|
2021-11-11 23:52:26 +08:00
|
|
|
u32 ghcb_sa_len;
|
2021-10-22 01:42:59 +08:00
|
|
|
bool ghcb_sa_sync;
|
|
|
|
bool ghcb_sa_free;
|
|
|
|
};
|
|
|
|
|
2020-03-24 17:41:52 +08:00
|
|
|
struct vcpu_svm {
|
|
|
|
struct kvm_vcpu vcpu;
|
2021-04-07 01:18:10 +08:00
|
|
|
/* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
|
2020-03-24 17:41:52 +08:00
|
|
|
struct vmcb *vmcb;
|
2021-01-13 20:07:52 +08:00
|
|
|
struct kvm_vmcb_info vmcb01;
|
|
|
|
struct kvm_vmcb_info *current_vmcb;
|
2020-10-12 02:48:17 +08:00
|
|
|
u32 asid;
|
2021-04-01 19:19:28 +08:00
|
|
|
u32 sysenter_esp_hi;
|
|
|
|
u32 sysenter_eip_hi;
|
2020-03-24 17:41:52 +08:00
|
|
|
uint64_t tsc_aux;
|
|
|
|
|
|
|
|
u64 msr_decfg;
|
|
|
|
|
|
|
|
u64 next_rip;
|
|
|
|
|
|
|
|
u64 spec_ctrl;
|
2021-09-14 23:48:24 +08:00
|
|
|
|
|
|
|
u64 tsc_ratio_msr;
|
2020-03-24 17:41:52 +08:00
|
|
|
/*
|
|
|
|
* Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
|
|
|
|
* translated into the appropriate L2_CFG bits on the host to
|
|
|
|
* perform speculative control.
|
|
|
|
*/
|
|
|
|
u64 virt_spec_ctrl;
|
|
|
|
|
|
|
|
u32 *msrpm;
|
|
|
|
|
|
|
|
ulong nmi_iret_rip;
|
|
|
|
|
2020-06-25 16:03:22 +08:00
|
|
|
struct svm_nested_state nested;
|
2020-03-24 17:41:52 +08:00
|
|
|
|
2023-01-31 09:20:03 +08:00
|
|
|
/* NMI mask value, used when vNMI is not enabled */
|
|
|
|
bool nmi_masked;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* True when NMIs are still masked but guest IRET was just intercepted
|
|
|
|
* and KVM is waiting for RIP to change, which will signal that the
|
|
|
|
* intercepted IRET was retired and thus NMI can be unmasked.
|
|
|
|
*/
|
|
|
|
bool awaiting_iret_completion;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set when KVM is awaiting IRET completion and needs to inject NMIs as
|
|
|
|
* soon as the IRET completes (e.g. NMI is pending injection). KVM
|
|
|
|
* temporarily steals RFLAGS.TF to single-step the guest in this case
|
|
|
|
* in order to regain control as soon as the NMI-blocking condition
|
|
|
|
* goes away.
|
|
|
|
*/
|
2020-03-24 17:41:52 +08:00
|
|
|
bool nmi_singlestep;
|
|
|
|
u64 nmi_singlestep_guest_rflags;
|
2023-01-31 09:20:03 +08:00
|
|
|
|
2022-05-02 06:07:34 +08:00
|
|
|
bool nmi_l1_to_l2;
|
2020-03-24 17:41:52 +08:00
|
|
|
|
KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction
Re-inject INT3/INTO instead of retrying the instruction if the CPU
encountered an intercepted exception while vectoring the software
exception, e.g. if vectoring INT3 encounters a #PF and KVM is using
shadow paging. Retrying the instruction is architecturally wrong, e.g.
will result in a spurious #DB if there's a code breakpoint on the INT3/O,
and lack of re-injection also breaks nested virtualization, e.g. if L1
injects a software exception and vectoring the injected exception
encounters an exception that is intercepted by L0 but not L1.
Due to, ahem, deficiencies in the SVM architecture, acquiring the next
RIP may require flowing through the emulator even if NRIPS is supported,
as the CPU clears next_rip if the VM-Exit is due to an exception other
than "exceptions caused by the INT3, INTO, and BOUND instructions". To
deal with this, "skip" the instruction to calculate next_rip (if it's
not already known), and then unwind the RIP write and any side effects
(RFLAGS updates).
Save the computed next_rip and use it to re-stuff next_rip if injection
doesn't complete. This allows KVM to do the right thing if next_rip was
known prior to injection, e.g. if L1 injects a soft event into L2, and
there is no backing INTn instruction, e.g. if L1 is injecting an
arbitrary event.
Note, it's impossible to guarantee architectural correctness given SVM's
architectural flaws. E.g. if the guest executes INTn (no KVM injection),
an exit occurs while vectoring the INTn, and the guest modifies the code
stream while the exit is being handled, KVM will compute the incorrect
next_rip due to "skipping" the wrong instruction. A future enhancement
to make this less awful would be for KVM to detect that the decoded
instruction is not the correct INTn and drop the to-be-injected soft
event (retrying is a lesser evil compared to shoving the wrong RIP on the
exception stack).
Reported-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <65cb88deab40bc1649d509194864312a89bbe02e.1651440202.git.maciej.szmigiero@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-05-02 06:07:29 +08:00
|
|
|
unsigned long soft_int_csbase;
|
|
|
|
unsigned long soft_int_old_rip;
|
|
|
|
unsigned long soft_int_next_rip;
|
|
|
|
bool soft_int_injected;
|
2020-03-24 17:41:52 +08:00
|
|
|
|
|
|
|
u32 ldr_reg;
|
|
|
|
u32 dfr_reg;
|
|
|
|
struct page *avic_backing_page;
|
|
|
|
u64 *avic_physical_id_cache;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Per-vcpu list of struct amd_svm_iommu_ir:
|
|
|
|
* This is used mainly to store interrupt remapping information used
|
|
|
|
* when update the vcpu affinity. This avoids the need to scan for
|
|
|
|
* IRTE and try to match ga_tag in the IOMMU driver.
|
|
|
|
*/
|
|
|
|
struct list_head ir_list;
|
|
|
|
spinlock_t ir_list_lock;
|
2020-09-25 22:34:19 +08:00
|
|
|
|
|
|
|
/* Save desired MSR intercept (read: pass-through) state */
|
|
|
|
struct {
|
|
|
|
DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS);
|
|
|
|
DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
|
|
|
|
} shadow_msr_intercept;
|
2020-12-11 01:09:40 +08:00
|
|
|
|
2021-10-22 01:42:59 +08:00
|
|
|
struct vcpu_sev_es_state sev_es;
|
2021-02-03 03:01:26 +08:00
|
|
|
|
|
|
|
bool guest_state_loaded;
|
2022-05-19 18:27:09 +08:00
|
|
|
|
|
|
|
bool x2avic_msrs_intercepted;
|
2023-01-31 08:32:53 +08:00
|
|
|
|
|
|
|
/* Guest GIF value, used when vGIF is not enabled */
|
|
|
|
bool guest_gif;
|
2020-03-24 17:41:52 +08:00
|
|
|
};
|
|
|
|
|
2020-03-24 17:41:54 +08:00
|
|
|
struct svm_cpu_data {
|
|
|
|
u64 asid_generation;
|
|
|
|
u32 max_asid;
|
|
|
|
u32 next_asid;
|
|
|
|
u32 min_asid;
|
|
|
|
|
|
|
|
struct page *save_area;
|
2022-11-07 16:49:59 +08:00
|
|
|
unsigned long save_area_pa;
|
|
|
|
|
2020-03-24 17:41:54 +08:00
|
|
|
struct vmcb *current_vmcb;
|
|
|
|
|
|
|
|
/* index = sev_asid, value = vmcb pointer */
|
|
|
|
struct vmcb **sev_vmcbs;
|
|
|
|
};
|
|
|
|
|
2022-11-09 22:07:55 +08:00
|
|
|
DECLARE_PER_CPU(struct svm_cpu_data, svm_data);
|
2020-03-24 17:41:54 +08:00
|
|
|
|
2020-03-24 17:41:52 +08:00
|
|
|
void recalc_intercepts(struct vcpu_svm *svm);
|
|
|
|
|
2021-06-24 17:41:03 +08:00
|
|
|
static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
|
2020-04-01 00:17:38 +08:00
|
|
|
{
|
|
|
|
return container_of(kvm, struct kvm_svm, kvm);
|
|
|
|
}
|
|
|
|
|
2021-06-24 17:41:03 +08:00
|
|
|
static __always_inline bool sev_guest(struct kvm *kvm)
|
2020-12-11 01:09:38 +08:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_KVM_AMD_SEV
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
|
|
|
|
return sev->active;
|
|
|
|
#else
|
|
|
|
return false;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2021-06-24 17:41:03 +08:00
|
|
|
static __always_inline bool sev_es_guest(struct kvm *kvm)
|
2020-12-11 01:09:38 +08:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_KVM_AMD_SEV
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
|
2021-11-10 05:50:59 +08:00
|
|
|
return sev->es_active && !WARN_ON_ONCE(!sev->active);
|
2020-12-11 01:09:38 +08:00
|
|
|
#else
|
|
|
|
return false;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-06-25 16:03:23 +08:00
|
|
|
static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
|
2020-03-24 17:41:52 +08:00
|
|
|
{
|
|
|
|
vmcb->control.clean = 0;
|
|
|
|
}
|
|
|
|
|
2020-06-25 16:03:23 +08:00
|
|
|
static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
|
2020-03-24 17:41:52 +08:00
|
|
|
{
|
2021-06-03 23:14:37 +08:00
|
|
|
vmcb->control.clean = VMCB_ALL_CLEAN_MASK
|
2020-03-24 17:41:52 +08:00
|
|
|
& ~VMCB_ALWAYS_DIRTY_MASK;
|
|
|
|
}
|
|
|
|
|
2020-06-25 16:03:23 +08:00
|
|
|
static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
|
2020-03-24 17:41:52 +08:00
|
|
|
{
|
|
|
|
vmcb->control.clean &= ~(1 << bit);
|
|
|
|
}
|
|
|
|
|
2021-03-02 04:08:44 +08:00
|
|
|
static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
|
|
|
|
{
|
|
|
|
return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
|
|
|
|
}
|
|
|
|
|
2021-06-24 17:41:06 +08:00
|
|
|
static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
|
2020-03-24 17:41:52 +08:00
|
|
|
{
|
|
|
|
return container_of(vcpu, struct vcpu_svm, vcpu);
|
|
|
|
}
|
|
|
|
|
2021-11-26 20:00:15 +08:00
|
|
|
/*
|
|
|
|
* Only the PDPTRs are loaded on demand into the shadow MMU. All other
|
2022-01-28 08:52:07 +08:00
|
|
|
* fields are synchronized on VM-Exit, because accessing the VMCB is cheap.
|
2021-11-26 20:00:15 +08:00
|
|
|
*
|
|
|
|
* CR3 might be out of date in the VMCB but it is not marked dirty; instead,
|
|
|
|
* KVM_REQ_LOAD_MMU_PGD is always requested when the cached vcpu->arch.cr3
|
|
|
|
* is changed. svm_load_mmu_pgd() then syncs the new CR3 value into the VMCB.
|
|
|
|
*/
|
|
|
|
#define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
|
|
|
|
|
2020-09-12 03:27:58 +08:00
|
|
|
static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
|
|
|
|
__set_bit(bit, (unsigned long *)&control->intercepts);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
|
|
|
|
__clear_bit(bit, (unsigned long *)&control->intercepts);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
|
|
|
|
return test_bit(bit, (unsigned long *)&control->intercepts);
|
|
|
|
}
|
|
|
|
|
2021-11-03 22:05:26 +08:00
|
|
|
static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
|
|
|
|
return test_bit(bit, (unsigned long *)&control->intercepts);
|
|
|
|
}
|
|
|
|
|
2020-09-12 03:28:20 +08:00
|
|
|
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
|
2020-03-24 17:41:52 +08:00
|
|
|
{
|
2021-01-13 20:07:52 +08:00
|
|
|
struct vmcb *vmcb = svm->vmcb01.ptr;
|
2020-03-24 17:41:52 +08:00
|
|
|
|
2020-09-12 03:28:20 +08:00
|
|
|
WARN_ON_ONCE(bit >= 32);
|
|
|
|
vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
|
2020-03-24 17:41:52 +08:00
|
|
|
|
|
|
|
recalc_intercepts(svm);
|
|
|
|
}
|
|
|
|
|
2020-09-12 03:28:20 +08:00
|
|
|
static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
|
2020-03-24 17:41:52 +08:00
|
|
|
{
|
2021-01-13 20:07:52 +08:00
|
|
|
struct vmcb *vmcb = svm->vmcb01.ptr;
|
2020-03-24 17:41:52 +08:00
|
|
|
|
2020-09-12 03:28:20 +08:00
|
|
|
WARN_ON_ONCE(bit >= 32);
|
|
|
|
vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
|
2020-03-24 17:41:52 +08:00
|
|
|
|
|
|
|
recalc_intercepts(svm);
|
|
|
|
}
|
|
|
|
|
2020-06-25 16:03:24 +08:00
|
|
|
static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
|
2020-03-24 17:41:52 +08:00
|
|
|
{
|
2021-01-13 20:07:52 +08:00
|
|
|
struct vmcb *vmcb = svm->vmcb01.ptr;
|
2020-03-24 17:41:52 +08:00
|
|
|
|
2020-09-12 03:28:28 +08:00
|
|
|
vmcb_set_intercept(&vmcb->control, bit);
|
2020-03-24 17:41:52 +08:00
|
|
|
|
|
|
|
recalc_intercepts(svm);
|
|
|
|
}
|
|
|
|
|
2020-06-25 16:03:24 +08:00
|
|
|
static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
|
2020-03-24 17:41:52 +08:00
|
|
|
{
|
2021-01-13 20:07:52 +08:00
|
|
|
struct vmcb *vmcb = svm->vmcb01.ptr;
|
2020-03-24 17:41:52 +08:00
|
|
|
|
2020-09-12 03:28:28 +08:00
|
|
|
vmcb_clr_intercept(&vmcb->control, bit);
|
2020-03-24 17:41:52 +08:00
|
|
|
|
|
|
|
recalc_intercepts(svm);
|
|
|
|
}
|
|
|
|
|
2020-06-25 16:03:24 +08:00
|
|
|
static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
|
2020-03-24 17:41:52 +08:00
|
|
|
{
|
2020-09-12 03:28:28 +08:00
|
|
|
return vmcb_is_intercept(&svm->vmcb->control, bit);
|
2020-03-24 17:41:52 +08:00
|
|
|
}
|
|
|
|
|
2022-03-23 01:40:48 +08:00
|
|
|
static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
|
|
|
|
{
|
2023-08-16 04:36:51 +08:00
|
|
|
return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) &&
|
|
|
|
(svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
|
2022-03-23 01:40:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
|
|
|
|
{
|
|
|
|
if (!vgif)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (is_guest_mode(&svm->vcpu) && !nested_vgif_enabled(svm))
|
|
|
|
return svm->nested.vmcb02.ptr;
|
|
|
|
else
|
|
|
|
return svm->vmcb01.ptr;
|
|
|
|
}
|
|
|
|
|
2020-03-24 17:41:52 +08:00
|
|
|
static inline void enable_gif(struct vcpu_svm *svm)
|
|
|
|
{
|
2022-03-23 01:40:48 +08:00
|
|
|
struct vmcb *vmcb = get_vgif_vmcb(svm);
|
|
|
|
|
|
|
|
if (vmcb)
|
|
|
|
vmcb->control.int_ctl |= V_GIF_MASK;
|
2020-03-24 17:41:52 +08:00
|
|
|
else
|
2023-01-31 08:32:53 +08:00
|
|
|
svm->guest_gif = true;
|
2020-03-24 17:41:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void disable_gif(struct vcpu_svm *svm)
|
|
|
|
{
|
2022-03-23 01:40:48 +08:00
|
|
|
struct vmcb *vmcb = get_vgif_vmcb(svm);
|
|
|
|
|
|
|
|
if (vmcb)
|
|
|
|
vmcb->control.int_ctl &= ~V_GIF_MASK;
|
2020-03-24 17:41:52 +08:00
|
|
|
else
|
2023-01-31 08:32:53 +08:00
|
|
|
svm->guest_gif = false;
|
2020-03-24 17:41:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool gif_set(struct vcpu_svm *svm)
|
|
|
|
{
|
2022-03-23 01:40:48 +08:00
|
|
|
struct vmcb *vmcb = get_vgif_vmcb(svm);
|
|
|
|
|
|
|
|
if (vmcb)
|
|
|
|
return !!(vmcb->control.int_ctl & V_GIF_MASK);
|
2020-03-24 17:41:52 +08:00
|
|
|
else
|
2023-01-31 08:32:53 +08:00
|
|
|
return svm->guest_gif;
|
2020-03-24 17:41:52 +08:00
|
|
|
}
|
|
|
|
|
2022-03-01 22:36:46 +08:00
|
|
|
static inline bool nested_npt_enabled(struct vcpu_svm *svm)
|
|
|
|
{
|
|
|
|
return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
|
|
|
|
}
|
|
|
|
|
2023-02-27 16:40:16 +08:00
|
|
|
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
|
|
|
|
{
|
2023-08-16 04:36:52 +08:00
|
|
|
return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) &&
|
2023-02-27 16:40:16 +08:00
|
|
|
(svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
|
|
|
|
}
|
|
|
|
|
2022-05-19 18:27:02 +08:00
|
|
|
static inline bool is_x2apic_msrpm_offset(u32 offset)
|
|
|
|
{
|
|
|
|
/* 4 msrs per u8, and 4 u8 in u32 */
|
|
|
|
u32 msr = offset * 16;
|
|
|
|
|
|
|
|
return (msr >= APIC_BASE_MSR) &&
|
|
|
|
(msr < (APIC_BASE_MSR + 0x100));
|
|
|
|
}
|
|
|
|
|
KVM: x86: Add support for SVM's Virtual NMI
Add support for SVM's Virtual NMIs implementation, which adds proper
tracking of virtual NMI blocking, and an intr_ctrl flag that software can
set to mark a virtual NMI as pending. Pending virtual NMIs are serviced
by hardware if/when virtual NMIs become unblocked, i.e. act more or less
like real NMIs.
Introduce two new kvm_x86_ops callbacks so to support SVM's vNMI, as KVM
needs to treat a pending vNMI as partially injected. Specifically, if
two NMIs (for L1) arrive concurrently in KVM's software model, KVM's ABI
is to inject one and pend the other. Without vNMI, KVM manually tracks
the pending NMI and uses NMI windows to detect when the NMI should be
injected.
With vNMI, the pending NMI is simply stuffed into the VMCB and handed
off to hardware. This means that KVM needs to be able to set a vNMI
pending on-demand, and also query if a vNMI is pending, e.g. to honor the
"at most one NMI pending" rule and to preserve all NMIs across save and
restore.
Warn if KVM attempts to open an NMI window when vNMI is fully enabled,
as the above logic should prevent KVM from ever getting to
kvm_check_and_inject_events() with two NMIs pending _in software_, and
the "at most one NMI pending" logic should prevent having an NMI pending
in hardware and an NMI pending in software if NMIs are also blocked, i.e.
if KVM can't immediately inject the second NMI.
Signed-off-by: Santosh Shukla <Santosh.Shukla@amd.com>
Co-developed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Link: https://lore.kernel.org/r/20230227084016.3368-11-santosh.shukla@amd.com
[sean: rewrite shortlog and changelog, massage code comments]
Signed-off-by: Sean Christopherson <seanjc@google.com>
2023-02-27 16:40:15 +08:00
|
|
|
static inline struct vmcb *get_vnmi_vmcb_l1(struct vcpu_svm *svm)
|
|
|
|
{
|
|
|
|
if (!vnmi)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (is_guest_mode(&svm->vcpu))
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
return svm->vmcb01.ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
|
|
|
|
{
|
|
|
|
struct vmcb *vmcb = get_vnmi_vmcb_l1(svm);
|
|
|
|
|
|
|
|
if (vmcb)
|
|
|
|
return !!(vmcb->control.int_ctl & V_NMI_ENABLE_MASK);
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-03-24 17:41:52 +08:00
|
|
|
/* svm.c */
|
2020-07-08 08:39:56 +08:00
|
|
|
#define MSR_INVALID 0xffffffffU
|
2020-03-24 17:41:52 +08:00
|
|
|
|
2022-03-23 01:40:46 +08:00
|
|
|
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
|
|
|
|
|
2020-12-11 01:09:47 +08:00
|
|
|
extern bool dump_invalid_vmcb;
|
2020-12-11 01:09:38 +08:00
|
|
|
|
2020-03-24 17:41:52 +08:00
|
|
|
u32 svm_msrpm_offset(u32 msr);
|
2020-10-01 19:29:54 +08:00
|
|
|
u32 *svm_vcpu_alloc_msrpm(void);
|
|
|
|
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
|
|
|
|
void svm_vcpu_free_msrpm(u32 *msrpm);
|
2022-03-23 01:40:45 +08:00
|
|
|
void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
|
|
|
|
void svm_update_lbrv(struct kvm_vcpu *vcpu);
|
2020-10-01 19:29:54 +08:00
|
|
|
|
2020-10-01 19:29:53 +08:00
|
|
|
int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
|
2020-03-24 17:41:52 +08:00
|
|
|
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
|
2020-10-07 09:44:15 +08:00
|
|
|
void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
|
2020-03-24 17:41:52 +08:00
|
|
|
void disable_nmi_singlestep(struct vcpu_svm *svm);
|
2020-04-24 02:19:26 +08:00
|
|
|
bool svm_smi_blocked(struct kvm_vcpu *vcpu);
|
|
|
|
bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
|
|
|
|
bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
|
2020-05-23 00:18:27 +08:00
|
|
|
void svm_set_gif(struct vcpu_svm *svm, bool value);
|
2021-03-03 03:40:39 +08:00
|
|
|
int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code);
|
2020-12-11 01:10:06 +08:00
|
|
|
void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
|
|
|
|
int read, int write);
|
2022-05-19 18:27:03 +08:00
|
|
|
void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool disable);
|
2022-02-08 19:48:42 +08:00
|
|
|
void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
|
|
|
|
int trig_mode, int vec);
|
2020-03-24 17:41:52 +08:00
|
|
|
|
|
|
|
/* nested.c */
|
|
|
|
|
|
|
|
#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
|
|
|
|
#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
|
|
|
|
#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
|
|
|
|
|
2020-06-25 16:03:25 +08:00
|
|
|
static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)
|
2020-03-24 17:41:52 +08:00
|
|
|
{
|
2020-05-14 01:28:23 +08:00
|
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
|
|
|
|
|
|
|
return is_guest_mode(vcpu) && (svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK);
|
2020-03-24 17:41:52 +08:00
|
|
|
}
|
|
|
|
|
2020-04-23 20:17:28 +08:00
|
|
|
static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
|
|
|
|
{
|
2021-11-03 22:05:26 +08:00
|
|
|
return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
|
2020-04-23 20:17:28 +08:00
|
|
|
}
|
|
|
|
|
2020-04-24 06:02:45 +08:00
|
|
|
static inline bool nested_exit_on_intr(struct vcpu_svm *svm)
|
|
|
|
{
|
2021-11-03 22:05:26 +08:00
|
|
|
return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
|
2020-04-24 06:02:45 +08:00
|
|
|
}
|
|
|
|
|
2020-04-23 20:06:43 +08:00
|
|
|
static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
|
|
|
|
{
|
2021-11-03 22:05:26 +08:00
|
|
|
return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
|
2020-04-23 20:06:43 +08:00
|
|
|
}
|
|
|
|
|
2021-09-13 22:09:51 +08:00
|
|
|
int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
|
|
|
|
u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
|
KVM: x86: Forcibly leave nested virt when SMM state is toggled
Forcibly leave nested virtualization operation if userspace toggles SMM
state via KVM_SET_VCPU_EVENTS or KVM_SYNC_X86_EVENTS. If userspace
forces the vCPU out of SMM while it's post-VMXON and then injects an SMI,
vmx_enter_smm() will overwrite vmx->nested.smm.vmxon and end up with both
vmxon=false and smm.vmxon=false, but all other nVMX state allocated.
Don't attempt to gracefully handle the transition as (a) most transitions
are nonsencial, e.g. forcing SMM while L2 is running, (b) there isn't
sufficient information to handle all transitions, e.g. SVM wants access
to the SMRAM save state, and (c) KVM_SET_VCPU_EVENTS must precede
KVM_SET_NESTED_STATE during state restore as the latter disallows putting
the vCPU into L2 if SMM is active, and disallows tagging the vCPU as
being post-VMXON in SMM if SMM is not active.
Abuse of KVM_SET_VCPU_EVENTS manifests as a WARN and memory leak in nVMX
due to failure to free vmcs01's shadow VMCS, but the bug goes far beyond
just a memory leak, e.g. toggling SMM on while L2 is active puts the vCPU
in an architecturally impossible state.
WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline]
WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656
Modules linked in:
CPU: 1 PID: 3606 Comm: syz-executor725 Not tainted 5.17.0-rc1-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline]
RIP: 0010:free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656
Code: <0f> 0b eb b3 e8 8f 4d 9f 00 e9 f7 fe ff ff 48 89 df e8 92 4d 9f 00
Call Trace:
<TASK>
kvm_arch_vcpu_destroy+0x72/0x2f0 arch/x86/kvm/x86.c:11123
kvm_vcpu_destroy arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 [inline]
kvm_destroy_vcpus+0x11f/0x290 arch/x86/kvm/../../../virt/kvm/kvm_main.c:460
kvm_free_vcpus arch/x86/kvm/x86.c:11564 [inline]
kvm_arch_destroy_vm+0x2e8/0x470 arch/x86/kvm/x86.c:11676
kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1217 [inline]
kvm_put_kvm+0x4fa/0xb00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1250
kvm_vm_release+0x3f/0x50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1273
__fput+0x286/0x9f0 fs/file_table.c:311
task_work_run+0xdd/0x1a0 kernel/task_work.c:164
exit_task_work include/linux/task_work.h:32 [inline]
do_exit+0xb29/0x2a30 kernel/exit.c:806
do_group_exit+0xd2/0x2f0 kernel/exit.c:935
get_signal+0x4b0/0x28c0 kernel/signal.c:2862
arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:868
handle_signal_work kernel/entry/common.c:148 [inline]
exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:207
__syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline]
syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300
do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
entry_SYSCALL_64_after_hwframe+0x44/0xae
</TASK>
Cc: stable@vger.kernel.org
Reported-by: syzbot+8112db3ab20e70d50c31@syzkaller.appspotmail.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220125220358.2091737-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-01-26 06:03:58 +08:00
|
|
|
void svm_leave_nested(struct kvm_vcpu *vcpu);
|
2020-10-01 19:29:54 +08:00
|
|
|
void svm_free_nested(struct vcpu_svm *svm);
|
|
|
|
int svm_allocate_nested(struct vcpu_svm *svm);
|
2021-03-03 03:40:39 +08:00
|
|
|
int nested_svm_vmrun(struct kvm_vcpu *vcpu);
|
2021-07-19 17:03:22 +08:00
|
|
|
void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
|
|
|
|
struct vmcb_save_area *from_save);
|
|
|
|
void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
|
2020-03-24 17:41:52 +08:00
|
|
|
int nested_svm_vmexit(struct vcpu_svm *svm);
|
2021-03-03 01:45:15 +08:00
|
|
|
|
|
|
|
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
|
|
|
|
{
|
|
|
|
svm->vmcb->control.exit_code = exit_code;
|
|
|
|
svm->vmcb->control.exit_info_1 = 0;
|
|
|
|
svm->vmcb->control.exit_info_2 = 0;
|
|
|
|
return nested_svm_vmexit(svm);
|
|
|
|
}
|
|
|
|
|
2020-03-24 17:41:52 +08:00
|
|
|
int nested_svm_exit_handled(struct vcpu_svm *svm);
|
2021-03-03 03:40:39 +08:00
|
|
|
int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
|
2020-03-24 17:41:52 +08:00
|
|
|
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
|
|
|
|
bool has_error_code, u32 error_code);
|
|
|
|
int nested_svm_exit_special(struct vcpu_svm *svm);
|
2021-09-14 23:48:24 +08:00
|
|
|
void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
|
2023-07-29 09:15:52 +08:00
|
|
|
void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu);
|
2021-11-03 22:05:23 +08:00
|
|
|
void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
|
|
|
|
struct vmcb_control_area *control);
|
2021-11-03 22:05:22 +08:00
|
|
|
void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
|
|
|
|
struct vmcb_save_area *save);
|
2020-11-17 18:15:41 +08:00
|
|
|
void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
|
2021-01-13 20:07:52 +08:00
|
|
|
void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
|
|
|
|
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
|
2020-03-24 17:41:52 +08:00
|
|
|
|
2020-04-17 22:24:18 +08:00
|
|
|
extern struct kvm_x86_nested_ops svm_nested_ops;
|
|
|
|
|
2020-04-01 00:17:38 +08:00
|
|
|
/* avic.c */
|
2023-01-06 09:13:04 +08:00
|
|
|
#define AVIC_REQUIRED_APICV_INHIBITS \
|
|
|
|
( \
|
|
|
|
BIT(APICV_INHIBIT_REASON_DISABLE) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_ABSENT) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_HYPERV) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_NESTED) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_IRQWIN) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_PIT_REINJ) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_SEV) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) | \
|
|
|
|
BIT(APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED) \
|
|
|
|
)
|
2020-04-01 00:17:38 +08:00
|
|
|
|
2022-11-09 19:59:52 +08:00
|
|
|
bool avic_hardware_setup(void);
|
2020-04-01 00:17:38 +08:00
|
|
|
int avic_ga_log_notifier(u32 ga_tag);
|
|
|
|
void avic_vm_destroy(struct kvm *kvm);
|
|
|
|
int avic_vm_init(struct kvm *kvm);
|
2022-03-23 01:24:43 +08:00
|
|
|
void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
|
2021-03-03 03:40:39 +08:00
|
|
|
int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
|
|
|
|
int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
|
2020-04-01 00:17:38 +08:00
|
|
|
int avic_init_vcpu(struct vcpu_svm *svm);
|
2022-06-07 02:08:29 +08:00
|
|
|
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
|
|
|
void avic_vcpu_put(struct kvm_vcpu *vcpu);
|
2022-01-28 08:52:04 +08:00
|
|
|
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
|
|
|
|
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
|
|
|
|
int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
|
|
|
|
uint32_t guest_irq, bool set);
|
2021-12-08 09:52:33 +08:00
|
|
|
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
|
|
|
|
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
|
2022-02-08 19:48:42 +08:00
|
|
|
void avic_ring_doorbell(struct kvm_vcpu *vcpu);
|
2022-03-23 01:40:50 +08:00
|
|
|
unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu);
|
2023-01-06 09:12:40 +08:00
|
|
|
void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu);
|
2022-05-19 18:27:01 +08:00
|
|
|
|
2020-04-01 00:17:38 +08:00
|
|
|
|
2020-03-24 17:41:54 +08:00
|
|
|
/* sev.c */
|
|
|
|
|
2021-04-27 19:16:35 +08:00
|
|
|
#define GHCB_VERSION_MAX 1ULL
|
|
|
|
#define GHCB_VERSION_MIN 1ULL
|
|
|
|
|
2020-12-11 01:09:51 +08:00
|
|
|
|
2020-03-24 17:41:54 +08:00
|
|
|
extern unsigned int max_sev_asid;
|
|
|
|
|
|
|
|
void sev_vm_destroy(struct kvm *kvm);
|
2022-01-28 08:52:06 +08:00
|
|
|
int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp);
|
|
|
|
int sev_mem_enc_register_region(struct kvm *kvm,
|
|
|
|
struct kvm_enc_region *range);
|
|
|
|
int sev_mem_enc_unregister_region(struct kvm *kvm,
|
|
|
|
struct kvm_enc_region *range);
|
|
|
|
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd);
|
|
|
|
int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd);
|
2022-04-21 11:14:07 +08:00
|
|
|
void sev_guest_memory_reclaimed(struct kvm *kvm);
|
|
|
|
|
2020-03-24 17:41:54 +08:00
|
|
|
void pre_sev_run(struct vcpu_svm *svm, int cpu);
|
2021-04-22 10:11:15 +08:00
|
|
|
void __init sev_set_cpu_caps(void);
|
2020-12-11 01:09:38 +08:00
|
|
|
void __init sev_hardware_setup(void);
|
2022-01-28 08:52:07 +08:00
|
|
|
void sev_hardware_unsetup(void);
|
2021-04-22 10:11:22 +08:00
|
|
|
int sev_cpu_init(struct svm_cpu_data *sd);
|
2022-06-24 01:34:06 +08:00
|
|
|
void sev_init_vmcb(struct vcpu_svm *svm);
|
KVM: SVM: Fix TSC_AUX virtualization setup
The checks for virtualizing TSC_AUX occur during the vCPU reset processing
path. However, at the time of initial vCPU reset processing, when the vCPU
is first created, not all of the guest CPUID information has been set. In
this case the RDTSCP and RDPID feature support for the guest is not in
place and so TSC_AUX virtualization is not established.
This continues for each vCPU created for the guest. On the first boot of
an AP, vCPU reset processing is executed as a result of an APIC INIT
event, this time with all of the guest CPUID information set, resulting
in TSC_AUX virtualization being enabled, but only for the APs. The BSP
always sees a TSC_AUX value of 0 which probably went unnoticed because,
at least for Linux, the BSP TSC_AUX value is 0.
Move the TSC_AUX virtualization enablement out of the init_vmcb() path and
into the vcpu_after_set_cpuid() path to allow for proper initialization of
the support after the guest CPUID information has been set.
With the TSC_AUX virtualization support now in the vcpu_set_after_cpuid()
path, the intercepts must be either cleared or set based on the guest
CPUID input.
Fixes: 296d5a17e793 ("KVM: SEV-ES: Use V_TSC_AUX if available instead of RDTSC/MSR_TSC_AUX intercepts")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <4137fbcb9008951ab5f0befa74a0399d2cce809a.1694811272.git.thomas.lendacky@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2023-09-16 04:54:30 +08:00
|
|
|
void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm);
|
2020-12-11 01:09:40 +08:00
|
|
|
void sev_free_vcpu(struct kvm_vcpu *vcpu);
|
2021-03-03 03:40:39 +08:00
|
|
|
int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
|
2020-12-11 01:09:54 +08:00
|
|
|
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
|
2021-09-21 08:03:02 +08:00
|
|
|
void sev_es_vcpu_reset(struct vcpu_svm *svm);
|
KVM: SVM: Add support for booting APs in an SEV-ES guest
Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.
Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.
First AP boot (first INIT-SIPI-SIPI sequence):
Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
support. It is up to the guest to transfer control of the AP to the
proper location.
Subsequent AP boot:
KVM will expect to receive an AP Reset Hold exit event indicating that
the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
awaken it. When the AP Reset Hold exit event is received, KVM will place
the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
sequence, KVM will make the vCPU runnable. It is again up to the guest
to then transfer control of the AP to the proper location.
To differentiate between an actual HLT and an AP Reset Hold, a new MP
state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
placed in upon receiving the AP Reset Hold exit event. Additionally, to
communicate the AP Reset Hold exit event up to userspace (if needed), a
new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.
A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
a new function that, for non SEV-ES guests, invokes the original SIPI
delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
implements the logic above.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <e8fbebe8eb161ceaabdad7c01a5859a78b424d5e.1609791600.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-01-05 04:20:01 +08:00
|
|
|
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
|
2022-04-06 02:27:43 +08:00
|
|
|
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
|
2021-05-07 04:14:41 +08:00
|
|
|
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
|
2020-03-24 17:41:54 +08:00
|
|
|
|
2020-12-11 01:10:08 +08:00
|
|
|
/* vmenter.S */
|
|
|
|
|
KVM: SVM: move MSR_IA32_SPEC_CTRL save/restore to assembly
Restoration of the host IA32_SPEC_CTRL value is probably too late
with respect to the return thunk training sequence.
With respect to the user/kernel boundary, AMD says, "If software chooses
to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel
exit), software should set STIBP to 1 before executing the return thunk
training sequence." I assume the same requirements apply to the guest/host
boundary. The return thunk training sequence is in vmenter.S, quite close
to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's
IA32_SPEC_CTRL value is not restored until much later.
To avoid this, move the restoration of host SPEC_CTRL to assembly and,
for consistency, move the restoration of the guest SPEC_CTRL as well.
This is not particularly difficult, apart from some care to cover both
32- and 64-bit, and to share code between SEV-ES and normal vmentry.
Cc: stable@vger.kernel.org
Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk")
Suggested-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-10-01 02:24:40 +08:00
|
|
|
void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
|
|
|
|
void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
|
2020-12-11 01:10:08 +08:00
|
|
|
|
2023-08-05 00:42:45 +08:00
|
|
|
#define DEFINE_KVM_GHCB_ACCESSORS(field) \
|
|
|
|
static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \
|
|
|
|
{ \
|
|
|
|
return test_bit(GHCB_BITMAP_IDX(field), \
|
|
|
|
(unsigned long *)&svm->sev_es.valid_bitmap); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm, struct ghcb *ghcb) \
|
|
|
|
{ \
|
|
|
|
return kvm_ghcb_##field##_is_valid(svm) ? ghcb->save.field : 0; \
|
|
|
|
} \
|
|
|
|
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(cpl)
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(rax)
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(rcx)
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(rdx)
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(rbx)
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(rsi)
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(sw_exit_code)
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1)
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2)
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(sw_scratch)
|
|
|
|
DEFINE_KVM_GHCB_ACCESSORS(xcr0)
|
|
|
|
|
2020-03-24 17:41:52 +08:00
|
|
|
#endif
|