mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-09-22 04:31:58 +08:00
ARM:
* Plug a race in the stage-2 mapping code where the IPA and the PA would end up being out of sync * Make better use of the bitmap API (bitmap_zero, bitmap_zalloc...) * FP/SVE/SME documentation update, in the hope that this field becomes clearer... * Add workaround for Apple SEIS brokenness to a new SoC * Random comment fixes x86: * add MSR_IA32_TSX_CTRL into msrs_to_save * fixes for XCR0 handling in SGX enclaves Generic: * Fix vcpu_array[0] races * Fix race between starting a VM and "reboot -f" -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmRp0WIUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroPqVwf+OFayNPpURAFqfrOuISYW7hoCL24+ sCtXyVv4Ei0np1vGekit2h/m8GmxO12xEBibcFeYj+YQItIqu9HvC08fRxAKaMeE N3p9iLuS1zcM3cEuZpg0r6QN+pKybttdadl70yho43CtagEM4FmB7dgyAo9AhyXk pZUaVfoO6beBQ/J6A6V/Q5xlue1LvHk1+K4rmNcYVTYn6ZOd+yYgvqng1nv5/h9b 0HgW0aUWkEHAB67/sSnUUro707loMNTowsZlMCtgDk4Fzf8RwQ7qc8lClLk1UPjJ DHB6Hif9F0Q5mkrwn+c7xyVlKARaY6/FOshS2Q620q19+4fq5fUD9HgjrQ== =ARzH -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "ARM: - Plug a race in the stage-2 mapping code where the IPA and the PA would end up being out of sync - Make better use of the bitmap API (bitmap_zero, bitmap_zalloc...) - FP/SVE/SME documentation update, in the hope that this field becomes clearer... - Add workaround for Apple SEIS brokenness to a new SoC - Random comment fixes x86: - add MSR_IA32_TSX_CTRL into msrs_to_save - fixes for XCR0 handling in SGX enclaves Generic: - Fix vcpu_array[0] races - Fix race between starting a VM and 'reboot -f'" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: VMX: add MSR_IA32_TSX_CTRL into msrs_to_save KVM: x86: Don't adjust guest's CPUID.0x12.1 (allowed SGX enclave XFRM) KVM: VMX: Don't rely _only_ on CPUID to enforce XCR0 restrictions for ECREATE KVM: Fix vcpu_array[0] races KVM: VMX: Fix header file dependency of asm/vmx.h KVM: Don't enable hardware after a restart/shutdown is initiated KVM: Use syscore_ops instead of reboot_notifier to hook restart/shutdown KVM: arm64: vgic: Add Apple M2 PRO/MAX cpus to the list of broken SEIS implementations KVM: arm64: Clarify host SME state management KVM: arm64: Restructure check for SVE support in FP trap handler KVM: arm64: Document check for TIF_FOREIGN_FPSTATE KVM: arm64: Fix repeated words in comments KVM: arm64: Constify start/end/phys fields of the pgtable walker data KVM: arm64: Infer PA offset from VA in hyp map walker KVM: arm64: Infer the PA offset from IPA in stage-2 map walker KVM: arm64: Use the bitmap API to allocate bitmaps KVM: arm64: Slightly optimize flush_context()
This commit is contained in:
commit
a35747c310
@ -126,6 +126,10 @@
|
||||
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
|
||||
#define APPLE_CPU_PART_M2_BLIZZARD 0x032
|
||||
#define APPLE_CPU_PART_M2_AVALANCHE 0x033
|
||||
#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034
|
||||
#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035
|
||||
#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038
|
||||
#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039
|
||||
|
||||
#define AMPERE_CPU_PART_AMPERE1 0xAC3
|
||||
|
||||
@ -181,6 +185,10 @@
|
||||
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
|
||||
#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
|
||||
#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
|
||||
#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)
|
||||
#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO)
|
||||
#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
|
||||
#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
|
||||
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
|
||||
|
||||
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
|
||||
|
@ -209,6 +209,7 @@ struct kvm_pgtable_visit_ctx {
|
||||
kvm_pte_t old;
|
||||
void *arg;
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
u64 start;
|
||||
u64 addr;
|
||||
u64 end;
|
||||
u32 level;
|
||||
|
@ -81,26 +81,34 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
|
||||
fpsimd_kvm_prepare();
|
||||
|
||||
/*
|
||||
* We will check TIF_FOREIGN_FPSTATE just before entering the
|
||||
* guest in kvm_arch_vcpu_ctxflush_fp() and override this to
|
||||
* FP_STATE_FREE if the flag set.
|
||||
*/
|
||||
vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
|
||||
|
||||
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
|
||||
vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
|
||||
|
||||
/*
|
||||
* We don't currently support SME guests but if we leave
|
||||
* things in streaming mode then when the guest starts running
|
||||
* FPSIMD or SVE code it may generate SME traps so as a
|
||||
* special case if we are in streaming mode we force the host
|
||||
* state to be saved now and exit streaming mode so that we
|
||||
* don't have to handle any SME traps for valid guest
|
||||
* operations. Do this for ZA as well for now for simplicity.
|
||||
*/
|
||||
if (system_supports_sme()) {
|
||||
vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
|
||||
vcpu_set_flag(vcpu, HOST_SME_ENABLED);
|
||||
|
||||
/*
|
||||
* If PSTATE.SM is enabled then save any pending FP
|
||||
* state and disable PSTATE.SM. If we leave PSTATE.SM
|
||||
* enabled and the guest does not enable SME via
|
||||
* CPACR_EL1.SMEN then operations that should be valid
|
||||
* may generate SME traps from EL1 to EL1 which we
|
||||
* can't intercept and which would confuse the guest.
|
||||
*
|
||||
* Do the same for PSTATE.ZA in the case where there
|
||||
* is state in the registers which has not already
|
||||
* been saved, this is very unlikely to happen.
|
||||
*/
|
||||
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
|
||||
vcpu->arch.fp_state = FP_STATE_FREE;
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
|
@ -177,9 +177,17 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
sve_guest = vcpu_has_sve(vcpu);
|
||||
esr_ec = kvm_vcpu_trap_get_class(vcpu);
|
||||
|
||||
/* Don't handle SVE traps for non-SVE vcpus here: */
|
||||
if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
|
||||
/* Only handle traps the vCPU can support here: */
|
||||
switch (esr_ec) {
|
||||
case ESR_ELx_EC_FP_ASIMD:
|
||||
break;
|
||||
case ESR_ELx_EC_SVE:
|
||||
if (!sve_guest)
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Valid trap. Switch the context: */
|
||||
|
||||
|
@ -58,8 +58,9 @@
|
||||
struct kvm_pgtable_walk_data {
|
||||
struct kvm_pgtable_walker *walker;
|
||||
|
||||
const u64 start;
|
||||
u64 addr;
|
||||
u64 end;
|
||||
const u64 end;
|
||||
};
|
||||
|
||||
static bool kvm_phys_is_valid(u64 phys)
|
||||
@ -201,6 +202,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
|
||||
.old = READ_ONCE(*ptep),
|
||||
.arg = data->walker->arg,
|
||||
.mm_ops = mm_ops,
|
||||
.start = data->start,
|
||||
.addr = data->addr,
|
||||
.end = data->end,
|
||||
.level = level,
|
||||
@ -293,6 +295,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_pgtable_walker *walker)
|
||||
{
|
||||
struct kvm_pgtable_walk_data walk_data = {
|
||||
.start = ALIGN_DOWN(addr, PAGE_SIZE),
|
||||
.addr = ALIGN_DOWN(addr, PAGE_SIZE),
|
||||
.end = PAGE_ALIGN(walk_data.addr + size),
|
||||
.walker = walker,
|
||||
@ -349,7 +352,7 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
|
||||
}
|
||||
|
||||
struct hyp_map_data {
|
||||
u64 phys;
|
||||
const u64 phys;
|
||||
kvm_pte_t attr;
|
||||
};
|
||||
|
||||
@ -407,13 +410,12 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
|
||||
static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
struct hyp_map_data *data)
|
||||
{
|
||||
u64 phys = data->phys + (ctx->addr - ctx->start);
|
||||
kvm_pte_t new;
|
||||
u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
|
||||
|
||||
if (!kvm_block_mapping_supported(ctx, phys))
|
||||
return false;
|
||||
|
||||
data->phys += granule;
|
||||
new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
|
||||
if (ctx->old == new)
|
||||
return true;
|
||||
@ -576,7 +578,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
|
||||
}
|
||||
|
||||
struct stage2_map_data {
|
||||
u64 phys;
|
||||
const u64 phys;
|
||||
kvm_pte_t attr;
|
||||
u8 owner_id;
|
||||
|
||||
@ -794,20 +796,43 @@ static bool stage2_pte_executable(kvm_pte_t pte)
|
||||
return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
|
||||
}
|
||||
|
||||
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
const struct stage2_map_data *data)
|
||||
{
|
||||
u64 phys = data->phys;
|
||||
|
||||
/*
|
||||
* Stage-2 walks to update ownership data are communicated to the map
|
||||
* walker using an invalid PA. Avoid offsetting an already invalid PA,
|
||||
* which could overflow and make the address valid again.
|
||||
*/
|
||||
if (!kvm_phys_is_valid(phys))
|
||||
return phys;
|
||||
|
||||
/*
|
||||
* Otherwise, work out the correct PA based on how far the walk has
|
||||
* gotten.
|
||||
*/
|
||||
return phys + (ctx->addr - ctx->start);
|
||||
}
|
||||
|
||||
static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
struct stage2_map_data *data)
|
||||
{
|
||||
u64 phys = stage2_map_walker_phys_addr(ctx, data);
|
||||
|
||||
if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
|
||||
return false;
|
||||
|
||||
return kvm_block_mapping_supported(ctx, data->phys);
|
||||
return kvm_block_mapping_supported(ctx, phys);
|
||||
}
|
||||
|
||||
static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
struct stage2_map_data *data)
|
||||
{
|
||||
kvm_pte_t new;
|
||||
u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
|
||||
u64 phys = stage2_map_walker_phys_addr(ctx, data);
|
||||
u64 granule = kvm_granule_size(ctx->level);
|
||||
struct kvm_pgtable *pgt = data->mmu->pgt;
|
||||
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
|
||||
|
||||
@ -841,8 +866,6 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
|
||||
stage2_make_pte(ctx, new);
|
||||
|
||||
if (kvm_phys_is_valid(phys))
|
||||
data->phys += granule;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -204,7 +204,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
|
||||
* Size Fault at level 0, as if exceeding PARange.
|
||||
*
|
||||
* Non-LPAE guests will only get the external abort, as there
|
||||
* is no way to to describe the ASF.
|
||||
* is no way to describe the ASF.
|
||||
*/
|
||||
if (vcpu_el1_is_32bit(vcpu) &&
|
||||
!(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
|
||||
|
@ -616,6 +616,10 @@ static const struct midr_range broken_seis[] = {
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
|
||||
{},
|
||||
};
|
||||
|
||||
|
@ -47,7 +47,7 @@ static void flush_context(void)
|
||||
int cpu;
|
||||
u64 vmid;
|
||||
|
||||
bitmap_clear(vmid_map, 0, NUM_USER_VMIDS);
|
||||
bitmap_zero(vmid_map, NUM_USER_VMIDS);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0);
|
||||
@ -182,8 +182,7 @@ int __init kvm_arm_vmid_alloc_init(void)
|
||||
*/
|
||||
WARN_ON(NUM_USER_VMIDS - 1 <= num_possible_cpus());
|
||||
atomic64_set(&vmid_generation, VMID_FIRST_VERSION);
|
||||
vmid_map = kcalloc(BITS_TO_LONGS(NUM_USER_VMIDS),
|
||||
sizeof(*vmid_map), GFP_KERNEL);
|
||||
vmid_map = bitmap_zalloc(NUM_USER_VMIDS, GFP_KERNEL);
|
||||
if (!vmid_map)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -192,5 +191,5 @@ int __init kvm_arm_vmid_alloc_init(void)
|
||||
|
||||
void __init kvm_arm_vmid_alloc_free(void)
|
||||
{
|
||||
kfree(vmid_map);
|
||||
bitmap_free(vmid_map);
|
||||
}
|
||||
|
@ -13,7 +13,9 @@
|
||||
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <uapi/asm/vmx.h>
|
||||
#include <asm/vmxfeatures.h>
|
||||
|
||||
|
@ -253,7 +253,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
|
||||
int nent)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
|
||||
|
||||
best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
|
||||
if (best) {
|
||||
@ -292,21 +291,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
|
||||
vcpu->arch.ia32_misc_enable_msr &
|
||||
MSR_IA32_MISC_ENABLE_MWAIT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
|
||||
* the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
|
||||
* requested XCR0 value. The enclave's XFRM must be a subset of XCRO
|
||||
* at the time of EENTER, thus adjust the allowed XFRM by the guest's
|
||||
* supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
|
||||
* '1' even on CPUs that don't support XSAVE.
|
||||
*/
|
||||
best = cpuid_entry2_find(entries, nent, 0x12, 0x1);
|
||||
if (best) {
|
||||
best->ecx &= guest_supported_xcr0 & 0xffffffff;
|
||||
best->edx &= guest_supported_xcr0 >> 32;
|
||||
best->ecx |= XFEATURE_MASK_FPSSE;
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
|
||||
|
@ -170,12 +170,19 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
|
||||
/*
|
||||
* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. Note
|
||||
* that the allowed XFRM (XFeature Request Mask) isn't strictly bound
|
||||
* by the supported XCR0. FP+SSE *must* be set in XFRM, even if XSAVE
|
||||
* is unsupported, i.e. even if XCR0 itself is completely unsupported.
|
||||
*/
|
||||
if ((u32)miscselect & ~sgx_12_0->ebx ||
|
||||
(u32)attributes & ~sgx_12_1->eax ||
|
||||
(u32)(attributes >> 32) & ~sgx_12_1->ebx ||
|
||||
(u32)xfrm & ~sgx_12_1->ecx ||
|
||||
(u32)(xfrm >> 32) & ~sgx_12_1->edx) {
|
||||
(u32)(xfrm >> 32) & ~sgx_12_1->edx ||
|
||||
xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) ||
|
||||
(xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
|
@ -1446,7 +1446,7 @@ static const u32 msrs_to_save_base[] = {
|
||||
#endif
|
||||
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
|
||||
MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
|
||||
MSR_IA32_SPEC_CTRL,
|
||||
MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL,
|
||||
MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
|
||||
MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
|
||||
MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
|
||||
@ -7155,6 +7155,10 @@ static void kvm_probe_msr_to_save(u32 msr_index)
|
||||
if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
|
||||
return;
|
||||
break;
|
||||
case MSR_IA32_TSX_CTRL:
|
||||
if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR))
|
||||
return;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -3962,18 +3962,19 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
|
||||
}
|
||||
|
||||
vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
|
||||
r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
|
||||
BUG_ON(r == -EBUSY);
|
||||
r = xa_reserve(&kvm->vcpu_array, vcpu->vcpu_idx, GFP_KERNEL_ACCOUNT);
|
||||
if (r)
|
||||
goto unlock_vcpu_destroy;
|
||||
|
||||
/* Now it's all set up, let userspace reach it */
|
||||
kvm_get_kvm(kvm);
|
||||
r = create_vcpu_fd(vcpu);
|
||||
if (r < 0) {
|
||||
xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx);
|
||||
kvm_put_kvm_no_destroy(kvm);
|
||||
goto unlock_vcpu_destroy;
|
||||
if (r < 0)
|
||||
goto kvm_put_xa_release;
|
||||
|
||||
if (KVM_BUG_ON(!!xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) {
|
||||
r = -EINVAL;
|
||||
goto kvm_put_xa_release;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3988,6 +3989,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
|
||||
kvm_create_vcpu_debugfs(vcpu);
|
||||
return r;
|
||||
|
||||
kvm_put_xa_release:
|
||||
kvm_put_kvm_no_destroy(kvm);
|
||||
xa_release(&kvm->vcpu_array, vcpu->vcpu_idx);
|
||||
unlock_vcpu_destroy:
|
||||
mutex_unlock(&kvm->lock);
|
||||
kvm_dirty_ring_free(&vcpu->dirty_ring);
|
||||
@ -5184,7 +5188,20 @@ static void hardware_disable_all(void)
|
||||
static int hardware_enable_all(void)
|
||||
{
|
||||
atomic_t failed = ATOMIC_INIT(0);
|
||||
int r = 0;
|
||||
int r;
|
||||
|
||||
/*
|
||||
* Do not enable hardware virtualization if the system is going down.
|
||||
* If userspace initiated a forced reboot, e.g. reboot -f, then it's
|
||||
* possible for an in-flight KVM_CREATE_VM to trigger hardware enabling
|
||||
* after kvm_reboot() is called. Note, this relies on system_state
|
||||
* being set _before_ kvm_reboot(), which is why KVM uses a syscore ops
|
||||
* hook instead of registering a dedicated reboot notifier (the latter
|
||||
* runs before system_state is updated).
|
||||
*/
|
||||
if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ||
|
||||
system_state == SYSTEM_RESTART)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* When onlining a CPU, cpu_online_mask is set before kvm_online_cpu()
|
||||
@ -5197,6 +5214,8 @@ static int hardware_enable_all(void)
|
||||
cpus_read_lock();
|
||||
mutex_lock(&kvm_lock);
|
||||
|
||||
r = 0;
|
||||
|
||||
kvm_usage_count++;
|
||||
if (kvm_usage_count == 1) {
|
||||
on_each_cpu(hardware_enable_nolock, &failed, 1);
|
||||
@ -5213,26 +5232,24 @@ static int hardware_enable_all(void)
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
|
||||
void *v)
|
||||
static void kvm_shutdown(void)
|
||||
{
|
||||
/*
|
||||
* Some (well, at least mine) BIOSes hang on reboot if
|
||||
* in vmx root mode.
|
||||
*
|
||||
* And Intel TXT required VMX off for all cpu when system shutdown.
|
||||
* Disable hardware virtualization and set kvm_rebooting to indicate
|
||||
* that KVM has asynchronously disabled hardware virtualization, i.e.
|
||||
* that relevant errors and exceptions aren't entirely unexpected.
|
||||
* Some flavors of hardware virtualization need to be disabled before
|
||||
* transferring control to firmware (to perform shutdown/reboot), e.g.
|
||||
* on x86, virtualization can block INIT interrupts, which are used by
|
||||
* firmware to pull APs back under firmware control. Note, this path
|
||||
* is used for both shutdown and reboot scenarios, i.e. neither name is
|
||||
* 100% comprehensive.
|
||||
*/
|
||||
pr_info("kvm: exiting hardware virtualization\n");
|
||||
kvm_rebooting = true;
|
||||
on_each_cpu(hardware_disable_nolock, NULL, 1);
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block kvm_reboot_notifier = {
|
||||
.notifier_call = kvm_reboot,
|
||||
.priority = 0,
|
||||
};
|
||||
|
||||
static int kvm_suspend(void)
|
||||
{
|
||||
/*
|
||||
@ -5263,6 +5280,7 @@ static void kvm_resume(void)
|
||||
static struct syscore_ops kvm_syscore_ops = {
|
||||
.suspend = kvm_suspend,
|
||||
.resume = kvm_resume,
|
||||
.shutdown = kvm_shutdown,
|
||||
};
|
||||
#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
|
||||
static int hardware_enable_all(void)
|
||||
@ -5967,7 +5985,6 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
register_reboot_notifier(&kvm_reboot_notifier);
|
||||
register_syscore_ops(&kvm_syscore_ops);
|
||||
#endif
|
||||
|
||||
@ -6039,7 +6056,6 @@ err_cpu_kick_mask:
|
||||
err_vcpu_cache:
|
||||
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
|
||||
unregister_syscore_ops(&kvm_syscore_ops);
|
||||
unregister_reboot_notifier(&kvm_reboot_notifier);
|
||||
cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
|
||||
#endif
|
||||
return r;
|
||||
@ -6065,7 +6081,6 @@ void kvm_exit(void)
|
||||
kvm_async_pf_deinit();
|
||||
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
|
||||
unregister_syscore_ops(&kvm_syscore_ops);
|
||||
unregister_reboot_notifier(&kvm_reboot_notifier);
|
||||
cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
|
||||
#endif
|
||||
kvm_irqfd_exit();
|
||||
|
Loading…
Reference in New Issue
Block a user