* Nuke hw_compat_4_0_1 and pc_compat_4_0_1 (Greg)

* Static analysis fixes (Igor, Lidong)
 * X86 Hyper-V CPUID improvements (Vitaly)
 * X86 nested virt migration (Liran)
 * New MSR-based features (Xiaoyao)
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQEcBAABAgAGBQJdDL62AAoJEL/70l94x66DTXMH/j2Tmfvs2vKY/zBL6CCr1YRn
 t277Em76sweOF/KicOOpWLBcKWDSqCvimkM7ArmRGRSQfbL3I5qMgtTzp0tEOa9p
 dz23mcazOceoIV0fmQ8AhTqiN2vb0J3+rN/t2cszrdHbobFq+20j6gGLkiw6aDM2
 DbKq4WHbujLOOEjbZSdMN9Eq5VgqHGYLMculFsZT51zvl26a3hyI/DUQRXZaQb3N
 /Dk1Ed0MVeLGBx0HmWgCQWLVec/7RTO1aF7YHbHF4o6XDfrE1KYlQGVGXcNElEHb
 4kYbSUgirKHgvK5f8UXe8ArbjnHYU3/emj1PQYR4qeTIQw4FnY8vK717qVNLh1c=
 =i0PT
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

* Nuke hw_compat_4_0_1 and pc_compat_4_0_1 (Greg)
* Static analysis fixes (Igor, Lidong)
* X86 Hyper-V CPUID improvements (Vitaly)
* X86 nested virt migration (Liran)
* New MSR-based features (Xiaoyao)

# gpg: Signature made Fri 21 Jun 2019 12:25:42 BST
# gpg:                using RSA key BFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (25 commits)
  hw: Nuke hw_compat_4_0_1 and pc_compat_4_0_1
  util/main-loop: Fix incorrect assertion
  sd: Fix out-of-bounds assertions
  target/i386: kvm: Add nested migration blocker only when kernel lacks required capabilities
  target/i386: kvm: Add support for KVM_CAP_EXCEPTION_PAYLOAD
  target/i386: kvm: Add support for save and restore nested state
  vmstate: Add support for kernel integer types
  linux-headers: sync with latest KVM headers from Linux 5.2
  target/i386: kvm: Block migration for vCPUs exposed with nested virtualization
  target/i386: kvm: Re-inject #DB to guest with updated DR6
  target/i386: kvm: Use symbolic constant for #DB/#BP exception constants
  KVM: Introduce kvm_arch_destroy_vcpu()
  target/i386: kvm: Delete VMX migration blocker on vCPU init failure
  target/i386: define a new MSR based feature word - FEAT_CORE_CAPABILITY
  i386/kvm: add support for Direct Mode for Hyper-V synthetic timers
  i386/kvm: hv-evmcs requires hv-vapic
  i386/kvm: hv-tlbflush/ipi require hv-vpindex
  i386/kvm: hv-stimer requires hv-time and hv-synic
  i386/kvm: implement 'hv-passthrough' mode
  i386/kvm: document existing Hyper-V enlightenments
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2019-06-21 13:32:10 +01:00
commit 68d7ff0cff
27 changed files with 1526 additions and 349 deletions

View File

@ -87,6 +87,7 @@ struct KVMState
#ifdef KVM_CAP_SET_GUEST_DEBUG
QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints;
#endif
int max_nested_state_len;
int many_ioeventfds;
int intx_set_mask;
bool sync_mmu;
@ -291,6 +292,11 @@ int kvm_destroy_vcpu(CPUState *cpu)
DPRINTF("kvm_destroy_vcpu\n");
ret = kvm_arch_destroy_vcpu(cpu);
if (ret < 0) {
goto err;
}
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size < 0) {
ret = mmap_size;
@ -863,8 +869,8 @@ static void kvm_mem_ioeventfd_add(MemoryListener *listener,
data, true, int128_get64(section->size),
match_data);
if (r < 0) {
fprintf(stderr, "%s: error adding ioeventfd: %s\n",
__func__, strerror(-r));
fprintf(stderr, "%s: error adding ioeventfd: %s (%d)\n",
__func__, strerror(-r), -r);
abort();
}
}
@ -881,6 +887,8 @@ static void kvm_mem_ioeventfd_del(MemoryListener *listener,
data, false, int128_get64(section->size),
match_data);
if (r < 0) {
fprintf(stderr, "%s: error deleting ioeventfd: %s (%d)\n",
__func__, strerror(-r), -r);
abort();
}
}
@ -897,8 +905,8 @@ static void kvm_io_ioeventfd_add(MemoryListener *listener,
data, true, int128_get64(section->size),
match_data);
if (r < 0) {
fprintf(stderr, "%s: error adding ioeventfd: %s\n",
__func__, strerror(-r));
fprintf(stderr, "%s: error adding ioeventfd: %s (%d)\n",
__func__, strerror(-r), -r);
abort();
}
}
@ -916,6 +924,8 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener,
data, false, int128_get64(section->size),
match_data);
if (r < 0) {
fprintf(stderr, "%s: error deleting ioeventfd: %s (%d)\n",
__func__, strerror(-r), -r);
abort();
}
}
@ -1672,6 +1682,8 @@ static int kvm_init(MachineState *ms)
s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
#endif
s->max_nested_state_len = kvm_check_extension(s, KVM_CAP_NESTED_STATE);
#ifdef KVM_CAP_IRQ_ROUTING
kvm_direct_msi_allowed = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
#endif
@ -2239,6 +2251,11 @@ int kvm_has_debugregs(void)
return kvm_state->debugregs;
}
int kvm_max_nested_state_length(void)
{
return kvm_state->max_nested_state_len;
}
int kvm_has_many_ioeventfds(void)
{
if (!kvm_enabled()) {

1
cpus.c
View File

@ -1594,7 +1594,6 @@ static void *qemu_hax_cpu_thread_fn(void *arg)
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
cpu->halted = 0;
current_cpu = cpu;
hax_init_vcpu(cpu);

201
docs/hyperv.txt Normal file
View File

@ -0,0 +1,201 @@
Hyper-V Enlightenments
======================
1. Description
===============
In some cases when implementing a hardware interface in software is slow, KVM
implements its own paravirtualized interfaces. This works well for Linux as
guest support for such features is added simultaneously with the feature itself.
It may, however, be hard-to-impossible to add support for these interfaces to
proprietary OSes, namely, Microsoft Windows.
KVM on x86 implements Hyper-V Enlightenments for Windows guests. These features
make Windows and Hyper-V guests think they're running on top of a Hyper-V
compatible hypervisor and use Hyper-V specific features.
2. Setup
=========
No Hyper-V enlightenments are enabled by default by either KVM or QEMU. In
QEMU, individual enlightenments can be enabled through CPU flags, e.g:
qemu-system-x86_64 --enable-kvm --cpu host,hv_relaxed,hv_vpindex,hv_time, ...
Sometimes there are dependencies between enlightenments, QEMU is supposed to
check that the supplied configuration is sane.
When any set of the Hyper-V enlightenments is enabled, QEMU changes hypervisor
identification (CPUID 0x40000000..0x4000000A) to Hyper-V. KVM identification
and features are kept in leaves 0x40000100..0x40000101.
3. Existing enlightenments
===========================
3.1. hv-relaxed
================
This feature tells guest OS to disable watchdog timeouts as it is running on a
hypervisor. It is known that some Windows versions will do this even when they
see 'hypervisor' CPU flag.
3.2. hv-vapic
==============
Provides so-called VP Assist page MSR to guest allowing it to work with APIC
more efficiently. In particular, this enlightenment allows paravirtualized
(exit-less) EOI processing.
3.3. hv-spinlocks=xxx
======================
Enables paravirtualized spinlocks. The parameter indicates how many times
spinlock acquisition should be attempted before indicating the situation to the
hypervisor. A special value 0xffffffff indicates "never to retry".
3.4. hv-vpindex
================
Provides HV_X64_MSR_VP_INDEX (0x40000002) MSR to the guest which has Virtual
processor index information. This enlightenment makes sense in conjunction with
hv-synic, hv-stimer and other enlightenments which require the guest to know its
Virtual Processor indices (e.g. when VP index needs to be passed in a
hypercall).
3.5. hv-runtime
================
Provides HV_X64_MSR_VP_RUNTIME (0x40000010) MSR to the guest. The MSR keeps the
virtual processor run time in 100ns units. This gives guest operating system an
idea of how much time was 'stolen' from it (when the virtual CPU was preempted
to perform some other work).
3.6. hv-crash
==============
Provides HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 (0x40000100..0x40000105) and
HV_X64_MSR_CRASH_CTL (0x40000105) MSRs to the guest. These MSRs are written to
by the guest when it crashes, HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 MSRs
contain additional crash information. This information is outputted in QEMU log
and through QAPI.
Note: unlike under genuine Hyper-V, write to HV_X64_MSR_CRASH_CTL causes guest
to shutdown. This effectively blocks crash dump generation by Windows.
3.7. hv-time
=============
Enables two Hyper-V-specific clocksources available to the guest: MSR-based
Hyper-V clocksource (HV_X64_MSR_TIME_REF_COUNT, 0x40000020) and Reference TSC
page (enabled via MSR HV_X64_MSR_REFERENCE_TSC, 0x40000021). Both clocksources
are per-guest, Reference TSC page clocksource allows for exit-less time stamp
readings. Using this enlightenment leads to significant speedup of all timestamp
related operations.
3.8. hv-synic
==============
Enables Hyper-V Synthetic interrupt controller - an extension of a local APIC.
When enabled, this enlightenment provides additional communication facilities
to the guest: SynIC messages and Events. This is a pre-requisite for
implementing VMBus devices (not yet in QEMU). Additionally, this enlightenment
is needed to enable Hyper-V synthetic timers. SynIC is controlled through MSRs
HV_X64_MSR_SCONTROL..HV_X64_MSR_EOM (0x40000080..0x40000084) and
HV_X64_MSR_SINT0..HV_X64_MSR_SINT15 (0x40000090..0x4000009F)
Requires: hv-vpindex
3.9. hv-stimer
===============
Enables Hyper-V synthetic timers. There are four synthetic timers per virtual
CPU controlled through HV_X64_MSR_STIMER0_CONFIG..HV_X64_MSR_STIMER3_COUNT
(0x400000B0..0x400000B7) MSRs. These timers can work either in single-shot or
periodic mode. It is known that certain Windows versions revert to using HPET
(or even RTC when HPET is unavailable) extensively when this enlightenment is
not provided; this can lead to significant CPU consumption, even when virtual
CPU is idle.
Requires: hv-vpindex, hv-synic, hv-time
3.10. hv-tlbflush
==================
Enables paravirtualized TLB shoot-down mechanism. On x86 architecture, remote
TLB flush procedure requires sending IPIs and waiting for other CPUs to perform
local TLB flush. In virtualized environment some virtual CPUs may not even be
scheduled at the time of the call and may not require flushing (or, flushing
may be postponed until the virtual CPU is scheduled). hv-tlbflush enlightenment
implements TLB shoot-down through hypervisor enabling the optimization.
Requires: hv-vpindex
3.11. hv-ipi
=============
Enables paravirtualized IPI send mechanism. HvCallSendSyntheticClusterIpi
hypercall may target more than 64 virtual CPUs simultaneously, doing the same
through APIC requires more than one access (and thus exit to the hypervisor).
Requires: hv-vpindex
3.12. hv-vendor-id=xxx
=======================
This changes Hyper-V identification in CPUID 0x40000000.EBX-EDX from the default
"Microsoft Hv". The parameter should be no longer than 12 characters. According
to the specification, guests shouldn't use this information and it is unknown
if there is a Windows version which acts differently.
Note: hv-vendor-id is not an enlightenment and thus doesn't enable Hyper-V
identification when specified without some other enlightenment.
3.13. hv-reset
===============
Provides HV_X64_MSR_RESET (0x40000003) MSR to the guest allowing it to reset
itself by writing to it. Even when this MSR is enabled, it is not a recommended
way for Windows to perform system reboot and thus it may not be used.
3.14. hv-frequencies
============================================
Provides HV_X64_MSR_TSC_FREQUENCY (0x40000022) and HV_X64_MSR_APIC_FREQUENCY
(0x40000023) allowing the guest to get its TSC/APIC frequencies without doing
measurements.
3.15 hv-reenlightenment
========================
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
enabled, it provides HV_X64_MSR_REENLIGHTENMENT_CONTROL (0x40000106),
HV_X64_MSR_TSC_EMULATION_CONTROL (0x40000107)and HV_X64_MSR_TSC_EMULATION_STATUS
(0x40000108) MSRs allowing the guest to get notified when TSC frequency changes
(only happens on migration) and keep using old frequency (through emulation in
the hypervisor) until it is ready to switch to the new one. This, in conjunction
with hv-frequencies, allows Hyper-V on KVM to pass stable clocksource (Reference
TSC page) to its own guests.
Recommended: hv-frequencies
3.16. hv-evmcs
===============
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
enabled, it provides Enlightened VMCS feature to the guest. The feature
implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V)
hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only.
Note: some virtualization features (e.g. Posted Interrupts) are disabled when
hv-evmcs is enabled. It may make sense to measure your nested workload with and
without the feature to find out if enabling it is beneficial.
Requires: hv-vapic
3.17. hv-stimer-direct
=======================
Hyper-V specification allows synthetic timer operation in two modes: "classic",
when expiration event is delivered as SynIC message and "direct", when the event
is delivered via normal interrupt. It is known that nested Hyper-V can only
use synthetic timers in direct mode and thus 'hv-stimer-direct' needs to be
enabled.
Requires: hv-vpindex, hv-synic, hv-time, hv-stimer
4. Development features
========================
In some cases (e.g. during development) it may make sense to use QEMU in
'pass-through' mode and give Windows guests all enlightenments currently
supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU
flag.
Note: enabling this flag effectively prevents migration as supported features
may differ between target and destination.
4. Useful links
================
Hyper-V Top Level Functional specification and other information:
https://github.com/MicrosoftDocs/Virtualization-Documentation

View File

@ -24,16 +24,13 @@
#include "hw/pci/pci.h"
#include "hw/mem/nvdimm.h"
GlobalProperty hw_compat_4_0_1[] = {
GlobalProperty hw_compat_4_0[] = {
{ "VGA", "edid", "false" },
{ "secondary-vga", "edid", "false" },
{ "bochs-display", "edid", "false" },
{ "virtio-vga", "edid", "false" },
{ "virtio-gpu-pci", "edid", "false" },
};
const size_t hw_compat_4_0_1_len = G_N_ELEMENTS(hw_compat_4_0_1);
GlobalProperty hw_compat_4_0[] = {};
const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0);
GlobalProperty hw_compat_3_1[] = {

View File

@ -111,9 +111,6 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
/* Physical Address of PVH entry point read from kernel ELF NOTE */
static size_t pvh_start_addr;
GlobalProperty pc_compat_4_0_1[] = {};
const size_t pc_compat_4_0_1_len = G_N_ELEMENTS(pc_compat_4_0_1);
GlobalProperty pc_compat_4_0[] = {};
const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0);
@ -2386,7 +2383,8 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
}
cpu->thread_id = topo.smt_id;
if (cpu->hyperv_vpindex && !kvm_hv_vpindex_settable()) {
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) &&
!kvm_hv_vpindex_settable()) {
error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX");
return;
}

View File

@ -378,8 +378,13 @@ static void pc_q35_4_0_1_machine_options(MachineClass *m)
{
pc_q35_4_1_machine_options(m);
m->alias = NULL;
compat_props_add(m->compat_props, hw_compat_4_0_1, hw_compat_4_0_1_len);
compat_props_add(m->compat_props, pc_compat_4_0_1, pc_compat_4_0_1_len);
/*
* This is the default machine for the 4.0-stable branch. It is basically
* a 4.0 that doesn't use split irqchip by default. It MUST hence apply the
* 4.0 compat props.
*/
compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len);
compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
}
DEFINE_Q35_MACHINE(v4_0_1, "pc-q35-4.0.1", NULL,
@ -390,8 +395,7 @@ static void pc_q35_4_0_machine_options(MachineClass *m)
pc_q35_4_0_1_machine_options(m);
m->default_kernel_irqchip_split = true;
m->alias = NULL;
compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len);
compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
/* Compat props are applied by the 4.0.1 machine */
}
DEFINE_Q35_MACHINE(v4_0, "pc-q35-4.0", NULL,

View File

@ -145,7 +145,7 @@ static const char *sd_state_name(enum SDCardStates state)
if (state == sd_inactive_state) {
return "inactive";
}
assert(state <= ARRAY_SIZE(state_name));
assert(state < ARRAY_SIZE(state_name));
return state_name[state];
}
@ -166,7 +166,7 @@ static const char *sd_response_name(sd_rsp_type_t rsp)
if (rsp == sd_r1b) {
rsp = sd_r1;
}
assert(rsp <= ARRAY_SIZE(response_name));
assert(rsp < ARRAY_SIZE(response_name));
return response_name[rsp];
}

View File

@ -293,9 +293,6 @@ struct MachineState {
} \
type_init(machine_initfn##_register_types)
extern GlobalProperty hw_compat_4_0_1[];
extern const size_t hw_compat_4_0_1_len;
extern GlobalProperty hw_compat_4_0[];
extern const size_t hw_compat_4_0_len;

View File

@ -293,9 +293,6 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
int e820_get_num_entries(void);
bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
extern GlobalProperty pc_compat_4_0_1[];
extern const size_t pc_compat_4_0_1_len;
extern GlobalProperty pc_compat_4_0[];
extern const size_t pc_compat_4_0_len;

View File

@ -797,6 +797,19 @@ extern const VMStateInfo vmstate_info_qtailq;
#define VMSTATE_UINT64_V(_f, _s, _v) \
VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, uint64_t)
#ifdef CONFIG_LINUX
#define VMSTATE_U8_V(_f, _s, _v) \
VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint8, __u8)
#define VMSTATE_U16_V(_f, _s, _v) \
VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16, __u16)
#define VMSTATE_U32_V(_f, _s, _v) \
VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32, __u32)
#define VMSTATE_U64_V(_f, _s, _v) \
VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, __u64)
#endif
#define VMSTATE_BOOL(_f, _s) \
VMSTATE_BOOL_V(_f, _s, 0)
@ -818,6 +831,19 @@ extern const VMStateInfo vmstate_info_qtailq;
#define VMSTATE_UINT64(_f, _s) \
VMSTATE_UINT64_V(_f, _s, 0)
#ifdef CONFIG_LINUX
#define VMSTATE_U8(_f, _s) \
VMSTATE_U8_V(_f, _s, 0)
#define VMSTATE_U16(_f, _s) \
VMSTATE_U16_V(_f, _s, 0)
#define VMSTATE_U32(_f, _s) \
VMSTATE_U32_V(_f, _s, 0)
#define VMSTATE_U64(_f, _s) \
VMSTATE_U64_V(_f, _s, 0)
#endif
#define VMSTATE_UINT8_EQUAL(_f, _s, _err_hint) \
VMSTATE_SINGLE_FULL(_f, _s, 0, 0, \
vmstate_info_uint8_equal, uint8_t, _err_hint)

View File

@ -210,6 +210,7 @@ bool kvm_has_sync_mmu(void);
int kvm_has_vcpu_events(void);
int kvm_has_robust_singlestep(void);
int kvm_has_debugregs(void);
int kvm_max_nested_state_length(void);
int kvm_has_pit_state2(void);
int kvm_has_many_ioeventfds(void);
int kvm_has_gsi_routing(void);
@ -371,6 +372,7 @@ int kvm_arch_put_registers(CPUState *cpu, int level);
int kvm_arch_init(MachineState *ms, KVMState *s);
int kvm_arch_init_vcpu(CPUState *cpu);
int kvm_arch_destroy_vcpu(CPUState *cpu);
bool kvm_vcpu_id_is_valid(int vcpu_id);

View File

@ -383,16 +383,26 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
struct kvm_vmx_nested_state {
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
};
struct kvm_vmx_nested_state_hdr {
__u64 vmxon_pa;
__u64 vmcs_pa;
__u64 vmcs12_pa;
struct {
__u16 flags;
@ -401,24 +411,25 @@ struct kvm_vmx_nested_state {
/* for KVM_CAP_NESTED_STATE */
struct kvm_nested_state {
/* KVM_STATE_* flags */
__u16 flags;
/* 0 for VMX, 1 for SVM. */
__u16 format;
/* 128 for SVM, 128 + VMCS size for VMX. */
__u32 size;
union {
/* VMXON, VMCS */
struct kvm_vmx_nested_state vmx;
struct kvm_vmx_nested_state_hdr vmx;
/* Pad the header to 128 bytes. */
__u8 pad[120];
};
} hdr;
__u8 data[0];
/*
* Define data region as 0 bytes to preserve backwards-compatability
* to old definition of kvm_nested_state in order to avoid changing
* KVM_{GET,PUT}_NESTED_STATE ioctl values.
*/
union {
struct kvm_vmx_nested_state_data vmx[0];
} data;
};
#endif /* _ASM_X86_KVM_H */

View File

@ -240,6 +240,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
return kvm_arm_init_cpreg_list(cpu);
}
int kvm_arch_destroy_vcpu(CPUState *cs)
{
return 0;
}
typedef struct Reg {
uint64_t id;
int offset;

View File

@ -654,6 +654,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
return kvm_arm_init_cpreg_list(cpu);
}
int kvm_arch_destroy_vcpu(CPUState *cs)
{
return 0;
}
bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
{
/* Return true if the regidx is a register we should synchronize

View File

@ -1085,7 +1085,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, "spec-ctrl", "stibp",
NULL, "arch-capabilities", NULL, "ssbd",
NULL, "arch-capabilities", "core-capability", "ssbd",
},
.cpuid = {
.eax = 7,
@ -1203,6 +1203,26 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
}
},
},
[FEAT_CORE_CAPABILITY] = {
.type = MSR_FEATURE_WORD,
.feat_names = {
NULL, NULL, NULL, NULL,
NULL, "split-lock-detect", NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
},
.msr = {
.index = MSR_IA32_CORE_CAPABILITY,
.cpuid_dep = {
FEAT_7_0_EDX,
CPUID_7_0_EDX_CORE_CAPABILITY,
},
},
},
};
typedef struct X86RegisterInfo32 {
@ -4799,7 +4819,11 @@ static void x86_cpu_reset(CPUState *s)
memset(env->mtrr_fixed, 0, sizeof(env->mtrr_fixed));
env->interrupt_injected = -1;
env->exception_injected = -1;
env->exception_nr = -1;
env->exception_pending = 0;
env->exception_injected = 0;
env->exception_has_payload = false;
env->exception_payload = 0;
env->nmi_injected = false;
#if !defined(CONFIG_USER_ONLY)
/* We hard-wire the BSP to the first CPU. */
@ -5195,12 +5219,6 @@ static int x86_cpu_filter_features(X86CPU *cpu)
return rv;
}
#define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \
(env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \
(env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3)
#define IS_AMD_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_AMD_1 && \
(env)->cpuid_vendor2 == CPUID_VENDOR_AMD_2 && \
(env)->cpuid_vendor3 == CPUID_VENDOR_AMD_3)
static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
{
CPUState *cs = CPU(dev);
@ -5853,21 +5871,40 @@ static Property x86_cpu_properties[] = {
#endif
DEFINE_PROP_INT32("node-id", X86CPU, node_id, CPU_UNSET_NUMA_NODE_ID),
DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false),
{ .name = "hv-spinlocks", .info = &qdev_prop_spinlocks },
DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false),
DEFINE_PROP_BOOL("hv-vapic", X86CPU, hyperv_vapic, false),
DEFINE_PROP_BOOL("hv-time", X86CPU, hyperv_time, false),
DEFINE_PROP_BOOL("hv-crash", X86CPU, hyperv_crash, false),
DEFINE_PROP_BOOL("hv-reset", X86CPU, hyperv_reset, false),
DEFINE_PROP_BOOL("hv-vpindex", X86CPU, hyperv_vpindex, false),
DEFINE_PROP_BOOL("hv-runtime", X86CPU, hyperv_runtime, false),
DEFINE_PROP_BOOL("hv-synic", X86CPU, hyperv_synic, false),
DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false),
DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false),
DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false),
DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false),
DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false),
DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false),
DEFINE_PROP_BIT64("hv-relaxed", X86CPU, hyperv_features,
HYPERV_FEAT_RELAXED, 0),
DEFINE_PROP_BIT64("hv-vapic", X86CPU, hyperv_features,
HYPERV_FEAT_VAPIC, 0),
DEFINE_PROP_BIT64("hv-time", X86CPU, hyperv_features,
HYPERV_FEAT_TIME, 0),
DEFINE_PROP_BIT64("hv-crash", X86CPU, hyperv_features,
HYPERV_FEAT_CRASH, 0),
DEFINE_PROP_BIT64("hv-reset", X86CPU, hyperv_features,
HYPERV_FEAT_RESET, 0),
DEFINE_PROP_BIT64("hv-vpindex", X86CPU, hyperv_features,
HYPERV_FEAT_VPINDEX, 0),
DEFINE_PROP_BIT64("hv-runtime", X86CPU, hyperv_features,
HYPERV_FEAT_RUNTIME, 0),
DEFINE_PROP_BIT64("hv-synic", X86CPU, hyperv_features,
HYPERV_FEAT_SYNIC, 0),
DEFINE_PROP_BIT64("hv-stimer", X86CPU, hyperv_features,
HYPERV_FEAT_STIMER, 0),
DEFINE_PROP_BIT64("hv-frequencies", X86CPU, hyperv_features,
HYPERV_FEAT_FREQUENCIES, 0),
DEFINE_PROP_BIT64("hv-reenlightenment", X86CPU, hyperv_features,
HYPERV_FEAT_REENLIGHTENMENT, 0),
DEFINE_PROP_BIT64("hv-tlbflush", X86CPU, hyperv_features,
HYPERV_FEAT_TLBFLUSH, 0),
DEFINE_PROP_BIT64("hv-evmcs", X86CPU, hyperv_features,
HYPERV_FEAT_EVMCS, 0),
DEFINE_PROP_BIT64("hv-ipi", X86CPU, hyperv_features,
HYPERV_FEAT_IPI, 0),
DEFINE_PROP_BIT64("hv-stimer-direct", X86CPU, hyperv_features,
HYPERV_FEAT_STIMER_DIRECT, 0),
DEFINE_PROP_BOOL("hv-passthrough", X86CPU, hyperv_passthrough, false),
DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),

View File

@ -345,6 +345,7 @@ typedef enum X86Seg {
#define MSR_IA32_SPEC_CTRL 0x48
#define MSR_VIRT_SSBD 0xc001011f
#define MSR_IA32_PRED_CMD 0x49
#define MSR_IA32_CORE_CAPABILITY 0xcf
#define MSR_IA32_ARCH_CAPABILITIES 0x10a
#define MSR_IA32_TSCDEADLINE 0x6e0
@ -496,6 +497,7 @@ typedef enum FeatureWord {
FEAT_XSAVE_COMP_LO, /* CPUID[EAX=0xd,ECX=0].EAX */
FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */
FEAT_ARCH_CAPABILITIES,
FEAT_CORE_CAPABILITY,
FEATURE_WORDS,
} FeatureWord;
@ -687,6 +689,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */
#define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Speculation Control */
#define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) /*Arch Capabilities*/
#define CPUID_7_0_EDX_CORE_CAPABILITY (1U << 30) /*Core Capability*/
#define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) /* Speculative Store Bypass Disable */
#define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) /* Write back and
@ -719,6 +722,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_VENDOR_HYGON "HygonGenuine"
#define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \
(env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \
(env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3)
#define IS_AMD_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_AMD_1 && \
(env)->cpuid_vendor2 == CPUID_VENDOR_AMD_2 && \
(env)->cpuid_vendor3 == CPUID_VENDOR_AMD_3)
#define CPUID_MWAIT_IBE (1U << 1) /* Interrupts can exit capability */
#define CPUID_MWAIT_EMX (1U << 0) /* enumeration supported */
@ -734,6 +744,25 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3)
#define MSR_ARCH_CAP_SSB_NO (1U << 4)
#define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5)
/* Supported Hyper-V Enlightenments */
#define HYPERV_FEAT_RELAXED 0
#define HYPERV_FEAT_VAPIC 1
#define HYPERV_FEAT_TIME 2
#define HYPERV_FEAT_CRASH 3
#define HYPERV_FEAT_RESET 4
#define HYPERV_FEAT_VPINDEX 5
#define HYPERV_FEAT_RUNTIME 6
#define HYPERV_FEAT_SYNIC 7
#define HYPERV_FEAT_STIMER 8
#define HYPERV_FEAT_FREQUENCIES 9
#define HYPERV_FEAT_REENLIGHTENMENT 10
#define HYPERV_FEAT_TLBFLUSH 11
#define HYPERV_FEAT_EVMCS 12
#define HYPERV_FEAT_IPI 13
#define HYPERV_FEAT_STIMER_DIRECT 14
#ifndef HYPERV_SPINLOCK_NEVER_RETRY
#define HYPERV_SPINLOCK_NEVER_RETRY 0xFFFFFFFF
#endif
@ -1319,10 +1348,14 @@ typedef struct CPUX86State {
/* For KVM */
uint32_t mp_state;
int32_t exception_injected;
int32_t exception_nr;
int32_t interrupt_injected;
uint8_t soft_interrupt;
uint8_t exception_pending;
uint8_t exception_injected;
uint8_t has_error_code;
uint8_t exception_has_payload;
uint64_t exception_payload;
uint32_t ins_len;
uint32_t sipi_vector;
bool tsc_valid;
@ -1331,6 +1364,9 @@ typedef struct CPUX86State {
#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
void *xsave_buf;
#endif
#if defined(CONFIG_KVM)
struct kvm_nested_state *nested_state;
#endif
#if defined(CONFIG_HVF)
HVFX86EmulatorState *hvf_emul;
#endif
@ -1370,23 +1406,12 @@ struct X86CPU {
CPUNegativeOffsetState neg;
CPUX86State env;
bool hyperv_vapic;
bool hyperv_relaxed_timing;
int hyperv_spinlock_attempts;
char *hyperv_vendor_id;
bool hyperv_time;
bool hyperv_crash;
bool hyperv_reset;
bool hyperv_vpindex;
bool hyperv_runtime;
bool hyperv_synic;
bool hyperv_synic_kvm_only;
bool hyperv_stimer;
bool hyperv_frequencies;
bool hyperv_reenlightenment;
bool hyperv_tlbflush;
bool hyperv_evmcs;
bool hyperv_ipi;
uint64_t hyperv_features;
bool hyperv_passthrough;
bool check_cpuid;
bool enforce_cpuid;
bool expose_kvm;
@ -1837,6 +1862,11 @@ static inline int32_t x86_get_a20_mask(CPUX86State *env)
}
}
static inline bool cpu_has_vmx(CPUX86State *env)
{
return env->features[FEAT_1_ECX] & CPUID_EXT_VMX;
}
/* fpu_helper.c */
void update_fp_status(CPUX86State *env);
void update_mxcsr_status(CPUX86State *env);
@ -1906,4 +1936,9 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const X86XSaveArea *buf);
void x86_cpu_xsave_all_areas(X86CPU *cpu, X86XSaveArea *buf);
void x86_update_hflags(CPUX86State* env);
static inline bool hyperv_feat_enabled(X86CPU *cpu, int feat)
{
return !!(cpu->hyperv_features & BIT(feat));
}
#endif /* I386_CPU_H */

View File

@ -471,13 +471,35 @@ static int hax_vcpu_hax_exec(CPUArchState *env)
return 0;
}
cpu->halted = 0;
if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
apic_poll_irq(x86_cpu->apic_state);
}
/* After a vcpu is halted (either because it is an AP and has just been
* reset, or because it has executed the HLT instruction), it will not be
* run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
* for events that may change the halted state of this vcpu:
* a) Maskable interrupt, when RFLAGS.IF is 1;
* Note: env->eflags may not reflect the current RFLAGS state, because
* it is not updated after each hax_vcpu_run(). We cannot afford
* to fail to recognize any unhalt-by-maskable-interrupt event
* (in which case the vcpu will halt forever), and yet we cannot
* afford the overhead of hax_vcpu_sync_state(). The current
* solution is to err on the side of caution and have the HLT
* handler (see case HAX_EXIT_HLT below) unconditionally set the
* IF_MASK bit in env->eflags, which, in effect, disables the
* RFLAGS.IF check.
* b) NMI;
* c) INIT signal;
* d) SIPI signal.
*/
if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
(env->eflags & IF_MASK)) ||
(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
cpu->halted = 0;
}
if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
cpu->cpu_index);
@ -493,6 +515,16 @@ static int hax_vcpu_hax_exec(CPUArchState *env)
hax_vcpu_sync_state(env, 1);
}
if (cpu->halted) {
/* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
* break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
* That way, this vcpu thread will be trapped in qemu_wait_io_event(),
* until the vcpu is unhalted.
*/
cpu->exception_index = EXCP_HLT;
return 0;
}
do {
int hax_ret;

View File

@ -605,7 +605,9 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
env->exception_injected = -1;
env->exception_nr = -1;
env->exception_pending = 0;
env->exception_injected = 0;
env->interrupt_injected = -1;
env->nmi_injected = false;
if (idtvec_info & VMCS_IDT_VEC_VALID) {
@ -619,7 +621,8 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
break;
case VMCS_IDT_VEC_HWEXCEPTION:
case VMCS_IDT_VEC_SWEXCEPTION:
env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
env->exception_injected = 1;
break;
case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
default:
@ -912,7 +915,8 @@ int hvf_vcpu_exec(CPUState *cpu)
macvm_set_rip(cpu, rip + ins_len);
break;
case VMX_REASON_VMCALL:
env->exception_injected = EXCP0D_GPF;
env->exception_nr = EXCP0D_GPF;
env->exception_injected = 1;
env->has_error_code = true;
env->error_code = 0;
break;

View File

@ -362,8 +362,8 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
if (env->interrupt_injected != -1) {
vector = env->interrupt_injected;
intr_type = VMCS_INTR_T_SWINTR;
} else if (env->exception_injected != -1) {
vector = env->exception_injected;
} else if (env->exception_nr != -1) {
vector = env->exception_nr;
if (vector == EXCP03_INT3 || vector == EXCP04_INTO) {
intr_type = VMCS_INTR_T_SWEXCEPTION;
} else {

View File

@ -49,6 +49,7 @@
#define HV_GUEST_IDLE_STATE_AVAILABLE (1u << 5)
#define HV_FREQUENCY_MSRS_AVAILABLE (1u << 8)
#define HV_GUEST_CRASH_MSR_AVAILABLE (1u << 10)
#define HV_STIMER_DIRECT_MODE_AVAILABLE (1u << 19)
/*
* HV_CPUID_ENLIGHTMENT_INFO.EAX bits

View File

@ -52,7 +52,7 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit)
switch (exit->type) {
case KVM_EXIT_HYPERV_SYNIC:
if (!cpu->hyperv_synic) {
if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) {
return -1;
}

File diff suppressed because it is too large Load Diff

View File

@ -231,6 +231,50 @@ static int cpu_pre_save(void *opaque)
env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
}
#ifdef CONFIG_KVM
/* Verify we have nested virtualization state from kernel if required */
if (kvm_enabled() && cpu_has_vmx(env) && !env->nested_state) {
error_report("Guest enabled nested virtualization but kernel "
"does not support saving of nested state");
return -EINVAL;
}
#endif
/*
* When vCPU is running L2 and exception is still pending,
* it can potentially be intercepted by L1 hypervisor.
* In contrast to an injected exception which cannot be
* intercepted anymore.
*
* Furthermore, when a L2 exception is intercepted by L1
* hypervisor, it's exception payload (CR2/DR6 on #PF/#DB)
* should not be set yet in the respective vCPU register.
* Thus, in case an exception is pending, it is
* important to save the exception payload seperately.
*
* Therefore, if an exception is not in a pending state
* or vCPU is not in guest-mode, it is not important to
* distinguish between a pending and injected exception
* and we don't need to store seperately the exception payload.
*
* In order to preserve better backwards-compatabile migration,
* convert a pending exception to an injected exception in
* case it is not important to distingiush between them
* as described above.
*/
if (env->exception_pending && !(env->hflags & HF_GUEST_MASK)) {
env->exception_pending = 0;
env->exception_injected = 1;
if (env->exception_has_payload) {
if (env->exception_nr == EXCP01_DB) {
env->dr[6] = env->exception_payload;
} else if (env->exception_nr == EXCP0E_PAGE) {
env->cr[2] = env->exception_payload;
}
}
}
return 0;
}
@ -278,6 +322,33 @@ static int cpu_post_load(void *opaque, int version_id)
env->hflags &= ~HF_CPL_MASK;
env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
#ifdef CONFIG_KVM
if ((env->hflags & HF_GUEST_MASK) &&
(!env->nested_state ||
!(env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE))) {
error_report("vCPU set in guest-mode inconsistent with "
"migrated kernel nested state");
return -EINVAL;
}
#endif
/*
* There are cases that we can get valid exception_nr with both
* exception_pending and exception_injected being cleared.
* This can happen in one of the following scenarios:
* 1) Source is older QEMU without KVM_CAP_EXCEPTION_PAYLOAD support.
* 2) Source is running on kernel without KVM_CAP_EXCEPTION_PAYLOAD support.
* 3) "cpu/exception_info" subsection not sent because there is no exception
* pending or guest wasn't running L2 (See comment in cpu_pre_save()).
*
* In those cases, we can just deduce that a valid exception_nr means
* we can treat the exception as already injected.
*/
if ((env->exception_nr != -1) &&
!env->exception_pending && !env->exception_injected) {
env->exception_injected = 1;
}
env->fpstt = (env->fpus_vmstate >> 11) & 7;
env->fpus = env->fpus_vmstate & ~0x3800;
env->fptag_vmstate ^= 0xff;
@ -323,6 +394,35 @@ static bool steal_time_msr_needed(void *opaque)
return cpu->env.steal_time_msr != 0;
}
static bool exception_info_needed(void *opaque)
{
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
/*
* It is important to save exception-info only in case
* we need to distingiush between a pending and injected
* exception. Which is only required in case there is a
* pending exception and vCPU is running L2.
* For more info, refer to comment in cpu_pre_save().
*/
return env->exception_pending && (env->hflags & HF_GUEST_MASK);
}
static const VMStateDescription vmstate_exception_info = {
.name = "cpu/exception_info",
.version_id = 1,
.minimum_version_id = 1,
.needed = exception_info_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(env.exception_pending, X86CPU),
VMSTATE_UINT8(env.exception_injected, X86CPU),
VMSTATE_UINT8(env.exception_has_payload, X86CPU),
VMSTATE_UINT64(env.exception_payload, X86CPU),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_steal_time_msr = {
.name = "cpu/steal_time_msr",
.version_id = 1,
@ -634,7 +734,7 @@ static bool hyperv_runtime_enable_needed(void *opaque)
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
if (!cpu->hyperv_runtime) {
if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_RUNTIME)) {
return false;
}
@ -851,6 +951,182 @@ static const VMStateDescription vmstate_tsc_khz = {
}
};
#ifdef CONFIG_KVM
static bool vmx_vmcs12_needed(void *opaque)
{
struct kvm_nested_state *nested_state = opaque;
return (nested_state->size >
offsetof(struct kvm_nested_state, data.vmx[0].vmcs12));
}
static const VMStateDescription vmstate_vmx_vmcs12 = {
.name = "cpu/kvm_nested_state/vmx/vmcs12",
.version_id = 1,
.minimum_version_id = 1,
.needed = vmx_vmcs12_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8_ARRAY(data.vmx[0].vmcs12,
struct kvm_nested_state,
KVM_STATE_NESTED_VMX_VMCS_SIZE),
VMSTATE_END_OF_LIST()
}
};
static bool vmx_shadow_vmcs12_needed(void *opaque)
{
struct kvm_nested_state *nested_state = opaque;
return (nested_state->size >
offsetof(struct kvm_nested_state, data.vmx[0].shadow_vmcs12));
}
static const VMStateDescription vmstate_vmx_shadow_vmcs12 = {
.name = "cpu/kvm_nested_state/vmx/shadow_vmcs12",
.version_id = 1,
.minimum_version_id = 1,
.needed = vmx_shadow_vmcs12_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8_ARRAY(data.vmx[0].shadow_vmcs12,
struct kvm_nested_state,
KVM_STATE_NESTED_VMX_VMCS_SIZE),
VMSTATE_END_OF_LIST()
}
};
static bool vmx_nested_state_needed(void *opaque)
{
struct kvm_nested_state *nested_state = opaque;
return ((nested_state->format == KVM_STATE_NESTED_FORMAT_VMX) &&
((nested_state->hdr.vmx.vmxon_pa != -1ull) ||
(nested_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)));
}
static const VMStateDescription vmstate_vmx_nested_state = {
.name = "cpu/kvm_nested_state/vmx",
.version_id = 1,
.minimum_version_id = 1,
.needed = vmx_nested_state_needed,
.fields = (VMStateField[]) {
VMSTATE_U64(hdr.vmx.vmxon_pa, struct kvm_nested_state),
VMSTATE_U64(hdr.vmx.vmcs12_pa, struct kvm_nested_state),
VMSTATE_U16(hdr.vmx.smm.flags, struct kvm_nested_state),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {
&vmstate_vmx_vmcs12,
&vmstate_vmx_shadow_vmcs12,
NULL,
}
};
static bool svm_nested_state_needed(void *opaque)
{
struct kvm_nested_state *nested_state = opaque;
return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM);
}
static const VMStateDescription vmstate_svm_nested_state = {
.name = "cpu/kvm_nested_state/svm",
.version_id = 1,
.minimum_version_id = 1,
.needed = svm_nested_state_needed,
.fields = (VMStateField[]) {
VMSTATE_END_OF_LIST()
}
};
static bool nested_state_needed(void *opaque)
{
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
return (env->nested_state &&
(vmx_nested_state_needed(env->nested_state) ||
svm_nested_state_needed(env->nested_state)));
}
static int nested_state_post_load(void *opaque, int version_id)
{
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
struct kvm_nested_state *nested_state = env->nested_state;
int min_nested_state_len = offsetof(struct kvm_nested_state, data);
int max_nested_state_len = kvm_max_nested_state_length();
/*
* If our kernel don't support setting nested state
* and we have received nested state from migration stream,
* we need to fail migration
*/
if (max_nested_state_len <= 0) {
error_report("Received nested state when kernel cannot restore it");
return -EINVAL;
}
/*
* Verify that the size of received nested_state struct
* at least cover required header and is not larger
* than the max size that our kernel support
*/
if (nested_state->size < min_nested_state_len) {
error_report("Received nested state size less than min: "
"len=%d, min=%d",
nested_state->size, min_nested_state_len);
return -EINVAL;
}
if (nested_state->size > max_nested_state_len) {
error_report("Recieved unsupported nested state size: "
"nested_state->size=%d, max=%d",
nested_state->size, max_nested_state_len);
return -EINVAL;
}
/* Verify format is valid */
if ((nested_state->format != KVM_STATE_NESTED_FORMAT_VMX) &&
(nested_state->format != KVM_STATE_NESTED_FORMAT_SVM)) {
error_report("Received invalid nested state format: %d",
nested_state->format);
return -EINVAL;
}
return 0;
}
static const VMStateDescription vmstate_kvm_nested_state = {
.name = "cpu/kvm_nested_state",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_U16(flags, struct kvm_nested_state),
VMSTATE_U16(format, struct kvm_nested_state),
VMSTATE_U32(size, struct kvm_nested_state),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {
&vmstate_vmx_nested_state,
&vmstate_svm_nested_state,
NULL
}
};
static const VMStateDescription vmstate_nested_state = {
.name = "cpu/nested_state",
.version_id = 1,
.minimum_version_id = 1,
.needed = nested_state_needed,
.post_load = nested_state_post_load,
.fields = (VMStateField[]) {
VMSTATE_STRUCT_POINTER(env.nested_state, X86CPU,
vmstate_kvm_nested_state,
struct kvm_nested_state),
VMSTATE_END_OF_LIST()
}
};
#endif
static bool mcg_ext_ctl_needed(void *opaque)
{
X86CPU *cpu = opaque;
@ -1056,7 +1332,7 @@ VMStateDescription vmstate_x86_cpu = {
VMSTATE_INT32(env.interrupt_injected, X86CPU),
VMSTATE_UINT32(env.mp_state, X86CPU),
VMSTATE_UINT64(env.tsc, X86CPU),
VMSTATE_INT32(env.exception_injected, X86CPU),
VMSTATE_INT32(env.exception_nr, X86CPU),
VMSTATE_UINT8(env.soft_interrupt, X86CPU),
VMSTATE_UINT8(env.nmi_injected, X86CPU),
VMSTATE_UINT8(env.nmi_pending, X86CPU),
@ -1080,6 +1356,7 @@ VMStateDescription vmstate_x86_cpu = {
/* The above list is not sorted /wrt version numbers, watch out! */
},
.subsections = (const VMStateDescription*[]) {
&vmstate_exception_info,
&vmstate_async_pf_msr,
&vmstate_pv_eoi_msr,
&vmstate_steal_time_msr,
@ -1112,6 +1389,9 @@ VMStateDescription vmstate_x86_cpu = {
&vmstate_svm_npt,
#ifndef TARGET_X86_64
&vmstate_efer32,
#endif
#ifdef CONFIG_KVM
&vmstate_nested_state,
#endif
NULL
}

View File

@ -91,6 +91,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
return ret;
}
int kvm_arch_destroy_vcpu(CPUState *cs)
{
return 0;
}
void kvm_mips_reset_vcpu(MIPSCPU *cpu)
{
CPUMIPSState *env = &cpu->env;

View File

@ -521,6 +521,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
return ret;
}
int kvm_arch_destroy_vcpu(CPUState *cs)
{
return 0;
}
static void kvm_sw_tlb_put(PowerPCCPU *cpu)
{
CPUPPCState *env = &cpu->env;

View File

@ -368,6 +368,16 @@ int kvm_arch_init_vcpu(CPUState *cs)
return 0;
}
int kvm_arch_destroy_vcpu(CPUState *cs)
{
S390CPU *cpu = S390_CPU(cs);
g_free(cpu->irqstate);
cpu->irqstate = NULL;
return 0;
}
void kvm_s390_reset_vcpu(S390CPU *cpu)
{
CPUState *cs = CPU(cpu);

View File

@ -422,7 +422,7 @@ static int os_host_main_loop_wait(int64_t timeout)
g_main_context_prepare(context, &max_priority);
n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
poll_fds, ARRAY_SIZE(poll_fds));
g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds));
g_assert(n_poll_fds + w->num <= ARRAY_SIZE(poll_fds));
for (i = 0; i < w->num; i++) {
poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];