* virtio-blk: remove SCSI passthrough functionality

* require x86-64-v2 baseline ISA
 * SEV-SNP host support
 * fix xsave.flat with TCG
 * fixes for CPUID checks done by TCG
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmZgKVYUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroPKYgf/QkWrNXdjjD3yAsv5LbJFVTVyCYW3
 b4Iax29kEDy8k9wbzfLxOfIk9jXIjmbOMO5ZN9LFiHK6VJxbXslsMh6hm50M3xKe
 49X1Rvf9YuVA7KZX+dWkEuqLYI6Tlgj3HaCilYWfXrjyo6hY3CxzkPV/ChmaeYlV
 Ad4Y8biifoUuuEK8OTeTlcDWLhOHlFXylG3AXqULsUsXp0XhWJ9juXQ60eATv/W4
 eCEH7CSmRhYFu2/rV+IrWFYMnskLRTk1OC1/m6yXGPKOzgnOcthuvQfiUgPkbR/d
 llY6Ni5Aaf7+XX3S7Avcyvoq8jXzaaMzOrzL98rxYGDR1sYBYO+4h4ZToA==
 =qQeP
 -----END PGP SIGNATURE-----

Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* virtio-blk: remove SCSI passthrough functionality
* require x86-64-v2 baseline ISA
* SEV-SNP host support
* fix xsave.flat with TCG
* fixes for CPUID checks done by TCG

# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmZgKVYUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroPKYgf/QkWrNXdjjD3yAsv5LbJFVTVyCYW3
# b4Iax29kEDy8k9wbzfLxOfIk9jXIjmbOMO5ZN9LFiHK6VJxbXslsMh6hm50M3xKe
# 49X1Rvf9YuVA7KZX+dWkEuqLYI6Tlgj3HaCilYWfXrjyo6hY3CxzkPV/ChmaeYlV
# Ad4Y8biifoUuuEK8OTeTlcDWLhOHlFXylG3AXqULsUsXp0XhWJ9juXQ60eATv/W4
# eCEH7CSmRhYFu2/rV+IrWFYMnskLRTk1OC1/m6yXGPKOzgnOcthuvQfiUgPkbR/d
# llY6Ni5Aaf7+XX3S7Avcyvoq8jXzaaMzOrzL98rxYGDR1sYBYO+4h4ZToA==
# =qQeP
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 05 Jun 2024 02:01:10 AM PDT
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (46 commits)
  hw/i386: Add support for loading BIOS using guest_memfd
  hw/i386/sev: Use guest_memfd for legacy ROMs
  memory: Introduce memory_region_init_ram_guest_memfd()
  i386/sev: Allow measured direct kernel boot on SNP
  i386/sev: Reorder struct declarations
  i386/sev: Extract build_kernel_loader_hashes
  i386/sev: Enable KVM_HC_MAP_GPA_RANGE hcall for SNP guests
  i386/kvm: Add KVM_EXIT_HYPERCALL handling for KVM_HC_MAP_GPA_RANGE
  i386/sev: Invoke launch_updata_data() for SNP class
  i386/sev: Invoke launch_updata_data() for SEV class
  hw/i386/sev: Add support to encrypt BIOS when SEV-SNP is enabled
  i386/sev: Add support for SNP CPUID validation
  i386/sev: Add support for populating OVMF metadata pages
  hw/i386/sev: Add function to get SEV metadata from OVMF header
  i386/sev: Set CPU state to protected once SNP guest payload is finalized
  i386/sev: Add handling to encrypt/finalize guest launch data
  i386/sev: Add the SNP launch start context
  i386/sev: Update query-sev QAPI format to handle SEV-SNP
  i386/sev: Add a class method to determine KVM VM type for SNP guests
  i386/sev: Don't return launch measurements for SEV-SNP guests
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2024-06-05 07:45:23 -07:00
commit f1572ab947
51 changed files with 1849 additions and 614 deletions

View File

@ -280,16 +280,6 @@ Device options
Emulated device options
'''''''''''''''''''''''
``-device virtio-blk,scsi=on|off`` (since 5.0)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The virtio-blk SCSI passthrough feature is a legacy VIRTIO feature. VIRTIO 1.0
and later do not support it because the virtio-scsi device was introduced for
full SCSI support. Use virtio-scsi instead when SCSI passthrough is required.
Note this also applies to ``-device virtio-blk-pci,scsi=on|off``, which is an
alias.
``-device nvme-ns,eui64-default=on|off`` (since 7.1)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -510,6 +510,14 @@ than zero.
Removed along with the ``compression`` migration capability.
``-device virtio-blk,scsi=on|off`` (since 9.1)
''''''''''''''''''''''''''''''''''''''''''''''
The virtio-blk SCSI passthrough feature is a legacy VIRTIO feature. VIRTIO 1.0
and later do not support it because the virtio-scsi device was introduced for
full SCSI support. Use virtio-scsi instead when SCSI passthrough is required.
User-mode emulator command line arguments
-----------------------------------------

View File

@ -25,7 +25,7 @@ A detailed command line would be:
-object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node1 -numa node,nodeid=1,cpus=1,memdev=ram-node1
-device pxb,id=bridge1,bus=pci.0,numa_node=1,bus_nr=4 -netdev user,id=nd -device e1000,bus=bridge1,addr=0x4,netdev=nd
-device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8 -device e1000,bus=bridge2,addr=0x3
-device pxb,id=bridge3,bus=pci.0,bus_nr=40 -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1
-device pxb,id=bridge3,bus=pci.0,bus_nr=40 -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,bus=bridge3,addr=1
Here you have:
- 2 NUMA nodes for the guest, 0 and 1. (both mapped to the same NUMA node in host, but you can and should put it in different host NUMA nodes)

View File

@ -336,7 +336,7 @@ In case a pSeries machine is emulated, use the following command line:
-tpmdev emulator,id=tpm0,chardev=chrtpm \
-device tpm-spapr,tpmdev=tpm0 \
-device spapr-vscsi,id=scsi0,reg=0x00002000 \
-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \
-device virtio-blk-pci,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \
-drive file=test.img,format=raw,if=none,id=drive-virtio-disk0
In case an Arm virt machine is emulated, use the following command line:

View File

@ -25,8 +25,8 @@ support for notifying a guest's operating system when certain types of VMEXITs
are about to occur. This allows the guest to selectively share information with
the hypervisor to satisfy the requested function.
Launching
---------
Launching (SEV and SEV-ES)
--------------------------
Boot images (such as bios) must be encrypted before a guest can be booted. The
``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ``LAUNCH_START``,
@ -161,6 +161,72 @@ The value of GCTX.LD is
If kernel hashes are not used, or SEV-ES is disabled, use empty blobs for
``kernel_hashes_blob`` and ``vmsas_blob`` as needed.
Launching (SEV-SNP)
-------------------
Boot images (such as bios) must be encrypted before a guest can be booted. The
``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images:
``SNP_LAUNCH_START``, ``SNP_LAUNCH_UPDATE``, and ``SNP_LAUNCH_FINISH``. These
three commands communicate with SEV-SNP firmware to generate a fresh memory
encryption key for the VM, encrypt the boot images for a successful launch. For
more details on the SEV-SNP firmware interfaces used by these commands please
see the SEV-SNP Firmware ABI.
``SNP_LAUNCH_START`` is called first to create a cryptographic launch context
within the firmware. To create this context, the guest owner must provide a
guest policy and other parameters as described in the SEV-SNP firmware
specification. The launch parameters should be specified as described in the
QAPI schema for the sev-snp-guest object.
The ``SNP_LAUNCH_START`` uses the following parameters, which can be configured
by the corresponding parameters documented in the QAPI schema for the
'sev-snp-guest' object.
+--------+-------+----------+-------------------------------------------------+
| key | type | default | meaning |
+---------------------------+-------------------------------------------------+
| policy | hex | 0x30000 | a 64-bit guest policy |
+---------------------------+-------------------------------------------------+
| guest-visible-workarounds | string| 0 | 16-byte base64 encoded string|
| | | | for guest OS visible |
| | | | workarounds. |
+---------------------------+-------------------------------------------------+
``SNP_LAUNCH_UPDATE`` encrypts the memory region using the cryptographic context
created via the ``SNP_LAUNCH_START`` command. If required, this command can be
called multiple times to encrypt different memory regions. The command also
calculates the measurement of the memory contents as it encrypts.
``SNP_LAUNCH_FINISH`` finalizes the guest launch flow. Optionally, while
finalizing the launch the firmware can perform checks on the launch digest
computing through the ``SNP_LAUNCH_UPDATE``. To perform the check the user must
supply the id block, authentication blob and host data that should be included
in the attestation report. See the SEV-SNP spec for further details.
The ``SNP_LAUNCH_FINISH`` uses the following parameters, which can be configured
by the corresponding parameters documented in the QAPI schema for the
'sev-snp-guest' object.
+--------------------+-------+----------+-------------------------------------+
| key | type | default | meaning |
+--------------------+-------+----------+-------------------------------------+
| id-block | string| none | base64 encoded ID block |
+--------------------+-------+----------+-------------------------------------+
| id-auth | string| none | base64 encoded authentication |
| | | | information |
+--------------------+-------+----------+-------------------------------------+
| author-key-enabled | bool | 0 | auth block contains author key |
+--------------------+-------+----------+-------------------------------------+
| host_data | string| none | host provided data |
+--------------------+-------+----------+-------------------------------------+
To launch a SEV-SNP guest (additional parameters are documented in the QAPI
schema for the 'sev-snp-guest' object)::
# ${QEMU} \
-machine ...,confidential-guest-support=sev0 \
-object sev-snp-guest,id=sev0,cbitpos=51,reduced-phys-bits=1
Debugging
---------

View File

@ -9,14 +9,10 @@
/* Digested version of <cpuid.h> */
#define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */
#define CPUINFO_CMOV (1u << 1)
#define CPUINFO_MOVBE (1u << 2)
#define CPUINFO_LZCNT (1u << 3)
#define CPUINFO_POPCNT (1u << 4)
#define CPUINFO_BMI1 (1u << 5)
#define CPUINFO_BMI2 (1u << 6)
#define CPUINFO_SSE2 (1u << 7)
#define CPUINFO_SSE4 (1u << 8)
#define CPUINFO_AVX1 (1u << 9)
#define CPUINFO_AVX2 (1u << 10)
#define CPUINFO_AVX512F (1u << 11)

View File

@ -172,57 +172,6 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
virtio_blk_free_request(req);
}
#ifdef __linux__
typedef struct {
VirtIOBlockReq *req;
struct sg_io_hdr hdr;
} VirtIOBlockIoctlReq;
static void virtio_blk_ioctl_complete(void *opaque, int status)
{
VirtIOBlockIoctlReq *ioctl_req = opaque;
VirtIOBlockReq *req = ioctl_req->req;
VirtIOBlock *s = req->dev;
VirtIODevice *vdev = VIRTIO_DEVICE(s);
struct virtio_scsi_inhdr *scsi;
struct sg_io_hdr *hdr;
scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
if (status) {
status = VIRTIO_BLK_S_UNSUPP;
virtio_stl_p(vdev, &scsi->errors, 255);
goto out;
}
hdr = &ioctl_req->hdr;
/*
* From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi)
* clear the masked_status field [hence status gets cleared too, see
* block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED
* status has occurred. However they do set DRIVER_SENSE in driver_status
* field. Also a (sb_len_wr > 0) indicates there is a sense buffer.
*/
if (hdr->status == 0 && hdr->sb_len_wr > 0) {
hdr->status = CHECK_CONDITION;
}
virtio_stl_p(vdev, &scsi->errors,
hdr->status | (hdr->msg_status << 8) |
(hdr->host_status << 16) | (hdr->driver_status << 24));
virtio_stl_p(vdev, &scsi->residual, hdr->resid);
virtio_stl_p(vdev, &scsi->sense_len, hdr->sb_len_wr);
virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len);
out:
virtio_blk_req_complete(req, status);
virtio_blk_free_request(req);
g_free(ioctl_req);
}
#endif
static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s, VirtQueue *vq)
{
VirtIOBlockReq *req = virtqueue_pop(vq, sizeof(VirtIOBlockReq));
@ -233,20 +182,14 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s, VirtQueue *vq)
return req;
}
static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req)
static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
{
int status = VIRTIO_BLK_S_OK;
struct virtio_scsi_inhdr *scsi = NULL;
int status;
struct virtio_scsi_inhdr *scsi;
VirtIOBlock *blk = req->dev;
VirtIODevice *vdev = VIRTIO_DEVICE(blk);
VirtQueueElement *elem = &req->elem;
#ifdef __linux__
int i;
VirtIOBlockIoctlReq *ioctl_req;
BlockAIOCB *acb;
#endif
/*
* We require at least one output segment each for the virtio_blk_outhdr
* and the SCSI command block.
@ -262,95 +205,16 @@ static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req)
/*
* The scsi inhdr is placed in the second-to-last input segment, just
* before the regular inhdr.
*
* Just put anything nonzero so that the ioctl fails in the guest.
*/
scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base;
if (!virtio_has_feature(blk->host_features, VIRTIO_BLK_F_SCSI)) {
status = VIRTIO_BLK_S_UNSUPP;
goto fail;
}
/*
* No support for bidirection commands yet.
*/
if (elem->out_num > 2 && elem->in_num > 3) {
status = VIRTIO_BLK_S_UNSUPP;
goto fail;
}
#ifdef __linux__
ioctl_req = g_new0(VirtIOBlockIoctlReq, 1);
ioctl_req->req = req;
ioctl_req->hdr.interface_id = 'S';
ioctl_req->hdr.cmd_len = elem->out_sg[1].iov_len;
ioctl_req->hdr.cmdp = elem->out_sg[1].iov_base;
ioctl_req->hdr.dxfer_len = 0;
if (elem->out_num > 2) {
/*
* If there are more than the minimally required 2 output segments
* there is write payload starting from the third iovec.
*/
ioctl_req->hdr.dxfer_direction = SG_DXFER_TO_DEV;
ioctl_req->hdr.iovec_count = elem->out_num - 2;
for (i = 0; i < ioctl_req->hdr.iovec_count; i++) {
ioctl_req->hdr.dxfer_len += elem->out_sg[i + 2].iov_len;
}
ioctl_req->hdr.dxferp = elem->out_sg + 2;
} else if (elem->in_num > 3) {
/*
* If we have more than 3 input segments the guest wants to actually
* read data.
*/
ioctl_req->hdr.dxfer_direction = SG_DXFER_FROM_DEV;
ioctl_req->hdr.iovec_count = elem->in_num - 3;
for (i = 0; i < ioctl_req->hdr.iovec_count; i++) {
ioctl_req->hdr.dxfer_len += elem->in_sg[i].iov_len;
}
ioctl_req->hdr.dxferp = elem->in_sg;
} else {
/*
* Some SCSI commands don't actually transfer any data.
*/
ioctl_req->hdr.dxfer_direction = SG_DXFER_NONE;
}
ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base;
ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len;
acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr,
virtio_blk_ioctl_complete, ioctl_req);
if (!acb) {
g_free(ioctl_req);
status = VIRTIO_BLK_S_UNSUPP;
goto fail;
}
return -EINPROGRESS;
#else
abort();
#endif
virtio_stl_p(vdev, &scsi->errors, 255);
status = VIRTIO_BLK_S_UNSUPP;
fail:
/* Just put anything nonzero so that the ioctl fails in the guest. */
if (scsi) {
virtio_stl_p(vdev, &scsi->errors, 255);
}
return status;
}
static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
{
int status;
status = virtio_blk_handle_scsi_req(req);
if (status != -EINPROGRESS) {
virtio_blk_req_complete(req, status);
virtio_blk_free_request(req);
}
virtio_blk_req_complete(req, status);
virtio_blk_free_request(req);
}
static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb,
@ -1379,13 +1243,9 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features,
virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) {
if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_SCSI)) {
error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0");
return 0;
}
} else {
if (!virtio_has_feature(features, VIRTIO_F_VERSION_1)) {
virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT);
/* Added for historical reasons, removing it could break migration. */
virtio_add_feature(&features, VIRTIO_BLK_F_SCSI);
}
@ -2132,10 +1992,6 @@ static Property virtio_blk_properties[] = {
DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial),
DEFINE_PROP_BIT64("config-wce", VirtIOBlock, host_features,
VIRTIO_BLK_F_CONFIG_WCE, true),
#ifdef __linux__
DEFINE_PROP_BIT64("scsi", VirtIOBlock, host_features,
VIRTIO_BLK_F_SCSI, false),
#endif
DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
true),
DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues,

View File

@ -265,8 +265,6 @@ GlobalProperty hw_compat_2_5[] = {
const size_t hw_compat_2_5_len = G_N_ELEMENTS(hw_compat_2_5);
GlobalProperty hw_compat_2_4[] = {
/* Optional because the 'scsi' property is Linux-only */
{ "virtio-blk-device", "scsi", "true", .optional = true },
{ "e1000", "extra_mac_registers", "off" },
{ "virtio-pci", "x-disable-pcie", "on" },
{ "virtio-pci", "migrate-extra", "off" },
@ -1218,7 +1216,7 @@ bool machine_mem_merge(MachineState *machine)
bool machine_require_guest_memfd(MachineState *machine)
{
return machine->require_guest_memfd;
return machine->cgs && machine->cgs->require_guest_memfd;
}
static char *cpu_slot_to_string(const CPUArchId *cpu)

View File

@ -62,6 +62,7 @@
#include "hw/mem/memory-device.h"
#include "e820_memory_layout.h"
#include "trace.h"
#include "sev.h"
#include CONFIG_DEVICES
#ifdef CONFIG_XEN_EMU
@ -1022,10 +1023,15 @@ void pc_memory_init(PCMachineState *pcms,
pc_system_firmware_init(pcms, rom_memory);
option_rom_mr = g_malloc(sizeof(*option_rom_mr));
memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
&error_fatal);
if (pcmc->pci_enabled) {
memory_region_set_readonly(option_rom_mr, true);
if (machine_require_guest_memfd(machine)) {
memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
PC_ROM_SIZE, &error_fatal);
} else {
memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
&error_fatal);
if (pcmc->pci_enabled) {
memory_region_set_readonly(option_rom_mr, true);
}
}
memory_region_add_subregion_overlap(rom_memory,
PC_ROM_MIN_VGA,

View File

@ -40,8 +40,8 @@
#define FLASH_SECTOR_SIZE 4096
static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
MemoryRegion *flash_mem)
static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios,
MemoryRegion *rom_memory, MemoryRegion *flash_mem)
{
int isa_bios_size;
uint64_t flash_size;
@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
/* map the last 128KB of the BIOS in ISA space */
isa_bios_size = MIN(flash_size, 128 * KiB);
memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
&error_fatal);
if (machine_require_guest_memfd(MACHINE(pcms))) {
memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
isa_bios_size, &error_fatal);
} else {
memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
&error_fatal);
}
memory_region_add_subregion_overlap(rom_memory,
0x100000 - isa_bios_size,
isa_bios,
@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
isa_bios_size);
memory_region_set_readonly(isa_bios, true);
if (!machine_require_guest_memfd(current_machine)) {
memory_region_set_readonly(isa_bios, true);
}
}
static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
@ -148,6 +155,8 @@ static void pc_system_flash_map(PCMachineState *pcms,
assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled);
for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) {
hwaddr gpa;
system_flash = pcms->flash[i];
blk = pflash_cfi01_get_blk(system_flash);
if (!blk) {
@ -177,11 +186,11 @@ static void pc_system_flash_map(PCMachineState *pcms,
}
total_size += size;
gpa = 0x100000000ULL - total_size; /* where the flash is mapped */
qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks",
size / FLASH_SECTOR_SIZE);
sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0,
0x100000000ULL - total_size);
sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa);
if (i == 0) {
flash_mem = pflash_cfi01_get_memory(system_flash);
@ -189,14 +198,14 @@ static void pc_system_flash_map(PCMachineState *pcms,
x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem,
true);
} else {
pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem);
}
/* Encrypt the pflash boot ROM */
if (sev_enabled()) {
flash_ptr = memory_region_get_ram_ptr(flash_mem);
flash_size = memory_region_size(flash_mem);
x86_firmware_configure(flash_ptr, flash_size);
x86_firmware_configure(gpa, flash_ptr, flash_size);
}
}
}
@ -249,7 +258,7 @@ void pc_system_firmware_init(PCMachineState *pcms,
pc_system_flash_cleanup_unused(pcms);
}
void x86_firmware_configure(void *ptr, int size)
void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
{
int ret;
@ -260,12 +269,16 @@ void x86_firmware_configure(void *ptr, int size)
pc_system_parse_ovmf_flash(ptr, size);
if (sev_enabled()) {
/* Copy the SEV metadata table (if it exists) */
pc_system_parse_sev_metadata(ptr, size);
ret = sev_es_save_reset_vector(ptr, size);
if (ret) {
error_report("failed to locate and/or save reset vector");
exit(1);
}
sev_encrypt_flash(ptr, size, &error_fatal);
sev_encrypt_flash(gpa, ptr, size, &error_fatal);
}
}

View File

@ -1001,8 +1001,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
(bios_size % 65536) != 0) {
goto bios_error;
}
memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size,
&error_fatal);
if (machine_require_guest_memfd(MACHINE(x86ms))) {
memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
} else {
memory_region_init_ram(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
}
if (sev_enabled()) {
/*
* The concept of a "reset" simply doesn't exist for
@ -1013,7 +1018,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
*/
void *ptr = memory_region_get_ram_ptr(&x86ms->bios);
load_image_size(filename, ptr, bios_size);
x86_firmware_configure(ptr, bios_size);
x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size);
} else {
memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw);
ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
@ -1023,9 +1028,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
}
g_free(filename);
/* map the last 128KB of the BIOS in ISA space */
x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
!isapc_ram_fw);
if (!machine_require_guest_memfd(MACHINE(x86ms))) {
/* map the last 128KB of the BIOS in ISA space */
x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
!isapc_ram_fw);
}
/* map all the bios at the top of memory */
memory_region_add_subregion(rom_memory,

View File

@ -21,7 +21,7 @@
#include "hw/misc/pvpanic.h"
#include "qom/object.h"
#include "hw/isa/isa.h"
#include "standard-headers/linux/pvpanic.h"
#include "standard-headers/misc/pvpanic.h"
#include "hw/acpi/acpi_aml_interface.h"
OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE)

View File

@ -21,7 +21,7 @@
#include "hw/misc/pvpanic.h"
#include "qom/object.h"
#include "hw/pci/pci_device.h"
#include "standard-headers/linux/pvpanic.h"
#include "standard-headers/misc/pvpanic.h"
OBJECT_DECLARE_SIMPLE_TYPE(PVPanicPCIState, PVPANIC_PCI_DEVICE)

View File

@ -21,7 +21,7 @@
#include "hw/qdev-properties.h"
#include "hw/misc/pvpanic.h"
#include "qom/object.h"
#include "standard-headers/linux/pvpanic.h"
#include "standard-headers/misc/pvpanic.h"
static void handle_event(int event)
{

View File

@ -31,6 +31,11 @@ OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
struct ConfidentialGuestSupport {
Object parent;
/*
* True if the machine should use guest_memfd for RAM.
*/
bool require_guest_memfd;
/*
* ready: flag set by CGS initialization code once it's ready to
* start executing instructions in a potentially-secure

View File

@ -1638,6 +1638,12 @@ bool memory_region_init_ram(MemoryRegion *mr,
uint64_t size,
Error **errp);
bool memory_region_init_ram_guest_memfd(MemoryRegion *mr,
Object *owner,
const char *name,
uint64_t size,
Error **errp);
/**
* memory_region_init_rom: Initialize a ROM memory region.
*

View File

@ -375,7 +375,6 @@ struct MachineState {
char *dt_compatible;
bool dump_guest_core;
bool mem_merge;
bool require_guest_memfd;
bool usb;
bool usb_disabled;
char *firmware;

View File

@ -164,6 +164,34 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
#define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size"
#define PCI_HOST_PROP_SMM_RANGES "smm-ranges"
typedef enum {
SEV_DESC_TYPE_UNDEF,
/* The section contains the region that must be validated by the VMM. */
SEV_DESC_TYPE_SNP_SEC_MEM,
/* The section contains the SNP secrets page */
SEV_DESC_TYPE_SNP_SECRETS,
/* The section contains address that can be used as a CPUID page */
SEV_DESC_TYPE_CPUID,
/* The section contains the region for kernel hashes for measured direct boot */
SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10,
} ovmf_sev_metadata_desc_type;
typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc {
uint32_t base;
uint32_t len;
ovmf_sev_metadata_desc_type type;
} OvmfSevMetadataDesc;
typedef struct __attribute__((__packed__)) OvmfSevMetadata {
uint8_t signature[4];
uint32_t len;
uint32_t version;
uint32_t num_desc;
OvmfSevMetadataDesc descs[];
} OvmfSevMetadata;
OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void);
void pc_pci_as_mapping_init(MemoryRegion *system_memory,
MemoryRegion *pci_address_space);

View File

@ -154,6 +154,6 @@ void ioapic_init_gsi(GSIState *gsi_state, Object *parent);
DeviceState *ioapic_init_secondary(GSIState *gsi_state);
/* pc_sysfw.c */
void x86_firmware_configure(void *ptr, int size);
void x86_firmware_configure(hwaddr gpa, void *ptr, int size);
#endif

View File

@ -0,0 +1,38 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __LINUX_KVM_PARA_H
#define __LINUX_KVM_PARA_H
/*
* This header file provides a method for making a hypercall to the host
* Architectures should define:
* - kvm_hypercall0, kvm_hypercall1...
* - kvm_arch_para_features
* - kvm_para_available
*/
/* Return values for hypercalls */
#define KVM_ENOSYS 1000
#define KVM_EFAULT EFAULT
#define KVM_EINVAL EINVAL
#define KVM_E2BIG E2BIG
#define KVM_EPERM EPERM
#define KVM_EOPNOTSUPP 95
#define KVM_HC_VAPIC_POLL_IRQ 1
#define KVM_HC_MMU_OP 2
#define KVM_HC_FEATURES 3
#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
#define KVM_HC_KICK_CPU 5
#define KVM_HC_MIPS_GET_CLOCK_FREQ 6
#define KVM_HC_MIPS_EXIT_VM 7
#define KVM_HC_MIPS_CONSOLE_OUTPUT 8
#define KVM_HC_CLOCK_PAIRING 9
#define KVM_HC_SEND_IPI 10
#define KVM_HC_SCHED_YIELD 11
#define KVM_HC_MAP_GPA_RANGE 12
/*
* hypercalls use architecture specific
*/
#endif /* __LINUX_KVM_PARA_H */

View File

@ -17,6 +17,8 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_DIRTY_LOG_PAGE_OFFSET 64
#define KVM_GUESTDBG_USE_SW_BP 0x00010000
/*
* for KVM_GET_REGS and KVM_SET_REGS
*/
@ -72,6 +74,8 @@ struct kvm_fpu {
#define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1)
#define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2)
/* Debugging: Special instruction for software breakpoint */
#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3)
#define LOONGARCH_REG_SHIFT 3
#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))

View File

@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZFA,
KVM_RISCV_ISA_EXT_ZTSO,
KVM_RISCV_ISA_EXT_ZACAS,
KVM_RISCV_ISA_EXT_SSCOFPMF,
KVM_RISCV_ISA_EXT_MAX,
};

View File

@ -695,6 +695,11 @@ enum sev_cmd_id {
/* Second time is the charm; improved versions of the above ioctls. */
KVM_SEV_INIT2,
/* SNP-specific commands */
KVM_SEV_SNP_LAUNCH_START = 100,
KVM_SEV_SNP_LAUNCH_UPDATE,
KVM_SEV_SNP_LAUNCH_FINISH,
KVM_SEV_NR_MAX,
};
@ -709,7 +714,9 @@ struct kvm_sev_cmd {
struct kvm_sev_init {
__u64 vmsa_features;
__u32 flags;
__u32 pad[9];
__u16 ghcb_version;
__u16 pad1;
__u32 pad2[8];
};
struct kvm_sev_launch_start {
@ -820,6 +827,48 @@ struct kvm_sev_receive_update_data {
__u32 pad2;
};
struct kvm_sev_snp_launch_start {
__u64 policy;
__u8 gosvw[16];
__u16 flags;
__u8 pad0[6];
__u64 pad1[4];
};
/* Kept in sync with firmware values for simplicity. */
#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5
#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6
struct kvm_sev_snp_launch_update {
__u64 gfn_start;
__u64 uaddr;
__u64 len;
__u8 type;
__u8 pad0;
__u16 flags;
__u32 pad1;
__u64 pad2[4];
};
#define KVM_SEV_SNP_ID_BLOCK_SIZE 96
#define KVM_SEV_SNP_ID_AUTH_SIZE 4096
#define KVM_SEV_SNP_FINISH_DATA_SIZE 32
struct kvm_sev_snp_launch_finish {
__u64 id_block_uaddr;
__u64 id_auth_uaddr;
__u8 id_block_en;
__u8 auth_key_en;
__u8 vcek_disabled;
__u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
__u8 pad0[3];
__u16 flags;
__u64 pad1[4];
};
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
@ -870,5 +919,6 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_SW_PROTECTED_VM 1
#define KVM_X86_SEV_VM 2
#define KVM_X86_SEV_ES_VM 3
#define KVM_X86_SNP_VM 4
#endif /* _ASM_X86_KVM_H */

View File

@ -0,0 +1 @@
#include "standard-headers/asm-x86/kvm_para.h"

View File

@ -0,0 +1,2 @@
#include "standard-headers/linux/kvm_para.h"
#include <asm/kvm_para.h>

View File

@ -179,12 +179,6 @@
/* Get the config size */
#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
/* Get the count of all virtqueues */
#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
/* Get the number of virtqueue groups. */
#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
/* Get the number of address spaces. */
#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
@ -228,10 +222,17 @@
#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \
struct vhost_vring_state)
/* Get the count of all virtqueues */
#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
/* Get the number of virtqueue groups. */
#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
/* Get the queue size of a specific virtqueue.
* userspace set the vring index in vhost_vring_state.index
* kernel set the queue size in vhost_vring_state.num
*/
#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \
#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \
struct vhost_vring_state)
#endif

View File

@ -336,9 +336,13 @@ if host_arch == 'i386' and not cc.links('''
qemu_common_flags = ['-march=i486'] + qemu_common_flags
endif
# ??? Only extremely old AMD cpus do not have cmpxchg16b.
# If we truly care, we should simply detect this case at
# runtime and generate the fallback to serial emulation.
# Assume x86-64-v2 (minus CMPXCHG16B for 32-bit code)
if host_arch == 'i386'
qemu_common_flags = ['-mfpmath=sse'] + qemu_common_flags
endif
if host_arch in ['i386', 'x86_64']
qemu_common_flags = ['-mpopcnt', '-msse4.2'] + qemu_common_flags
endif
if host_arch == 'x86_64'
qemu_common_flags = ['-mcx16'] + qemu_common_flags
endif

View File

@ -47,6 +47,50 @@
'send-update', 'receive-update' ],
'if': 'TARGET_I386' }
##
# @SevGuestType:
#
# An enumeration indicating the type of SEV guest being run.
#
# @sev: The guest is a legacy SEV or SEV-ES guest.
#
# @sev-snp: The guest is an SEV-SNP guest.
#
# Since: 6.2
##
{ 'enum': 'SevGuestType',
'data': [ 'sev', 'sev-snp' ],
'if': 'TARGET_I386' }
##
# @SevGuestInfo:
#
# Information specific to legacy SEV/SEV-ES guests.
#
# @policy: SEV policy value
#
# @handle: SEV firmware handle
#
# Since: 2.12
##
{ 'struct': 'SevGuestInfo',
'data': { 'policy': 'uint32',
'handle': 'uint32' },
'if': 'TARGET_I386' }
##
# @SevSnpGuestInfo:
#
# Information specific to SEV-SNP guests.
#
# @snp-policy: SEV-SNP policy value
#
# Since: 9.1
##
{ 'struct': 'SevSnpGuestInfo',
'data': { 'snp-policy': 'uint64' },
'if': 'TARGET_I386' }
##
# @SevInfo:
#
@ -60,25 +104,25 @@
#
# @build-id: SEV FW build id
#
# @policy: SEV policy value
#
# @state: SEV guest state
#
# @handle: SEV firmware handle
# @sev-type: Type of SEV guest being run
#
# Since: 2.12
##
{ 'struct': 'SevInfo',
'data': { 'enabled': 'bool',
'api-major': 'uint8',
'api-minor' : 'uint8',
'build-id' : 'uint8',
'policy' : 'uint32',
'state' : 'SevState',
'handle' : 'uint32'
},
'if': 'TARGET_I386'
}
{ 'union': 'SevInfo',
'base': { 'enabled': 'bool',
'api-major': 'uint8',
'api-minor' : 'uint8',
'build-id' : 'uint8',
'state' : 'SevState',
'sev-type' : 'SevGuestType' },
'discriminator': 'sev-type',
'data': {
'sev': 'SevGuestInfo',
'sev-snp': 'SevSnpGuestInfo' },
'if': 'TARGET_I386' }
##
# @query-sev:

View File

@ -875,20 +875,12 @@
'data': { '*filename': 'str' } }
##
# @SevGuestProperties:
# @SevCommonProperties:
#
# Properties for sev-guest objects.
# Properties common to objects that are derivatives of sev-common.
#
# @sev-device: SEV device to use (default: "/dev/sev")
#
# @dh-cert-file: guest owners DH certificate (encoded with base64)
#
# @session-file: guest owners session parameters (encoded with base64)
#
# @policy: SEV policy value (default: 0x1)
#
# @handle: SEV firmware handle (default: 0)
#
# @cbitpos: C-bit location in page table entry (default: 0)
#
# @reduced-phys-bits: number of bits in physical addresses that become
@ -898,6 +890,27 @@
# designated guest firmware page for measured boot with -kernel
# (default: false) (since 6.2)
#
# Since: 9.1
##
{ 'struct': 'SevCommonProperties',
'data': { '*sev-device': 'str',
'*cbitpos': 'uint32',
'reduced-phys-bits': 'uint32',
'*kernel-hashes': 'bool' } }
##
# @SevGuestProperties:
#
# Properties for sev-guest objects.
#
# @dh-cert-file: guest owners DH certificate (encoded with base64)
#
# @session-file: guest owners session parameters (encoded with base64)
#
# @policy: SEV policy value (default: 0x1)
#
# @handle: SEV firmware handle (default: 0)
#
# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM.
# The newer KVM_SEV_INIT2 interface syncs additional vCPU
# state when initializing the VMSA structures, which will
@ -909,16 +922,69 @@
# Since: 2.12
##
{ 'struct': 'SevGuestProperties',
'data': { '*sev-device': 'str',
'*dh-cert-file': 'str',
'base': 'SevCommonProperties',
'data': { '*dh-cert-file': 'str',
'*session-file': 'str',
'*policy': 'uint32',
'*handle': 'uint32',
'*cbitpos': 'uint32',
'reduced-phys-bits': 'uint32',
'*kernel-hashes': 'bool',
'*legacy-vm-type': 'bool' } }
##
# @SevSnpGuestProperties:
#
# Properties for sev-snp-guest objects. Most of these are direct
# arguments for the KVM_SNP_* interfaces documented in the Linux
# kernel source under
# Documentation/arch/x86/amd-memory-encryption.rst, which are in turn
# closely coupled with the SNP_INIT/SNP_LAUNCH_* firmware commands
# documented in the SEV-SNP Firmware ABI Specification (Rev 0.9).
#
# More usage information is also available in the QEMU source tree
# under docs/amd-memory-encryption.
#
# @policy: the 'POLICY' parameter to the SNP_LAUNCH_START command, as
# defined in the SEV-SNP firmware ABI (default: 0x30000)
#
# @guest-visible-workarounds: 16-byte, base64-encoded blob to report
# hypervisor-defined workarounds, corresponding to the 'GOSVW'
# parameter of the SNP_LAUNCH_START command defined in the SEV-SNP
# firmware ABI (default: all-zero)
#
# @id-block: 96-byte, base64-encoded blob to provide the 'ID Block'
# structure for the SNP_LAUNCH_FINISH command defined in the
# SEV-SNP firmware ABI (default: all-zero)
#
# @id-auth: 4096-byte, base64-encoded blob to provide the 'ID
# Authentication Information Structure' for the SNP_LAUNCH_FINISH
# command defined in the SEV-SNP firmware ABI (default: all-zero)
#
# @author-key-enabled: true if 'id-auth' blob contains the 'AUTHOR_KEY'
# field defined SEV-SNP firmware ABI (default: false)
#
# @host-data: 32-byte, base64-encoded, user-defined blob to provide to
# the guest, as documented for the 'HOST_DATA' parameter of the
# SNP_LAUNCH_FINISH command in the SEV-SNP firmware ABI (default:
# all-zero)
#
# @vcek-disabled: Guests are by default allowed to choose between VLEK
# (Versioned Loaded Endorsement Key) or VCEK (Versioned Chip
# Endorsement Key) when requesting attestation reports from
# firmware. Set this to true to disable the use of VCEK.
# (default: false) (since: 9.1)
#
# Since: 9.1
##
{ 'struct': 'SevSnpGuestProperties',
'base': 'SevCommonProperties',
'data': {
'*policy': 'uint64',
'*guest-visible-workarounds': 'str',
'*id-block': 'str',
'*id-auth': 'str',
'*author-key-enabled': 'bool',
'*host-data': 'str',
'*vcek-disabled': 'bool' } }
##
# @ThreadContextProperties:
#
@ -997,6 +1063,7 @@
{ 'name': 'secret_keyring',
'if': 'CONFIG_SECRET_KEYRING' },
'sev-guest',
'sev-snp-guest',
'thread-context',
's390-pv-guest',
'throttle-group',
@ -1067,6 +1134,7 @@
'secret_keyring': { 'type': 'SecretKeyringProperties',
'if': 'CONFIG_SECRET_KEYRING' },
'sev-guest': 'SevGuestProperties',
'sev-snp-guest': 'SevSnpGuestProperties',
'thread-context': 'ThreadContextProperties',
'throttle-group': 'ThrottleGroupProperties',
'tls-creds-anon': 'TlsCredsAnonProperties',

View File

@ -63,6 +63,7 @@ cp_portable() {
-e 'linux/kernel' \
-e 'linux/sysinfo' \
-e 'asm/setup_data.h' \
-e 'asm/kvm_para.h' \
> /dev/null
then
echo "Unexpected #include in input file $f".
@ -70,6 +71,15 @@ cp_portable() {
fi
header=$(basename "$f");
if test -z "$arch"; then
# Let users of include/standard-headers/linux/ headers pick the
# asm-* header that they care about
arch_cmd='/<asm\/\([^>]*\)>/d'
else
arch_cmd='s/<asm\/\([^>]*\)>/"standard-headers\/asm-'$arch'\/\1"/'
fi
sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \
-e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \
-e 's/u\([0-9][0-9]*\)/uint\1_t/g' \
@ -78,7 +88,7 @@ cp_portable() {
-e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \
-e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \
-e 's/<linux\/\([^>]*\)>/"standard-headers\/linux\/\1"/' \
-e 's/<asm\/\([^>]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \
-e "$arch_cmd" \
-e 's/__bitwise//' \
-e 's/__attribute__((packed))/QEMU_PACKED/' \
-e 's/__inline__/inline/' \
@ -118,7 +128,14 @@ for arch in $ARCHLIST; do
rm -rf "$output/linux-headers/asm-$arch"
mkdir -p "$output/linux-headers/asm-$arch"
for header in kvm.h unistd.h bitsperlong.h mman.h; do
cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch"
if test -f "$hdrdir/include/asm/$header"; then
cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch"
elif test -f "$hdrdir/include/asm-generic/$header"; then
# not installed as <asm/$header>, but used as such in kernel sources
cat <<EOF >$output/linux-headers/asm-$arch/$header
#include <asm-generic/$header>
EOF
fi
done
if [ $arch = mips ]; then
@ -151,7 +168,12 @@ for arch in $ARCHLIST; do
cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/"
cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/"
cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/"
cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch"
cat <<EOF >$output/linux-headers/asm-$arch/kvm_para.h
#include "standard-headers/asm-$arch/kvm_para.h"
EOF
# Remove everything except the macros from bootparam.h avoiding the
# unnecessary import of several video/ist/etc headers
sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \
@ -201,6 +223,10 @@ if [ -d "$linux/LICENSES" ]; then
done
fi
cat <<EOF >$output/linux-headers/linux/kvm_para.h
#include "standard-headers/linux/kvm_para.h"
#include <asm/kvm_para.h>
EOF
cat <<EOF >$output/linux-headers/linux/virtio_config.h
#include "standard-headers/linux/virtio_config.h"
EOF
@ -223,11 +249,14 @@ for i in "$hdrdir"/include/linux/*virtio*.h \
"$hdrdir/include/linux/ethtool.h" \
"$hdrdir/include/linux/const.h" \
"$hdrdir/include/linux/kernel.h" \
"$hdrdir/include/linux/kvm_para.h" \
"$hdrdir/include/linux/vhost_types.h" \
"$hdrdir/include/linux/sysinfo.h" \
"$hdrdir/include/misc/pvpanic.h"; do
"$hdrdir/include/linux/sysinfo.h"; do
cp_portable "$i" "$output/include/standard-headers/linux"
done
mkdir -p "$output/include/standard-headers/misc"
cp_portable "$hdrdir/include/misc/pvpanic.h" \
"$output/include/standard-headers/misc"
mkdir -p "$output/include/standard-headers/drm"
cp_portable "$hdrdir/include/drm/drm_fourcc.h" \
"$output/include/standard-headers/drm"

View File

@ -3649,6 +3649,30 @@ bool memory_region_init_ram(MemoryRegion *mr,
return true;
}
bool memory_region_init_ram_guest_memfd(MemoryRegion *mr,
Object *owner,
const char *name,
uint64_t size,
Error **errp)
{
DeviceState *owner_dev;
if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size,
RAM_GUEST_MEMFD, errp)) {
return false;
}
/* This will assert if owner is neither NULL nor a DeviceState.
* We only want the owner here for the purposes of defining a
* unique name for migration. TODO: Ideally we should implement
* a naming scheme for Objects which are not DeviceStates, in
* which case we can relax this restriction.
*/
owner_dev = DEVICE(owner);
vmstate_register_ram(mr, owner_dev);
return true;
}
bool memory_region_init_rom(MemoryRegion *mr,
Object *owner,
const char *name,

View File

@ -6979,6 +6979,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if (sev_enabled()) {
*eax = 0x2;
*eax |= sev_es_enabled() ? 0x8 : 0;
*eax |= sev_snp_enabled() ? 0x10 : 0;
*ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
*ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
}

View File

@ -21,6 +21,7 @@
#include <sys/syscall.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
#include "standard-headers/asm-x86/kvm_para.h"
#include "hw/xen/interface/arch-x86/cpuid.h"
@ -167,6 +168,7 @@ static const char *vm_type_name[] = {
[KVM_X86_DEFAULT_VM] = "default",
[KVM_X86_SEV_VM] = "SEV",
[KVM_X86_SEV_ES_VM] = "SEV-ES",
[KVM_X86_SNP_VM] = "SEV-SNP",
};
bool kvm_is_vm_type_supported(int type)
@ -208,6 +210,13 @@ int kvm_get_vm_type(MachineState *ms)
return kvm_type;
}
bool kvm_enable_hypercall(uint64_t enable_mask)
{
KVMState *s = KVM_STATE(current_accel());
return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask);
}
bool kvm_has_smm(void)
{
return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
@ -5321,6 +5330,50 @@ static bool host_supports_vmx(void)
return ecx & CPUID_EXT_VMX;
}
/*
* Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE
* to service guest-initiated memory attribute update requests so that
* KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be
* backed by the private memory pool provided by guest_memfd, and as such
* is only applicable to guest_memfd-backed guests (e.g. SNP/TDX).
*
* Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live
* migration, are not implemented here currently.
*
* For the guest_memfd use-case, these exits will generally be synthesized
* by KVM based on platform-specific hypercalls, like GHCB requests in the
* case of SEV-SNP, and not issued directly within the guest though the
* KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is
* not actually advertised to guests via the KVM CPUID feature bit, as
* opposed to SEV live migration where it would be. Since it is unlikely the
* SEV live migration use-case would be useful for guest-memfd backed guests,
* because private/shared page tracking is already provided through other
* means, these 2 use-cases should be treated as being mutually-exclusive.
*/
static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
{
uint64_t gpa, size, attributes;
if (!machine_require_guest_memfd(current_machine))
return -EINVAL;
gpa = run->hypercall.args[0];
size = run->hypercall.args[1] * TARGET_PAGE_SIZE;
attributes = run->hypercall.args[2];
trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags);
return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
}
static int kvm_handle_hypercall(struct kvm_run *run)
{
if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE)
return kvm_handle_hc_map_gpa_range(run);
return -EINVAL;
}
#define VMX_INVALID_GUEST_STATE 0x80000021
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
@ -5415,6 +5468,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
ret = kvm_xen_handle_exit(cpu, &run->xen);
break;
#endif
case KVM_EXIT_HYPERCALL:
ret = kvm_handle_hypercall(run);
break;
default:
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
ret = -1;

View File

@ -33,6 +33,7 @@
bool kvm_has_smm(void);
bool kvm_enable_x2apic(void);
bool kvm_hv_vpindex_settable(void);
bool kvm_enable_hypercall(uint64_t enable_mask);
bool kvm_enable_sgx_provisioning(KVMState *s);
bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);

View File

@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %"
kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d"
kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d"
kvm_x86_update_msi_routes(int num) "Updated %d MSI routes"
kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64
# xen-emu.c
kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64

View File

@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret,
error_setg(errp, "SEV is not available in this QEMU");
}
int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
{
g_assert_not_reached();
}
@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict)
{
monitor_printf(mon, "SEV is not available in this QEMU\n");
}
void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size)
{
}

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,10 @@
#include "exec/confidential-guest-support.h"
#define TYPE_SEV_COMMON "sev-common"
#define TYPE_SEV_GUEST "sev-guest"
#define TYPE_SEV_SNP_GUEST "sev-snp-guest"
#define SEV_POLICY_NODBG 0x1
#define SEV_POLICY_NOKS 0x2
#define SEV_POLICY_ES 0x4
@ -27,6 +31,9 @@
#define SEV_POLICY_DOMAIN 0x10
#define SEV_POLICY_SEV 0x20
#define SEV_SNP_POLICY_SMT 0x10000
#define SEV_SNP_POLICY_DBG 0x80000
typedef struct SevKernelLoaderContext {
char *setup_data;
size_t setup_size;
@ -41,20 +48,24 @@ typedef struct SevKernelLoaderContext {
#ifdef CONFIG_SEV
bool sev_enabled(void);
bool sev_es_enabled(void);
bool sev_snp_enabled(void);
#else
#define sev_enabled() 0
#define sev_es_enabled() 0
#define sev_snp_enabled() 0
#endif
uint32_t sev_get_cbit_position(void);
uint32_t sev_get_reduced_phys_bits(void);
bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp);
int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp);
int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp);
int sev_inject_launch_secret(const char *hdr, const char *secret,
uint64_t gpa, Error **errp);
int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
void sev_es_set_reset_vector(CPUState *cpu);
void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size);
#endif

View File

@ -202,6 +202,7 @@
#define avx_movx .special = X86_SPECIAL_AVXExtMov,
#define sextT0 .special = X86_SPECIAL_SExtT0,
#define zextT0 .special = X86_SPECIAL_ZExtT0,
#define op0_Mw .special = X86_SPECIAL_Op0_Mw,
#define vex1 .vex_class = 1,
#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
@ -1576,9 +1577,10 @@ static const X86OpEntry opcodes_root[256] = {
[0x89] = X86_OP_ENTRY3(MOV, E,v, G,v, None, None),
[0x8A] = X86_OP_ENTRY3(MOV, G,b, E,b, None, None),
[0x8B] = X86_OP_ENTRY3(MOV, G,v, E,v, None, None),
[0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None),
/* Missing in Table A-2: memory destination is always 16-bit. */
[0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None, op0_Mw),
[0x8D] = X86_OP_ENTRY3(LEA, G,v, M,v, None, None, noseg),
[0x8E] = X86_OP_ENTRY3(MOV, S,w, E,v, None, None),
[0x8E] = X86_OP_ENTRY3(MOV, S,w, E,w, None, None),
[0x8F] = X86_OP_GROUPw(group1A, E,v),
[0x98] = X86_OP_ENTRY1(CBW, 0,v), /* rAX */
@ -2041,9 +2043,9 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
case X86_FEAT_PCLMULQDQ:
return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
case X86_FEAT_SSE:
return (s->cpuid_ext_features & CPUID_SSE);
return (s->cpuid_features & CPUID_SSE);
case X86_FEAT_SSE2:
return (s->cpuid_ext_features & CPUID_SSE2);
return (s->cpuid_features & CPUID_SSE2);
case X86_FEAT_SSE3:
return (s->cpuid_ext_features & CPUID_EXT_SSE3);
case X86_FEAT_SSSE3:
@ -2514,6 +2516,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
s->override = -1;
break;
case X86_SPECIAL_Op0_Mw:
assert(decode.op[0].unit == X86_OP_INT);
if (decode.op[0].has_ea) {
decode.op[0].ot = MO_16;
}
break;
default:
break;
}

View File

@ -203,6 +203,9 @@ typedef enum X86InsnSpecial {
/* When loaded into s->T0, register operand 1 is zero/sign extended. */
X86_SPECIAL_SExtT0,
X86_SPECIAL_ZExtT0,
/* Memory operand size of MOV from segment register is MO_16 */
X86_SPECIAL_Op0_Mw,
} X86InsnSpecial;
/*

View File

@ -3142,6 +3142,11 @@ void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
goto do_gpf;
}
/* SSE can be disabled, but only if AVX is disabled too. */
if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) {
goto do_gpf;
}
/* Disallow enabling unimplemented features. */
cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
ena = ((uint64_t)ena_hi << 32) | ena_lo;

View File

@ -3199,7 +3199,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
goto illegal_op;
}
if (s->prefix & PREFIX_REPZ) {
if (!(s->cpuid_ext_features & CPUID_7_0_ECX_RDPID)) {
if (!(s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_RDPID)) {
goto illegal_op;
}
gen_helper_rdpid(s->T0, tcg_env);

View File

@ -11,3 +11,6 @@ kvm_sev_launch_measurement(const char *value) "data %s"
kvm_sev_launch_finish(void) ""
kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d"
kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s"
kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s"
kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)"
kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s"

View File

@ -157,12 +157,6 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
#define SOFTMMU_RESERVE_REGS \
(tcg_use_softmmu ? (1 << TCG_REG_L0) | (1 << TCG_REG_L1) : 0)
/* For 64-bit, we always know that CMOV is available. */
#if TCG_TARGET_REG_BITS == 64
# define have_cmov true
#else
# define have_cmov (cpuinfo & CPUINFO_CMOV)
#endif
#define have_bmi2 (cpuinfo & CPUINFO_BMI2)
#define have_lzcnt (cpuinfo & CPUINFO_LZCNT)
@ -1815,14 +1809,7 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
static void tcg_out_cmov(TCGContext *s, int jcc, int rexw,
TCGReg dest, TCGReg v1)
{
if (have_cmov) {
tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1);
} else {
TCGLabel *over = gen_new_label();
tcg_out_jxx(s, jcc ^ 1, over, 1);
tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
tcg_out_label(s, over);
}
tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1);
}
static void tcg_out_movcond(TCGContext *s, int rexw, TCGCond cond,

View File

@ -111,7 +111,6 @@ typedef enum {
#endif
#define have_bmi1 (cpuinfo & CPUINFO_BMI1)
#define have_popcnt (cpuinfo & CPUINFO_POPCNT)
#define have_avx1 (cpuinfo & CPUINFO_AVX1)
#define have_avx2 (cpuinfo & CPUINFO_AVX2)
#define have_movbe (cpuinfo & CPUINFO_MOVBE)
@ -143,7 +142,7 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_clz_i32 1
#define TCG_TARGET_HAS_ctz_i32 1
#define TCG_TARGET_HAS_ctpop_i32 have_popcnt
#define TCG_TARGET_HAS_ctpop_i32 1
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_extract_i32 1
#define TCG_TARGET_HAS_sextract_i32 1
@ -178,7 +177,7 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_clz_i64 1
#define TCG_TARGET_HAS_ctz_i64 1
#define TCG_TARGET_HAS_ctpop_i64 have_popcnt
#define TCG_TARGET_HAS_ctpop_i64 1
#define TCG_TARGET_HAS_deposit_i64 1
#define TCG_TARGET_HAS_extract_i64 1
#define TCG_TARGET_HAS_sextract_i64 0

View File

@ -32,7 +32,7 @@ class IntelIOMMU(LinuxTest):
def set_up_boot(self):
path = self.download_boot()
self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,scsi=off,' +
self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,' +
'drive=drv0,id=virtio-disk0,bootindex=1,'
'werror=stop,rerror=stop' + self.IOMMU_ADDON)
self.vm.add_args('-device', 'virtio-gpu-pci' + self.IOMMU_ADDON)

View File

@ -32,7 +32,7 @@ class SMMU(LinuxTest):
def set_up_boot(self):
path = self.download_boot()
self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,scsi=off,' +
self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,' +
'drive=drv0,id=virtio-disk0,bootindex=1,'
'werror=stop,rerror=stop' + self.IOMMU_ADDON)
self.vm.add_args('-drive',

View File

@ -235,7 +235,7 @@ class TuxRunBaselineTest(QemuSystemTest):
self.vm.add_args('-drive', 'file=' + qcow2.name +
',format=qcow2,if=none,id='
'drive-virtio-disk1',
'-device', 'virtio-blk-pci,scsi=off,bus=pci.0,'
'-device', 'virtio-blk-pci,bus=pci.0,'
'addr=0xb,drive=drive-virtio-disk1,id=virtio-disk1'
',bootindex=2')
self.common_tuxrun(csums=sums, drive="scsi-hd")

View File

@ -188,14 +188,14 @@ static biz_accel_fn const accel_table[] = {
static unsigned best_accel(void)
{
#ifdef CONFIG_AVX2_OPT
unsigned info = cpuinfo_init();
#ifdef CONFIG_AVX2_OPT
if (info & CPUINFO_AVX2) {
return 2;
}
#endif
return info & CPUINFO_SSE2 ? 1 : 0;
return 1;
}
#elif defined(__aarch64__) && defined(__ARM_NEON)

View File

@ -34,15 +34,11 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
if (max >= 1) {
__cpuid(1, a, b, c, d);
info |= (d & bit_CMOV ? CPUINFO_CMOV : 0);
info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
info |= (c & bit_SSE4_1 ? CPUINFO_SSE4 : 0);
info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0);
/* Our AES support requires PSHUFB as well. */
info |= ((c & bit_AES) && (c & bit_SSSE3) ? CPUINFO_AES : 0);
/* NOTE: our AES support requires SSSE3 (PSHUFB) as well. */
info |= (c & bit_AES) ? CPUINFO_AES : 0;
/* For AVX features, we must check available and usable. */
if ((c & bit_AVX) && (c & bit_OSXSAVE)) {