mirror of
https://github.com/qemu/qemu.git
synced 2024-11-24 03:13:44 +08:00
Hi,
"Host Memory Backends" and "Memory devices" queue ("mem"): - Support and document VM templating with R/O files using a new "rom" parameter for memory-backend-file - Some cleanups and fixes around NVDIMMs and R/O file handling for guest RAM - Optimize ioeventfd updates by skipping address spaces that are not applicable -----BEGIN PGP SIGNATURE----- iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUJdykRHGRhdmlkQHJl ZGhhdC5jb20ACgkQTd4Q9wD/g1pf2w//akOUoYMuamySGjXtKLVyMKZkjIys+Ama k2C0xzsWAHBP572ezwHi8uxf5j9kzAjsw6GxDZ7FAamD9MhiohkEvkecloBx6f/c q3fVHblBNkG7v2urtf4+6PJtJvhzOST2SFXfWeYhO/vaA04AYCDgexv82JN3gA6B OS8WyOX62b8wILPSY2GLZ8IqpE9XnOYZwzVBn6YB1yo7ZkYEfXO6cA8nykNuNcOE vppqDo7uVIX6317FWj8ygxmzFfOaj0WT2MT2XFzEIDfg8BInQN8HC4mTn0hcVKMa N1y+eZH733CQKT+uNBRZ5YOeljOi4d6gEEyvkkA/L7e5D3Qg9hIdvHb4uryCFSWX Vt07OP1XLBwCZFobOC6sg+2gtTZJxxYK89e6ZzEd0454S24w5bnEteRAaCGOP0XL ww9xYULqhtZs55UC4rvZHJwdUAk1fIY4VqynwkeQXegvz6BxedNeEkJiiEU0Tizx N2VpsxAJ7H/LLSFeZoCRESo4azrH6U4n7S/eS1tkCniFqibfe2yIQCDoJVfb42ec gfg/vThCrDwHkIHzkMmoV8NndA7Q7SIkyMfYeEEBeZMeg8JzYll4DJEw/jQCacxh KRUa+AZvGlTJUq0mkvyOVfLki+iaehoIUuY1yvMrmdWijPO8n3YybmP9Ljhr8VdR 9MSYZe+I2v8= =iraT -----END PGP SIGNATURE----- Merge tag 'mem-2023-09-19' of https://github.com/davidhildenbrand/qemu into staging Hi, "Host Memory Backends" and "Memory devices" queue ("mem"): - Support and document VM templating with R/O files using a new "rom" parameter for memory-backend-file - Some cleanups and fixes around NVDIMMs and R/O file handling for guest RAM - Optimize ioeventfd updates by skipping address spaces that are not applicable # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUJdykRHGRhdmlkQHJl # ZGhhdC5jb20ACgkQTd4Q9wD/g1pf2w//akOUoYMuamySGjXtKLVyMKZkjIys+Ama # k2C0xzsWAHBP572ezwHi8uxf5j9kzAjsw6GxDZ7FAamD9MhiohkEvkecloBx6f/c # q3fVHblBNkG7v2urtf4+6PJtJvhzOST2SFXfWeYhO/vaA04AYCDgexv82JN3gA6B # OS8WyOX62b8wILPSY2GLZ8IqpE9XnOYZwzVBn6YB1yo7ZkYEfXO6cA8nykNuNcOE # vppqDo7uVIX6317FWj8ygxmzFfOaj0WT2MT2XFzEIDfg8BInQN8HC4mTn0hcVKMa # N1y+eZH733CQKT+uNBRZ5YOeljOi4d6gEEyvkkA/L7e5D3Qg9hIdvHb4uryCFSWX # Vt07OP1XLBwCZFobOC6sg+2gtTZJxxYK89e6ZzEd0454S24w5bnEteRAaCGOP0XL # ww9xYULqhtZs55UC4rvZHJwdUAk1fIY4VqynwkeQXegvz6BxedNeEkJiiEU0Tizx # N2VpsxAJ7H/LLSFeZoCRESo4azrH6U4n7S/eS1tkCniFqibfe2yIQCDoJVfb42ec # gfg/vThCrDwHkIHzkMmoV8NndA7Q7SIkyMfYeEEBeZMeg8JzYll4DJEw/jQCacxh # KRUa+AZvGlTJUq0mkvyOVfLki+iaehoIUuY1yvMrmdWijPO8n3YybmP9Ljhr8VdR # 9MSYZe+I2v8= # =iraT # -----END PGP SIGNATURE----- # gpg: Signature made Tue 19 Sep 2023 06:25:45 EDT # gpg: using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A # gpg: issuer "david@redhat.com" # gpg: Good signature from "David Hildenbrand <david@redhat.com>" [unknown] # gpg: aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full] # gpg: aka "David Hildenbrand <hildenbr@in.tum.de>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 1BD9 CAAD 735C 4C3A 460D FCCA 4DDE 10F7 00FF 835A * tag 'mem-2023-09-19' of https://github.com/davidhildenbrand/qemu: memory: avoid updating ioeventfds for some address_space machine: Improve error message when using default RAM backend id softmmu/physmem: Hint that "readonly=on,rom=off" exists when opening file R/W for private mapping fails docs: Start documenting VM templating docs: Don't mention "-mem-path" in multi-process.rst softmmu/physmem: Never return directories from file_ram_open() softmmu/physmem: Fail creation of new files in file_ram_open() with readonly=true softmmu/physmem: Bail out early in ram_block_discard_range() with readonly files softmmu/physmem: Remap with proper protection in qemu_ram_remap() backends/hostmem-file: Add "rom" property to support VM templating with R/O files softmmu/physmem: Distinguish between file access mode and mmap protection nvdimm: Reject writing label data to ROM instead of crashing QEMU Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
4907644841
@ -2961,6 +2961,7 @@ M: Igor Mammedov <imammedo@redhat.com>
|
||||
S: Maintained
|
||||
F: backends/hostmem*.c
|
||||
F: include/sysemu/hostmem.h
|
||||
F: docs/system/vm-templating.rst
|
||||
T: git https://gitlab.com/ehabkost/qemu.git machine-next
|
||||
|
||||
Cryptodev Backends
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include "sysemu/hostmem.h"
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "qom/object.h"
|
||||
#include "qapi/visitor.h"
|
||||
#include "qapi/qapi-visit-common.h"
|
||||
|
||||
OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendFile, MEMORY_BACKEND_FILE)
|
||||
|
||||
@ -31,6 +33,7 @@ struct HostMemoryBackendFile {
|
||||
bool discard_data;
|
||||
bool is_pmem;
|
||||
bool readonly;
|
||||
OnOffAuto rom;
|
||||
};
|
||||
|
||||
static void
|
||||
@ -53,15 +56,39 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
switch (fb->rom) {
|
||||
case ON_OFF_AUTO_AUTO:
|
||||
/* Traditionally, opening the file readonly always resulted in ROM. */
|
||||
fb->rom = fb->readonly ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
|
||||
break;
|
||||
case ON_OFF_AUTO_ON:
|
||||
if (!fb->readonly) {
|
||||
error_setg(errp, "property 'rom' = 'on' is not supported with"
|
||||
" 'readonly' = 'off'");
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case ON_OFF_AUTO_OFF:
|
||||
if (fb->readonly && backend->share) {
|
||||
error_setg(errp, "property 'rom' = 'off' is incompatible with"
|
||||
" 'readonly' = 'on' and 'share' = 'on'");
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
name = host_memory_backend_get_name(backend);
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
|
||||
ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
|
||||
ram_flags |= RAM_NAMED_FILE;
|
||||
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
|
||||
backend->size, fb->align, ram_flags,
|
||||
fb->mem_path, fb->offset, fb->readonly,
|
||||
errp);
|
||||
fb->mem_path, fb->offset, errp);
|
||||
g_free(name);
|
||||
#endif
|
||||
}
|
||||
@ -201,6 +228,32 @@ static void file_memory_backend_set_readonly(Object *obj, bool value,
|
||||
fb->readonly = value;
|
||||
}
|
||||
|
||||
static void file_memory_backend_get_rom(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
Error **errp)
|
||||
{
|
||||
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
|
||||
OnOffAuto rom = fb->rom;
|
||||
|
||||
visit_type_OnOffAuto(v, name, &rom, errp);
|
||||
}
|
||||
|
||||
static void file_memory_backend_set_rom(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
Error **errp)
|
||||
{
|
||||
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
|
||||
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
|
||||
|
||||
if (host_memory_backend_mr_inited(backend)) {
|
||||
error_setg(errp, "cannot change property '%s' of %s.", name,
|
||||
object_get_typename(obj));
|
||||
return;
|
||||
}
|
||||
|
||||
visit_type_OnOffAuto(v, name, &fb->rom, errp);
|
||||
}
|
||||
|
||||
static void file_backend_unparent(Object *obj)
|
||||
{
|
||||
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
|
||||
@ -243,6 +296,10 @@ file_backend_class_init(ObjectClass *oc, void *data)
|
||||
object_class_property_add_bool(oc, "readonly",
|
||||
file_memory_backend_get_readonly,
|
||||
file_memory_backend_set_readonly);
|
||||
object_class_property_add(oc, "rom", "OnOffAuto",
|
||||
file_memory_backend_get_rom, file_memory_backend_set_rom, NULL, NULL);
|
||||
object_class_property_set_description(oc, "rom",
|
||||
"Whether to create Read Only Memory (ROM)");
|
||||
}
|
||||
|
||||
static void file_backend_instance_finalize(Object *o)
|
||||
|
@ -409,8 +409,9 @@ the initial messages sent to the emulation process is a guest memory
|
||||
table. Each entry in this table consists of a file descriptor and size
|
||||
that the emulation process can ``mmap()`` to directly access guest
|
||||
memory, similar to ``vhost_user_set_mem_table()``. Note guest memory
|
||||
must be backed by file descriptors, such as when QEMU is given the
|
||||
*-mem-path* command line option.
|
||||
must be backed by shared file-backed memory, for example, using
|
||||
*-object memory-backend-file,share=on* and setting that memory backend
|
||||
as RAM for the machine.
|
||||
|
||||
IOMMU operations
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
@ -38,3 +38,4 @@ or Hypervisor.Framework.
|
||||
security
|
||||
multi-process
|
||||
confidential-guest-support
|
||||
vm-templating
|
||||
|
125
docs/system/vm-templating.rst
Normal file
125
docs/system/vm-templating.rst
Normal file
@ -0,0 +1,125 @@
|
||||
QEMU VM templating
|
||||
==================
|
||||
|
||||
This document explains how to use VM templating in QEMU.
|
||||
|
||||
For now, the focus is on VM memory aspects, and not about how to save and
|
||||
restore other VM state (i.e., migrate-to-file with ``x-ignore-shared``).
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
With VM templating, a single template VM serves as the starting point for
|
||||
new VMs. This allows for fast and efficient replication of VMs, resulting
|
||||
in fast startup times and reduced memory consumption.
|
||||
|
||||
Conceptually, the VM state is frozen, to then be used as a basis for new
|
||||
VMs. The Copy-On-Write mechanism in the operating systems makes sure that
|
||||
new VMs are able to read template VM memory; however, any modifications
|
||||
stay private and don't modify the original template VM or any other
|
||||
created VM.
|
||||
|
||||
!!! Security Alert !!!
|
||||
----------------------
|
||||
|
||||
When effectively cloning VMs by VM templating, hardware identifiers
|
||||
(such as UUIDs and NIC MAC addresses), and similar data in the guest OS
|
||||
(such as machine IDs, SSH keys, certificates) that are supposed to be
|
||||
*unique* are no longer unique, which can be a security concern.
|
||||
|
||||
Please be aware of these implications and how to mitigate them for your
|
||||
use case, which might involve vmgenid, hot(un)plug of NIC, etc..
|
||||
|
||||
Memory configuration
|
||||
--------------------
|
||||
|
||||
In order to create the template VM, we have to make sure that VM memory
|
||||
ends up in a file, from where it can be reused for the new VMs:
|
||||
|
||||
Supply VM RAM via memory-backend-file, with ``share=on`` (modifications go
|
||||
to the file) and ``readonly=off`` (open the file writable). Note that
|
||||
``readonly=off`` is implicit.
|
||||
|
||||
In the following command-line example, a 2GB VM is created, whereby VM RAM
|
||||
is to be stored in the ``template`` file.
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
|qemu_system| [...] -m 2g \\
|
||||
-object memory-backend-file,id=pc.ram,mem-path=template,size=2g,share=on,... \\
|
||||
-machine q35,memory-backend=pc.ram
|
||||
|
||||
If multiple memory backends are used (vNUMA, DIMMs), configure all
|
||||
memory backends accordingly.
|
||||
|
||||
Once the VM is in the desired state, stop the VM and save other VM state,
|
||||
leaving the current state of VM RAM reside in the file.
|
||||
|
||||
In order to have a new VM be based on a template VM, we have to
|
||||
configure VM RAM to be based on a template VM RAM file; however, the VM
|
||||
should not be able to modify file content.
|
||||
|
||||
Supply VM RAM via memory-backend-file, with ``share=off`` (modifications
|
||||
stay private), ``readonly=on`` (open the file readonly) and ``rom=off``
|
||||
(don't make the memory readonly for the VM). Note that ``share=off`` is
|
||||
implicit and that other VM state has to be restored separately.
|
||||
|
||||
In the following command-line example, a 2GB VM is created based on the
|
||||
existing 2GB file ``template``.
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
|qemu_system| [...] -m 2g \\
|
||||
-object memory-backend-file,id=pc.ram,mem-path=template,size=2g,readonly=on,rom=off,... \\
|
||||
-machine q35,memory-backend=pc.ram
|
||||
|
||||
If multiple memory backends are used (vNUMA, DIMMs), configure all
|
||||
memory backends accordingly.
|
||||
|
||||
Note that ``-mem-path`` cannot be used for VM templating when creating the
|
||||
template VM or when starting new VMs based on a template VM.
|
||||
|
||||
Incompatible features
|
||||
---------------------
|
||||
|
||||
Some features are incompatible with VM templating, as the underlying file
|
||||
cannot be modified to discard VM RAM, or to actually share memory with
|
||||
another process.
|
||||
|
||||
vhost-user and multi-process QEMU
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
vhost-user and multi-process QEMU are incompatible with VM templating.
|
||||
These technologies rely on shared memory, however, the template VMs
|
||||
don't actually share memory (``share=off``), even though they are
|
||||
file-based.
|
||||
|
||||
virtio-balloon
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
virtio-balloon inflation and "free page reporting" cannot discard VM RAM
|
||||
and will repeatedly report errors. While virtio-balloon can be used
|
||||
for template VMs (e.g., report VM RAM stats), "free page reporting"
|
||||
should be disabled and the balloon should not be inflated.
|
||||
|
||||
virtio-mem
|
||||
~~~~~~~~~~
|
||||
|
||||
virtio-mem cannot discard VM RAM that is managed by the virtio-mem
|
||||
device. virtio-mem will fail early when realizing the device. To use
|
||||
VM templating with virtio-mem, either hotplug virtio-mem devices to the
|
||||
new VM, or don't supply any memory to the template VM using virtio-mem
|
||||
(requested-size=0), not using a template VM file as memory backend for the
|
||||
virtio-mem device.
|
||||
|
||||
VM migration
|
||||
~~~~~~~~~~~~
|
||||
|
||||
For VM migration, "x-release-ram" similarly relies on discarding of VM
|
||||
RAM on the migration source to free up migrated RAM, and will
|
||||
repeatedly report errors.
|
||||
|
||||
Postcopy live migration fails discarding VM RAM on the migration
|
||||
destination early and refuses to activate postcopy live migration. Note
|
||||
that postcopy live migration usually only works on selected filesystems
|
||||
(shmem/tmpfs, hugetlbfs) either way.
|
@ -670,7 +670,8 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
|
||||
}
|
||||
|
||||
static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
|
||||
uint32_t offset, uint32_t length)
|
||||
uint32_t offset, uint32_t length,
|
||||
bool is_write)
|
||||
{
|
||||
uint32_t ret = NVDIMM_DSM_RET_STATUS_INVALID;
|
||||
|
||||
@ -690,6 +691,10 @@ static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (is_write && nvdimm->readonly) {
|
||||
return NVDIMM_DSM_RET_STATUS_UNSUPPORT;
|
||||
}
|
||||
|
||||
return NVDIMM_DSM_RET_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@ -713,7 +718,7 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
|
||||
get_label_data->length);
|
||||
|
||||
status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset,
|
||||
get_label_data->length);
|
||||
get_label_data->length, false);
|
||||
if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
|
||||
nvdimm_dsm_no_payload(status, dsm_mem_addr);
|
||||
return;
|
||||
@ -752,7 +757,7 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
|
||||
set_label_data->length);
|
||||
|
||||
status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset,
|
||||
set_label_data->length);
|
||||
set_label_data->length, true);
|
||||
if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
|
||||
nvdimm_dsm_no_payload(status, dsm_mem_addr);
|
||||
return;
|
||||
|
@ -1359,6 +1359,7 @@ out:
|
||||
|
||||
void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp)
|
||||
{
|
||||
ERRP_GUARD();
|
||||
MachineClass *machine_class = MACHINE_GET_CLASS(machine);
|
||||
ObjectClass *oc = object_class_by_name(machine->cpu_type);
|
||||
CPUClass *cc;
|
||||
@ -1387,9 +1388,13 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error *
|
||||
numa_uses_legacy_mem()) {
|
||||
if (object_property_find(object_get_objects_root(),
|
||||
machine_class->default_ram_id)) {
|
||||
error_setg(errp, "object name '%s' is reserved for the default"
|
||||
" RAM backend, it can't be used for any other purposes."
|
||||
" Change the object's 'id' to something else",
|
||||
error_setg(errp, "object's id '%s' is reserved for the default"
|
||||
" RAM backend, it can't be used for any other purposes",
|
||||
machine_class->default_ram_id);
|
||||
error_append_hint(errp,
|
||||
"Change the object's 'id' to something else or disable"
|
||||
" automatic creation of the default RAM backend by setting"
|
||||
" 'memory-backend=%s' with '-machine'.\n",
|
||||
machine_class->default_ram_id);
|
||||
return;
|
||||
}
|
||||
|
@ -154,6 +154,9 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
|
||||
object_get_canonical_path_component(OBJECT(hostmem)));
|
||||
return;
|
||||
}
|
||||
if (memory_region_is_rom(mr)) {
|
||||
nvdimm->readonly = true;
|
||||
}
|
||||
|
||||
nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
|
||||
memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
|
||||
@ -207,15 +210,16 @@ static void nvdimm_unrealize(PCDIMMDevice *dimm)
|
||||
* label read/write functions.
|
||||
*/
|
||||
static void nvdimm_validate_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size,
|
||||
uint64_t offset)
|
||||
uint64_t offset, bool is_write)
|
||||
{
|
||||
assert((nvdimm->label_size >= size + offset) && (offset + size > offset));
|
||||
assert(!is_write || !nvdimm->readonly);
|
||||
}
|
||||
|
||||
static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf,
|
||||
uint64_t size, uint64_t offset)
|
||||
{
|
||||
nvdimm_validate_rw_label_data(nvdimm, size, offset);
|
||||
nvdimm_validate_rw_label_data(nvdimm, size, offset, false);
|
||||
|
||||
memcpy(buf, nvdimm->label_data + offset, size);
|
||||
}
|
||||
@ -229,7 +233,7 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
|
||||
"pmem", NULL);
|
||||
uint64_t backend_offset;
|
||||
|
||||
nvdimm_validate_rw_label_data(nvdimm, size, offset);
|
||||
nvdimm_validate_rw_label_data(nvdimm, size, offset, true);
|
||||
|
||||
if (!is_pmem) {
|
||||
memcpy(nvdimm->label_data + offset, buf, size);
|
||||
|
@ -320,7 +320,8 @@ static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,
|
||||
|
||||
nvdimm = NVDIMM(drc->dev);
|
||||
if ((offset + len < offset) ||
|
||||
(nvdimm->label_size < len + offset)) {
|
||||
(nvdimm->label_size < len + offset) ||
|
||||
nvdimm->readonly) {
|
||||
return H_P2;
|
||||
}
|
||||
|
||||
|
@ -235,6 +235,12 @@ typedef struct IOMMUTLBEvent {
|
||||
/* RAM is an mmap-ed named file */
|
||||
#define RAM_NAMED_FILE (1 << 9)
|
||||
|
||||
/* RAM is mmap-ed read-only */
|
||||
#define RAM_READONLY (1 << 10)
|
||||
|
||||
/* RAM FD is opened read-only */
|
||||
#define RAM_READONLY_FD (1 << 11)
|
||||
|
||||
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
|
||||
IOMMUNotifierFlag flags,
|
||||
hwaddr start, hwaddr end,
|
||||
@ -1089,6 +1095,7 @@ struct AddressSpace {
|
||||
struct FlatView *current_map;
|
||||
|
||||
int ioeventfd_nb;
|
||||
int ioeventfd_notifiers;
|
||||
struct MemoryRegionIoeventfd *ioeventfds;
|
||||
QTAILQ_HEAD(, MemoryListener) listeners;
|
||||
QTAILQ_ENTRY(AddressSpace) address_spaces_link;
|
||||
@ -1331,10 +1338,10 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
|
||||
* @align: alignment of the region base address; if 0, the default alignment
|
||||
* (getpagesize()) will be used.
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE,
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
* RAM_READONLY_FD
|
||||
* @path: the path in which to allocate the RAM.
|
||||
* @offset: offset within the file referenced by path
|
||||
* @readonly: true to open @path for reading, false for read/write.
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
*
|
||||
* Note that this function does not do anything to cause the data in the
|
||||
@ -1348,7 +1355,6 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
|
||||
uint32_t ram_flags,
|
||||
const char *path,
|
||||
ram_addr_t offset,
|
||||
bool readonly,
|
||||
Error **errp);
|
||||
|
||||
/**
|
||||
@ -1360,7 +1366,8 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
|
||||
* @name: the name of the region.
|
||||
* @size: size of the region.
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE, RAM_PROTECTED.
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
* RAM_READONLY_FD
|
||||
* @fd: the fd to mmap.
|
||||
* @offset: offset within the file referenced by fd
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
|
@ -108,10 +108,10 @@ long qemu_maxrampagesize(void);
|
||||
* @size: the size in bytes of the ram block
|
||||
* @mr: the memory region where the ram block is
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE.
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
* RAM_READONLY_FD
|
||||
* @mem_path or @fd: specify the backing file or device
|
||||
* @offset: Offset into target file
|
||||
* @readonly: true to open @path for reading, false for read/write.
|
||||
* @errp: pointer to Error*, to store an error if it happens
|
||||
*
|
||||
* Return:
|
||||
@ -120,10 +120,10 @@ long qemu_maxrampagesize(void);
|
||||
*/
|
||||
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
|
||||
uint32_t ram_flags, const char *mem_path,
|
||||
off_t offset, bool readonly, Error **errp);
|
||||
off_t offset, Error **errp);
|
||||
RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
uint32_t ram_flags, int fd, off_t offset,
|
||||
bool readonly, Error **errp);
|
||||
Error **errp);
|
||||
|
||||
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
|
||||
MemoryRegion *mr, Error **errp);
|
||||
|
@ -77,6 +77,12 @@ struct NVDIMMDevice {
|
||||
*/
|
||||
bool unarmed;
|
||||
|
||||
/*
|
||||
* Whether our DIMM is backed by ROM, and even label data cannot be
|
||||
* written. If set, implies that "unarmed" is also set.
|
||||
*/
|
||||
bool readonly;
|
||||
|
||||
/*
|
||||
* The PPC64 - spapr requires each nvdimm device have a uuid.
|
||||
*/
|
||||
|
@ -668,6 +668,20 @@
|
||||
# @readonly: if true, the backing file is opened read-only; if false,
|
||||
# it is opened read-write. (default: false)
|
||||
#
|
||||
# @rom: whether to create Read Only Memory (ROM) that cannot be modified
|
||||
# by the VM. Any write attempts to such ROM will be denied. Most
|
||||
# use cases want writable RAM instead of ROM. However, selected use
|
||||
# cases, like R/O NVDIMMs, can benefit from ROM. If set to 'on',
|
||||
# create ROM; if set to 'off', create writable RAM; if set to
|
||||
# 'auto', the value of the @readonly property is used. This
|
||||
# property is primarily helpful when we want to have proper RAM in
|
||||
# configurations that would traditionally create ROM before this
|
||||
# property was introduced: VM templating, where we want to open a
|
||||
# file readonly (@readonly set to true) and mark the memory to be
|
||||
# private for QEMU (@share set to false). For this use case, we need
|
||||
# writable RAM instead of ROM, and want to set this property to 'off'.
|
||||
# (default: auto, since 8.2)
|
||||
#
|
||||
# Since: 2.1
|
||||
##
|
||||
{ 'struct': 'MemoryBackendFileProperties',
|
||||
@ -677,7 +691,8 @@
|
||||
'*discard-data': 'bool',
|
||||
'mem-path': 'str',
|
||||
'*pmem': { 'type': 'bool', 'if': 'CONFIG_LIBPMEM' },
|
||||
'*readonly': 'bool' } }
|
||||
'*readonly': 'bool',
|
||||
'*rom': 'OnOffAuto' } }
|
||||
|
||||
##
|
||||
# @MemoryBackendMemfdProperties:
|
||||
|
@ -5063,7 +5063,7 @@ SRST
|
||||
they are specified. Note that the 'id' property must be set. These
|
||||
objects are placed in the '/objects' path.
|
||||
|
||||
``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align,offset=offset,readonly=on|off``
|
||||
``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align,offset=offset,readonly=on|off,rom=on|off|auto``
|
||||
Creates a memory file backend object, which can be used to back
|
||||
the guest RAM with huge pages.
|
||||
|
||||
@ -5153,6 +5153,20 @@ SRST
|
||||
The ``readonly`` option specifies whether the backing file is opened
|
||||
read-only or read-write (default).
|
||||
|
||||
The ``rom`` option specifies whether to create Read Only Memory
|
||||
(ROM) that cannot be modified by the VM. Any write attempts to such
|
||||
ROM will be denied. Most use cases want proper RAM instead of ROM.
|
||||
However, selected use cases, like R/O NVDIMMs, can benefit from
|
||||
ROM. If set to ``on``, create ROM; if set to ``off``, create
|
||||
writable RAM; if set to ``auto`` (default), the value of the
|
||||
``readonly`` option is used. This option is primarily helpful when
|
||||
we want to have writable RAM in configurations that would
|
||||
traditionally create ROM before the ``rom`` option was introduced:
|
||||
VM templating, where we want to open a file readonly
|
||||
(``readonly=on``) and mark the memory to be private for QEMU
|
||||
(``share=off``). For this use case, we need writable RAM instead
|
||||
of ROM, and want to also set ``rom=off``.
|
||||
|
||||
``-object memory-backend-ram,id=id,merge=on|off,dump=on|off,share=on|off,prealloc=on|off,size=size,host-nodes=host-nodes,policy=default|preferred|bind|interleave``
|
||||
Creates a memory backend object, which can be used to back the
|
||||
guest RAM. Memory backend objects offer more control than the
|
||||
|
@ -842,6 +842,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
|
||||
AddrRange tmp;
|
||||
unsigned i;
|
||||
|
||||
if (!as->ioeventfd_notifiers) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is likely that the number of ioeventfds hasn't changed much, so use
|
||||
* the previous size as the starting value, with some headroom to avoid
|
||||
@ -1620,18 +1624,17 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
|
||||
uint32_t ram_flags,
|
||||
const char *path,
|
||||
ram_addr_t offset,
|
||||
bool readonly,
|
||||
Error **errp)
|
||||
{
|
||||
Error *err = NULL;
|
||||
memory_region_init(mr, owner, name, size);
|
||||
mr->ram = true;
|
||||
mr->readonly = readonly;
|
||||
mr->readonly = !!(ram_flags & RAM_READONLY);
|
||||
mr->terminates = true;
|
||||
mr->destructor = memory_region_destructor_ram;
|
||||
mr->align = align;
|
||||
mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path,
|
||||
offset, readonly, &err);
|
||||
offset, &err);
|
||||
if (err) {
|
||||
mr->size = int128_zero();
|
||||
object_unparent(OBJECT(mr));
|
||||
@ -1651,10 +1654,11 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
|
||||
Error *err = NULL;
|
||||
memory_region_init(mr, owner, name, size);
|
||||
mr->ram = true;
|
||||
mr->readonly = !!(ram_flags & RAM_READONLY);
|
||||
mr->terminates = true;
|
||||
mr->destructor = memory_region_destructor_ram;
|
||||
mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset,
|
||||
false, &err);
|
||||
&err);
|
||||
if (err) {
|
||||
mr->size = int128_zero();
|
||||
object_unparent(OBJECT(mr));
|
||||
@ -3075,6 +3079,10 @@ void memory_listener_register(MemoryListener *listener, AddressSpace *as)
|
||||
}
|
||||
|
||||
listener_add_address_space(listener, as);
|
||||
|
||||
if (listener->eventfd_add || listener->eventfd_del) {
|
||||
as->ioeventfd_notifiers++;
|
||||
}
|
||||
}
|
||||
|
||||
void memory_listener_unregister(MemoryListener *listener)
|
||||
@ -3083,6 +3091,10 @@ void memory_listener_unregister(MemoryListener *listener)
|
||||
return;
|
||||
}
|
||||
|
||||
if (listener->eventfd_add || listener->eventfd_del) {
|
||||
listener->address_space->ioeventfd_notifiers--;
|
||||
}
|
||||
|
||||
listener_del_address_space(listener, listener->address_space);
|
||||
QTAILQ_REMOVE(&memory_listeners, listener, link);
|
||||
QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);
|
||||
|
@ -1288,8 +1288,7 @@ static int64_t get_file_align(int fd)
|
||||
static int file_ram_open(const char *path,
|
||||
const char *region_name,
|
||||
bool readonly,
|
||||
bool *created,
|
||||
Error **errp)
|
||||
bool *created)
|
||||
{
|
||||
char *filename;
|
||||
char *sanitized_name;
|
||||
@ -1300,10 +1299,33 @@ static int file_ram_open(const char *path,
|
||||
for (;;) {
|
||||
fd = open(path, readonly ? O_RDONLY : O_RDWR);
|
||||
if (fd >= 0) {
|
||||
/*
|
||||
* open(O_RDONLY) won't fail with EISDIR. Check manually if we
|
||||
* opened a directory and fail similarly to how we fail ENOENT
|
||||
* in readonly mode. Note that mkstemp() would imply O_RDWR.
|
||||
*/
|
||||
if (readonly) {
|
||||
struct stat file_stat;
|
||||
|
||||
if (fstat(fd, &file_stat)) {
|
||||
close(fd);
|
||||
if (errno == EINTR) {
|
||||
continue;
|
||||
}
|
||||
return -errno;
|
||||
} else if (S_ISDIR(file_stat.st_mode)) {
|
||||
close(fd);
|
||||
return -EISDIR;
|
||||
}
|
||||
}
|
||||
/* @path names an existing file, use it */
|
||||
break;
|
||||
}
|
||||
if (errno == ENOENT) {
|
||||
if (readonly) {
|
||||
/* Refuse to create new, readonly files. */
|
||||
return -ENOENT;
|
||||
}
|
||||
/* @path names a file that doesn't exist, create it */
|
||||
fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
|
||||
if (fd >= 0) {
|
||||
@ -1333,10 +1355,7 @@ static int file_ram_open(const char *path,
|
||||
g_free(filename);
|
||||
}
|
||||
if (errno != EEXIST && errno != EINTR) {
|
||||
error_setg_errno(errp, errno,
|
||||
"can't open backing store %s for guest RAM",
|
||||
path);
|
||||
return -1;
|
||||
return -errno;
|
||||
}
|
||||
/*
|
||||
* Try again on EINTR and EEXIST. The latter happens when
|
||||
@ -1350,7 +1369,6 @@ static int file_ram_open(const char *path,
|
||||
static void *file_ram_alloc(RAMBlock *block,
|
||||
ram_addr_t memory,
|
||||
int fd,
|
||||
bool readonly,
|
||||
bool truncate,
|
||||
off_t offset,
|
||||
Error **errp)
|
||||
@ -1408,7 +1426,7 @@ static void *file_ram_alloc(RAMBlock *block,
|
||||
perror("ftruncate");
|
||||
}
|
||||
|
||||
qemu_map_flags = readonly ? QEMU_MAP_READONLY : 0;
|
||||
qemu_map_flags = (block->flags & RAM_READONLY) ? QEMU_MAP_READONLY : 0;
|
||||
qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0;
|
||||
qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0;
|
||||
qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0;
|
||||
@ -1876,7 +1894,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
#ifdef CONFIG_POSIX
|
||||
RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
uint32_t ram_flags, int fd, off_t offset,
|
||||
bool readonly, Error **errp)
|
||||
Error **errp)
|
||||
{
|
||||
RAMBlock *new_block;
|
||||
Error *local_err = NULL;
|
||||
@ -1884,7 +1902,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
|
||||
/* Just support these ram flags by now. */
|
||||
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
|
||||
RAM_PROTECTED | RAM_NAMED_FILE)) == 0);
|
||||
RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
|
||||
RAM_READONLY_FD)) == 0);
|
||||
|
||||
if (xen_enabled()) {
|
||||
error_setg(errp, "-mem-path not supported with Xen");
|
||||
@ -1919,8 +1938,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
new_block->used_length = size;
|
||||
new_block->max_length = size;
|
||||
new_block->flags = ram_flags;
|
||||
new_block->host = file_ram_alloc(new_block, size, fd, readonly,
|
||||
!file_size, offset, errp);
|
||||
new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
|
||||
errp);
|
||||
if (!new_block->host) {
|
||||
g_free(new_block);
|
||||
return NULL;
|
||||
@ -1939,20 +1958,40 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
|
||||
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
|
||||
uint32_t ram_flags, const char *mem_path,
|
||||
off_t offset, bool readonly, Error **errp)
|
||||
off_t offset, Error **errp)
|
||||
{
|
||||
int fd;
|
||||
bool created;
|
||||
RAMBlock *block;
|
||||
|
||||
fd = file_ram_open(mem_path, memory_region_name(mr), readonly, &created,
|
||||
errp);
|
||||
fd = file_ram_open(mem_path, memory_region_name(mr),
|
||||
!!(ram_flags & RAM_READONLY_FD), &created);
|
||||
if (fd < 0) {
|
||||
error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM",
|
||||
mem_path);
|
||||
if (!(ram_flags & RAM_READONLY_FD) && !(ram_flags & RAM_SHARED) &&
|
||||
fd == -EACCES) {
|
||||
/*
|
||||
* If we can open the file R/O (note: will never create a new file)
|
||||
* and we are dealing with a private mapping, there are still ways
|
||||
* to consume such files and get RAM instead of ROM.
|
||||
*/
|
||||
fd = file_ram_open(mem_path, memory_region_name(mr), true,
|
||||
&created);
|
||||
if (fd < 0) {
|
||||
return NULL;
|
||||
}
|
||||
assert(!created);
|
||||
close(fd);
|
||||
error_append_hint(errp, "Consider opening the backing store"
|
||||
" read-only but still creating writable RAM using"
|
||||
" '-object memory-backend-file,readonly=on,rom=off...'"
|
||||
" (see \"VM templating\" documentation)\n");
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, readonly,
|
||||
errp);
|
||||
block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp);
|
||||
if (!block) {
|
||||
if (created) {
|
||||
unlink(mem_path);
|
||||
@ -2070,6 +2109,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
|
||||
ram_addr_t offset;
|
||||
int flags;
|
||||
void *area, *vaddr;
|
||||
int prot;
|
||||
|
||||
RAMBLOCK_FOREACH(block) {
|
||||
offset = addr - block->offset;
|
||||
@ -2084,13 +2124,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
|
||||
flags |= block->flags & RAM_SHARED ?
|
||||
MAP_SHARED : MAP_PRIVATE;
|
||||
flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
|
||||
prot = PROT_READ;
|
||||
prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
|
||||
if (block->fd >= 0) {
|
||||
area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
|
||||
flags, block->fd, offset + block->fd_offset);
|
||||
area = mmap(vaddr, length, prot, flags, block->fd,
|
||||
offset + block->fd_offset);
|
||||
} else {
|
||||
flags |= MAP_ANONYMOUS;
|
||||
area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
|
||||
flags, -1, 0);
|
||||
area = mmap(vaddr, length, prot, flags, -1, 0);
|
||||
}
|
||||
if (area != vaddr) {
|
||||
error_report("Could not remap addr: "
|
||||
@ -3480,6 +3521,16 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
|
||||
* so a userfault will trigger.
|
||||
*/
|
||||
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
|
||||
/*
|
||||
* fallocate() will fail with readonly files. Let's print a
|
||||
* proper error message.
|
||||
*/
|
||||
if (rb->flags & RAM_READONLY_FD) {
|
||||
error_report("ram_block_discard_range: Discarding RAM"
|
||||
" with readonly files is not supported");
|
||||
goto err;
|
||||
|
||||
}
|
||||
/*
|
||||
* We'll discard data from the actual file, even though we only
|
||||
* have a MAP_PRIVATE mapping, possibly messing with other
|
||||
|
Loading…
Reference in New Issue
Block a user