"Host Memory Backends" and "Memory devices" queue ("mem"):
 - Support and document VM templating with R/O files using a new "rom"
   parameter for memory-backend-file
 - Some cleanups and fixes around NVDIMMs and R/O file handling for guest
   RAM
 - Optimize ioeventfd updates by skipping address spaces that are not
   applicable
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUJdykRHGRhdmlkQHJl
 ZGhhdC5jb20ACgkQTd4Q9wD/g1pf2w//akOUoYMuamySGjXtKLVyMKZkjIys+Ama
 k2C0xzsWAHBP572ezwHi8uxf5j9kzAjsw6GxDZ7FAamD9MhiohkEvkecloBx6f/c
 q3fVHblBNkG7v2urtf4+6PJtJvhzOST2SFXfWeYhO/vaA04AYCDgexv82JN3gA6B
 OS8WyOX62b8wILPSY2GLZ8IqpE9XnOYZwzVBn6YB1yo7ZkYEfXO6cA8nykNuNcOE
 vppqDo7uVIX6317FWj8ygxmzFfOaj0WT2MT2XFzEIDfg8BInQN8HC4mTn0hcVKMa
 N1y+eZH733CQKT+uNBRZ5YOeljOi4d6gEEyvkkA/L7e5D3Qg9hIdvHb4uryCFSWX
 Vt07OP1XLBwCZFobOC6sg+2gtTZJxxYK89e6ZzEd0454S24w5bnEteRAaCGOP0XL
 ww9xYULqhtZs55UC4rvZHJwdUAk1fIY4VqynwkeQXegvz6BxedNeEkJiiEU0Tizx
 N2VpsxAJ7H/LLSFeZoCRESo4azrH6U4n7S/eS1tkCniFqibfe2yIQCDoJVfb42ec
 gfg/vThCrDwHkIHzkMmoV8NndA7Q7SIkyMfYeEEBeZMeg8JzYll4DJEw/jQCacxh
 KRUa+AZvGlTJUq0mkvyOVfLki+iaehoIUuY1yvMrmdWijPO8n3YybmP9Ljhr8VdR
 9MSYZe+I2v8=
 =iraT
 -----END PGP SIGNATURE-----

Merge tag 'mem-2023-09-19' of https://github.com/davidhildenbrand/qemu into staging

Hi,

"Host Memory Backends" and "Memory devices" queue ("mem"):
- Support and document VM templating with R/O files using a new "rom"
  parameter for memory-backend-file
- Some cleanups and fixes around NVDIMMs and R/O file handling for guest
  RAM
- Optimize ioeventfd updates by skipping address spaces that are not
  applicable

# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUJdykRHGRhdmlkQHJl
# ZGhhdC5jb20ACgkQTd4Q9wD/g1pf2w//akOUoYMuamySGjXtKLVyMKZkjIys+Ama
# k2C0xzsWAHBP572ezwHi8uxf5j9kzAjsw6GxDZ7FAamD9MhiohkEvkecloBx6f/c
# q3fVHblBNkG7v2urtf4+6PJtJvhzOST2SFXfWeYhO/vaA04AYCDgexv82JN3gA6B
# OS8WyOX62b8wILPSY2GLZ8IqpE9XnOYZwzVBn6YB1yo7ZkYEfXO6cA8nykNuNcOE
# vppqDo7uVIX6317FWj8ygxmzFfOaj0WT2MT2XFzEIDfg8BInQN8HC4mTn0hcVKMa
# N1y+eZH733CQKT+uNBRZ5YOeljOi4d6gEEyvkkA/L7e5D3Qg9hIdvHb4uryCFSWX
# Vt07OP1XLBwCZFobOC6sg+2gtTZJxxYK89e6ZzEd0454S24w5bnEteRAaCGOP0XL
# ww9xYULqhtZs55UC4rvZHJwdUAk1fIY4VqynwkeQXegvz6BxedNeEkJiiEU0Tizx
# N2VpsxAJ7H/LLSFeZoCRESo4azrH6U4n7S/eS1tkCniFqibfe2yIQCDoJVfb42ec
# gfg/vThCrDwHkIHzkMmoV8NndA7Q7SIkyMfYeEEBeZMeg8JzYll4DJEw/jQCacxh
# KRUa+AZvGlTJUq0mkvyOVfLki+iaehoIUuY1yvMrmdWijPO8n3YybmP9Ljhr8VdR
# 9MSYZe+I2v8=
# =iraT
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 19 Sep 2023 06:25:45 EDT
# gpg:                using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A
# gpg:                issuer "david@redhat.com"
# gpg: Good signature from "David Hildenbrand <david@redhat.com>" [unknown]
# gpg:                 aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full]
# gpg:                 aka "David Hildenbrand <hildenbr@in.tum.de>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 1BD9 CAAD 735C 4C3A 460D  FCCA 4DDE 10F7 00FF 835A

* tag 'mem-2023-09-19' of https://github.com/davidhildenbrand/qemu:
  memory: avoid updating ioeventfds for some address_space
  machine: Improve error message when using default RAM backend id
  softmmu/physmem: Hint that "readonly=on,rom=off" exists when opening file R/W for private mapping fails
  docs: Start documenting VM templating
  docs: Don't mention "-mem-path" in multi-process.rst
  softmmu/physmem: Never return directories from file_ram_open()
  softmmu/physmem: Fail creation of new files in file_ram_open() with readonly=true
  softmmu/physmem: Bail out early in ram_block_discard_range() with readonly files
  softmmu/physmem: Remap with proper protection in qemu_ram_remap()
  backends/hostmem-file: Add "rom" property to support VM templating with R/O files
  softmmu/physmem: Distinguish between file access mode and mmap protection
  nvdimm: Reject writing label data to ROM instead of crashing QEMU

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2023-09-19 13:22:18 -04:00
commit 4907644841
16 changed files with 354 additions and 49 deletions

View File

@ -2961,6 +2961,7 @@ M: Igor Mammedov <imammedo@redhat.com>
S: Maintained
F: backends/hostmem*.c
F: include/sysemu/hostmem.h
F: docs/system/vm-templating.rst
T: git https://gitlab.com/ehabkost/qemu.git machine-next
Cryptodev Backends

View File

@ -18,6 +18,8 @@
#include "sysemu/hostmem.h"
#include "qom/object_interfaces.h"
#include "qom/object.h"
#include "qapi/visitor.h"
#include "qapi/qapi-visit-common.h"
OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendFile, MEMORY_BACKEND_FILE)
@ -31,6 +33,7 @@ struct HostMemoryBackendFile {
bool discard_data;
bool is_pmem;
bool readonly;
OnOffAuto rom;
};
static void
@ -53,15 +56,39 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
return;
}
switch (fb->rom) {
case ON_OFF_AUTO_AUTO:
/* Traditionally, opening the file readonly always resulted in ROM. */
fb->rom = fb->readonly ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
break;
case ON_OFF_AUTO_ON:
if (!fb->readonly) {
error_setg(errp, "property 'rom' = 'on' is not supported with"
" 'readonly' = 'off'");
return;
}
break;
case ON_OFF_AUTO_OFF:
if (fb->readonly && backend->share) {
error_setg(errp, "property 'rom' = 'off' is incompatible with"
" 'readonly' = 'on' and 'share' = 'on'");
return;
}
break;
default:
assert(false);
}
name = host_memory_backend_get_name(backend);
ram_flags = backend->share ? RAM_SHARED : 0;
ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
ram_flags |= RAM_NAMED_FILE;
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
backend->size, fb->align, ram_flags,
fb->mem_path, fb->offset, fb->readonly,
errp);
fb->mem_path, fb->offset, errp);
g_free(name);
#endif
}
@ -201,6 +228,32 @@ static void file_memory_backend_set_readonly(Object *obj, bool value,
fb->readonly = value;
}
static void file_memory_backend_get_rom(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
OnOffAuto rom = fb->rom;
visit_type_OnOffAuto(v, name, &rom, errp);
}
static void file_memory_backend_set_rom(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
if (host_memory_backend_mr_inited(backend)) {
error_setg(errp, "cannot change property '%s' of %s.", name,
object_get_typename(obj));
return;
}
visit_type_OnOffAuto(v, name, &fb->rom, errp);
}
static void file_backend_unparent(Object *obj)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@ -243,6 +296,10 @@ file_backend_class_init(ObjectClass *oc, void *data)
object_class_property_add_bool(oc, "readonly",
file_memory_backend_get_readonly,
file_memory_backend_set_readonly);
object_class_property_add(oc, "rom", "OnOffAuto",
file_memory_backend_get_rom, file_memory_backend_set_rom, NULL, NULL);
object_class_property_set_description(oc, "rom",
"Whether to create Read Only Memory (ROM)");
}
static void file_backend_instance_finalize(Object *o)

View File

@ -409,8 +409,9 @@ the initial messages sent to the emulation process is a guest memory
table. Each entry in this table consists of a file descriptor and size
that the emulation process can ``mmap()`` to directly access guest
memory, similar to ``vhost_user_set_mem_table()``. Note guest memory
must be backed by file descriptors, such as when QEMU is given the
*-mem-path* command line option.
must be backed by shared file-backed memory, for example, using
*-object memory-backend-file,share=on* and setting that memory backend
as RAM for the machine.
IOMMU operations
^^^^^^^^^^^^^^^^

View File

@ -38,3 +38,4 @@ or Hypervisor.Framework.
security
multi-process
confidential-guest-support
vm-templating

View File

@ -0,0 +1,125 @@
QEMU VM templating
==================
This document explains how to use VM templating in QEMU.
For now, the focus is on VM memory aspects, and not about how to save and
restore other VM state (i.e., migrate-to-file with ``x-ignore-shared``).
Overview
--------
With VM templating, a single template VM serves as the starting point for
new VMs. This allows for fast and efficient replication of VMs, resulting
in fast startup times and reduced memory consumption.
Conceptually, the VM state is frozen, to then be used as a basis for new
VMs. The Copy-On-Write mechanism in the operating systems makes sure that
new VMs are able to read template VM memory; however, any modifications
stay private and don't modify the original template VM or any other
created VM.
!!! Security Alert !!!
----------------------
When effectively cloning VMs by VM templating, hardware identifiers
(such as UUIDs and NIC MAC addresses), and similar data in the guest OS
(such as machine IDs, SSH keys, certificates) that are supposed to be
*unique* are no longer unique, which can be a security concern.
Please be aware of these implications and how to mitigate them for your
use case, which might involve vmgenid, hot(un)plug of NIC, etc..
Memory configuration
--------------------
In order to create the template VM, we have to make sure that VM memory
ends up in a file, from where it can be reused for the new VMs:
Supply VM RAM via memory-backend-file, with ``share=on`` (modifications go
to the file) and ``readonly=off`` (open the file writable). Note that
``readonly=off`` is implicit.
In the following command-line example, a 2GB VM is created, whereby VM RAM
is to be stored in the ``template`` file.
.. parsed-literal::
|qemu_system| [...] -m 2g \\
-object memory-backend-file,id=pc.ram,mem-path=template,size=2g,share=on,... \\
-machine q35,memory-backend=pc.ram
If multiple memory backends are used (vNUMA, DIMMs), configure all
memory backends accordingly.
Once the VM is in the desired state, stop the VM and save other VM state,
leaving the current state of VM RAM reside in the file.
In order to have a new VM be based on a template VM, we have to
configure VM RAM to be based on a template VM RAM file; however, the VM
should not be able to modify file content.
Supply VM RAM via memory-backend-file, with ``share=off`` (modifications
stay private), ``readonly=on`` (open the file readonly) and ``rom=off``
(don't make the memory readonly for the VM). Note that ``share=off`` is
implicit and that other VM state has to be restored separately.
In the following command-line example, a 2GB VM is created based on the
existing 2GB file ``template``.
.. parsed-literal::
|qemu_system| [...] -m 2g \\
-object memory-backend-file,id=pc.ram,mem-path=template,size=2g,readonly=on,rom=off,... \\
-machine q35,memory-backend=pc.ram
If multiple memory backends are used (vNUMA, DIMMs), configure all
memory backends accordingly.
Note that ``-mem-path`` cannot be used for VM templating when creating the
template VM or when starting new VMs based on a template VM.
Incompatible features
---------------------
Some features are incompatible with VM templating, as the underlying file
cannot be modified to discard VM RAM, or to actually share memory with
another process.
vhost-user and multi-process QEMU
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
vhost-user and multi-process QEMU are incompatible with VM templating.
These technologies rely on shared memory, however, the template VMs
don't actually share memory (``share=off``), even though they are
file-based.
virtio-balloon
~~~~~~~~~~~~~~
virtio-balloon inflation and "free page reporting" cannot discard VM RAM
and will repeatedly report errors. While virtio-balloon can be used
for template VMs (e.g., report VM RAM stats), "free page reporting"
should be disabled and the balloon should not be inflated.
virtio-mem
~~~~~~~~~~
virtio-mem cannot discard VM RAM that is managed by the virtio-mem
device. virtio-mem will fail early when realizing the device. To use
VM templating with virtio-mem, either hotplug virtio-mem devices to the
new VM, or don't supply any memory to the template VM using virtio-mem
(requested-size=0), not using a template VM file as memory backend for the
virtio-mem device.
VM migration
~~~~~~~~~~~~
For VM migration, "x-release-ram" similarly relies on discarding of VM
RAM on the migration source to free up migrated RAM, and will
repeatedly report errors.
Postcopy live migration fails discarding VM RAM on the migration
destination early and refuses to activate postcopy live migration. Note
that postcopy live migration usually only works on selected filesystems
(shmem/tmpfs, hugetlbfs) either way.

View File

@ -670,7 +670,8 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
}
static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
uint32_t offset, uint32_t length)
uint32_t offset, uint32_t length,
bool is_write)
{
uint32_t ret = NVDIMM_DSM_RET_STATUS_INVALID;
@ -690,6 +691,10 @@ static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
return ret;
}
if (is_write && nvdimm->readonly) {
return NVDIMM_DSM_RET_STATUS_UNSUPPORT;
}
return NVDIMM_DSM_RET_STATUS_SUCCESS;
}
@ -713,7 +718,7 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
get_label_data->length);
status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset,
get_label_data->length);
get_label_data->length, false);
if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
nvdimm_dsm_no_payload(status, dsm_mem_addr);
return;
@ -752,7 +757,7 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
set_label_data->length);
status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset,
set_label_data->length);
set_label_data->length, true);
if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
nvdimm_dsm_no_payload(status, dsm_mem_addr);
return;

View File

@ -1359,6 +1359,7 @@ out:
void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp)
{
ERRP_GUARD();
MachineClass *machine_class = MACHINE_GET_CLASS(machine);
ObjectClass *oc = object_class_by_name(machine->cpu_type);
CPUClass *cc;
@ -1387,9 +1388,13 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error *
numa_uses_legacy_mem()) {
if (object_property_find(object_get_objects_root(),
machine_class->default_ram_id)) {
error_setg(errp, "object name '%s' is reserved for the default"
" RAM backend, it can't be used for any other purposes."
" Change the object's 'id' to something else",
error_setg(errp, "object's id '%s' is reserved for the default"
" RAM backend, it can't be used for any other purposes",
machine_class->default_ram_id);
error_append_hint(errp,
"Change the object's 'id' to something else or disable"
" automatic creation of the default RAM backend by setting"
" 'memory-backend=%s' with '-machine'.\n",
machine_class->default_ram_id);
return;
}

View File

@ -154,6 +154,9 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
object_get_canonical_path_component(OBJECT(hostmem)));
return;
}
if (memory_region_is_rom(mr)) {
nvdimm->readonly = true;
}
nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
@ -207,15 +210,16 @@ static void nvdimm_unrealize(PCDIMMDevice *dimm)
* label read/write functions.
*/
static void nvdimm_validate_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size,
uint64_t offset)
uint64_t offset, bool is_write)
{
assert((nvdimm->label_size >= size + offset) && (offset + size > offset));
assert(!is_write || !nvdimm->readonly);
}
static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf,
uint64_t size, uint64_t offset)
{
nvdimm_validate_rw_label_data(nvdimm, size, offset);
nvdimm_validate_rw_label_data(nvdimm, size, offset, false);
memcpy(buf, nvdimm->label_data + offset, size);
}
@ -229,7 +233,7 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
"pmem", NULL);
uint64_t backend_offset;
nvdimm_validate_rw_label_data(nvdimm, size, offset);
nvdimm_validate_rw_label_data(nvdimm, size, offset, true);
if (!is_pmem) {
memcpy(nvdimm->label_data + offset, buf, size);

View File

@ -320,7 +320,8 @@ static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,
nvdimm = NVDIMM(drc->dev);
if ((offset + len < offset) ||
(nvdimm->label_size < len + offset)) {
(nvdimm->label_size < len + offset) ||
nvdimm->readonly) {
return H_P2;
}

View File

@ -235,6 +235,12 @@ typedef struct IOMMUTLBEvent {
/* RAM is an mmap-ed named file */
#define RAM_NAMED_FILE (1 << 9)
/* RAM is mmap-ed read-only */
#define RAM_READONLY (1 << 10)
/* RAM FD is opened read-only */
#define RAM_READONLY_FD (1 << 11)
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
IOMMUNotifierFlag flags,
hwaddr start, hwaddr end,
@ -1089,6 +1095,7 @@ struct AddressSpace {
struct FlatView *current_map;
int ioeventfd_nb;
int ioeventfd_notifiers;
struct MemoryRegionIoeventfd *ioeventfds;
QTAILQ_HEAD(, MemoryListener) listeners;
QTAILQ_ENTRY(AddressSpace) address_spaces_link;
@ -1331,10 +1338,10 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
* @align: alignment of the region base address; if 0, the default alignment
* (getpagesize()) will be used.
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
* RAM_NORESERVE,
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
* RAM_READONLY_FD
* @path: the path in which to allocate the RAM.
* @offset: offset within the file referenced by path
* @readonly: true to open @path for reading, false for read/write.
* @errp: pointer to Error*, to store an error if it happens.
*
* Note that this function does not do anything to cause the data in the
@ -1348,7 +1355,6 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
uint32_t ram_flags,
const char *path,
ram_addr_t offset,
bool readonly,
Error **errp);
/**
@ -1360,7 +1366,8 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
* @name: the name of the region.
* @size: size of the region.
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
* RAM_NORESERVE, RAM_PROTECTED.
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
* RAM_READONLY_FD
* @fd: the fd to mmap.
* @offset: offset within the file referenced by fd
* @errp: pointer to Error*, to store an error if it happens.

View File

@ -108,10 +108,10 @@ long qemu_maxrampagesize(void);
* @size: the size in bytes of the ram block
* @mr: the memory region where the ram block is
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
* RAM_NORESERVE.
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
* RAM_READONLY_FD
* @mem_path or @fd: specify the backing file or device
* @offset: Offset into target file
* @readonly: true to open @path for reading, false for read/write.
* @errp: pointer to Error*, to store an error if it happens
*
* Return:
@ -120,10 +120,10 @@ long qemu_maxrampagesize(void);
*/
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
uint32_t ram_flags, const char *mem_path,
off_t offset, bool readonly, Error **errp);
off_t offset, Error **errp);
RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
uint32_t ram_flags, int fd, off_t offset,
bool readonly, Error **errp);
Error **errp);
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
MemoryRegion *mr, Error **errp);

View File

@ -77,6 +77,12 @@ struct NVDIMMDevice {
*/
bool unarmed;
/*
* Whether our DIMM is backed by ROM, and even label data cannot be
* written. If set, implies that "unarmed" is also set.
*/
bool readonly;
/*
* The PPC64 - spapr requires each nvdimm device have a uuid.
*/

View File

@ -668,6 +668,20 @@
# @readonly: if true, the backing file is opened read-only; if false,
# it is opened read-write. (default: false)
#
# @rom: whether to create Read Only Memory (ROM) that cannot be modified
# by the VM. Any write attempts to such ROM will be denied. Most
# use cases want writable RAM instead of ROM. However, selected use
# cases, like R/O NVDIMMs, can benefit from ROM. If set to 'on',
# create ROM; if set to 'off', create writable RAM; if set to
# 'auto', the value of the @readonly property is used. This
# property is primarily helpful when we want to have proper RAM in
# configurations that would traditionally create ROM before this
# property was introduced: VM templating, where we want to open a
# file readonly (@readonly set to true) and mark the memory to be
# private for QEMU (@share set to false). For this use case, we need
# writable RAM instead of ROM, and want to set this property to 'off'.
# (default: auto, since 8.2)
#
# Since: 2.1
##
{ 'struct': 'MemoryBackendFileProperties',
@ -677,7 +691,8 @@
'*discard-data': 'bool',
'mem-path': 'str',
'*pmem': { 'type': 'bool', 'if': 'CONFIG_LIBPMEM' },
'*readonly': 'bool' } }
'*readonly': 'bool',
'*rom': 'OnOffAuto' } }
##
# @MemoryBackendMemfdProperties:

View File

@ -5063,7 +5063,7 @@ SRST
they are specified. Note that the 'id' property must be set. These
objects are placed in the '/objects' path.
``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align,offset=offset,readonly=on|off``
``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align,offset=offset,readonly=on|off,rom=on|off|auto``
Creates a memory file backend object, which can be used to back
the guest RAM with huge pages.
@ -5153,6 +5153,20 @@ SRST
The ``readonly`` option specifies whether the backing file is opened
read-only or read-write (default).
The ``rom`` option specifies whether to create Read Only Memory
(ROM) that cannot be modified by the VM. Any write attempts to such
ROM will be denied. Most use cases want proper RAM instead of ROM.
However, selected use cases, like R/O NVDIMMs, can benefit from
ROM. If set to ``on``, create ROM; if set to ``off``, create
writable RAM; if set to ``auto`` (default), the value of the
``readonly`` option is used. This option is primarily helpful when
we want to have writable RAM in configurations that would
traditionally create ROM before the ``rom`` option was introduced:
VM templating, where we want to open a file readonly
(``readonly=on``) and mark the memory to be private for QEMU
(``share=off``). For this use case, we need writable RAM instead
of ROM, and want to also set ``rom=off``.
``-object memory-backend-ram,id=id,merge=on|off,dump=on|off,share=on|off,prealloc=on|off,size=size,host-nodes=host-nodes,policy=default|preferred|bind|interleave``
Creates a memory backend object, which can be used to back the
guest RAM. Memory backend objects offer more control than the

View File

@ -842,6 +842,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
AddrRange tmp;
unsigned i;
if (!as->ioeventfd_notifiers) {
return;
}
/*
* It is likely that the number of ioeventfds hasn't changed much, so use
* the previous size as the starting value, with some headroom to avoid
@ -1620,18 +1624,17 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
uint32_t ram_flags,
const char *path,
ram_addr_t offset,
bool readonly,
Error **errp)
{
Error *err = NULL;
memory_region_init(mr, owner, name, size);
mr->ram = true;
mr->readonly = readonly;
mr->readonly = !!(ram_flags & RAM_READONLY);
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->align = align;
mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path,
offset, readonly, &err);
offset, &err);
if (err) {
mr->size = int128_zero();
object_unparent(OBJECT(mr));
@ -1651,10 +1654,11 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
Error *err = NULL;
memory_region_init(mr, owner, name, size);
mr->ram = true;
mr->readonly = !!(ram_flags & RAM_READONLY);
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset,
false, &err);
&err);
if (err) {
mr->size = int128_zero();
object_unparent(OBJECT(mr));
@ -3075,6 +3079,10 @@ void memory_listener_register(MemoryListener *listener, AddressSpace *as)
}
listener_add_address_space(listener, as);
if (listener->eventfd_add || listener->eventfd_del) {
as->ioeventfd_notifiers++;
}
}
void memory_listener_unregister(MemoryListener *listener)
@ -3083,6 +3091,10 @@ void memory_listener_unregister(MemoryListener *listener)
return;
}
if (listener->eventfd_add || listener->eventfd_del) {
listener->address_space->ioeventfd_notifiers--;
}
listener_del_address_space(listener, listener->address_space);
QTAILQ_REMOVE(&memory_listeners, listener, link);
QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);

View File

@ -1288,8 +1288,7 @@ static int64_t get_file_align(int fd)
static int file_ram_open(const char *path,
const char *region_name,
bool readonly,
bool *created,
Error **errp)
bool *created)
{
char *filename;
char *sanitized_name;
@ -1300,10 +1299,33 @@ static int file_ram_open(const char *path,
for (;;) {
fd = open(path, readonly ? O_RDONLY : O_RDWR);
if (fd >= 0) {
/*
* open(O_RDONLY) won't fail with EISDIR. Check manually if we
* opened a directory and fail similarly to how we fail ENOENT
* in readonly mode. Note that mkstemp() would imply O_RDWR.
*/
if (readonly) {
struct stat file_stat;
if (fstat(fd, &file_stat)) {
close(fd);
if (errno == EINTR) {
continue;
}
return -errno;
} else if (S_ISDIR(file_stat.st_mode)) {
close(fd);
return -EISDIR;
}
}
/* @path names an existing file, use it */
break;
}
if (errno == ENOENT) {
if (readonly) {
/* Refuse to create new, readonly files. */
return -ENOENT;
}
/* @path names a file that doesn't exist, create it */
fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
if (fd >= 0) {
@ -1333,10 +1355,7 @@ static int file_ram_open(const char *path,
g_free(filename);
}
if (errno != EEXIST && errno != EINTR) {
error_setg_errno(errp, errno,
"can't open backing store %s for guest RAM",
path);
return -1;
return -errno;
}
/*
* Try again on EINTR and EEXIST. The latter happens when
@ -1350,7 +1369,6 @@ static int file_ram_open(const char *path,
static void *file_ram_alloc(RAMBlock *block,
ram_addr_t memory,
int fd,
bool readonly,
bool truncate,
off_t offset,
Error **errp)
@ -1408,7 +1426,7 @@ static void *file_ram_alloc(RAMBlock *block,
perror("ftruncate");
}
qemu_map_flags = readonly ? QEMU_MAP_READONLY : 0;
qemu_map_flags = (block->flags & RAM_READONLY) ? QEMU_MAP_READONLY : 0;
qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0;
qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0;
qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0;
@ -1876,7 +1894,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
#ifdef CONFIG_POSIX
RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
uint32_t ram_flags, int fd, off_t offset,
bool readonly, Error **errp)
Error **errp)
{
RAMBlock *new_block;
Error *local_err = NULL;
@ -1884,7 +1902,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
/* Just support these ram flags by now. */
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
RAM_PROTECTED | RAM_NAMED_FILE)) == 0);
RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
RAM_READONLY_FD)) == 0);
if (xen_enabled()) {
error_setg(errp, "-mem-path not supported with Xen");
@ -1919,8 +1938,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
new_block->used_length = size;
new_block->max_length = size;
new_block->flags = ram_flags;
new_block->host = file_ram_alloc(new_block, size, fd, readonly,
!file_size, offset, errp);
new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
errp);
if (!new_block->host) {
g_free(new_block);
return NULL;
@ -1939,20 +1958,40 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
uint32_t ram_flags, const char *mem_path,
off_t offset, bool readonly, Error **errp)
off_t offset, Error **errp)
{
int fd;
bool created;
RAMBlock *block;
fd = file_ram_open(mem_path, memory_region_name(mr), readonly, &created,
errp);
fd = file_ram_open(mem_path, memory_region_name(mr),
!!(ram_flags & RAM_READONLY_FD), &created);
if (fd < 0) {
error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM",
mem_path);
if (!(ram_flags & RAM_READONLY_FD) && !(ram_flags & RAM_SHARED) &&
fd == -EACCES) {
/*
* If we can open the file R/O (note: will never create a new file)
* and we are dealing with a private mapping, there are still ways
* to consume such files and get RAM instead of ROM.
*/
fd = file_ram_open(mem_path, memory_region_name(mr), true,
&created);
if (fd < 0) {
return NULL;
}
assert(!created);
close(fd);
error_append_hint(errp, "Consider opening the backing store"
" read-only but still creating writable RAM using"
" '-object memory-backend-file,readonly=on,rom=off...'"
" (see \"VM templating\" documentation)\n");
}
return NULL;
}
block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, readonly,
errp);
block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp);
if (!block) {
if (created) {
unlink(mem_path);
@ -2070,6 +2109,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
ram_addr_t offset;
int flags;
void *area, *vaddr;
int prot;
RAMBLOCK_FOREACH(block) {
offset = addr - block->offset;
@ -2084,13 +2124,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
flags |= block->flags & RAM_SHARED ?
MAP_SHARED : MAP_PRIVATE;
flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
prot = PROT_READ;
prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
if (block->fd >= 0) {
area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
flags, block->fd, offset + block->fd_offset);
area = mmap(vaddr, length, prot, flags, block->fd,
offset + block->fd_offset);
} else {
flags |= MAP_ANONYMOUS;
area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
flags, -1, 0);
area = mmap(vaddr, length, prot, flags, -1, 0);
}
if (area != vaddr) {
error_report("Could not remap addr: "
@ -3480,6 +3521,16 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
* so a userfault will trigger.
*/
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
/*
* fallocate() will fail with readonly files. Let's print a
* proper error message.
*/
if (rb->flags & RAM_READONLY_FD) {
error_report("ram_block_discard_range: Discarding RAM"
" with readonly files is not supported");
goto err;
}
/*
* We'll discard data from the actual file, even though we only
* have a MAP_PRIVATE mapping, possibly messing with other