mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 04:18:39 +08:00
Merge branch kvm-arm64/dirty-ring into kvmarm-master/next
* kvm-arm64/dirty-ring: : . : Add support for the "per-vcpu dirty-ring tracking with a bitmap : and sprinkles on top", courtesy of Gavin Shan. : : This branch drags the kvmarm-fixes-6.1-3 tag which was already : merged in 6.1-rc4 so that the branch is in a working state. : . KVM: Push dirty information unconditionally to backup bitmap KVM: selftests: Automate choosing dirty ring size in dirty_log_test KVM: selftests: Clear dirty ring states between two modes in dirty_log_test KVM: selftests: Use host page size to map ring buffer in dirty_log_test KVM: arm64: Enable ring-based dirty memory tracking KVM: Support dirty ring in conjunction with bitmap KVM: Move declaration of kvm_cpu_dirty_log_size() to kvm_dirty_ring.h KVM: x86: Introduce KVM_REQ_DIRTY_RING_SOFT_FULL Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
commit
a937f37d85
@ -7921,7 +7921,7 @@ regardless of what has actually been exposed through the CPUID leaf.
|
||||
8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
|
||||
----------------------------------------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Architectures: x86, arm64
|
||||
:Parameters: args[0] - size of the dirty log ring
|
||||
|
||||
KVM is capable of tracking dirty memory using ring buffers that are
|
||||
@ -8003,13 +8003,6 @@ flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one
|
||||
needs to kick the vcpu out of KVM_RUN using a signal. The resulting
|
||||
vmexit ensures that all dirty GFNs are flushed to the dirty rings.
|
||||
|
||||
NOTE: the capability KVM_CAP_DIRTY_LOG_RING and the corresponding
|
||||
ioctl KVM_RESET_DIRTY_RINGS are mutual exclusive to the existing ioctls
|
||||
KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG. After enabling
|
||||
KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual
|
||||
machine will switch to ring-buffer dirty page tracking and further
|
||||
KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail.
|
||||
|
||||
NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that
|
||||
should be exposed by weakly ordered architecture, in order to indicate
|
||||
the additional memory ordering requirements imposed on userspace when
|
||||
@ -8018,6 +8011,33 @@ Architecture with TSO-like ordering (such as x86) are allowed to
|
||||
expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL
|
||||
to userspace.
|
||||
|
||||
After enabling the dirty rings, the userspace needs to detect the
|
||||
capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the
|
||||
ring structures can be backed by per-slot bitmaps. With this capability
|
||||
advertised, it means the architecture can dirty guest pages without
|
||||
vcpu/ring context, so that some of the dirty information will still be
|
||||
maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP
|
||||
can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL
|
||||
hasn't been enabled, or any memslot has been existing.
|
||||
|
||||
Note that the bitmap here is only a backup of the ring structure. The
|
||||
use of the ring and bitmap combination is only beneficial if there is
|
||||
only a very small amount of memory that is dirtied out of vcpu/ring
|
||||
context. Otherwise, the stand-alone per-slot bitmap mechanism needs to
|
||||
be considered.
|
||||
|
||||
To collect dirty bits in the backup bitmap, userspace can use the same
|
||||
KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all
|
||||
the generation of the dirty bits is done in a single pass. Collecting
|
||||
the dirty bitmap should be the very last thing that the VMM does before
|
||||
considering the state as complete. VMM needs to ensure that the dirty
|
||||
state is final and avoid missing dirty pages from another ioctl ordered
|
||||
after the bitmap collection.
|
||||
|
||||
NOTE: One example of using the backup bitmap is saving arm64 vgic/its
|
||||
tables through KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} command on
|
||||
KVM device "kvm-arm-vgic-its" when dirty ring is enabled.
|
||||
|
||||
8.30 KVM_CAP_XEN_HVM
|
||||
--------------------
|
||||
|
||||
|
@ -52,7 +52,10 @@ KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
|
||||
KVM_DEV_ARM_ITS_SAVE_TABLES
|
||||
save the ITS table data into guest RAM, at the location provisioned
|
||||
by the guest in corresponding registers/table entries.
|
||||
by the guest in corresponding registers/table entries. Should userspace
|
||||
require a form of dirty tracking to identify which pages are modified
|
||||
by the saving process, it should use a bitmap even if using another
|
||||
mechanism to track the memory dirtied by the vCPUs.
|
||||
|
||||
The layout of the tables in guest memory defines an ABI. The entries
|
||||
are laid out in little endian format as described in the last paragraph.
|
||||
|
@ -43,6 +43,7 @@
|
||||
#define __KVM_HAVE_VCPU_EVENTS
|
||||
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
#define KVM_DIRTY_LOG_PAGE_OFFSET 64
|
||||
|
||||
#define KVM_REG_SIZE(id) \
|
||||
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
|
||||
|
@ -32,6 +32,8 @@ menuconfig KVM
|
||||
select KVM_VFIO
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_DIRTY_RING_ACQ_REL
|
||||
select NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
select HAVE_KVM_MSI
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
|
@ -746,6 +746,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
|
||||
return kvm_vcpu_suspend(vcpu);
|
||||
|
||||
if (kvm_dirty_ring_check_request(vcpu))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
@ -2743,6 +2743,7 @@ static int vgic_its_has_attr(struct kvm_device *dev,
|
||||
static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
|
||||
{
|
||||
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
int ret = 0;
|
||||
|
||||
if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */
|
||||
@ -2762,7 +2763,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
|
||||
vgic_its_reset(kvm, its);
|
||||
break;
|
||||
case KVM_DEV_ARM_ITS_SAVE_TABLES:
|
||||
dist->save_its_tables_in_progress = true;
|
||||
ret = abi->save_tables(its);
|
||||
dist->save_its_tables_in_progress = false;
|
||||
break;
|
||||
case KVM_DEV_ARM_ITS_RESTORE_TABLES:
|
||||
ret = abi->restore_tables(its);
|
||||
@ -2775,6 +2778,23 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_arch_allow_write_without_running_vcpu - allow writing guest memory
|
||||
* without the running VCPU when dirty ring is enabled.
|
||||
*
|
||||
* The running VCPU is required to track dirty guest pages when dirty ring
|
||||
* is enabled. Otherwise, the backup bitmap should be used to track the
|
||||
* dirty guest pages. When vgic/its tables are being saved, the backup
|
||||
* bitmap is used to track the dirty guest pages due to the missed running
|
||||
* VCPU in the period.
|
||||
*/
|
||||
bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
|
||||
return dist->save_its_tables_in_progress;
|
||||
}
|
||||
|
||||
static int vgic_its_set_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
|
@ -2090,8 +2090,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
|
||||
#define GET_SMSTATE(type, buf, offset) \
|
||||
(*(type *)((buf) + (offset) - 0x7e00))
|
||||
|
||||
int kvm_cpu_dirty_log_size(void);
|
||||
|
||||
int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
|
||||
|
||||
#define KVM_CLOCK_VALID_FLAGS \
|
||||
|
@ -10515,20 +10515,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
|
||||
bool req_immediate_exit = false;
|
||||
|
||||
/* Forbid vmenter if vcpu dirty ring is soft-full */
|
||||
if (unlikely(vcpu->kvm->dirty_ring_size &&
|
||||
kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
|
||||
trace_kvm_dirty_ring_exit(vcpu);
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kvm_request_pending(vcpu)) {
|
||||
if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
|
||||
r = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kvm_dirty_ring_check_request(vcpu)) {
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
|
||||
if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
|
||||
r = 0;
|
||||
|
@ -263,6 +263,7 @@ struct vgic_dist {
|
||||
struct vgic_io_device dist_iodev;
|
||||
|
||||
bool has_its;
|
||||
bool save_its_tables_in_progress;
|
||||
|
||||
/*
|
||||
* Contains the attributes and gpa of the LPI configuration table.
|
||||
|
@ -37,6 +37,11 @@ static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool kvm_use_dirty_bitmap(struct kvm *kvm)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
|
||||
int index, u32 size)
|
||||
{
|
||||
@ -49,7 +54,7 @@ static inline int kvm_dirty_ring_reset(struct kvm *kvm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring,
|
||||
static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu,
|
||||
u32 slot, u64 offset)
|
||||
{
|
||||
}
|
||||
@ -64,13 +69,11 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#else /* CONFIG_HAVE_KVM_DIRTY_RING */
|
||||
|
||||
int kvm_cpu_dirty_log_size(void);
|
||||
bool kvm_use_dirty_bitmap(struct kvm *kvm);
|
||||
bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm);
|
||||
u32 kvm_dirty_ring_get_rsvd_entries(void);
|
||||
int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
|
||||
|
||||
@ -84,13 +87,14 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
|
||||
* returns =0: successfully pushed
|
||||
* <0: unable to push, need to wait
|
||||
*/
|
||||
void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset);
|
||||
void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset);
|
||||
|
||||
bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* for use in vm_operations_struct */
|
||||
struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
|
||||
|
||||
void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
|
||||
bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
|
||||
|
||||
#endif /* CONFIG_HAVE_KVM_DIRTY_RING */
|
||||
|
||||
|
@ -153,10 +153,11 @@ static inline bool is_error_page(struct page *page)
|
||||
* Architecture-independent vcpu->requests bit members
|
||||
* Bits 3-7 are reserved for more arch-independent bits.
|
||||
*/
|
||||
#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_UNBLOCK 2
|
||||
#define KVM_REQUEST_ARCH_BASE 8
|
||||
#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_UNBLOCK 2
|
||||
#define KVM_REQ_DIRTY_RING_SOFT_FULL 3
|
||||
#define KVM_REQUEST_ARCH_BASE 8
|
||||
|
||||
/*
|
||||
* KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to
|
||||
@ -778,6 +779,7 @@ struct kvm {
|
||||
pid_t userspace_pid;
|
||||
unsigned int max_halt_poll_ns;
|
||||
u32 dirty_ring_size;
|
||||
bool dirty_ring_with_bitmap;
|
||||
bool vm_bugged;
|
||||
bool vm_dead;
|
||||
|
||||
|
@ -1178,6 +1178,7 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_S390_ZPCI_OP 221
|
||||
#define KVM_CAP_S390_CPU_TOPOLOGY 222
|
||||
#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
|
||||
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 224
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -24,6 +24,9 @@
|
||||
#include "guest_modes.h"
|
||||
#include "processor.h"
|
||||
|
||||
#define DIRTY_MEM_BITS 30 /* 1G */
|
||||
#define PAGE_SHIFT_4K 12
|
||||
|
||||
/* The memory slot index to track dirty pages */
|
||||
#define TEST_MEM_SLOT_INDEX 1
|
||||
|
||||
@ -226,13 +229,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm)
|
||||
}
|
||||
|
||||
static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
void *bitmap, uint32_t num_pages,
|
||||
uint32_t *unused)
|
||||
{
|
||||
kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
|
||||
}
|
||||
|
||||
static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
void *bitmap, uint32_t num_pages,
|
||||
uint32_t *unused)
|
||||
{
|
||||
kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
|
||||
kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages);
|
||||
@ -271,6 +276,24 @@ static bool dirty_ring_supported(void)
|
||||
|
||||
static void dirty_ring_create_vm_done(struct kvm_vm *vm)
|
||||
{
|
||||
uint64_t pages;
|
||||
uint32_t limit;
|
||||
|
||||
/*
|
||||
* We rely on vcpu exit due to full dirty ring state. Adjust
|
||||
* the ring buffer size to ensure we're able to reach the
|
||||
* full dirty ring state.
|
||||
*/
|
||||
pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
|
||||
pages = vm_adjust_num_guest_pages(vm->mode, pages);
|
||||
if (vm->page_size < getpagesize())
|
||||
pages = vm_num_host_pages(vm->mode, pages);
|
||||
|
||||
limit = 1 << (31 - __builtin_clz(pages));
|
||||
test_dirty_ring_count = 1 << (31 - __builtin_clz(test_dirty_ring_count));
|
||||
test_dirty_ring_count = min(limit, test_dirty_ring_count);
|
||||
pr_info("dirty ring count: 0x%x\n", test_dirty_ring_count);
|
||||
|
||||
/*
|
||||
* Switch to dirty ring mode after VM creation but before any
|
||||
* of the vcpu creation.
|
||||
@ -329,10 +352,9 @@ static void dirty_ring_continue_vcpu(void)
|
||||
}
|
||||
|
||||
static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
void *bitmap, uint32_t num_pages,
|
||||
uint32_t *ring_buf_idx)
|
||||
{
|
||||
/* We only have one vcpu */
|
||||
static uint32_t fetch_index = 0;
|
||||
uint32_t count = 0, cleared;
|
||||
bool continued_vcpu = false;
|
||||
|
||||
@ -349,7 +371,8 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
|
||||
|
||||
/* Only have one vcpu */
|
||||
count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
|
||||
slot, bitmap, num_pages, &fetch_index);
|
||||
slot, bitmap, num_pages,
|
||||
ring_buf_idx);
|
||||
|
||||
cleared = kvm_vm_reset_dirty_ring(vcpu->vm);
|
||||
|
||||
@ -406,7 +429,8 @@ struct log_mode {
|
||||
void (*create_vm_done)(struct kvm_vm *vm);
|
||||
/* Hook to collect the dirty pages into the bitmap provided */
|
||||
void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot,
|
||||
void *bitmap, uint32_t num_pages);
|
||||
void *bitmap, uint32_t num_pages,
|
||||
uint32_t *ring_buf_idx);
|
||||
/* Hook to call when after each vcpu run */
|
||||
void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
|
||||
void (*before_vcpu_join) (void);
|
||||
@ -471,13 +495,14 @@ static void log_mode_create_vm_done(struct kvm_vm *vm)
|
||||
}
|
||||
|
||||
static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
void *bitmap, uint32_t num_pages,
|
||||
uint32_t *ring_buf_idx)
|
||||
{
|
||||
struct log_mode *mode = &log_modes[host_log_mode];
|
||||
|
||||
TEST_ASSERT(mode->collect_dirty_pages != NULL,
|
||||
"collect_dirty_pages() is required for any log mode!");
|
||||
mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages);
|
||||
mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
|
||||
}
|
||||
|
||||
static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
|
||||
@ -681,9 +706,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
|
||||
return vm;
|
||||
}
|
||||
|
||||
#define DIRTY_MEM_BITS 30 /* 1G */
|
||||
#define PAGE_SHIFT_4K 12
|
||||
|
||||
struct test_params {
|
||||
unsigned long iterations;
|
||||
unsigned long interval;
|
||||
@ -696,6 +718,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vm *vm;
|
||||
unsigned long *bmap;
|
||||
uint32_t ring_buf_idx = 0;
|
||||
|
||||
if (!log_mode_supported()) {
|
||||
print_skip("Log mode '%s' not supported",
|
||||
@ -771,6 +794,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
host_dirty_count = 0;
|
||||
host_clear_count = 0;
|
||||
host_track_next_count = 0;
|
||||
WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
|
||||
|
||||
pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
|
||||
|
||||
@ -778,7 +802,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
/* Give the vcpu thread some time to dirty some pages */
|
||||
usleep(p->interval * 1000);
|
||||
log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
|
||||
bmap, host_num_pages);
|
||||
bmap, host_num_pages,
|
||||
&ring_buf_idx);
|
||||
|
||||
/*
|
||||
* See vcpu_sync_stop_requested definition for details on why
|
||||
@ -823,7 +848,7 @@ static void help(char *name)
|
||||
printf("usage: %s [-h] [-i iterations] [-I interval] "
|
||||
"[-p offset] [-m mode]\n", name);
|
||||
puts("");
|
||||
printf(" -c: specify dirty ring size, in number of entries\n");
|
||||
printf(" -c: hint to dirty ring size, in number of entries\n");
|
||||
printf(" (only useful for dirty-ring test; default: %"PRIu32")\n",
|
||||
TEST_DIRTY_RING_COUNT);
|
||||
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
|
||||
|
@ -1527,7 +1527,7 @@ struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)
|
||||
|
||||
void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
uint32_t page_size = vcpu->vm->page_size;
|
||||
uint32_t page_size = getpagesize();
|
||||
uint32_t size = vcpu->vm->dirty_ring_size;
|
||||
|
||||
TEST_ASSERT(size > 0, "Should enable dirty ring first");
|
||||
|
@ -33,6 +33,12 @@ config HAVE_KVM_DIRTY_RING_ACQ_REL
|
||||
bool
|
||||
select HAVE_KVM_DIRTY_RING
|
||||
|
||||
# Allow enabling both the dirty bitmap and dirty ring. Only architectures
|
||||
# that need to dirty memory outside of a vCPU context should select this.
|
||||
config NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
bool
|
||||
depends on HAVE_KVM_DIRTY_RING
|
||||
|
||||
config HAVE_KVM_EVENTFD
|
||||
bool
|
||||
select EVENTFD
|
||||
|
@ -21,12 +21,26 @@ u32 kvm_dirty_ring_get_rsvd_entries(void)
|
||||
return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size();
|
||||
}
|
||||
|
||||
bool kvm_use_dirty_bitmap(struct kvm *kvm)
|
||||
{
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
return !kvm->dirty_ring_size || kvm->dirty_ring_with_bitmap;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring)
|
||||
{
|
||||
return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index);
|
||||
}
|
||||
|
||||
bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
|
||||
static bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
|
||||
{
|
||||
return kvm_dirty_ring_used(ring) >= ring->soft_limit;
|
||||
}
|
||||
@ -142,13 +156,19 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
|
||||
|
||||
kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
|
||||
|
||||
/*
|
||||
* The request KVM_REQ_DIRTY_RING_SOFT_FULL will be cleared
|
||||
* by the VCPU thread next time when it enters the guest.
|
||||
*/
|
||||
|
||||
trace_kvm_dirty_ring_reset(ring);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
|
||||
void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset)
|
||||
{
|
||||
struct kvm_dirty_ring *ring = &vcpu->dirty_ring;
|
||||
struct kvm_dirty_gfn *entry;
|
||||
|
||||
/* It should never get full */
|
||||
@ -166,6 +186,28 @@ void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
|
||||
kvm_dirty_gfn_set_dirtied(entry);
|
||||
ring->dirty_index++;
|
||||
trace_kvm_dirty_ring_push(ring, slot, offset);
|
||||
|
||||
if (kvm_dirty_ring_soft_full(ring))
|
||||
kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu);
|
||||
}
|
||||
|
||||
bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* The VCPU isn't runnable when the dirty ring becomes soft full.
|
||||
* The KVM_REQ_DIRTY_RING_SOFT_FULL event is always set to prevent
|
||||
* the VCPU from running until the dirty pages are harvested and
|
||||
* the dirty ring is reset by userspace.
|
||||
*/
|
||||
if (kvm_check_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu) &&
|
||||
kvm_dirty_ring_soft_full(&vcpu->dirty_ring)) {
|
||||
kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu);
|
||||
vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
|
||||
trace_kvm_dirty_ring_exit(vcpu);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset)
|
||||
|
@ -1617,7 +1617,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
|
||||
new->dirty_bitmap = NULL;
|
||||
else if (old && old->dirty_bitmap)
|
||||
new->dirty_bitmap = old->dirty_bitmap;
|
||||
else if (!kvm->dirty_ring_size) {
|
||||
else if (kvm_use_dirty_bitmap(kvm)) {
|
||||
r = kvm_alloc_dirty_bitmap(new);
|
||||
if (r)
|
||||
return r;
|
||||
@ -2060,8 +2060,8 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
|
||||
unsigned long n;
|
||||
unsigned long any = 0;
|
||||
|
||||
/* Dirty ring tracking is exclusive to dirty log tracking */
|
||||
if (kvm->dirty_ring_size)
|
||||
/* Dirty ring tracking may be exclusive to dirty log tracking */
|
||||
if (!kvm_use_dirty_bitmap(kvm))
|
||||
return -ENXIO;
|
||||
|
||||
*memslot = NULL;
|
||||
@ -2125,8 +2125,8 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
unsigned long *dirty_bitmap_buffer;
|
||||
bool flush;
|
||||
|
||||
/* Dirty ring tracking is exclusive to dirty log tracking */
|
||||
if (kvm->dirty_ring_size)
|
||||
/* Dirty ring tracking may be exclusive to dirty log tracking */
|
||||
if (!kvm_use_dirty_bitmap(kvm))
|
||||
return -ENXIO;
|
||||
|
||||
as_id = log->slot >> 16;
|
||||
@ -2237,8 +2237,8 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
|
||||
unsigned long *dirty_bitmap_buffer;
|
||||
bool flush;
|
||||
|
||||
/* Dirty ring tracking is exclusive to dirty log tracking */
|
||||
if (kvm->dirty_ring_size)
|
||||
/* Dirty ring tracking may be exclusive to dirty log tracking */
|
||||
if (!kvm_use_dirty_bitmap(kvm))
|
||||
return -ENXIO;
|
||||
|
||||
as_id = log->slot >> 16;
|
||||
@ -3305,18 +3305,19 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
|
||||
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_DIRTY_RING
|
||||
if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm))
|
||||
if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!vcpu && !kvm_arch_allow_write_without_running_vcpu(kvm));
|
||||
#endif
|
||||
|
||||
if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
|
||||
unsigned long rel_gfn = gfn - memslot->base_gfn;
|
||||
u32 slot = (memslot->as_id << 16) | memslot->id;
|
||||
|
||||
if (kvm->dirty_ring_size)
|
||||
kvm_dirty_ring_push(&vcpu->dirty_ring,
|
||||
slot, rel_gfn);
|
||||
else
|
||||
if (kvm->dirty_ring_size && vcpu)
|
||||
kvm_dirty_ring_push(vcpu, slot, rel_gfn);
|
||||
else if (memslot->dirty_bitmap)
|
||||
set_bit_le(rel_gfn, memslot->dirty_bitmap);
|
||||
}
|
||||
}
|
||||
@ -4483,6 +4484,9 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
||||
return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
#ifdef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP:
|
||||
#endif
|
||||
case KVM_CAP_BINARY_STATS_FD:
|
||||
case KVM_CAP_SYSTEM_EVENT_DATA:
|
||||
@ -4559,6 +4563,20 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static bool kvm_are_all_memslots_empty(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
if (!kvm_memslots_empty(__kvm_memslots(kvm, i)))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap)
|
||||
{
|
||||
@ -4589,6 +4607,29 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
|
||||
return -EINVAL;
|
||||
|
||||
return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
|
||||
case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP: {
|
||||
int r = -EINVAL;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP) ||
|
||||
!kvm->dirty_ring_size || cap->flags)
|
||||
return r;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
/*
|
||||
* For simplicity, allow enabling ring+bitmap if and only if
|
||||
* there are no memslots, e.g. to ensure all memslots allocate
|
||||
* a bitmap after the capability is enabled.
|
||||
*/
|
||||
if (kvm_are_all_memslots_empty(kvm)) {
|
||||
kvm->dirty_ring_with_bitmap = true;
|
||||
r = 0;
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
default:
|
||||
return kvm_vm_ioctl_enable_cap(kvm, cap);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user