diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index 1fc860c007a3..69b090fdd2e7 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -20,10 +20,10 @@ On x86: - vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock -- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is - taken inside kvm->arch.mmu_lock, and cannot be taken without already - holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise - there's no need to take kvm->arch.tdp_mmu_pages_lock at all). +- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and + kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and + cannot be taken without already holding kvm->arch.mmu_lock (typically with + ``read_lock`` for the TDP MMU, thus the need for additional spinlocks). Everything else is a leaf: no other lock is taken inside the critical sections. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fbd55c682d5e..5e5f06acba55 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -987,6 +987,13 @@ struct kvm_arch { struct list_head lpage_disallowed_mmu_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; + /* + * Protects marking pages unsync during page faults, as TDP MMU page + * faults only take mmu_lock for read. For simplicity, the unsync + * pages lock is always taken when marking pages unsync regardless of + * whether mmu_lock is held for read or write. + */ + spinlock_t mmu_unsync_pages_lock; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index ed469ddf2074..9d3783800c8c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2454,6 +2454,7 @@ bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) { struct kvm_mmu_page *sp; + bool locked = false; if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE)) return true; @@ -2465,9 +2466,34 @@ bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, if (sp->unsync) continue; + /* + * TDP MMU page faults require an additional spinlock as they + * run with mmu_lock held for read, not write, and the unsync + * logic is not thread safe. Take the spinklock regardless of + * the MMU type to avoid extra conditionals/parameters, there's + * no meaningful penalty if mmu_lock is held for write. + */ + if (!locked) { + locked = true; + spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock); + + /* + * Recheck after taking the spinlock, a different vCPU + * may have since marked the page unsync. A false + * positive on the unprotected check above is not + * possible as clearing sp->unsync _must_ hold mmu_lock + * for write, i.e. unsync cannot transition from 0->1 + * while this CPU holds mmu_lock for read (or write). + */ + if (READ_ONCE(sp->unsync)) + continue; + } + WARN_ON(sp->role.level != PG_LEVEL_4K); kvm_unsync_page(vcpu, sp); } + if (locked) + spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock); /* * We need to ensure that the marking of unsync pages is visible @@ -5514,6 +5540,8 @@ void kvm_mmu_init_vm(struct kvm *kvm) { struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; + spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); + kvm_mmu_init_tdp_mmu(kvm); node->track_write = kvm_mmu_pte_write;