2020-10-15 02:26:43 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
|
|
|
|
#ifndef __KVM_X86_MMU_TDP_MMU_H
|
|
|
|
#define __KVM_X86_MMU_TDP_MMU_H
|
|
|
|
|
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
|
2020-10-15 02:26:44 +08:00
|
|
|
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
|
|
|
|
|
2021-04-02 07:37:30 +08:00
|
|
|
__must_check static inline bool kvm_tdp_mmu_get_root(struct kvm *kvm,
|
|
|
|
struct kvm_mmu_page *root)
|
2021-04-02 07:37:25 +08:00
|
|
|
{
|
2021-04-02 07:37:35 +08:00
|
|
|
if (root->role.invalid)
|
|
|
|
return false;
|
|
|
|
|
2021-04-02 07:37:30 +08:00
|
|
|
return refcount_inc_not_zero(&root->tdp_mmu_root_count);
|
2021-04-02 07:37:25 +08:00
|
|
|
}
|
|
|
|
|
2021-04-02 07:37:32 +08:00
|
|
|
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
|
|
|
bool shared);
|
2021-04-02 07:37:25 +08:00
|
|
|
|
2021-03-26 10:19:44 +08:00
|
|
|
bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
|
2021-08-11 04:52:36 +08:00
|
|
|
gfn_t end, bool can_yield, bool flush);
|
2021-03-26 10:19:44 +08:00
|
|
|
static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id,
|
2021-08-11 04:52:36 +08:00
|
|
|
gfn_t start, gfn_t end, bool flush)
|
KVM: x86/mmu: Don't allow TDP MMU to yield when recovering NX pages
Prevent the TDP MMU from yielding when zapping a gfn range during NX
page recovery. If a flush is pending from a previous invocation of the
zapping helper, either in the TDP MMU or the legacy MMU, but the TDP MMU
has not accumulated a flush for the current invocation, then yielding
will release mmu_lock with stale TLB entries.
That being said, this isn't technically a bug fix in the current code, as
the TDP MMU will never yield in this case. tdp_mmu_iter_cond_resched()
will yield if and only if it has made forward progress, as defined by the
current gfn vs. the last yielded (or starting) gfn. Because zapping a
single shadow page is guaranteed to (a) find that page and (b) step
sideways at the level of the shadow page, the TDP iter will break its loop
before getting a chance to yield.
But that is all very, very subtle, and will break at the slightest sneeze,
e.g. zapping while holding mmu_lock for read would break as the TDP MMU
wouldn't be guaranteed to see the present shadow page, and thus could step
sideways at a lower level.
Cc: Ben Gardon <bgardon@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210325200119.1359384-4-seanjc@google.com>
[Add lockdep assertion. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-03-26 04:01:19 +08:00
|
|
|
{
|
2021-08-11 04:52:36 +08:00
|
|
|
return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush);
|
KVM: x86/mmu: Don't allow TDP MMU to yield when recovering NX pages
Prevent the TDP MMU from yielding when zapping a gfn range during NX
page recovery. If a flush is pending from a previous invocation of the
zapping helper, either in the TDP MMU or the legacy MMU, but the TDP MMU
has not accumulated a flush for the current invocation, then yielding
will release mmu_lock with stale TLB entries.
That being said, this isn't technically a bug fix in the current code, as
the TDP MMU will never yield in this case. tdp_mmu_iter_cond_resched()
will yield if and only if it has made forward progress, as defined by the
current gfn vs. the last yielded (or starting) gfn. Because zapping a
single shadow page is guaranteed to (a) find that page and (b) step
sideways at the level of the shadow page, the TDP iter will break its loop
before getting a chance to yield.
But that is all very, very subtle, and will break at the slightest sneeze,
e.g. zapping while holding mmu_lock for read would break as the TDP MMU
wouldn't be guaranteed to see the present shadow page, and thus could step
sideways at a lower level.
Cc: Ben Gardon <bgardon@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210325200119.1359384-4-seanjc@google.com>
[Add lockdep assertion. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-03-26 04:01:19 +08:00
|
|
|
}
|
|
|
|
static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|
|
|
{
|
KVM: x86/mmu: Fix TDP MMU page table level
TDP MMU iterator's level is identical to page table's actual level. For
instance, for the last level page table (whose entry points to one 4K
page), iter->level is 1 (PG_LEVEL_4K), and in case of 5 level paging,
the iter->level is mmu->shadow_root_level, which is 5. However, struct
kvm_mmu_page's level currently is not set correctly when it is allocated
in kvm_tdp_mmu_map(). When iterator hits non-present SPTE and needs to
allocate a new child page table, currently iter->level, which is the
level of the page table where the non-present SPTE belongs to, is used.
This results in struct kvm_mmu_page's level always having its parent's
level (excpet root table's level, which is initialized explicitly using
mmu->shadow_root_level).
This is kinda wrong, and not consistent with existing non TDP MMU code.
Fortuantely sp->role.level is only used in handle_removed_tdp_mmu_page()
and kvm_tdp_mmu_zap_sp(), and they are already aware of this and behave
correctly. However to make it consistent with legacy MMU code (and fix
the issue that both root page table and its child page table have
shadow_root_level), use iter->level - 1 in kvm_tdp_mmu_map(), and change
handle_removed_tdp_mmu_page() and kvm_tdp_mmu_zap_sp() accordingly.
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <bcb6569b6e96cb78aaa7b50640e6e6b53291a74e.1623717884.git.kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-06-15 08:57:11 +08:00
|
|
|
gfn_t end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level + 1);
|
KVM: x86/mmu: Don't allow TDP MMU to yield when recovering NX pages
Prevent the TDP MMU from yielding when zapping a gfn range during NX
page recovery. If a flush is pending from a previous invocation of the
zapping helper, either in the TDP MMU or the legacy MMU, but the TDP MMU
has not accumulated a flush for the current invocation, then yielding
will release mmu_lock with stale TLB entries.
That being said, this isn't technically a bug fix in the current code, as
the TDP MMU will never yield in this case. tdp_mmu_iter_cond_resched()
will yield if and only if it has made forward progress, as defined by the
current gfn vs. the last yielded (or starting) gfn. Because zapping a
single shadow page is guaranteed to (a) find that page and (b) step
sideways at the level of the shadow page, the TDP iter will break its loop
before getting a chance to yield.
But that is all very, very subtle, and will break at the slightest sneeze,
e.g. zapping while holding mmu_lock for read would break as the TDP MMU
wouldn't be guaranteed to see the present shadow page, and thus could step
sideways at a lower level.
Cc: Ben Gardon <bgardon@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210325200119.1359384-4-seanjc@google.com>
[Add lockdep assertion. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-03-26 04:01:19 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't allow yielding, as the caller may have a flush pending. Note,
|
|
|
|
* if mmu_lock is held for write, zapping will never yield in this case,
|
|
|
|
* but explicitly disallow it for safety. The TDP MMU does not yield
|
|
|
|
* until it has made forward progress (steps sideways), and when zapping
|
|
|
|
* a single shadow page that it's guaranteed to see (thus the mmu_lock
|
|
|
|
* requirement), its "step sideways" will always step beyond the bounds
|
|
|
|
* of the shadow page's gfn range and stop iterating before yielding.
|
|
|
|
*/
|
|
|
|
lockdep_assert_held_write(&kvm->mmu_lock);
|
2021-03-26 10:19:44 +08:00
|
|
|
return __kvm_tdp_mmu_zap_gfn_range(kvm, kvm_mmu_page_as_id(sp),
|
2021-08-11 04:52:36 +08:00
|
|
|
sp->gfn, end, false, false);
|
KVM: x86/mmu: Don't allow TDP MMU to yield when recovering NX pages
Prevent the TDP MMU from yielding when zapping a gfn range during NX
page recovery. If a flush is pending from a previous invocation of the
zapping helper, either in the TDP MMU or the legacy MMU, but the TDP MMU
has not accumulated a flush for the current invocation, then yielding
will release mmu_lock with stale TLB entries.
That being said, this isn't technically a bug fix in the current code, as
the TDP MMU will never yield in this case. tdp_mmu_iter_cond_resched()
will yield if and only if it has made forward progress, as defined by the
current gfn vs. the last yielded (or starting) gfn. Because zapping a
single shadow page is guaranteed to (a) find that page and (b) step
sideways at the level of the shadow page, the TDP iter will break its loop
before getting a chance to yield.
But that is all very, very subtle, and will break at the slightest sneeze,
e.g. zapping while holding mmu_lock for read would break as the TDP MMU
wouldn't be guaranteed to see the present shadow page, and thus could step
sideways at a lower level.
Cc: Ben Gardon <bgardon@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210325200119.1359384-4-seanjc@google.com>
[Add lockdep assertion. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-03-26 04:01:19 +08:00
|
|
|
}
|
2021-04-02 07:37:35 +08:00
|
|
|
|
2020-10-15 02:26:47 +08:00
|
|
|
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
|
2021-04-02 07:37:35 +08:00
|
|
|
void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm);
|
2021-04-02 07:37:36 +08:00
|
|
|
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm);
|
2020-10-15 02:26:50 +08:00
|
|
|
|
|
|
|
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
|
|
|
int map_writable, int max_level, kvm_pfn_t pfn,
|
|
|
|
bool prefault);
|
2020-10-15 02:26:52 +08:00
|
|
|
|
KVM: Move x86's MMU notifier memslot walkers to generic code
Move the hva->gfn lookup for MMU notifiers into common code. Every arch
does a similar lookup, and some arch code is all but identical across
multiple architectures.
In addition to consolidating code, this will allow introducing
optimizations that will benefit all architectures without incurring
multiple walks of the memslots, e.g. by taking mmu_lock if and only if a
relevant range exists in the memslots.
The use of __always_inline to avoid indirect call retpolines, as done by
x86, may also benefit other architectures.
Consolidating the lookups also fixes a wart in x86, where the legacy MMU
and TDP MMU each do their own memslot walks.
Lastly, future enhancements to the memslot implementation, e.g. to add an
interval tree to track host address, will need to touch far less arch
specific code.
MIPS, PPC, and arm64 will be converted one at a time in future patches.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210402005658.3024832-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-04-02 08:56:50 +08:00
|
|
|
bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
|
|
|
|
bool flush);
|
|
|
|
bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
|
|
|
|
bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
|
|
|
bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
2020-10-15 02:26:55 +08:00
|
|
|
|
2021-07-13 10:33:38 +08:00
|
|
|
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
|
|
|
|
const struct kvm_memory_slot *slot, int min_level);
|
2020-10-15 02:26:55 +08:00
|
|
|
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
|
2021-07-13 10:33:38 +08:00
|
|
|
const struct kvm_memory_slot *slot);
|
2020-10-15 02:26:55 +08:00
|
|
|
void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
|
|
|
struct kvm_memory_slot *slot,
|
|
|
|
gfn_t gfn, unsigned long mask,
|
|
|
|
bool wrprot);
|
2021-03-26 10:19:42 +08:00
|
|
|
bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
2021-04-02 07:37:24 +08:00
|
|
|
const struct kvm_memory_slot *slot,
|
|
|
|
bool flush);
|
2020-10-15 02:26:57 +08:00
|
|
|
|
|
|
|
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
|
2021-04-29 11:41:14 +08:00
|
|
|
struct kvm_memory_slot *slot, gfn_t gfn,
|
|
|
|
int min_level);
|
2020-10-15 02:26:58 +08:00
|
|
|
|
2021-07-14 06:09:54 +08:00
|
|
|
static inline void kvm_tdp_mmu_walk_lockless_begin(void)
|
|
|
|
{
|
|
|
|
rcu_read_lock();
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void kvm_tdp_mmu_walk_lockless_end(void)
|
|
|
|
{
|
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
|
|
|
|
2020-12-18 08:31:37 +08:00
|
|
|
int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
|
|
|
|
int *root_level);
|
2021-07-14 06:09:55 +08:00
|
|
|
u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
|
|
|
|
u64 *spte);
|
2020-12-18 08:31:37 +08:00
|
|
|
|
2021-02-06 22:53:33 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
2021-05-19 01:34:14 +08:00
|
|
|
bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
|
2021-02-06 22:53:33 +08:00
|
|
|
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
|
|
|
|
static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
|
|
|
|
static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
|
|
|
|
|
2021-06-18 07:19:47 +08:00
|
|
|
static inline bool is_tdp_mmu(struct kvm_mmu *mmu)
|
2021-02-06 22:53:33 +08:00
|
|
|
{
|
|
|
|
struct kvm_mmu_page *sp;
|
2021-06-18 07:19:47 +08:00
|
|
|
hpa_t hpa = mmu->root_hpa;
|
2021-02-06 22:53:33 +08:00
|
|
|
|
|
|
|
if (WARN_ON(!VALID_PAGE(hpa)))
|
|
|
|
return false;
|
|
|
|
|
2021-06-22 15:24:54 +08:00
|
|
|
/*
|
|
|
|
* A NULL shadow page is legal when shadowing a non-paging guest with
|
|
|
|
* PAE paging, as the MMU will be direct with root_hpa pointing at the
|
|
|
|
* pae_root page, not a shadow page.
|
|
|
|
*/
|
2021-02-06 22:53:33 +08:00
|
|
|
sp = to_shadow_page(hpa);
|
2021-06-22 15:24:54 +08:00
|
|
|
return sp && is_tdp_mmu_page(sp) && sp->root_count;
|
2021-02-06 22:53:33 +08:00
|
|
|
}
|
2021-06-18 18:42:10 +08:00
|
|
|
#else
|
|
|
|
static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; }
|
|
|
|
static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
|
|
|
|
static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
|
|
|
|
static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
|
2021-06-18 07:19:47 +08:00
|
|
|
static inline bool is_tdp_mmu(struct kvm_mmu *mmu) { return false; }
|
2021-06-18 18:42:10 +08:00
|
|
|
#endif
|
2021-02-06 22:53:33 +08:00
|
|
|
|
2020-10-15 02:26:43 +08:00
|
|
|
#endif /* __KVM_X86_MMU_TDP_MMU_H */
|