mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-03 12:24:45 +08:00
7cca2d0b7e
In order to enable concurrent modifications to the paging structures in
the TDP MMU, threads must be able to safely remove pages of page table
memory while other threads are traversing the same memory. To ensure
threads do not access PT memory after it is freed, protect PT memory
with RCU.
Protecting concurrent accesses to page table memory from use-after-free
bugs could also have been acomplished using
walk_shadow_page_lockless_begin/end() and READING_SHADOW_PAGE_TABLES,
coupling with the barriers in a TLB flush. The use of RCU for this case
has several distinct advantages over that approach.
1. Disabling interrupts for long running operations is not desirable.
Future commits will allow operations besides page faults to operate
without the exclusive protection of the MMU lock and those operations
are too long to disable iterrupts for their duration.
2. The use of RCU here avoids long blocking / spinning operations in
perfromance critical paths. By freeing memory with an asynchronous
RCU API we avoid the longer wait times TLB flushes experience when
overlapping with a thread in walk_shadow_page_lockless_begin/end().
3. RCU provides a separation of concerns when removing memory from the
paging structure. Because the RCU callback to free memory can be
scheduled immediately after a TLB flush, there's no need for the
thread to manually free a queue of pages later, as commit_zap_pages
does.
Fixes: 95fb5b0258
("kvm: x86/mmu: Support MMIO in the TDP MMU")
Reviewed-by: Peter Feiner <pfeiner@google.com>
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210202185734.1680553-18-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
167 lines
4.7 KiB
C
167 lines
4.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include "mmu_internal.h"
|
|
#include "tdp_iter.h"
|
|
#include "spte.h"
|
|
|
|
/*
|
|
* Recalculates the pointer to the SPTE for the current GFN and level and
|
|
* reread the SPTE.
|
|
*/
|
|
static void tdp_iter_refresh_sptep(struct tdp_iter *iter)
|
|
{
|
|
iter->sptep = iter->pt_path[iter->level - 1] +
|
|
SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level);
|
|
iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));
|
|
}
|
|
|
|
static gfn_t round_gfn_for_level(gfn_t gfn, int level)
|
|
{
|
|
return gfn & -KVM_PAGES_PER_HPAGE(level);
|
|
}
|
|
|
|
/*
|
|
* Sets a TDP iterator to walk a pre-order traversal of the paging structure
|
|
* rooted at root_pt, starting with the walk to translate next_last_level_gfn.
|
|
*/
|
|
void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
|
|
int min_level, gfn_t next_last_level_gfn)
|
|
{
|
|
WARN_ON(root_level < 1);
|
|
WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
|
|
|
|
iter->next_last_level_gfn = next_last_level_gfn;
|
|
iter->yielded_gfn = iter->next_last_level_gfn;
|
|
iter->root_level = root_level;
|
|
iter->min_level = min_level;
|
|
iter->level = root_level;
|
|
iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt;
|
|
|
|
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
|
|
tdp_iter_refresh_sptep(iter);
|
|
|
|
iter->valid = true;
|
|
}
|
|
|
|
/*
|
|
* Given an SPTE and its level, returns a pointer containing the host virtual
|
|
* address of the child page table referenced by the SPTE. Returns null if
|
|
* there is no such entry.
|
|
*/
|
|
tdp_ptep_t spte_to_child_pt(u64 spte, int level)
|
|
{
|
|
/*
|
|
* There's no child entry if this entry isn't present or is a
|
|
* last-level entry.
|
|
*/
|
|
if (!is_shadow_present_pte(spte) || is_last_spte(spte, level))
|
|
return NULL;
|
|
|
|
return (tdp_ptep_t)__va(spte_to_pfn(spte) << PAGE_SHIFT);
|
|
}
|
|
|
|
/*
|
|
* Steps down one level in the paging structure towards the goal GFN. Returns
|
|
* true if the iterator was able to step down a level, false otherwise.
|
|
*/
|
|
static bool try_step_down(struct tdp_iter *iter)
|
|
{
|
|
tdp_ptep_t child_pt;
|
|
|
|
if (iter->level == iter->min_level)
|
|
return false;
|
|
|
|
/*
|
|
* Reread the SPTE before stepping down to avoid traversing into page
|
|
* tables that are no longer linked from this entry.
|
|
*/
|
|
iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));
|
|
|
|
child_pt = spte_to_child_pt(iter->old_spte, iter->level);
|
|
if (!child_pt)
|
|
return false;
|
|
|
|
iter->level--;
|
|
iter->pt_path[iter->level - 1] = child_pt;
|
|
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
|
|
tdp_iter_refresh_sptep(iter);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Steps to the next entry in the current page table, at the current page table
|
|
* level. The next entry could point to a page backing guest memory or another
|
|
* page table, or it could be non-present. Returns true if the iterator was
|
|
* able to step to the next entry in the page table, false if the iterator was
|
|
* already at the end of the current page table.
|
|
*/
|
|
static bool try_step_side(struct tdp_iter *iter)
|
|
{
|
|
/*
|
|
* Check if the iterator is already at the end of the current page
|
|
* table.
|
|
*/
|
|
if (SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level) ==
|
|
(PT64_ENT_PER_PAGE - 1))
|
|
return false;
|
|
|
|
iter->gfn += KVM_PAGES_PER_HPAGE(iter->level);
|
|
iter->next_last_level_gfn = iter->gfn;
|
|
iter->sptep++;
|
|
iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Tries to traverse back up a level in the paging structure so that the walk
|
|
* can continue from the next entry in the parent page table. Returns true on a
|
|
* successful step up, false if already in the root page.
|
|
*/
|
|
static bool try_step_up(struct tdp_iter *iter)
|
|
{
|
|
if (iter->level == iter->root_level)
|
|
return false;
|
|
|
|
iter->level++;
|
|
iter->gfn = round_gfn_for_level(iter->gfn, iter->level);
|
|
tdp_iter_refresh_sptep(iter);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Step to the next SPTE in a pre-order traversal of the paging structure.
|
|
* To get to the next SPTE, the iterator either steps down towards the goal
|
|
* GFN, if at a present, non-last-level SPTE, or over to a SPTE mapping a
|
|
* highter GFN.
|
|
*
|
|
* The basic algorithm is as follows:
|
|
* 1. If the current SPTE is a non-last-level SPTE, step down into the page
|
|
* table it points to.
|
|
* 2. If the iterator cannot step down, it will try to step to the next SPTE
|
|
* in the current page of the paging structure.
|
|
* 3. If the iterator cannot step to the next entry in the current page, it will
|
|
* try to step up to the parent paging structure page. In this case, that
|
|
* SPTE will have already been visited, and so the iterator must also step
|
|
* to the side again.
|
|
*/
|
|
void tdp_iter_next(struct tdp_iter *iter)
|
|
{
|
|
if (try_step_down(iter))
|
|
return;
|
|
|
|
do {
|
|
if (try_step_side(iter))
|
|
return;
|
|
} while (try_step_up(iter));
|
|
iter->valid = false;
|
|
}
|
|
|
|
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter)
|
|
{
|
|
return iter->pt_path[iter->root_level - 1];
|
|
}
|
|
|