KVM: x86/mmu: Don't allow TDP MMU to yield when recovering NX pages
Prevent the TDP MMU from yielding when zapping a gfn range during NX page recovery. If a flush is pending from a previous invocation of the zapping helper, either in the TDP MMU or the legacy MMU, but the TDP MMU has not accumulated a flush for the current invocation, then yielding will release mmu_lock with stale TLB entries. That being said, this isn't technically a bug fix in the current code, as the TDP MMU will never yield in this case. tdp_mmu_iter_cond_resched() will yield if and only if it has made forward progress, as defined by the current gfn vs. the last yielded (or starting) gfn. Because zapping a single shadow page is guaranteed to (a) find that page and (b) step sideways at the level of the shadow page, the TDP iter will break its loop before getting a chance to yield. But that is all very, very subtle, and will break at the slightest sneeze, e.g. zapping while holding mmu_lock for read would break as the TDP MMU wouldn't be guaranteed to see the present shadow page, and thus could step sideways at a lower level. Cc: Ben Gardon <bgardon@google.com> Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20210325200119.1359384-4-seanjc@google.com> [Add lockdep assertion. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
048f49809c
commit
33a3164161
3 changed files with 27 additions and 6 deletions
|
@ -5885,7 +5885,6 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
|
||||||
unsigned int ratio;
|
unsigned int ratio;
|
||||||
LIST_HEAD(invalid_list);
|
LIST_HEAD(invalid_list);
|
||||||
bool flush = false;
|
bool flush = false;
|
||||||
gfn_t gfn_end;
|
|
||||||
ulong to_zap;
|
ulong to_zap;
|
||||||
|
|
||||||
rcu_idx = srcu_read_lock(&kvm->srcu);
|
rcu_idx = srcu_read_lock(&kvm->srcu);
|
||||||
|
@ -5907,8 +5906,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
|
||||||
lpage_disallowed_link);
|
lpage_disallowed_link);
|
||||||
WARN_ON_ONCE(!sp->lpage_disallowed);
|
WARN_ON_ONCE(!sp->lpage_disallowed);
|
||||||
if (is_tdp_mmu_page(sp)) {
|
if (is_tdp_mmu_page(sp)) {
|
||||||
gfn_end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);
|
flush = kvm_tdp_mmu_zap_sp(kvm, sp);
|
||||||
flush = kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, gfn_end);
|
|
||||||
} else {
|
} else {
|
||||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||||
WARN_ON_ONCE(sp->lpage_disallowed);
|
WARN_ON_ONCE(sp->lpage_disallowed);
|
||||||
|
|
|
@ -723,13 +723,14 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||||
* SPTEs have been cleared and a TLB flush is needed before releasing the
|
* SPTEs have been cleared and a TLB flush is needed before releasing the
|
||||||
* MMU lock.
|
* MMU lock.
|
||||||
*/
|
*/
|
||||||
bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
|
bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||||
|
bool can_yield)
|
||||||
{
|
{
|
||||||
struct kvm_mmu_page *root;
|
struct kvm_mmu_page *root;
|
||||||
bool flush = false;
|
bool flush = false;
|
||||||
|
|
||||||
for_each_tdp_mmu_root_yield_safe(kvm, root)
|
for_each_tdp_mmu_root_yield_safe(kvm, root)
|
||||||
flush = zap_gfn_range(kvm, root, start, end, true, flush);
|
flush = zap_gfn_range(kvm, root, start, end, can_yield, flush);
|
||||||
|
|
||||||
return flush;
|
return flush;
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,29 @@
|
||||||
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
|
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
|
||||||
void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);
|
void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);
|
||||||
|
|
||||||
bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end);
|
bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||||
|
bool can_yield);
|
||||||
|
static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start,
|
||||||
|
gfn_t end)
|
||||||
|
{
|
||||||
|
return __kvm_tdp_mmu_zap_gfn_range(kvm, start, end, true);
|
||||||
|
}
|
||||||
|
static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||||
|
{
|
||||||
|
gfn_t end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Don't allow yielding, as the caller may have a flush pending. Note,
|
||||||
|
* if mmu_lock is held for write, zapping will never yield in this case,
|
||||||
|
* but explicitly disallow it for safety. The TDP MMU does not yield
|
||||||
|
* until it has made forward progress (steps sideways), and when zapping
|
||||||
|
* a single shadow page that it's guaranteed to see (thus the mmu_lock
|
||||||
|
* requirement), its "step sideways" will always step beyond the bounds
|
||||||
|
* of the shadow page's gfn range and stop iterating before yielding.
|
||||||
|
*/
|
||||||
|
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||||
|
return __kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, end, false);
|
||||||
|
}
|
||||||
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
|
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
|
||||||
|
|
||||||
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||||
|
|
Loading…
Add table
Reference in a new issue