KVM: x86/mmu: Return unique RET_PF_* values if the fault was fixed
Introduce RET_PF_FIXED and RET_PF_SPURIOUS to provide unique return values instead of overloading RET_PF_RETRY. In the short term, the unique values add clarity to the code and RET_PF_SPURIOUS will be used by set_spte() to avoid unnecessary work for spurious faults. In the long term, TDX will use RET_PF_FIXED to deterministically map memory during pre-boot. The page fault flow may bail early for benign reasons, e.g. if the mmu_notifier fires for an unrelated address. With only RET_PF_RETRY, it's impossible for the caller to distinguish between "cool, page is mapped" and "darn, need to try again", and thus cannot handle benign cases like the mmu_notifier retry. Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> Message-Id: <20200923220425.18402-4-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
83a2ba4cb2
commit
c4371c2a68
2 changed files with 29 additions and 30 deletions
|
@ -198,17 +198,20 @@ module_param(dbg, bool, 0644);
|
||||||
#define PTE_LIST_EXT 3
|
#define PTE_LIST_EXT 3
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return values of handle_mmio_page_fault and mmu.page_fault:
|
* Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
|
||||||
|
*
|
||||||
* RET_PF_RETRY: let CPU fault again on the address.
|
* RET_PF_RETRY: let CPU fault again on the address.
|
||||||
* RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
|
* RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
|
||||||
*
|
|
||||||
* For handle_mmio_page_fault only:
|
|
||||||
* RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
|
* RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
|
||||||
|
* RET_PF_FIXED: The faulting entry has been fixed.
|
||||||
|
* RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
|
||||||
*/
|
*/
|
||||||
enum {
|
enum {
|
||||||
RET_PF_RETRY = 0,
|
RET_PF_RETRY = 0,
|
||||||
RET_PF_EMULATE = 1,
|
RET_PF_EMULATE,
|
||||||
RET_PF_INVALID = 2,
|
RET_PF_INVALID,
|
||||||
|
RET_PF_FIXED,
|
||||||
|
RET_PF_SPURIOUS,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pte_list_desc {
|
struct pte_list_desc {
|
||||||
|
@ -3083,7 +3086,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||||
int was_rmapped = 0;
|
int was_rmapped = 0;
|
||||||
int rmap_count;
|
int rmap_count;
|
||||||
int set_spte_ret;
|
int set_spte_ret;
|
||||||
int ret = RET_PF_RETRY;
|
int ret = RET_PF_FIXED;
|
||||||
bool flush = false;
|
bool flush = false;
|
||||||
|
|
||||||
pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
|
pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
|
||||||
|
@ -3491,21 +3494,19 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return value:
|
* Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS.
|
||||||
* - true: let the vcpu to access on the same address again.
|
|
||||||
* - false: let the real page fault path to fix it.
|
|
||||||
*/
|
*/
|
||||||
static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||||
u32 error_code)
|
u32 error_code)
|
||||||
{
|
{
|
||||||
struct kvm_shadow_walk_iterator iterator;
|
struct kvm_shadow_walk_iterator iterator;
|
||||||
struct kvm_mmu_page *sp;
|
struct kvm_mmu_page *sp;
|
||||||
bool fault_handled = false;
|
int ret = RET_PF_INVALID;
|
||||||
u64 spte = 0ull;
|
u64 spte = 0ull;
|
||||||
uint retry_count = 0;
|
uint retry_count = 0;
|
||||||
|
|
||||||
if (!page_fault_can_be_fast(error_code))
|
if (!page_fault_can_be_fast(error_code))
|
||||||
return false;
|
return ret;
|
||||||
|
|
||||||
walk_shadow_page_lockless_begin(vcpu);
|
walk_shadow_page_lockless_begin(vcpu);
|
||||||
|
|
||||||
|
@ -3531,7 +3532,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||||
* they are always ACC_ALL.
|
* they are always ACC_ALL.
|
||||||
*/
|
*/
|
||||||
if (is_access_allowed(error_code, spte)) {
|
if (is_access_allowed(error_code, spte)) {
|
||||||
fault_handled = true;
|
ret = RET_PF_SPURIOUS;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3574,11 +3575,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||||
* since the gfn is not stable for indirect shadow page. See
|
* since the gfn is not stable for indirect shadow page. See
|
||||||
* Documentation/virt/kvm/locking.rst to get more detail.
|
* Documentation/virt/kvm/locking.rst to get more detail.
|
||||||
*/
|
*/
|
||||||
fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
|
if (fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte,
|
||||||
iterator.sptep, spte,
|
new_spte)) {
|
||||||
new_spte);
|
ret = RET_PF_FIXED;
|
||||||
if (fault_handled)
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (++retry_count > 4) {
|
if (++retry_count > 4) {
|
||||||
printk_once(KERN_WARNING
|
printk_once(KERN_WARNING
|
||||||
|
@ -3589,10 +3590,10 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||||
} while (true);
|
} while (true);
|
||||||
|
|
||||||
trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
|
trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
|
||||||
spte, fault_handled);
|
spte, ret);
|
||||||
walk_shadow_page_lockless_end(vcpu);
|
walk_shadow_page_lockless_end(vcpu);
|
||||||
|
|
||||||
return fault_handled;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
|
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
|
||||||
|
@ -4104,8 +4105,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||||
if (page_fault_handle_page_track(vcpu, error_code, gfn))
|
if (page_fault_handle_page_track(vcpu, error_code, gfn))
|
||||||
return RET_PF_EMULATE;
|
return RET_PF_EMULATE;
|
||||||
|
|
||||||
if (fast_page_fault(vcpu, gpa, error_code))
|
r = fast_page_fault(vcpu, gpa, error_code);
|
||||||
return RET_PF_RETRY;
|
if (r != RET_PF_INVALID)
|
||||||
|
return r;
|
||||||
|
|
||||||
r = mmu_topup_memory_caches(vcpu, false);
|
r = mmu_topup_memory_caches(vcpu, false);
|
||||||
if (r)
|
if (r)
|
||||||
|
|
|
@ -244,14 +244,11 @@ TRACE_EVENT(
|
||||||
__entry->access)
|
__entry->access)
|
||||||
);
|
);
|
||||||
|
|
||||||
#define __spte_satisfied(__spte) \
|
|
||||||
(__entry->retry && is_writable_pte(__entry->__spte))
|
|
||||||
|
|
||||||
TRACE_EVENT(
|
TRACE_EVENT(
|
||||||
fast_page_fault,
|
fast_page_fault,
|
||||||
TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code,
|
TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code,
|
||||||
u64 *sptep, u64 old_spte, bool retry),
|
u64 *sptep, u64 old_spte, int ret),
|
||||||
TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry),
|
TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, ret),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__field(int, vcpu_id)
|
__field(int, vcpu_id)
|
||||||
|
@ -260,7 +257,7 @@ TRACE_EVENT(
|
||||||
__field(u64 *, sptep)
|
__field(u64 *, sptep)
|
||||||
__field(u64, old_spte)
|
__field(u64, old_spte)
|
||||||
__field(u64, new_spte)
|
__field(u64, new_spte)
|
||||||
__field(bool, retry)
|
__field(int, ret)
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
|
@ -270,7 +267,7 @@ TRACE_EVENT(
|
||||||
__entry->sptep = sptep;
|
__entry->sptep = sptep;
|
||||||
__entry->old_spte = old_spte;
|
__entry->old_spte = old_spte;
|
||||||
__entry->new_spte = *sptep;
|
__entry->new_spte = *sptep;
|
||||||
__entry->retry = retry;
|
__entry->ret = ret;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx"
|
TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx"
|
||||||
|
@ -278,7 +275,7 @@ TRACE_EVENT(
|
||||||
__entry->cr2_or_gpa, __print_flags(__entry->error_code, "|",
|
__entry->cr2_or_gpa, __print_flags(__entry->error_code, "|",
|
||||||
kvm_mmu_trace_pferr_flags), __entry->sptep,
|
kvm_mmu_trace_pferr_flags), __entry->sptep,
|
||||||
__entry->old_spte, __entry->new_spte,
|
__entry->old_spte, __entry->new_spte,
|
||||||
__spte_satisfied(old_spte), __spte_satisfied(new_spte)
|
__entry->ret == RET_PF_SPURIOUS, __entry->ret == RET_PF_FIXED
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue