KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot in HPT page fault handler
This makes the same changes in the page fault handler for HPT guests that commits31c8b0d069
("KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot() in page fault handler", 2018-03-01),71d29f43b6
("KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size", 2018-09-11) and6579804c43
("KVM: PPC: Book3S HV: Avoid crash from THP collapse during radix page fault", 2018-10-04) made for the page fault handler for radix guests. In summary, where we used to call get_user_pages_fast() and then do special handling for VM_PFNMAP vmas, we now call __get_user_pages_fast() and then __gfn_to_pfn_memslot() if that fails, followed by reading the Linux PTE to get the host PFN, host page size and mapping attributes. This also brings in the change from SetPageDirty() to set_page_dirty_lock() which was done for the radix page fault handler in commitc3856aeb29
("KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault handler", 2018-02-23). Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
parent
1c482452d5
commit
cd758a9b57
1 changed files with 57 additions and 62 deletions
|
@ -485,18 +485,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||||
__be64 *hptep;
|
__be64 *hptep;
|
||||||
unsigned long mmu_seq, psize, pte_size;
|
unsigned long mmu_seq, psize, pte_size;
|
||||||
unsigned long gpa_base, gfn_base;
|
unsigned long gpa_base, gfn_base;
|
||||||
unsigned long gpa, gfn, hva, pfn;
|
unsigned long gpa, gfn, hva, pfn, hpa;
|
||||||
struct kvm_memory_slot *memslot;
|
struct kvm_memory_slot *memslot;
|
||||||
unsigned long *rmap;
|
unsigned long *rmap;
|
||||||
struct revmap_entry *rev;
|
struct revmap_entry *rev;
|
||||||
struct page *page, *pages[1];
|
struct page *page;
|
||||||
long index, ret, npages;
|
long index, ret;
|
||||||
bool is_ci;
|
bool is_ci;
|
||||||
unsigned int writing, write_ok;
|
bool writing, write_ok;
|
||||||
struct vm_area_struct *vma;
|
unsigned int shift;
|
||||||
unsigned long rcbits;
|
unsigned long rcbits;
|
||||||
long mmio_update;
|
long mmio_update;
|
||||||
struct mm_struct *mm;
|
pte_t pte, *ptep;
|
||||||
|
|
||||||
if (kvm_is_radix(kvm))
|
if (kvm_is_radix(kvm))
|
||||||
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
|
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
|
||||||
|
@ -570,59 +570,62 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||||
smp_rmb();
|
smp_rmb();
|
||||||
|
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
is_ci = false;
|
|
||||||
pfn = 0;
|
|
||||||
page = NULL;
|
page = NULL;
|
||||||
mm = kvm->mm;
|
|
||||||
pte_size = PAGE_SIZE;
|
|
||||||
writing = (dsisr & DSISR_ISSTORE) != 0;
|
writing = (dsisr & DSISR_ISSTORE) != 0;
|
||||||
/* If writing != 0, then the HPTE must allow writing, if we get here */
|
/* If writing != 0, then the HPTE must allow writing, if we get here */
|
||||||
write_ok = writing;
|
write_ok = writing;
|
||||||
hva = gfn_to_hva_memslot(memslot, gfn);
|
hva = gfn_to_hva_memslot(memslot, gfn);
|
||||||
npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
|
|
||||||
if (npages < 1) {
|
|
||||||
/* Check if it's an I/O mapping */
|
|
||||||
down_read(&mm->mmap_sem);
|
|
||||||
vma = find_vma(mm, hva);
|
|
||||||
if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
|
|
||||||
(vma->vm_flags & VM_PFNMAP)) {
|
|
||||||
pfn = vma->vm_pgoff +
|
|
||||||
((hva - vma->vm_start) >> PAGE_SHIFT);
|
|
||||||
pte_size = psize;
|
|
||||||
is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
|
|
||||||
write_ok = vma->vm_flags & VM_WRITE;
|
|
||||||
}
|
|
||||||
up_read(&mm->mmap_sem);
|
|
||||||
if (!pfn)
|
|
||||||
goto out_put;
|
|
||||||
} else {
|
|
||||||
page = pages[0];
|
|
||||||
pfn = page_to_pfn(page);
|
|
||||||
if (PageHuge(page)) {
|
|
||||||
page = compound_head(page);
|
|
||||||
pte_size <<= compound_order(page);
|
|
||||||
}
|
|
||||||
/* if the guest wants write access, see if that is OK */
|
|
||||||
if (!writing && hpte_is_writable(r)) {
|
|
||||||
pte_t *ptep, pte;
|
|
||||||
unsigned long flags;
|
|
||||||
/*
|
/*
|
||||||
* We need to protect against page table destruction
|
* Do a fast check first, since __gfn_to_pfn_memslot doesn't
|
||||||
* hugepage split and collapse.
|
* do it with !atomic && !async, which is how we call it.
|
||||||
|
* We always ask for write permission since the common case
|
||||||
|
* is that the page is writable.
|
||||||
*/
|
*/
|
||||||
local_irq_save(flags);
|
if (__get_user_pages_fast(hva, 1, 1, &page) == 1) {
|
||||||
ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
|
write_ok = true;
|
||||||
if (ptep) {
|
} else {
|
||||||
pte = kvmppc_read_update_linux_pte(ptep, 1);
|
/* Call KVM generic code to do the slow-path check */
|
||||||
if (__pte_write(pte))
|
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
|
||||||
write_ok = 1;
|
writing, &write_ok);
|
||||||
}
|
if (is_error_noslot_pfn(pfn))
|
||||||
local_irq_restore(flags);
|
return -EFAULT;
|
||||||
|
page = NULL;
|
||||||
|
if (pfn_valid(pfn)) {
|
||||||
|
page = pfn_to_page(pfn);
|
||||||
|
if (PageReserved(page))
|
||||||
|
page = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read the PTE from the process' radix tree and use that
|
||||||
|
* so we get the shift and attribute bits.
|
||||||
|
*/
|
||||||
|
local_irq_disable();
|
||||||
|
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
|
||||||
|
/*
|
||||||
|
* If the PTE disappeared temporarily due to a THP
|
||||||
|
* collapse, just return and let the guest try again.
|
||||||
|
*/
|
||||||
|
if (!ptep) {
|
||||||
|
local_irq_enable();
|
||||||
|
if (page)
|
||||||
|
put_page(page);
|
||||||
|
return RESUME_GUEST;
|
||||||
|
}
|
||||||
|
pte = *ptep;
|
||||||
|
local_irq_enable();
|
||||||
|
hpa = pte_pfn(pte) << PAGE_SHIFT;
|
||||||
|
pte_size = PAGE_SIZE;
|
||||||
|
if (shift)
|
||||||
|
pte_size = 1ul << shift;
|
||||||
|
is_ci = pte_ci(pte);
|
||||||
|
|
||||||
if (psize > pte_size)
|
if (psize > pte_size)
|
||||||
goto out_put;
|
goto out_put;
|
||||||
|
if (pte_size > psize)
|
||||||
|
hpa |= hva & (pte_size - psize);
|
||||||
|
|
||||||
/* Check WIMG vs. the actual page we're accessing */
|
/* Check WIMG vs. the actual page we're accessing */
|
||||||
if (!hpte_cache_flags_ok(r, is_ci)) {
|
if (!hpte_cache_flags_ok(r, is_ci)) {
|
||||||
|
@ -636,14 +639,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set the HPTE to point to pfn.
|
* Set the HPTE to point to hpa.
|
||||||
* Since the pfn is at PAGE_SIZE granularity, make sure we
|
* Since the hpa is at PAGE_SIZE granularity, make sure we
|
||||||
* don't mask out lower-order bits if psize < PAGE_SIZE.
|
* don't mask out lower-order bits if psize < PAGE_SIZE.
|
||||||
*/
|
*/
|
||||||
if (psize < PAGE_SIZE)
|
if (psize < PAGE_SIZE)
|
||||||
psize = PAGE_SIZE;
|
psize = PAGE_SIZE;
|
||||||
r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
|
r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
|
||||||
((pfn << PAGE_SHIFT) & ~(psize - 1));
|
|
||||||
if (hpte_is_writable(r) && !write_ok)
|
if (hpte_is_writable(r) && !write_ok)
|
||||||
r = hpte_make_readonly(r);
|
r = hpte_make_readonly(r);
|
||||||
ret = RESUME_GUEST;
|
ret = RESUME_GUEST;
|
||||||
|
@ -708,20 +710,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||||
asm volatile("ptesync" : : : "memory");
|
asm volatile("ptesync" : : : "memory");
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
if (page && hpte_is_writable(r))
|
if (page && hpte_is_writable(r))
|
||||||
SetPageDirty(page);
|
set_page_dirty_lock(page);
|
||||||
|
|
||||||
out_put:
|
out_put:
|
||||||
trace_kvm_page_fault_exit(vcpu, hpte, ret);
|
trace_kvm_page_fault_exit(vcpu, hpte, ret);
|
||||||
|
|
||||||
if (page) {
|
if (page)
|
||||||
/*
|
put_page(page);
|
||||||
* We drop pages[0] here, not page because page might
|
|
||||||
* have been set to the head page of a compound, but
|
|
||||||
* we have to drop the reference on the correct tail
|
|
||||||
* page to match the get inside gup()
|
|
||||||
*/
|
|
||||||
put_page(pages[0]);
|
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
|
|
Loading…
Add table
Reference in a new issue