1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
* Fix TCR_EL2 configuration to not use the ASID in TTBR1_EL2
   and not mess-up T1SZ/PS by using the HCR_EL2.E2H==0 layout.
 
 * Bring back the VMID allocation to the vcpu_load phase, ensuring
   that we only setup VTTBR_EL2 once on VHE. This cures an ugly
   race that would lead to running with an unallocated VMID.
 
 RISC-V:
 
 * Fix hart status check in SBI HSM extension
 
 * Fix hart suspend_type usage in SBI HSM extension
 
 * Fix error returned by SBI IPI and TIME extensions for
   unsupported function IDs
 
 * Fix suspend_type usage in SBI SUSP extension
 
 * Remove unnecessary vcpu kick after injecting interrupt
   via IMSIC guest file
 
 x86:
 
 * Fix an nVMX bug where KVM fails to detect that, after nested
   VM-Exit, L1 has a pending IRQ (or NMI).
 
 * To avoid freeing the PIC while vCPUs are still around, which
   would cause a NULL pointer access with the previous patch,
   destroy vCPUs before any VM-level destruction.
 
 * Handle failures to create vhost_tasks
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmfCvVsUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroPqGwf9FOWQRd/yCKHiufjPDefD1Og0DmgB
 Dgk0nmHxaxbyPw+5vYlhn/J3vZ54sNngBpmUekE5OuBMZ9EsxXAK/myByHkzNnV9
 cyLm4vYwpb9OQmbQ5MMdDlptYsjV40EmSfwwIJpBxjdkwAI3f7NgeHvG8EwkJgch
 C+X4JMrLu2+BGo7BUhuE/xrB8h0CBRnhalB5aK1wuF+ey8v06zcU0zdQCRLUpOsx
 mW9S0OpSpSlecvcblr0AhuajjHjwFaTFOQofaXaQFBW6kv3dXmSq/JRABEfx0TBb
 MTUDQtnnaYvPy/RWwZIzBpgfASLQNQNxSJ7DIw9C8IG7k6rK25BSRwTmSw==
 =afMB
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Fix TCR_EL2 configuration to not use the ASID in TTBR1_EL2 and not
     mess-up T1SZ/PS by using the HCR_EL2.E2H==0 layout.

   - Bring back the VMID allocation to the vcpu_load phase, ensuring
     that we only setup VTTBR_EL2 once on VHE. This cures an ugly race
     that would lead to running with an unallocated VMID.

  RISC-V:

   - Fix hart status check in SBI HSM extension

   - Fix hart suspend_type usage in SBI HSM extension

   - Fix error returned by SBI IPI and TIME extensions for unsupported
     function IDs

   - Fix suspend_type usage in SBI SUSP extension

   - Remove unnecessary vcpu kick after injecting interrupt via IMSIC
     guest file

  x86:

   - Fix an nVMX bug where KVM fails to detect that, after nested
     VM-Exit, L1 has a pending IRQ (or NMI).

   - To avoid freeing the PIC while vCPUs are still around, which would
     cause a NULL pointer access with the previous patch, destroy vCPUs
     before any VM-level destruction.

   - Handle failures to create vhost_tasks"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: retry nx_huge_page_recovery_thread creation
  vhost: return task creation error instead of NULL
  KVM: nVMX: Process events on nested VM-Exit if injectable IRQ or NMI is pending
  KVM: x86: Free vCPUs before freeing VM state
  riscv: KVM: Remove unnecessary vcpu kick
  KVM: arm64: Ensure a VMID is allocated before programming VTTBR_EL2
  KVM: arm64: Fix tcr_el2 initialisation in hVHE mode
  riscv: KVM: Fix SBI sleep_type use
  riscv: KVM: Fix SBI TIME error generation
  riscv: KVM: Fix SBI IPI error generation
  riscv: KVM: Fix hart suspend_type use
  riscv: KVM: Fix hart suspend status check
This commit is contained in:
Linus Torvalds 2025-03-01 08:48:53 -08:00
commit 209cd6f2ca
14 changed files with 94 additions and 62 deletions

View file

@ -119,7 +119,7 @@
#define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
#define TCR_EL2_T0SZ_MASK 0x3f
#define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK)
/* VTCR_EL2 Registers bits */
#define VTCR_EL2_DS TCR_EL2_DS

View file

@ -1259,7 +1259,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
extern unsigned int __ro_after_init kvm_arm_vmid_bits;
int __init kvm_arm_vmid_alloc_init(void);
void __init kvm_arm_vmid_alloc_free(void);
bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
void kvm_arm_vmid_clear_active(void);
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)

View file

@ -559,6 +559,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
mmu = vcpu->arch.hw_mmu;
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
/*
* Ensure a VMID is allocated for the MMU before programming VTTBR_EL2,
* which happens eagerly in VHE.
*
* Also, the VMID allocator only preserves VMIDs that are active at the
* time of rollover, so KVM might need to grab a new VMID for the MMU if
* this is called from kvm_sched_in().
*/
kvm_arm_vmid_update(&mmu->vmid);
/*
* We guarantee that both TLBs and I-cache are private to each
* vcpu. If detecting that a vcpu from the same VM has
@ -1138,18 +1148,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
preempt_disable();
/*
* The VMID allocator only tracks active VMIDs per
* physical CPU, and therefore the VMID allocated may not be
* preserved on VMID roll-over if the task was preempted,
* making a thread's VMID inactive. So we need to call
* kvm_arm_vmid_update() in non-premptible context.
*/
if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) &&
has_vhe())
__load_stage2(vcpu->arch.hw_mmu,
vcpu->arch.hw_mmu->arch);
kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
@ -1980,7 +1978,7 @@ static int kvm_init_vector_slots(void)
static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
{
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
unsigned long tcr, ips;
unsigned long tcr;
/*
* Calculate the raw per-cpu offset without a translation from the
@ -1994,19 +1992,18 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
params->mair_el2 = read_sysreg(mair_el1);
tcr = read_sysreg(tcr_el1);
ips = FIELD_GET(TCR_IPS_MASK, tcr);
if (cpus_have_final_cap(ARM64_KVM_HVHE)) {
tcr &= ~(TCR_HD | TCR_HA | TCR_A1 | TCR_T0SZ_MASK);
tcr |= TCR_EPD1_MASK;
} else {
unsigned long ips = FIELD_GET(TCR_IPS_MASK, tcr);
tcr &= TCR_EL2_MASK;
tcr |= TCR_EL2_RES1;
tcr |= TCR_EL2_RES1 | FIELD_PREP(TCR_EL2_PS_MASK, ips);
if (lpa2_is_enabled())
tcr |= TCR_EL2_DS;
}
tcr &= ~TCR_T0SZ_MASK;
tcr |= TCR_T0SZ(hyp_va_bits);
tcr &= ~TCR_EL2_PS_MASK;
tcr |= FIELD_PREP(TCR_EL2_PS_MASK, ips);
if (lpa2_is_enabled())
tcr |= TCR_EL2_DS;
params->tcr_el2 = tcr;
params->pgd_pa = kvm_mmu_get_httbr();

View file

@ -135,11 +135,10 @@ void kvm_arm_vmid_clear_active(void)
atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID);
}
bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
{
unsigned long flags;
u64 vmid, old_active_vmid;
bool updated = false;
vmid = atomic64_read(&kvm_vmid->id);
@ -157,21 +156,17 @@ bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
if (old_active_vmid != 0 && vmid_gen_match(vmid) &&
0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids),
old_active_vmid, vmid))
return false;
return;
raw_spin_lock_irqsave(&cpu_vmid_lock, flags);
/* Check that our VMID belongs to the current generation. */
vmid = atomic64_read(&kvm_vmid->id);
if (!vmid_gen_match(vmid)) {
if (!vmid_gen_match(vmid))
vmid = new_vmid(kvm_vmid);
updated = true;
}
atomic64_set(this_cpu_ptr(&active_vmids), vmid);
raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags);
return updated;
}
/*

View file

@ -974,7 +974,6 @@ int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
if (imsic->vsfile_cpu >= 0) {
writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE);
kvm_vcpu_kick(vcpu);
} else {
eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)];
set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip);

View file

@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
#include <linux/wordpart.h>
#include <asm/sbi.h>
#include <asm/kvm_vcpu_sbi.h>
@ -79,12 +80,12 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu)
return SBI_ERR_INVALID_PARAM;
if (!kvm_riscv_vcpu_stopped(target_vcpu))
return SBI_HSM_STATE_STARTED;
else if (vcpu->stat.generic.blocking)
if (kvm_riscv_vcpu_stopped(target_vcpu))
return SBI_HSM_STATE_STOPPED;
else if (target_vcpu->stat.generic.blocking)
return SBI_HSM_STATE_SUSPENDED;
else
return SBI_HSM_STATE_STOPPED;
return SBI_HSM_STATE_STARTED;
}
static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
@ -109,7 +110,7 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
}
return 0;
case SBI_EXT_HSM_HART_SUSPEND:
switch (cp->a0) {
switch (lower_32_bits(cp->a0)) {
case SBI_HSM_SUSPEND_RET_DEFAULT:
kvm_riscv_vcpu_wfi(vcpu);
break;

View file

@ -21,7 +21,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
u64 next_cycle;
if (cp->a6 != SBI_EXT_TIME_SET_TIMER) {
retdata->err_val = SBI_ERR_INVALID_PARAM;
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
return 0;
}
@ -51,9 +51,10 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
unsigned long hmask = cp->a0;
unsigned long hbase = cp->a1;
unsigned long hart_bit = 0, sentmask = 0;
if (cp->a6 != SBI_EXT_IPI_SEND_IPI) {
retdata->err_val = SBI_ERR_INVALID_PARAM;
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
return 0;
}
@ -62,15 +63,23 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (hbase != -1UL) {
if (tmp->vcpu_id < hbase)
continue;
if (!(hmask & (1UL << (tmp->vcpu_id - hbase))))
hart_bit = tmp->vcpu_id - hbase;
if (hart_bit >= __riscv_xlen)
goto done;
if (!(hmask & (1UL << hart_bit)))
continue;
}
ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT);
if (ret < 0)
break;
sentmask |= 1UL << hart_bit;
kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RCVD);
}
done:
if (hbase != -1UL && (hmask ^ sentmask))
retdata->err_val = SBI_ERR_INVALID_PARAM;
return ret;
}

View file

@ -4,6 +4,7 @@
*/
#include <linux/kvm_host.h>
#include <linux/wordpart.h>
#include <asm/kvm_vcpu_sbi.h>
#include <asm/sbi.h>
@ -19,7 +20,7 @@ static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
switch (funcid) {
case SBI_EXT_SUSP_SYSTEM_SUSPEND:
if (cp->a0 != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) {
if (lower_32_bits(cp->a0) != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) {
retdata->err_val = SBI_ERR_INVALID_PARAM;
return 0;
}

View file

@ -7460,7 +7460,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
return true;
}
static void kvm_mmu_start_lpage_recovery(struct once *once)
static int kvm_mmu_start_lpage_recovery(struct once *once)
{
struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
struct kvm *kvm = container_of(ka, struct kvm, arch);
@ -7471,13 +7471,14 @@ static void kvm_mmu_start_lpage_recovery(struct once *once)
kvm_nx_huge_page_recovery_worker_kill,
kvm, "kvm-nx-lpage-recovery");
if (!nx_thread)
return;
if (IS_ERR(nx_thread))
return PTR_ERR(nx_thread);
vhost_task_start(nx_thread);
/* Make the task visible only once it is fully started. */
WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread);
return 0;
}
int kvm_mmu_post_init_vm(struct kvm *kvm)
@ -7485,10 +7486,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
if (nx_hugepage_mitigation_hard_disabled)
return 0;
call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
if (!kvm->arch.nx_huge_page_recovery_thread)
return -ENOMEM;
return 0;
return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
}
void kvm_mmu_pre_destroy_vm(struct kvm *kvm)

View file

@ -5084,6 +5084,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
load_vmcs12_host_state(vcpu, vmcs12);
/*
* Process events if an injectable IRQ or NMI is pending, even
* if the event is blocked (RFLAGS.IF is cleared on VM-Exit).
* If an event became pending while L2 was active, KVM needs to
* either inject the event or request an IRQ/NMI window. SMIs
* don't need to be processed as SMM is mutually exclusive with
* non-root mode. INIT/SIPI don't need to be checked as INIT
* is blocked post-VMXON, and SIPIs are ignored.
*/
if (kvm_cpu_has_injectable_intr(vcpu) || vcpu->arch.nmi_pending)
kvm_make_request(KVM_REQ_EVENT, vcpu);
return;
}

View file

@ -12877,11 +12877,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
mutex_unlock(&kvm->slots_lock);
}
kvm_unload_vcpu_mmus(kvm);
kvm_destroy_vcpus(kvm);
kvm_x86_call(vm_destroy)(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
kvm_destroy_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
kvm_mmu_uninit_vm(kvm);

View file

@ -666,7 +666,7 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
worker, name);
if (!vtsk)
if (IS_ERR(vtsk))
goto free_worker;
mutex_init(&worker->mutex);

View file

@ -26,20 +26,41 @@ do { \
__once_init((once), #once, &__key); \
} while (0)
static inline void call_once(struct once *once, void (*cb)(struct once *))
/*
* call_once - Ensure a function has been called exactly once
*
* @once: Tracking struct
* @cb: Function to be called
*
* If @once has never completed successfully before, call @cb and, if
* it returns a zero or positive value, mark @once as completed. Return
* the value returned by @cb
*
* If @once has completed succesfully before, return 0.
*
* The call to @cb is implicitly surrounded by a mutex, though for
* efficiency the * function avoids taking it after the first call.
*/
static inline int call_once(struct once *once, int (*cb)(struct once *))
{
/* Pairs with atomic_set_release() below. */
if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
return;
int r, state;
guard(mutex)(&once->lock);
WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
if (atomic_read(&once->state) != ONCE_NOT_STARTED)
return;
/* Pairs with atomic_set_release() below. */
if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
return 0;
atomic_set(&once->state, ONCE_RUNNING);
cb(once);
atomic_set_release(&once->state, ONCE_COMPLETED);
guard(mutex)(&once->lock);
state = atomic_read(&once->state);
if (unlikely(state != ONCE_NOT_STARTED))
return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0;
atomic_set(&once->state, ONCE_RUNNING);
r = cb(once);
if (r < 0)
atomic_set(&once->state, ONCE_NOT_STARTED);
else
atomic_set_release(&once->state, ONCE_COMPLETED);
return r;
}
#endif /* _LINUX_CALL_ONCE_H */

View file

@ -133,7 +133,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *),
vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL);
if (!vtsk)
return NULL;
return ERR_PTR(-ENOMEM);
init_completion(&vtsk->exited);
mutex_init(&vtsk->exit_mutex);
vtsk->data = arg;
@ -145,7 +145,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *),
tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args);
if (IS_ERR(tsk)) {
kfree(vtsk);
return NULL;
return ERR_PTR(PTR_ERR(tsk));
}
vtsk->task = tsk;