KVM/arm64 fixes for 6.14, take #1
- Correctly clean the BSS to the PoC before allowing EL2 to access it on nVHE/hVHE/protected configurations - Propagate ownership of debug registers in protected mode after the rework that landed in 6.14-rc1 - Stop pretending that we can run the protected mode without a GICv3 being present on the host - Fix a use-after-free situation that can occur if a vcpu fails to initialise the NV shadow S2 MMU contexts - Always evaluate the need to arm a background timer for fully emulated guest timers - Fix the emulation of EL1 timers in the absence of FEAT_ECV - Correctly handle the EL2 virtual timer, specially when HCR_EL2.E2H==0 -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAmeiNZsACgkQI9DQutE9 ekOmiQ//WsCDESxdg+kezCCmx9yHo8CM5tsOqzcDoIz+KV5aiLB6KlegGbciC9MZ Fgj8lBr+Mu2GGEhYP/H6glqMLB8VPLiGYzwbeBE8ty2IH5Nn6M9hiKFWTZJVpGma gNDtFVSO39d39gtklQ4Y3SYSlN0dN2wW47pXxlZiY/aD9GRie7z2m1yC3T05xVpR TT7XAYjucfx0Cj8vK9MaJyyy69TXsqocdAxZI+ZCp+NI1yuBv7sunjDdfPcp9n4G Lk+Ikn39FC3qlrjZ38eHidMgY6LQvucrwtlvDuPp7yMuNYTXW6UEIU7RUF1AUts4 IsKKvENV0zLsmtoEAgSa4d9Rtid3eomyll6mb3hBF+1GSBf7+v+sgKdpoB7/mOyn exGEbYCOXghXswhF9up2YC6jI8XtcO/2nTt6PdS2nMK61mvCb31u6xFvU0yTEMQW dEDVTcIKmvKrAeVzNOBNNjZj0bGNns0P5+oLFbkOzYj8qNueYS0Y9o996kqODET2 d4J8xib6Mh0cr6usgj0Z8Jp+7vl9Li9KR0SSQqiQyOQcu5St0zidJeCf7J46HCR7 5d8sQi6lYZ+G+9rm0X7Rl8+yBaUhrUs9v+jOAip3M+1fIxIWr9DyDUcQQKv+obW0 KqyMWx4Fv+bDqBqTRdd1r6KQEpLL0DbeWinNrdJdVZkmLyI1Y98= =idVr -----END PGP SIGNATURE----- Merge tag 'kvmarm-fixes-6.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD KVM/arm64 fixes for 6.14, take #1 - Correctly clean the BSS to the PoC before allowing EL2 to access it on nVHE/hVHE/protected configurations - Propagate ownership of debug registers in protected mode after the rework that landed in 6.14-rc1 - Stop pretending that we can run the protected mode without a GICv3 being present on the host - Fix a use-after-free situation that can occur if a vcpu fails to initialise the NV shadow S2 MMU contexts - Always evaluate the need to arm a background timer for fully emulated guest timers - Fix the emulation of EL1 timers in the absence of FEAT_ECV - Correctly handle the EL2 virtual timer, specially when HCR_EL2.E2H==0
This commit is contained in:
commit
5e21d0c5b9
5 changed files with 73 additions and 45 deletions
|
@ -471,10 +471,8 @@ static void timer_emulate(struct arch_timer_context *ctx)
|
|||
|
||||
trace_kvm_timer_emulate(ctx, should_fire);
|
||||
|
||||
if (should_fire != ctx->irq.level) {
|
||||
if (should_fire != ctx->irq.level)
|
||||
kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
kvm_timer_update_status(ctx, should_fire);
|
||||
|
||||
|
@ -761,21 +759,6 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
|
|||
timer_irq(map->direct_ptimer),
|
||||
&arch_timer_irq_ops);
|
||||
WARN_ON_ONCE(ret);
|
||||
|
||||
/*
|
||||
* The virtual offset behaviour is "interesting", as it
|
||||
* always applies when HCR_EL2.E2H==0, but only when
|
||||
* accessed from EL1 when HCR_EL2.E2H==1. So make sure we
|
||||
* track E2H when putting the HV timer in "direct" mode.
|
||||
*/
|
||||
if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
|
||||
struct arch_timer_offset *offs = &map->direct_vtimer->offset;
|
||||
|
||||
if (vcpu_el2_e2h_is_set(vcpu))
|
||||
offs->vcpu_offset = NULL;
|
||||
else
|
||||
offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -976,31 +959,21 @@ void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
|
|||
* which allows trapping of the timer registers even with NV2.
|
||||
* Still, this is still worse than FEAT_NV on its own. Meh.
|
||||
*/
|
||||
if (!vcpu_el2_e2h_is_set(vcpu)) {
|
||||
if (cpus_have_final_cap(ARM64_HAS_ECV))
|
||||
return;
|
||||
|
||||
/*
|
||||
* A non-VHE guest hypervisor doesn't have any direct access
|
||||
* to its timers: the EL2 registers trap (and the HW is
|
||||
* fully emulated), while the EL0 registers access memory
|
||||
* despite the access being notionally direct. Boo.
|
||||
*
|
||||
* We update the hardware timer registers with the
|
||||
* latest value written by the guest to the VNCR page
|
||||
* and let the hardware take care of the rest.
|
||||
*/
|
||||
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL);
|
||||
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL);
|
||||
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL);
|
||||
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL);
|
||||
} else {
|
||||
if (!cpus_have_final_cap(ARM64_HAS_ECV)) {
|
||||
/*
|
||||
* For a VHE guest hypervisor, the EL2 state is directly
|
||||
* stored in the host EL1 timers, while the emulated EL0
|
||||
* stored in the host EL1 timers, while the emulated EL1
|
||||
* state is stored in the VNCR page. The latter could have
|
||||
* been updated behind our back, and we must reset the
|
||||
* emulation of the timers.
|
||||
*
|
||||
* A non-VHE guest hypervisor doesn't have any direct access
|
||||
* to its timers: the EL2 registers trap despite being
|
||||
* notionally direct (we use the EL1 HW, as for VHE), while
|
||||
* the EL1 registers access memory.
|
||||
*
|
||||
* In both cases, process the emulated timers on each guest
|
||||
* exit. Boo.
|
||||
*/
|
||||
struct timer_map map;
|
||||
get_timer_map(vcpu, &map);
|
||||
|
|
|
@ -2290,6 +2290,19 @@ static int __init init_subsystems(void)
|
|||
break;
|
||||
case -ENODEV:
|
||||
case -ENXIO:
|
||||
/*
|
||||
* No VGIC? No pKVM for you.
|
||||
*
|
||||
* Protected mode assumes that VGICv3 is present, so no point
|
||||
* in trying to hobble along if vgic initialization fails.
|
||||
*/
|
||||
if (is_protected_kvm_enabled())
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Otherwise, userspace could choose to implement a GIC for its
|
||||
* guest on non-cooperative hardware.
|
||||
*/
|
||||
vgic_present = false;
|
||||
err = 0;
|
||||
break;
|
||||
|
@ -2400,6 +2413,13 @@ static void kvm_hyp_init_symbols(void)
|
|||
kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1);
|
||||
kvm_nvhe_sym(__icache_flags) = __icache_flags;
|
||||
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
|
||||
|
||||
/*
|
||||
* Flush entire BSS since part of its data containing init symbols is read
|
||||
* while the MMU is off.
|
||||
*/
|
||||
kvm_flush_dcache_to_poc(kvm_ksym_ref(__hyp_bss_start),
|
||||
kvm_ksym_ref(__hyp_bss_end) - kvm_ksym_ref(__hyp_bss_start));
|
||||
}
|
||||
|
||||
static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
|
||||
|
|
|
@ -91,11 +91,34 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
|
|||
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
|
||||
}
|
||||
|
||||
static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
|
||||
|
||||
hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner;
|
||||
|
||||
if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
|
||||
hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state;
|
||||
else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
|
||||
hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state;
|
||||
}
|
||||
|
||||
static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
|
||||
|
||||
if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
|
||||
host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state;
|
||||
else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
|
||||
host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state;
|
||||
}
|
||||
|
||||
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
|
||||
|
||||
fpsimd_sve_flush();
|
||||
flush_debug_state(hyp_vcpu);
|
||||
|
||||
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
|
||||
|
||||
|
@ -123,6 +146,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
|||
unsigned int i;
|
||||
|
||||
fpsimd_sve_sync(&hyp_vcpu->vcpu);
|
||||
sync_debug_state(hyp_vcpu);
|
||||
|
||||
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
|
||||
|
||||
|
|
|
@ -67,26 +67,27 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
|
|||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
|
||||
swap(kvm->arch.nested_mmus, tmp);
|
||||
|
||||
/*
|
||||
* If we went through a realocation, adjust the MMU back-pointers in
|
||||
* the previously initialised kvm_pgtable structures.
|
||||
*/
|
||||
if (kvm->arch.nested_mmus != tmp)
|
||||
for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
|
||||
tmp[i].pgt->mmu = &tmp[i];
|
||||
kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i];
|
||||
|
||||
for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++)
|
||||
ret = init_nested_s2_mmu(kvm, &tmp[i]);
|
||||
ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]);
|
||||
|
||||
if (ret) {
|
||||
for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++)
|
||||
kvm_free_stage2_pgd(&tmp[i]);
|
||||
kvm_free_stage2_pgd(&kvm->arch.nested_mmus[i]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
kvm->arch.nested_mmus_size = num_mmus;
|
||||
kvm->arch.nested_mmus = tmp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1452,6 +1452,16 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool access_hv_timer(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (!vcpu_el2_e2h_is_set(vcpu))
|
||||
return undef_access(vcpu, p, r);
|
||||
|
||||
return access_arch_timer(vcpu, p, r);
|
||||
}
|
||||
|
||||
static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
|
||||
s64 new, s64 cur)
|
||||
{
|
||||
|
@ -3103,9 +3113,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||
EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0),
|
||||
EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0),
|
||||
|
||||
{ SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer },
|
||||
EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0),
|
||||
EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0),
|
||||
{ SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer },
|
||||
EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0),
|
||||
EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0),
|
||||
|
||||
{ SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue