1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/arch/arm64/kvm/vgic/vgic-init.c
Marc Zyngier b3aa9283c0 KVM: arm64: vgic: Hoist SGI/PPI alloc from vgic_init() to kvm_create_vgic()
If userspace creates vcpus, then a vgic, we end-up in a situation
where irqchip_in_kernel() will return true, but no private interrupt
has been allocated for these vcpus. This situation will continue
until userspace initialises the vgic, at which point we fix the
early vcpus. Should a vcpu run or be initialised in the interval,
bad things may happen.

An obvious solution is to move this fix-up phase to the point where
the vgic is created. This ensures that from that point onwards,
all vcpus have their private interrupts, as new vcpus will directly
allocate them.

With that, we have the invariant that when irqchip_in_kernel() is
true, all vcpus have their private interrupts.

Reported-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250212182558.2865232-3-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-02-13 18:03:54 +00:00

697 lines
17 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2015, 2016 ARM Ltd.
*/
#include <linux/uaccess.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/kvm_host.h>
#include <kvm/arm_vgic.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include "vgic.h"
/*
* Initialization rules: there are multiple stages to the vgic
* initialization, both for the distributor and the CPU interfaces. The basic
* idea is that even though the VGIC is not functional or not requested from
* user space, the critical path of the run loop can still call VGIC functions
* that just won't do anything, without them having to check additional
* initialization flags to ensure they don't look at uninitialized data
* structures.
*
* Distributor:
*
* - kvm_vgic_early_init(): initialization of static data that doesn't
* depend on any sizing information or emulation type. No allocation
* is allowed there.
*
* - vgic_init(): allocation and initialization of the generic data
* structures that depend on sizing information (number of CPUs,
* number of interrupts). Also initializes the vcpu specific data
* structures. Can be executed lazily for GICv2.
*
* CPU Interface:
*
* - kvm_vgic_vcpu_init(): initialization of static data that doesn't depend
* on any sizing information. Private interrupts are allocated if not
* already allocated at vgic-creation time.
*/
/* EARLY INIT */
/**
* kvm_vgic_early_init() - Initialize static VGIC VCPU data structures
* @kvm: The VM whose VGIC districutor should be initialized
*
* Only do initialization of static structures that don't require any
* allocation or sizing information from userspace. vgic_init() called
* kvm_vgic_dist_init() which takes care of the rest.
*/
void kvm_vgic_early_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
}
/* CREATION */
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type);
/**
* kvm_vgic_create: triggered by the instantiation of the VGIC device by
* user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only)
* or through the generic KVM_CREATE_DEVICE API ioctl.
* irqchip_in_kernel() tells you if this function succeeded or not.
* @kvm: kvm struct pointer
* @type: KVM_DEV_TYPE_ARM_VGIC_V[23]
*/
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
struct kvm_vcpu *vcpu;
unsigned long i;
int ret;
/*
* This function is also called by the KVM_CREATE_IRQCHIP handler,
* which had no chance yet to check the availability of the GICv2
* emulation. So check this here again. KVM_CREATE_DEVICE does
* the proper checks already.
*/
if (type == KVM_DEV_TYPE_ARM_VGIC_V2 &&
!kvm_vgic_global_state.can_emulate_gicv2)
return -ENODEV;
/* Must be held to avoid race with vCPU creation */
lockdep_assert_held(&kvm->lock);
ret = -EBUSY;
if (!lock_all_vcpus(kvm))
return ret;
mutex_lock(&kvm->arch.config_lock);
if (irqchip_in_kernel(kvm)) {
ret = -EEXIST;
goto out_unlock;
}
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu_has_run_once(vcpu))
goto out_unlock;
}
ret = 0;
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
kvm->max_vcpus = VGIC_V2_MAX_CPUS;
else
kvm->max_vcpus = VGIC_V3_MAX_CPUS;
if (atomic_read(&kvm->online_vcpus) > kvm->max_vcpus) {
ret = -E2BIG;
goto out_unlock;
}
kvm_for_each_vcpu(i, vcpu, kvm) {
ret = vgic_allocate_private_irqs_locked(vcpu, type);
if (ret)
break;
}
if (ret) {
kvm_for_each_vcpu(i, vcpu, kvm) {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
kfree(vgic_cpu->private_irqs);
vgic_cpu->private_irqs = NULL;
}
goto out_unlock;
}
kvm->arch.vgic.in_kernel = true;
kvm->arch.vgic.vgic_model = type;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
else
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
unlock_all_vcpus(kvm);
return ret;
}
/* INIT/DESTROY */
/**
* kvm_vgic_dist_init: initialize the dist data structures
* @kvm: kvm struct pointer
* @nr_spis: number of spis, frozen by caller
*/
static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
int i;
dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
if (!dist->spis)
return -ENOMEM;
/*
* In the following code we do not take the irq struct lock since
* no other action on irq structs can happen while the VGIC is
* not initialized yet:
* If someone wants to inject an interrupt or does a MMIO access, we
* require prior initialization in case of a virtual GICv3 or trigger
* initialization when using a virtual GICv2.
*/
for (i = 0; i < nr_spis; i++) {
struct vgic_irq *irq = &dist->spis[i];
irq->intid = i + VGIC_NR_PRIVATE_IRQS;
INIT_LIST_HEAD(&irq->ap_list);
raw_spin_lock_init(&irq->irq_lock);
irq->vcpu = NULL;
irq->target_vcpu = vcpu0;
kref_init(&irq->refcount);
switch (dist->vgic_model) {
case KVM_DEV_TYPE_ARM_VGIC_V2:
irq->targets = 0;
irq->group = 0;
break;
case KVM_DEV_TYPE_ARM_VGIC_V3:
irq->mpidr = 0;
irq->group = 1;
break;
default:
kfree(dist->spis);
dist->spis = NULL;
return -EINVAL;
}
}
return 0;
}
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
int i;
lockdep_assert_held(&vcpu->kvm->arch.config_lock);
if (vgic_cpu->private_irqs)
return 0;
vgic_cpu->private_irqs = kcalloc(VGIC_NR_PRIVATE_IRQS,
sizeof(struct vgic_irq),
GFP_KERNEL_ACCOUNT);
if (!vgic_cpu->private_irqs)
return -ENOMEM;
/*
* Enable and configure all SGIs to be edge-triggered and
* configure all PPIs as level-triggered.
*/
for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
INIT_LIST_HEAD(&irq->ap_list);
raw_spin_lock_init(&irq->irq_lock);
irq->intid = i;
irq->vcpu = NULL;
irq->target_vcpu = vcpu;
kref_init(&irq->refcount);
if (vgic_irq_is_sgi(i)) {
/* SGIs */
irq->enabled = 1;
irq->config = VGIC_CONFIG_EDGE;
} else {
/* PPIs */
irq->config = VGIC_CONFIG_LEVEL;
}
switch (type) {
case KVM_DEV_TYPE_ARM_VGIC_V3:
irq->group = 1;
irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
break;
case KVM_DEV_TYPE_ARM_VGIC_V2:
irq->group = 0;
irq->targets = BIT(vcpu->vcpu_id);
break;
}
}
return 0;
}
static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu, u32 type)
{
int ret;
mutex_lock(&vcpu->kvm->arch.config_lock);
ret = vgic_allocate_private_irqs_locked(vcpu, type);
mutex_unlock(&vcpu->kvm->arch.config_lock);
return ret;
}
/**
* kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data
* structures and register VCPU-specific KVM iodevs
*
* @vcpu: pointer to the VCPU being created and initialized
*
* Only do initialization, but do not actually enable the
* VGIC CPU interface
*/
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
int ret = 0;
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
raw_spin_lock_init(&vgic_cpu->ap_list_lock);
atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
if (!irqchip_in_kernel(vcpu->kvm))
return 0;
ret = vgic_allocate_private_irqs(vcpu, dist->vgic_model);
if (ret)
return ret;
/*
* If we are creating a VCPU with a GICv3 we must also register the
* KVM io device for the redistributor that belongs to this VCPU.
*/
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
mutex_lock(&vcpu->kvm->slots_lock);
ret = vgic_register_redist_iodev(vcpu);
mutex_unlock(&vcpu->kvm->slots_lock);
}
return ret;
}
static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
{
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_enable(vcpu);
else
vgic_v3_enable(vcpu);
}
/*
* vgic_init: allocates and initializes dist and vcpu data structures
* depending on two dimensioning parameters:
* - the number of spis
* - the number of vcpus
* The function is generally called when nr_spis has been explicitly set
* by the guest through the KVM DEVICE API. If not nr_spis is set to 256.
* vgic_initialized() returns true when this function has succeeded.
*/
int vgic_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
int ret = 0;
unsigned long idx;
lockdep_assert_held(&kvm->arch.config_lock);
if (vgic_initialized(kvm))
return 0;
/* Are we also in the middle of creating a VCPU? */
if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
return -EBUSY;
/* freeze the number of spis */
if (!dist->nr_spis)
dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
ret = kvm_vgic_dist_init(kvm, dist->nr_spis);
if (ret)
goto out;
/*
* If we have GICv4.1 enabled, unconditionally request enable the
* v4 support so that we get HW-accelerated vSGIs. Otherwise, only
* enable it if we present a virtual ITS to the guest.
*/
if (vgic_supports_direct_msis(kvm)) {
ret = vgic_v4_init(kvm);
if (ret)
goto out;
}
kvm_for_each_vcpu(idx, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
ret = kvm_vgic_setup_default_irq_routing(kvm);
if (ret)
goto out;
vgic_debug_init(kvm);
/*
* If userspace didn't set the GIC implementation revision,
* default to the latest and greatest. You know want it.
*/
if (!dist->implementation_rev)
dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST;
dist->initialized = true;
out:
return ret;
}
static void kvm_vgic_dist_destroy(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_redist_region *rdreg, *next;
dist->ready = false;
dist->initialized = false;
kfree(dist->spis);
dist->spis = NULL;
dist->nr_spis = 0;
dist->vgic_dist_base = VGIC_ADDR_UNDEF;
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
vgic_v3_free_redist_region(kvm, rdreg);
INIT_LIST_HEAD(&dist->rd_regions);
} else {
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
}
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
xa_destroy(&dist->lpi_xa);
}
static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
/*
* Retire all pending LPIs on this vcpu anyway as we're
* going to destroy it.
*/
vgic_flush_pending_lpis(vcpu);
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
kfree(vgic_cpu->private_irqs);
vgic_cpu->private_irqs = NULL;
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
/*
* If this vCPU is being destroyed because of a failed creation
* then unregister the redistributor to avoid leaving behind a
* dangling pointer to the vCPU struct.
*
* vCPUs that have been successfully created (i.e. added to
* kvm->vcpu_array) get unregistered in kvm_vgic_destroy(), as
* this function gets called while holding kvm->arch.config_lock
* in the VM teardown path and would otherwise introduce a lock
* inversion w.r.t. kvm->srcu.
*
* vCPUs that failed creation are torn down outside of the
* kvm->arch.config_lock and do not get unregistered in
* kvm_vgic_destroy(), meaning it is both safe and necessary to
* do so here.
*/
if (kvm_get_vcpu_by_id(vcpu->kvm, vcpu->vcpu_id) != vcpu)
vgic_unregister_redist_iodev(vcpu);
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
}
}
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
mutex_lock(&kvm->slots_lock);
__kvm_vgic_vcpu_destroy(vcpu);
mutex_unlock(&kvm->slots_lock);
}
void kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
unsigned long i;
mutex_lock(&kvm->slots_lock);
mutex_lock(&kvm->arch.config_lock);
vgic_debug_destroy(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
__kvm_vgic_vcpu_destroy(vcpu);
kvm_vgic_dist_destroy(kvm);
mutex_unlock(&kvm->arch.config_lock);
if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
kvm_for_each_vcpu(i, vcpu, kvm)
vgic_unregister_redist_iodev(vcpu);
mutex_unlock(&kvm->slots_lock);
}
/**
* vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
* is a GICv2. A GICv3 must be explicitly initialized by userspace using the
* KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
* @kvm: kvm struct pointer
*/
int vgic_lazy_init(struct kvm *kvm)
{
int ret = 0;
if (unlikely(!vgic_initialized(kvm))) {
/*
* We only provide the automatic initialization of the VGIC
* for the legacy case of a GICv2. Any other type must
* be explicitly initialized once setup with the respective
* KVM device call.
*/
if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
return -EBUSY;
mutex_lock(&kvm->arch.config_lock);
ret = vgic_init(kvm);
mutex_unlock(&kvm->arch.config_lock);
}
return ret;
}
/* RESOURCE MAPPING */
/**
* kvm_vgic_map_resources - map the MMIO regions
* @kvm: kvm struct pointer
*
* Map the MMIO regions depending on the VGIC model exposed to the guest
* called on the first VCPU run.
* Also map the virtual CPU interface into the VM.
* v2 calls vgic_init() if not already done.
* v3 and derivatives return an error if the VGIC is not initialized.
* vgic_ready() returns true if this function has succeeded.
*/
int kvm_vgic_map_resources(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
enum vgic_type type;
gpa_t dist_base;
int ret = 0;
if (likely(vgic_ready(kvm)))
return 0;
mutex_lock(&kvm->slots_lock);
mutex_lock(&kvm->arch.config_lock);
if (vgic_ready(kvm))
goto out;
if (!irqchip_in_kernel(kvm))
goto out;
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
ret = vgic_v2_map_resources(kvm);
type = VGIC_V2;
} else {
ret = vgic_v3_map_resources(kvm);
type = VGIC_V3;
}
if (ret)
goto out;
dist_base = dist->vgic_dist_base;
mutex_unlock(&kvm->arch.config_lock);
ret = vgic_register_dist_iodev(kvm, dist_base, type);
if (ret) {
kvm_err("Unable to register VGIC dist MMIO regions\n");
goto out_slots;
}
/*
* kvm_io_bus_register_dev() guarantees all readers see the new MMIO
* registration before returning through synchronize_srcu(), which also
* implies a full memory barrier. As such, marking the distributor as
* 'ready' here is guaranteed to be ordered after all vCPUs having seen
* a completely configured distributor.
*/
dist->ready = true;
goto out_slots;
out:
mutex_unlock(&kvm->arch.config_lock);
out_slots:
if (ret)
kvm_vm_dead(kvm);
mutex_unlock(&kvm->slots_lock);
return ret;
}
/* GENERIC PROBE */
void kvm_vgic_cpu_up(void)
{
enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0);
}
void kvm_vgic_cpu_down(void)
{
disable_percpu_irq(kvm_vgic_global_state.maint_irq);
}
static irqreturn_t vgic_maintenance_handler(int irq, void *data)
{
/*
* We cannot rely on the vgic maintenance interrupt to be
* delivered synchronously. This means we can only use it to
* exit the VM, and we perform the handling of EOIed
* interrupts on the exit path (see vgic_fold_lr_state).
*/
return IRQ_HANDLED;
}
static struct gic_kvm_info *gic_kvm_info;
void __init vgic_set_kvm_info(const struct gic_kvm_info *info)
{
BUG_ON(gic_kvm_info != NULL);
gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL);
if (gic_kvm_info)
*gic_kvm_info = *info;
}
/**
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
*
* For a specific CPU, initialize the GIC VE hardware.
*/
void kvm_vgic_init_cpu_hardware(void)
{
BUG_ON(preemptible());
/*
* We want to make sure the list registers start out clear so that we
* only have the program the used registers.
*/
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_init_lrs();
else
kvm_call_hyp(__vgic_v3_init_lrs);
}
/**
* kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
* according to the host GIC model. Accordingly calls either
* vgic_v2/v3_probe which registers the KVM_DEVICE that can be
* instantiated by a guest later on .
*/
int kvm_vgic_hyp_init(void)
{
bool has_mask;
int ret;
if (!gic_kvm_info)
return -ENODEV;
has_mask = !gic_kvm_info->no_maint_irq_mask;
if (has_mask && !gic_kvm_info->maint_irq) {
kvm_err("No vgic maintenance irq\n");
return -ENXIO;
}
/*
* If we get one of these oddball non-GICs, taint the kernel,
* as we have no idea of how they *really* behave.
*/
if (gic_kvm_info->no_hw_deactivation) {
kvm_info("Non-architectural vgic, tainting kernel\n");
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
kvm_vgic_global_state.no_hw_deactivation = true;
}
switch (gic_kvm_info->type) {
case GIC_V2:
ret = vgic_v2_probe(gic_kvm_info);
break;
case GIC_V3:
ret = vgic_v3_probe(gic_kvm_info);
if (!ret) {
static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif);
kvm_info("GIC system register CPU interface enabled\n");
}
break;
default:
ret = -ENODEV;
}
kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
kfree(gic_kvm_info);
gic_kvm_info = NULL;
if (ret)
return ret;
if (!has_mask && !kvm_vgic_global_state.maint_irq)
return 0;
ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
vgic_maintenance_handler,
"vgic", kvm_get_running_vcpus());
if (ret) {
kvm_err("Cannot register interrupt %d\n",
kvm_vgic_global_state.maint_irq);
return ret;
}
kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq);
return 0;
}