1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00

kvm: defer huge page recovery vhost task to later

Some libraries want to ensure they are single threaded before forking,
so making the kernel's kvm huge page recovery process a vhost task of
the user process breaks those. The minijail library used by crosvm is
one such affected application.

Defer the task to after the first VM_RUN call, which occurs after the
parent process has forked all its jailed processes. This needs to happen
only once for the kvm instance, so introduce some general-purpose
infrastructure for that, too.  It's similar in concept to pthread_once;
except it is actually usable, because the callback takes a parameter.

Cc: Sean Christopherson <seanjc@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Alyssa Ross <hi@alyssa.is>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Message-ID: <20250123153543.2769928-1-kbusch@meta.com>
[Move call_once API to include/linux. - Paolo]
Cc: stable@vger.kernel.org
Fixes: d96c77bd4e ("KVM: x86: switch hugepage recovery thread to vhost_task")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Keith Busch 2025-01-23 07:35:43 -08:00 committed by Paolo Bonzini
parent 86eb1aef72
commit 931656b9e2
4 changed files with 66 additions and 6 deletions

View file

@ -27,6 +27,7 @@
#include <linux/hyperv.h>
#include <linux/kfifo.h>
#include <linux/sched/vhost_task.h>
#include <linux/call_once.h>
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
@ -1466,6 +1467,7 @@ struct kvm_arch {
struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
struct vhost_task *nx_huge_page_recovery_thread;
u64 nx_huge_page_last;
struct once nx_once;
#ifdef CONFIG_X86_64
/* The number of TDP MMU pages across all roots. */

View file

@ -7447,20 +7447,28 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
return true;
}
int kvm_mmu_post_init_vm(struct kvm *kvm)
static void kvm_mmu_start_lpage_recovery(struct once *once)
{
if (nx_hugepage_mitigation_hard_disabled)
return 0;
struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
struct kvm *kvm = container_of(ka, struct kvm, arch);
kvm->arch.nx_huge_page_last = get_jiffies_64();
kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
kvm, "kvm-nx-lpage-recovery");
if (kvm->arch.nx_huge_page_recovery_thread)
vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
}
int kvm_mmu_post_init_vm(struct kvm *kvm)
{
if (nx_hugepage_mitigation_hard_disabled)
return 0;
call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
if (!kvm->arch.nx_huge_page_recovery_thread)
return -ENOMEM;
vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
return 0;
}

View file

@ -11471,6 +11471,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
int r;
r = kvm_mmu_post_init_vm(vcpu->kvm);
if (r)
return r;
vcpu_load(vcpu);
kvm_sigset_activate(vcpu);
kvm_run->flags = 0;
@ -12748,7 +12752,8 @@ out:
int kvm_arch_post_init_vm(struct kvm *kvm)
{
return kvm_mmu_post_init_vm(kvm);
once_init(&kvm->arch.nx_once);
return 0;
}
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)

45
include/linux/call_once.h Normal file
View file

@ -0,0 +1,45 @@
#ifndef _LINUX_CALL_ONCE_H
#define _LINUX_CALL_ONCE_H
#include <linux/types.h>
#include <linux/mutex.h>
#define ONCE_NOT_STARTED 0
#define ONCE_RUNNING 1
#define ONCE_COMPLETED 2
struct once {
atomic_t state;
struct mutex lock;
};
static inline void __once_init(struct once *once, const char *name,
struct lock_class_key *key)
{
atomic_set(&once->state, ONCE_NOT_STARTED);
__mutex_init(&once->lock, name, key);
}
#define once_init(once) \
do { \
static struct lock_class_key __key; \
__once_init((once), #once, &__key); \
} while (0)
static inline void call_once(struct once *once, void (*cb)(struct once *))
{
/* Pairs with atomic_set_release() below. */
if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
return;
guard(mutex)(&once->lock);
WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
if (atomic_read(&once->state) != ONCE_NOT_STARTED)
return;
atomic_set(&once->state, ONCE_RUNNING);
cb(once);
atomic_set_release(&once->state, ONCE_COMPLETED);
}
#endif /* _LINUX_CALL_ONCE_H */