kvm: defer huge page recovery vhost task to later
Some libraries want to ensure they are single threaded before forking,
so making the kernel's kvm huge page recovery process a vhost task of
the user process breaks those. The minijail library used by crosvm is
one such affected application.
Defer the task to after the first VM_RUN call, which occurs after the
parent process has forked all its jailed processes. This needs to happen
only once for the kvm instance, so introduce some general-purpose
infrastructure for that, too. It's similar in concept to pthread_once;
except it is actually usable, because the callback takes a parameter.
Cc: Sean Christopherson <seanjc@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Alyssa Ross <hi@alyssa.is>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Message-ID: <20250123153543.2769928-1-kbusch@meta.com>
[Move call_once API to include/linux. - Paolo]
Cc: stable@vger.kernel.org
Fixes: d96c77bd4e
("KVM: x86: switch hugepage recovery thread to vhost_task")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
86eb1aef72
commit
931656b9e2
4 changed files with 66 additions and 6 deletions
|
@ -27,6 +27,7 @@
|
|||
#include <linux/hyperv.h>
|
||||
#include <linux/kfifo.h>
|
||||
#include <linux/sched/vhost_task.h>
|
||||
#include <linux/call_once.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
#include <asm/pvclock-abi.h>
|
||||
|
@ -1466,6 +1467,7 @@ struct kvm_arch {
|
|||
struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
|
||||
struct vhost_task *nx_huge_page_recovery_thread;
|
||||
u64 nx_huge_page_last;
|
||||
struct once nx_once;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* The number of TDP MMU pages across all roots. */
|
||||
|
|
|
@ -7447,20 +7447,28 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
|
|||
return true;
|
||||
}
|
||||
|
||||
int kvm_mmu_post_init_vm(struct kvm *kvm)
|
||||
static void kvm_mmu_start_lpage_recovery(struct once *once)
|
||||
{
|
||||
if (nx_hugepage_mitigation_hard_disabled)
|
||||
return 0;
|
||||
struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
|
||||
struct kvm *kvm = container_of(ka, struct kvm, arch);
|
||||
|
||||
kvm->arch.nx_huge_page_last = get_jiffies_64();
|
||||
kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
|
||||
kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
|
||||
kvm, "kvm-nx-lpage-recovery");
|
||||
|
||||
if (kvm->arch.nx_huge_page_recovery_thread)
|
||||
vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
|
||||
}
|
||||
|
||||
int kvm_mmu_post_init_vm(struct kvm *kvm)
|
||||
{
|
||||
if (nx_hugepage_mitigation_hard_disabled)
|
||||
return 0;
|
||||
|
||||
call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
|
||||
if (!kvm->arch.nx_huge_page_recovery_thread)
|
||||
return -ENOMEM;
|
||||
|
||||
vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -11471,6 +11471,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
|||
struct kvm_run *kvm_run = vcpu->run;
|
||||
int r;
|
||||
|
||||
r = kvm_mmu_post_init_vm(vcpu->kvm);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
vcpu_load(vcpu);
|
||||
kvm_sigset_activate(vcpu);
|
||||
kvm_run->flags = 0;
|
||||
|
@ -12748,7 +12752,8 @@ out:
|
|||
|
||||
int kvm_arch_post_init_vm(struct kvm *kvm)
|
||||
{
|
||||
return kvm_mmu_post_init_vm(kvm);
|
||||
once_init(&kvm->arch.nx_once);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
|
||||
|
|
45
include/linux/call_once.h
Normal file
45
include/linux/call_once.h
Normal file
|
@ -0,0 +1,45 @@
|
|||
#ifndef _LINUX_CALL_ONCE_H
|
||||
#define _LINUX_CALL_ONCE_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#define ONCE_NOT_STARTED 0
|
||||
#define ONCE_RUNNING 1
|
||||
#define ONCE_COMPLETED 2
|
||||
|
||||
struct once {
|
||||
atomic_t state;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
static inline void __once_init(struct once *once, const char *name,
|
||||
struct lock_class_key *key)
|
||||
{
|
||||
atomic_set(&once->state, ONCE_NOT_STARTED);
|
||||
__mutex_init(&once->lock, name, key);
|
||||
}
|
||||
|
||||
#define once_init(once) \
|
||||
do { \
|
||||
static struct lock_class_key __key; \
|
||||
__once_init((once), #once, &__key); \
|
||||
} while (0)
|
||||
|
||||
static inline void call_once(struct once *once, void (*cb)(struct once *))
|
||||
{
|
||||
/* Pairs with atomic_set_release() below. */
|
||||
if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
|
||||
return;
|
||||
|
||||
guard(mutex)(&once->lock);
|
||||
WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
|
||||
if (atomic_read(&once->state) != ONCE_NOT_STARTED)
|
||||
return;
|
||||
|
||||
atomic_set(&once->state, ONCE_RUNNING);
|
||||
cb(once);
|
||||
atomic_set_release(&once->state, ONCE_COMPLETED);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_CALL_ONCE_H */
|
Loading…
Add table
Reference in a new issue