1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/arch/x86/include/asm/fpu/sched.h
Linus Torvalds 24b8a23638 x86/fpu: Clean up FPU switching in the middle of task switching
It happens to work, but it's very very wrong, because our 'current'
macro is magic that is supposedly loading a stable value.

It just happens to be not quite stable enough and the compilers
re-load the value enough for this code to work.  But it's wrong.

The whole

        struct fpu *prev_fpu = &prev->fpu;

thing in __switch_to() is pretty ugly. There's no reason why we
should look at that 'prev_fpu' pointer there, or pass it down.

And it only generates worse code, in how it loads 'current' when
__switch_to() has the right task pointers.

The attached patch not only cleans this up, it actually
generates better code too:

 (a) it removes one push/pop pair at entry/exit because there's one
     less register used (no 'current')

 (b) it removes that pointless load of 'current' because it just uses
     the right argument:

	-       movq    %gs:pcpu_hot(%rip), %r12
	-       testq   $16384, (%r12)
	+       testq   $16384, (%rdi)

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20231018184227.446318-1-ubizjak@gmail.com
2023-10-20 11:24:22 +02:00

71 lines
2.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_FPU_SCHED_H
#define _ASM_X86_FPU_SCHED_H
#include <linux/sched.h>
#include <asm/cpufeature.h>
#include <asm/fpu/types.h>
#include <asm/trace/fpu.h>
extern void save_fpregs_to_fpstate(struct fpu *fpu);
extern void fpu__drop(struct fpu *fpu);
extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
unsigned long shstk_addr);
extern void fpu_flush_thread(void);
/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
*
* - switch_fpu_prepare() saves the old state.
* This is done within the context of the old process.
*
* - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
* will get loaded on return to userspace, or when the kernel needs it.
*
* If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
* are saved in the current thread's FPU register state.
*
* If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
* hold current()'s FPU registers. It is required to load the
* registers before returning to userland or using the content
* otherwise.
*
* The FPU context is only stored/restored for a user task and
* PF_KTHREAD is used to distinguish between kernel and user threads.
*/
static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
{
if (cpu_feature_enabled(X86_FEATURE_FPU) &&
!(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
struct fpu *old_fpu = &old->thread.fpu;
save_fpregs_to_fpstate(old_fpu);
/*
* The save operation preserved register state, so the
* fpu_fpregs_owner_ctx is still @old_fpu. Store the
* current CPU number in @old_fpu, so the next return
* to user space can avoid the FPU register restore
* when is returns on the same CPU and still owns the
* context.
*/
old_fpu->last_cpu = cpu;
trace_x86_fpu_regs_deactivated(old_fpu);
}
}
/*
* Delay loading of the complete FPU state until the return to userland.
* PKRU is handled separately.
*/
static inline void switch_fpu_finish(struct task_struct *new)
{
if (cpu_feature_enabled(X86_FEATURE_FPU))
set_tsk_thread_flag(new, TIF_NEED_FPU_LOAD);
}
#endif /* _ASM_X86_FPU_SCHED_H */