x86/asm: Rewrite sync_core() to use IRET-to-self
Aside from being excessively slow, CPUID is problematic: Linux runs on a handful of CPUs that don't have CPUID. Use IRET-to-self instead. IRET-to-self works everywhere, so it makes testing easy. For reference, On my laptop, IRET-to-self is ~110ns, CPUID(eax=1, ecx=0) is ~83ns on native and very very slow under KVM, and MOV-to-CR2 is ~42ns. While we're at it: sync_core() serves a very specific purpose. Document it. Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Juergen Gross <jgross@suse.com> Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Matthew Whitehead <tedheadster@gmail.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Henrique de Moraes Holschuh <hmh@hmh.eng.br> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: xen-devel <Xen-devel@lists.xen.org> Link: http://lkml.kernel.org/r/5c79f0225f68bc8c40335612bf624511abb78941.1481307769.git.luto@kernel.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
parent
484d0e5c79
commit
c198b121b1
1 changed files with 58 additions and 22 deletions
|
@ -602,33 +602,69 @@ static __always_inline void cpu_relax(void)
|
||||||
rep_nop();
|
rep_nop();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Stop speculative execution and prefetching of modified code. */
|
/*
|
||||||
|
* This function forces the icache and prefetched instruction stream to
|
||||||
|
* catch up with reality in two very specific cases:
|
||||||
|
*
|
||||||
|
* a) Text was modified using one virtual address and is about to be executed
|
||||||
|
* from the same physical page at a different virtual address.
|
||||||
|
*
|
||||||
|
* b) Text was modified on a different CPU, may subsequently be
|
||||||
|
* executed on this CPU, and you want to make sure the new version
|
||||||
|
* gets executed. This generally means you're calling this in a IPI.
|
||||||
|
*
|
||||||
|
* If you're calling this for a different reason, you're probably doing
|
||||||
|
* it wrong.
|
||||||
|
*/
|
||||||
static inline void sync_core(void)
|
static inline void sync_core(void)
|
||||||
{
|
{
|
||||||
int tmp;
|
/*
|
||||||
|
* There are quite a few ways to do this. IRET-to-self is nice
|
||||||
|
* because it works on every CPU, at any CPL (so it's compatible
|
||||||
|
* with paravirtualization), and it never exits to a hypervisor.
|
||||||
|
* The only down sides are that it's a bit slow (it seems to be
|
||||||
|
* a bit more than 2x slower than the fastest options) and that
|
||||||
|
* it unmasks NMIs. The "push %cs" is needed because, in
|
||||||
|
* paravirtual environments, __KERNEL_CS may not be a valid CS
|
||||||
|
* value when we do IRET directly.
|
||||||
|
*
|
||||||
|
* In case NMI unmasking or performance ever becomes a problem,
|
||||||
|
* the next best option appears to be MOV-to-CR2 and an
|
||||||
|
* unconditional jump. That sequence also works on all CPUs,
|
||||||
|
* but it will fault at CPL3 (i.e. Xen PV and lguest).
|
||||||
|
*
|
||||||
|
* CPUID is the conventional way, but it's nasty: it doesn't
|
||||||
|
* exist on some 486-like CPUs, and it usually exits to a
|
||||||
|
* hypervisor.
|
||||||
|
*
|
||||||
|
* Like all of Linux's memory ordering operations, this is a
|
||||||
|
* compiler barrier as well.
|
||||||
|
*/
|
||||||
|
register void *__sp asm(_ASM_SP);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
/*
|
asm volatile (
|
||||||
* Do a CPUID if available, otherwise do a jump. The jump
|
"pushfl\n\t"
|
||||||
* can conveniently enough be the jump around CPUID.
|
"pushl %%cs\n\t"
|
||||||
*/
|
"pushl $1f\n\t"
|
||||||
asm volatile("cmpl %2,%1\n\t"
|
"iret\n\t"
|
||||||
"jl 1f\n\t"
|
|
||||||
"cpuid\n"
|
|
||||||
"1:"
|
"1:"
|
||||||
: "=a" (tmp)
|
: "+r" (__sp) : : "memory");
|
||||||
: "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1)
|
|
||||||
: "ebx", "ecx", "edx", "memory");
|
|
||||||
#else
|
#else
|
||||||
/*
|
unsigned int tmp;
|
||||||
* CPUID is a barrier to speculative execution.
|
|
||||||
* Prefetched instructions are automatically
|
asm volatile (
|
||||||
* invalidated when modified.
|
"mov %%ss, %0\n\t"
|
||||||
*/
|
"pushq %q0\n\t"
|
||||||
asm volatile("cpuid"
|
"pushq %%rsp\n\t"
|
||||||
: "=a" (tmp)
|
"addq $8, (%%rsp)\n\t"
|
||||||
: "0" (1)
|
"pushfq\n\t"
|
||||||
: "ebx", "ecx", "edx", "memory");
|
"mov %%cs, %0\n\t"
|
||||||
|
"pushq %q0\n\t"
|
||||||
|
"pushq $1f\n\t"
|
||||||
|
"iretq\n\t"
|
||||||
|
"1:"
|
||||||
|
: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue