1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/arch/x86/include/asm/qspinlock.h
Uros Bizjak 94af3a04e3 locking/qspinlock/x86: Micro-optimize virt_spin_lock()
Optimize virt_spin_lock() to use simpler and faster:

  atomic_try_cmpxchg(*ptr, &val, new)

instead of:

  atomic_cmpxchg(*ptr, val, new) == val

The x86 CMPXCHG instruction returns success in the ZF flag, so
this change saves a compare after the CMPXCHG.

Also optimize retry loop a bit. atomic_try_cmpxchg() fails iff
&lock->val != 0, so there is no need to load and compare the
lock value again - cpu_relax() can be unconditinally called in
this case. This allows us to generate optimized:

  1f:	ba 01 00 00 00       	mov    $0x1,%edx
  24:	8b 03                	mov    (%rbx),%eax
  26:	85 c0                	test   %eax,%eax
  28:	75 63                	jne    8d <...>
  2a:	f0 0f b1 13          	lock cmpxchg %edx,(%rbx)
  2e:	75 5d                	jne    8d <...>
...
  8d:	f3 90                	pause
  8f:	eb 93                	jmp    24 <...>

instead of:

  1f:	ba 01 00 00 00       	mov    $0x1,%edx
  24:	8b 03                	mov    (%rbx),%eax
  26:	85 c0                	test   %eax,%eax
  28:	75 13                	jne    3d <...>
  2a:	f0 0f b1 13          	lock cmpxchg %edx,(%rbx)
  2e:	85 c0                	test   %eax,%eax
  30:	75 f2                	jne    24 <...>
...
  3d:	f3 90                	pause
  3f:	eb e3                	jmp    24 <...>

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240422120054.199092-1-ubizjak@gmail.com
2024-04-24 11:46:28 +02:00

114 lines
3 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_QSPINLOCK_H
#define _ASM_X86_QSPINLOCK_H
#include <linux/jump_label.h>
#include <asm/cpufeature.h>
#include <asm-generic/qspinlock_types.h>
#include <asm/paravirt.h>
#include <asm/rmwcc.h>
#define _Q_PENDING_LOOPS (1 << 9)
#define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire
static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
{
u32 val;
/*
* We can't use GEN_BINARY_RMWcc() inside an if() stmt because asm goto
* and CONFIG_PROFILE_ALL_BRANCHES=y results in a label inside a
* statement expression, which GCC doesn't like.
*/
val = GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
"I", _Q_PENDING_OFFSET) * _Q_PENDING_VAL;
val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK;
return val;
}
#ifdef CONFIG_PARAVIRT_SPINLOCKS
extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __pv_init_lock_hash(void);
extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
extern bool nopvspin;
#define queued_spin_unlock queued_spin_unlock
/**
* queued_spin_unlock - release a queued spinlock
* @lock : Pointer to queued spinlock structure
*
* A smp_store_release() on the least-significant byte.
*/
static inline void native_queued_spin_unlock(struct qspinlock *lock)
{
smp_store_release(&lock->locked, 0);
}
static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
pv_queued_spin_lock_slowpath(lock, val);
}
static inline void queued_spin_unlock(struct qspinlock *lock)
{
kcsan_release();
pv_queued_spin_unlock(lock);
}
#define vcpu_is_preempted vcpu_is_preempted
static inline bool vcpu_is_preempted(long cpu)
{
return pv_vcpu_is_preempted(cpu);
}
#endif
#ifdef CONFIG_PARAVIRT
/*
* virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack.
*
* Native (and PV wanting native due to vCPU pinning) should disable this key.
* It is done in this backwards fashion to only have a single direction change,
* which removes ordering between native_pv_spin_init() and HV setup.
*/
DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
/*
* Shortcut for the queued_spin_lock_slowpath() function that allows
* virt to hijack it.
*
* Returns:
* true - lock has been negotiated, all done;
* false - queued_spin_lock_slowpath() will do its thing.
*/
#define virt_spin_lock virt_spin_lock
static inline bool virt_spin_lock(struct qspinlock *lock)
{
int val;
if (!static_branch_likely(&virt_spin_lock_key))
return false;
/*
* On hypervisors without PARAVIRT_SPINLOCKS support we fall
* back to a Test-and-Set spinlock, because fair locks have
* horrible lock 'holder' preemption issues.
*/
__retry:
val = atomic_read(&lock->val);
if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
cpu_relax();
goto __retry;
}
return true;
}
#endif /* CONFIG_PARAVIRT */
#include <asm-generic/qspinlock.h>
#endif /* _ASM_X86_QSPINLOCK_H */