1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/arch/x86/include/asm/cmpxchg_64.h
Uros Bizjak d26e46f6bf locking/atomic/x86: Introduce arch_try_cmpxchg64_local()
Introduce arch_try_cmpxchg64_local() for 64-bit and 32-bit targets
to improve code using cmpxchg64_local().  On 64-bit targets, the
generated assembly improves from:

    3e28:	31 c0                	xor    %eax,%eax
    3e2a:	4d 0f b1 7d 00       	cmpxchg %r15,0x0(%r13)
    3e2f:	48 85 c0             	test   %rax,%rax
    3e32:	0f 85 9f 00 00 00    	jne    3ed7 <...>

to:

    3e28:	31 c0                	xor    %eax,%eax
    3e2a:	4d 0f b1 7d 00       	cmpxchg %r15,0x0(%r13)
    3e2f:	0f 85 9f 00 00 00    	jne    3ed4 <...>

where a TEST instruction after CMPXCHG is saved.  The improvements
for 32-bit targets are even more noticeable, because double-word
compare after CMPXCHG8B gets eliminated.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Waiman Long <longman@redhat.com>
Link: https://lore.kernel.org/r/20240414161257.49145-1-ubizjak@gmail.com
2024-04-14 22:40:54 +02:00

96 lines
2.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_CMPXCHG_64_H
#define _ASM_X86_CMPXCHG_64_H
#define arch_cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
arch_cmpxchg((ptr), (o), (n)); \
})
#define arch_cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
arch_cmpxchg_local((ptr), (o), (n)); \
})
#define arch_try_cmpxchg64(ptr, po, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
arch_try_cmpxchg((ptr), (po), (n)); \
})
#define arch_try_cmpxchg64_local(ptr, po, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
arch_try_cmpxchg_local((ptr), (po), (n)); \
})
union __u128_halves {
u128 full;
struct {
u64 low, high;
};
};
#define __arch_cmpxchg128(_ptr, _old, _new, _lock) \
({ \
union __u128_halves o = { .full = (_old), }, \
n = { .full = (_new), }; \
\
asm volatile(_lock "cmpxchg16b %[ptr]" \
: [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
: "memory"); \
\
o.full; \
})
static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
{
return __arch_cmpxchg128(ptr, old, new, LOCK_PREFIX);
}
#define arch_cmpxchg128 arch_cmpxchg128
static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, u128 new)
{
return __arch_cmpxchg128(ptr, old, new,);
}
#define arch_cmpxchg128_local arch_cmpxchg128_local
#define __arch_try_cmpxchg128(_ptr, _oldp, _new, _lock) \
({ \
union __u128_halves o = { .full = *(_oldp), }, \
n = { .full = (_new), }; \
bool ret; \
\
asm volatile(_lock "cmpxchg16b %[ptr]" \
CC_SET(e) \
: CC_OUT(e) (ret), \
[ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
: "memory"); \
\
if (unlikely(!ret)) \
*(_oldp) = o.full; \
\
likely(ret); \
})
static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new)
{
return __arch_try_cmpxchg128(ptr, oldp, new, LOCK_PREFIX);
}
#define arch_try_cmpxchg128 arch_try_cmpxchg128
static __always_inline bool arch_try_cmpxchg128_local(volatile u128 *ptr, u128 *oldp, u128 new)
{
return __arch_try_cmpxchg128(ptr, oldp, new,);
}
#define arch_try_cmpxchg128_local arch_try_cmpxchg128_local
#define system_has_cmpxchg128() boot_cpu_has(X86_FEATURE_CX16)
#endif /* _ASM_X86_CMPXCHG_64_H */