Optimize cmpxchg with ASM acquire/release fence ASM instructions
instead of previous generic based. Prevent a fence when cmxchg's
first load != old.
Comments by Rutland:
8e86f0b409
("arm64: atomics: fix use of acquire + release for
full barrier semantics")
Comments by Boqun:
FWIW, you probably need to make sure that a barrier instruction inside
an lr/sc loop is a good thing. IIUC, the execution time of a barrier
instruction is determined by the status of store buffers and invalidate
queues (and probably other stuffs), so it may increase the execution
time of the lr/sc loop, and make it unlikely to succeed. But this really
depends on how the arch executes these instructions.
Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=LbAdu7jntZRUa=-dJwL0VfmDfBV5MHB=rcZ-w@mail.gmail.com/T/#m27a0f1342995deae49ce1d0e1f2683f8a181d6c3
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
136 lines
3.2 KiB
C
136 lines
3.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef __ASM_CSKY_CMPXCHG_H
|
|
#define __ASM_CSKY_CMPXCHG_H
|
|
|
|
#ifdef CONFIG_SMP
|
|
#include <asm/barrier.h>
|
|
|
|
extern void __bad_xchg(void);
|
|
|
|
#define __xchg_relaxed(new, ptr, size) \
|
|
({ \
|
|
__typeof__(ptr) __ptr = (ptr); \
|
|
__typeof__(new) __new = (new); \
|
|
__typeof__(*(ptr)) __ret; \
|
|
unsigned long tmp; \
|
|
switch (size) { \
|
|
case 4: \
|
|
asm volatile ( \
|
|
"1: ldex.w %0, (%3) \n" \
|
|
" mov %1, %2 \n" \
|
|
" stex.w %1, (%3) \n" \
|
|
" bez %1, 1b \n" \
|
|
: "=&r" (__ret), "=&r" (tmp) \
|
|
: "r" (__new), "r"(__ptr) \
|
|
:); \
|
|
break; \
|
|
default: \
|
|
__bad_xchg(); \
|
|
} \
|
|
__ret; \
|
|
})
|
|
|
|
#define arch_xchg_relaxed(ptr, x) \
|
|
(__xchg_relaxed((x), (ptr), sizeof(*(ptr))))
|
|
|
|
#define __cmpxchg_relaxed(ptr, old, new, size) \
|
|
({ \
|
|
__typeof__(ptr) __ptr = (ptr); \
|
|
__typeof__(new) __new = (new); \
|
|
__typeof__(new) __tmp; \
|
|
__typeof__(old) __old = (old); \
|
|
__typeof__(*(ptr)) __ret; \
|
|
switch (size) { \
|
|
case 4: \
|
|
asm volatile ( \
|
|
"1: ldex.w %0, (%3) \n" \
|
|
" cmpne %0, %4 \n" \
|
|
" bt 2f \n" \
|
|
" mov %1, %2 \n" \
|
|
" stex.w %1, (%3) \n" \
|
|
" bez %1, 1b \n" \
|
|
"2: \n" \
|
|
: "=&r" (__ret), "=&r" (__tmp) \
|
|
: "r" (__new), "r"(__ptr), "r"(__old) \
|
|
:); \
|
|
break; \
|
|
default: \
|
|
__bad_xchg(); \
|
|
} \
|
|
__ret; \
|
|
})
|
|
|
|
#define arch_cmpxchg_relaxed(ptr, o, n) \
|
|
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
|
|
|
|
#define __cmpxchg_acquire(ptr, old, new, size) \
|
|
({ \
|
|
__typeof__(ptr) __ptr = (ptr); \
|
|
__typeof__(new) __new = (new); \
|
|
__typeof__(new) __tmp; \
|
|
__typeof__(old) __old = (old); \
|
|
__typeof__(*(ptr)) __ret; \
|
|
switch (size) { \
|
|
case 4: \
|
|
asm volatile ( \
|
|
"1: ldex.w %0, (%3) \n" \
|
|
" cmpne %0, %4 \n" \
|
|
" bt 2f \n" \
|
|
" mov %1, %2 \n" \
|
|
" stex.w %1, (%3) \n" \
|
|
" bez %1, 1b \n" \
|
|
ACQUIRE_FENCE \
|
|
"2: \n" \
|
|
: "=&r" (__ret), "=&r" (__tmp) \
|
|
: "r" (__new), "r"(__ptr), "r"(__old) \
|
|
:); \
|
|
break; \
|
|
default: \
|
|
__bad_xchg(); \
|
|
} \
|
|
__ret; \
|
|
})
|
|
|
|
#define arch_cmpxchg_acquire(ptr, o, n) \
|
|
(__cmpxchg_acquire((ptr), (o), (n), sizeof(*(ptr))))
|
|
|
|
#define __cmpxchg(ptr, old, new, size) \
|
|
({ \
|
|
__typeof__(ptr) __ptr = (ptr); \
|
|
__typeof__(new) __new = (new); \
|
|
__typeof__(new) __tmp; \
|
|
__typeof__(old) __old = (old); \
|
|
__typeof__(*(ptr)) __ret; \
|
|
switch (size) { \
|
|
case 4: \
|
|
asm volatile ( \
|
|
RELEASE_FENCE \
|
|
"1: ldex.w %0, (%3) \n" \
|
|
" cmpne %0, %4 \n" \
|
|
" bt 2f \n" \
|
|
" mov %1, %2 \n" \
|
|
" stex.w %1, (%3) \n" \
|
|
" bez %1, 1b \n" \
|
|
FULL_FENCE \
|
|
"2: \n" \
|
|
: "=&r" (__ret), "=&r" (__tmp) \
|
|
: "r" (__new), "r"(__ptr), "r"(__old) \
|
|
:); \
|
|
break; \
|
|
default: \
|
|
__bad_xchg(); \
|
|
} \
|
|
__ret; \
|
|
})
|
|
|
|
#define arch_cmpxchg(ptr, o, n) \
|
|
(__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
|
|
|
|
#define arch_cmpxchg_local(ptr, o, n) \
|
|
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
|
|
#else
|
|
#include <asm-generic/cmpxchg.h>
|
|
#endif
|
|
|
|
#endif /* __ASM_CSKY_CMPXCHG_H */
|