1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/arch/csky/include/asm/cmpxchg.h
Guo Ren 186f69b64c csky: atomic: Optimize cmpxchg with acquire & release
Optimize cmpxchg with ASM acquire/release fence ASM instructions
instead of previous generic based. Prevent a fence when cmxchg's
first load != old.

Comments by Rutland:

8e86f0b409 ("arm64: atomics: fix use of acquire + release for
full barrier semantics")

Comments by Boqun:

FWIW, you probably need to make sure that a barrier instruction inside
an lr/sc loop is a good thing. IIUC, the execution time of a barrier
instruction is determined by the status of store buffers and invalidate
queues (and probably other stuffs), so it may increase the execution
time of the lr/sc loop, and make it unlikely to succeed. But this really
depends on how the arch executes these instructions.

Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=LbAdu7jntZRUa=-dJwL0VfmDfBV5MHB=rcZ-w@mail.gmail.com/T/#m27a0f1342995deae49ce1d0e1f2683f8a181d6c3
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
2022-04-25 13:51:42 +08:00

136 lines
3.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_CSKY_CMPXCHG_H
#define __ASM_CSKY_CMPXCHG_H
#ifdef CONFIG_SMP
#include <asm/barrier.h>
extern void __bad_xchg(void);
#define __xchg_relaxed(new, ptr, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
unsigned long tmp; \
switch (size) { \
case 4: \
asm volatile ( \
"1: ldex.w %0, (%3) \n" \
" mov %1, %2 \n" \
" stex.w %1, (%3) \n" \
" bez %1, 1b \n" \
: "=&r" (__ret), "=&r" (tmp) \
: "r" (__new), "r"(__ptr) \
:); \
break; \
default: \
__bad_xchg(); \
} \
__ret; \
})
#define arch_xchg_relaxed(ptr, x) \
(__xchg_relaxed((x), (ptr), sizeof(*(ptr))))
#define __cmpxchg_relaxed(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(new) __tmp; \
__typeof__(old) __old = (old); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
asm volatile ( \
"1: ldex.w %0, (%3) \n" \
" cmpne %0, %4 \n" \
" bt 2f \n" \
" mov %1, %2 \n" \
" stex.w %1, (%3) \n" \
" bez %1, 1b \n" \
"2: \n" \
: "=&r" (__ret), "=&r" (__tmp) \
: "r" (__new), "r"(__ptr), "r"(__old) \
:); \
break; \
default: \
__bad_xchg(); \
} \
__ret; \
})
#define arch_cmpxchg_relaxed(ptr, o, n) \
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
#define __cmpxchg_acquire(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(new) __tmp; \
__typeof__(old) __old = (old); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
asm volatile ( \
"1: ldex.w %0, (%3) \n" \
" cmpne %0, %4 \n" \
" bt 2f \n" \
" mov %1, %2 \n" \
" stex.w %1, (%3) \n" \
" bez %1, 1b \n" \
ACQUIRE_FENCE \
"2: \n" \
: "=&r" (__ret), "=&r" (__tmp) \
: "r" (__new), "r"(__ptr), "r"(__old) \
:); \
break; \
default: \
__bad_xchg(); \
} \
__ret; \
})
#define arch_cmpxchg_acquire(ptr, o, n) \
(__cmpxchg_acquire((ptr), (o), (n), sizeof(*(ptr))))
#define __cmpxchg(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(new) __tmp; \
__typeof__(old) __old = (old); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
asm volatile ( \
RELEASE_FENCE \
"1: ldex.w %0, (%3) \n" \
" cmpne %0, %4 \n" \
" bt 2f \n" \
" mov %1, %2 \n" \
" stex.w %1, (%3) \n" \
" bez %1, 1b \n" \
FULL_FENCE \
"2: \n" \
: "=&r" (__ret), "=&r" (__tmp) \
: "r" (__new), "r"(__ptr), "r"(__old) \
:); \
break; \
default: \
__bad_xchg(); \
} \
__ret; \
})
#define arch_cmpxchg(ptr, o, n) \
(__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
#define arch_cmpxchg_local(ptr, o, n) \
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
#else
#include <asm-generic/cmpxchg.h>
#endif
#endif /* __ASM_CSKY_CMPXCHG_H */