The current ARC fetch/return atomics provide fully ordered semantics only with 2 full barriers around the operation. Instead implement them as relaxed variants without any barriers and rely on generic code to generate the fully-ordered, acquire and release varaints by adding the appropriate full barriers. This helps elide some extra barriers in case of acquire/release/relaxed calls. bloat-o-meter for hsdk defconfig shows codegen improvements, although numbers below inflated due to unrelated inlining heuristic changes | bloat-o-meter vmlinux-643babe34fd7-non-relaxed vmlinux-45aa05cb44d7-relaxed | add/remove: 2/5 grow/shrink: 42/1222 up/down: 4158/-14312 (-10154) | Function old new delta | .. | sys_renameat 462 476 +14 | ip_mc_inc_group 424 436 +12 | do_read_cache_page 1882 1894 +12 | .. | refcount_dec_and_mutex_lock 254 250 -4 | refcount_dec_and_lock_irqsave 258 254 -4 | refcount_dec_and_lock 254 250 -4 | .. | tcp_v6_route_req 246 238 -8 | tcp_v4_destroy_sock 286 278 -8 | tcp_twsk_unique 352 344 -8 Link: https://lore.kernel.org/r/20180830144344.GW24142@hirez.programming.kicks-ass.net Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Vineet Gupta <vgupta@kernel.org>
97 lines
2.8 KiB
C
97 lines
2.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
|
|
#ifndef _ASM_ARC_ATOMIC_LLSC_H
|
|
#define _ASM_ARC_ATOMIC_LLSC_H
|
|
|
|
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
|
|
|
|
#define ATOMIC_OP(op, c_op, asm_op) \
|
|
static inline void arch_atomic_##op(int i, atomic_t *v) \
|
|
{ \
|
|
unsigned int val; \
|
|
\
|
|
__asm__ __volatile__( \
|
|
"1: llock %[val], [%[ctr]] \n" \
|
|
" " #asm_op " %[val], %[val], %[i] \n" \
|
|
" scond %[val], [%[ctr]] \n" \
|
|
" bnz 1b \n" \
|
|
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
|
|
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
|
|
[i] "ir" (i) \
|
|
: "cc"); \
|
|
} \
|
|
|
|
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
|
static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
|
|
{ \
|
|
unsigned int val; \
|
|
\
|
|
__asm__ __volatile__( \
|
|
"1: llock %[val], [%[ctr]] \n" \
|
|
" " #asm_op " %[val], %[val], %[i] \n" \
|
|
" scond %[val], [%[ctr]] \n" \
|
|
" bnz 1b \n" \
|
|
: [val] "=&r" (val) \
|
|
: [ctr] "r" (&v->counter), \
|
|
[i] "ir" (i) \
|
|
: "cc"); \
|
|
\
|
|
return val; \
|
|
}
|
|
|
|
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
|
|
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
|
|
|
|
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
|
|
static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
|
|
{ \
|
|
unsigned int val, orig; \
|
|
\
|
|
__asm__ __volatile__( \
|
|
"1: llock %[orig], [%[ctr]] \n" \
|
|
" " #asm_op " %[val], %[orig], %[i] \n" \
|
|
" scond %[val], [%[ctr]] \n" \
|
|
" bnz 1b \n" \
|
|
: [val] "=&r" (val), \
|
|
[orig] "=&r" (orig) \
|
|
: [ctr] "r" (&v->counter), \
|
|
[i] "ir" (i) \
|
|
: "cc"); \
|
|
\
|
|
return orig; \
|
|
}
|
|
|
|
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
|
|
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
|
|
|
|
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
|
|
#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
|
|
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
|
|
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
|
|
|
|
#define ATOMIC_OPS(op, c_op, asm_op) \
|
|
ATOMIC_OP(op, c_op, asm_op) \
|
|
ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
|
ATOMIC_FETCH_OP(op, c_op, asm_op)
|
|
|
|
ATOMIC_OPS(add, +=, add)
|
|
ATOMIC_OPS(sub, -=, sub)
|
|
|
|
#undef ATOMIC_OPS
|
|
#define ATOMIC_OPS(op, c_op, asm_op) \
|
|
ATOMIC_OP(op, c_op, asm_op) \
|
|
ATOMIC_FETCH_OP(op, c_op, asm_op)
|
|
|
|
ATOMIC_OPS(and, &=, and)
|
|
ATOMIC_OPS(andnot, &= ~, bic)
|
|
ATOMIC_OPS(or, |=, or)
|
|
ATOMIC_OPS(xor, ^=, xor)
|
|
|
|
#define arch_atomic_andnot arch_atomic_andnot
|
|
|
|
#undef ATOMIC_OPS
|
|
#undef ATOMIC_FETCH_OP
|
|
#undef ATOMIC_OP_RETURN
|
|
#undef ATOMIC_OP
|
|
|
|
#endif
|