AArch64: Cleanup ifuncs

Cleanup ifuncs.  Remove uses of libc_hidden_builtin_def, use ENTRY rather than
ENTRY_ALIGN, remove unnecessary defines and conditional compilation.  Rename
strlen_mte to strlen_generic.  Remove rtld-memset.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
This commit is contained in:
Wilco Dijkstra 2023-10-24 13:51:07 +01:00
parent 9db31d7456
commit 9fd3409842
18 changed files with 41 additions and 125 deletions

View file

@ -29,7 +29,7 @@
*
*/
ENTRY_ALIGN (MEMSET, 6)
ENTRY (MEMSET)
PTR_ARG (0)
SIZE_ARG (2)

View file

@ -17,6 +17,6 @@ sysdep_routines += \
memset_kunpeng \
memset_mops \
strlen_asimd \
strlen_mte \
strlen_generic \
# sysdep_routines
endif

View file

@ -70,7 +70,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strlen,
IFUNC_IMPL_ADD (array, i, strlen, !mte, __strlen_asimd)
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_mte))
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_generic))
return 0;
}

View file

@ -26,10 +26,6 @@
* Use base integer registers.
*/
#ifndef MEMCHR
# define MEMCHR __memchr_nosimd
#endif
/* Arguments and results. */
#define srcin x0
#define chrin x1
@ -62,7 +58,7 @@
#define REP8_7f 0x7f7f7f7f7f7f7f7f
ENTRY_ALIGN (MEMCHR, 6)
ENTRY (__memchr_nosimd)
PTR_ARG (0)
SIZE_ARG (2)
@ -219,5 +215,4 @@ L(none_chr):
mov result, 0
ret
END (MEMCHR)
libc_hidden_builtin_def (MEMCHR)
END (__memchr_nosimd)

View file

@ -39,9 +39,6 @@
#define vlen8 x8
#if HAVE_AARCH64_SVE_ASM
# if IS_IN (libc)
# define MEMCPY __memcpy_a64fx
# define MEMMOVE __memmove_a64fx
.arch armv8.2-a+sve
@ -97,7 +94,7 @@
#undef BTI_C
#define BTI_C
ENTRY (MEMCPY)
ENTRY (__memcpy_a64fx)
PTR_ARG (0)
PTR_ARG (1)
@ -234,11 +231,10 @@ L(last_bytes):
st1b z3.b, p0, [dstend, -1, mul vl]
ret
END (MEMCPY)
libc_hidden_builtin_def (MEMCPY)
END (__memcpy_a64fx)
ENTRY_ALIGN (MEMMOVE, 4)
ENTRY_ALIGN (__memmove_a64fx, 4)
PTR_ARG (0)
PTR_ARG (1)
@ -307,7 +303,5 @@ L(full_overlap):
mov dst, dstin
b L(last_bytes)
END (MEMMOVE)
libc_hidden_builtin_def (MEMMOVE)
# endif /* IS_IN (libc) */
END (__memmove_a64fx)
#endif /* HAVE_AARCH64_SVE_ASM */

View file

@ -71,7 +71,7 @@
The non-temporal stores help optimize cache utilization. */
#if IS_IN (libc)
ENTRY_ALIGN (__memcpy_falkor, 6)
ENTRY (__memcpy_falkor)
PTR_ARG (0)
PTR_ARG (1)
@ -198,7 +198,6 @@ L(loop64):
ret
END (__memcpy_falkor)
libc_hidden_builtin_def (__memcpy_falkor)
/* RATIONALE:
@ -216,7 +215,7 @@ libc_hidden_builtin_def (__memcpy_falkor)
For small and medium cases memcpy is used. */
ENTRY_ALIGN (__memmove_falkor, 6)
ENTRY (__memmove_falkor)
PTR_ARG (0)
PTR_ARG (1)
@ -311,5 +310,4 @@ L(move_long):
3: ret
END (__memmove_falkor)
libc_hidden_builtin_def (__memmove_falkor)
#endif

View file

@ -141,7 +141,6 @@ L(copy64_from_end):
ret
END (__memcpy_sve)
libc_hidden_builtin_def (__memcpy_sve)
ENTRY (__memmove_sve)
@ -208,5 +207,4 @@ L(return):
ret
END (__memmove_sve)
libc_hidden_builtin_def (__memmove_sve)
#endif

View file

@ -65,21 +65,7 @@
Overlapping large forward memmoves use a loop that copies backwards.
*/
#ifndef MEMMOVE
# define MEMMOVE memmove
#endif
#ifndef MEMCPY
# define MEMCPY memcpy
#endif
#if IS_IN (libc)
# undef MEMCPY
# define MEMCPY __memcpy_thunderx
# undef MEMMOVE
# define MEMMOVE __memmove_thunderx
ENTRY_ALIGN (MEMMOVE, 6)
ENTRY (__memmove_thunderx)
PTR_ARG (0)
PTR_ARG (1)
@ -91,9 +77,9 @@ ENTRY_ALIGN (MEMMOVE, 6)
b.lo L(move_long)
/* Common case falls through into memcpy. */
END (MEMMOVE)
libc_hidden_builtin_def (MEMMOVE)
ENTRY (MEMCPY)
END (__memmove_thunderx)
ENTRY (__memcpy_thunderx)
PTR_ARG (0)
PTR_ARG (1)
@ -316,7 +302,4 @@ L(move_long):
stp C_l, C_h, [dstin]
3: ret
END (MEMCPY)
libc_hidden_builtin_def (MEMCPY)
#endif
END (__memcpy_thunderx)

View file

@ -75,27 +75,12 @@
#define I_v v16
#define J_v v17
#ifndef MEMMOVE
# define MEMMOVE memmove
#endif
#ifndef MEMCPY
# define MEMCPY memcpy
#endif
#if IS_IN (libc)
#undef MEMCPY
#define MEMCPY __memcpy_thunderx2
#undef MEMMOVE
#define MEMMOVE __memmove_thunderx2
/* Overlapping large forward memmoves use a loop that copies backwards.
Otherwise memcpy is used. Small moves branch to memcopy16 directly.
The longer memcpy cases fall through to the memcpy head.
*/
ENTRY_ALIGN (MEMMOVE, 6)
ENTRY (__memmove_thunderx2)
PTR_ARG (0)
PTR_ARG (1)
@ -109,8 +94,7 @@ ENTRY_ALIGN (MEMMOVE, 6)
ccmp tmp1, count, 2, hi
b.lo L(move_long)
END (MEMMOVE)
libc_hidden_builtin_def (MEMMOVE)
END (__memmove_thunderx2)
/* Copies are split into 3 main cases: small copies of up to 16 bytes,
@ -124,8 +108,7 @@ libc_hidden_builtin_def (MEMMOVE)
#define MEMCPY_PREFETCH_LDR 640
.p2align 4
ENTRY (MEMCPY)
ENTRY (__memcpy_thunderx2)
PTR_ARG (0)
PTR_ARG (1)
@ -449,7 +432,7 @@ L(move_long):
3: ret
END (MEMCPY)
END (__memcpy_thunderx2)
.section .rodata
.p2align 4
@ -472,6 +455,3 @@ L(ext_table):
.word L(ext_size_13) -.
.word L(ext_size_14) -.
.word L(ext_size_15) -.
libc_hidden_builtin_def (MEMCPY)
#endif

View file

@ -33,8 +33,6 @@
#define vector_length x9
#if HAVE_AARCH64_SVE_ASM
# if IS_IN (libc)
# define MEMSET __memset_a64fx
.arch armv8.2-a+sve
@ -49,7 +47,7 @@
#undef BTI_C
#define BTI_C
ENTRY (MEMSET)
ENTRY (__memset_a64fx)
PTR_ARG (0)
SIZE_ARG (2)
@ -166,8 +164,6 @@ L(L2):
add count, count, CACHE_LINE_SIZE
b L(last)
END (MEMSET)
libc_hidden_builtin_def (MEMSET)
END (__memset_a64fx)
#endif /* IS_IN (libc) */
#endif /* HAVE_AARCH64_SVE_ASM */

View file

@ -34,7 +34,7 @@
*
*/
ENTRY_ALIGN (MEMSET, 6)
ENTRY (MEMSET)
PTR_ARG (0)
SIZE_ARG (2)
@ -183,4 +183,3 @@ L(zva_64):
#endif
END (MEMSET)
libc_hidden_builtin_def (MEMSET)

View file

@ -19,8 +19,7 @@
#include <sysdep.h>
#if IS_IN (libc)
# define MEMSET __memset_emag
#define MEMSET __memset_emag
/*
* Using DC ZVA to zero memory does not produce better performance if
@ -30,7 +29,6 @@
* workloads.
*/
# define DC_ZVA_THRESHOLD 0
#define DC_ZVA_THRESHOLD 0
# include "./memset_base64.S"
#endif
#include "./memset_base64.S"

View file

@ -21,9 +21,15 @@
#if IS_IN (libc)
# define MEMSET __memset_generic
/* Do not hide the generic version of memset, we use it internally. */
# undef libc_hidden_builtin_def
# define libc_hidden_builtin_def(name)
/* Add a hidden definition for use within libc.so. */
# ifdef SHARED
.globl __GI_memset; __GI_memset = __memset_generic
# endif
# include <sysdeps/aarch64/memset.S>
#endif
#include <../memset.S>

View file

@ -20,16 +20,13 @@
#include <sysdep.h>
#include <sysdeps/aarch64/memset-reg.h>
#if IS_IN (libc)
# define MEMSET __memset_kunpeng
/* Assumptions:
*
* ARMv8-a, AArch64, unaligned accesses
*
*/
ENTRY_ALIGN (MEMSET, 6)
ENTRY (__memset_kunpeng)
PTR_ARG (0)
SIZE_ARG (2)
@ -108,6 +105,4 @@ L(set_long):
stp q0, q0, [dstend, -32]
ret
END (MEMSET)
libc_hidden_builtin_def (MEMSET)
#endif
END (__memset_kunpeng)

View file

@ -1,25 +0,0 @@
/* Memset for aarch64, for the dynamic linker.
Copyright (C) 2017-2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#if IS_IN (rtld)
# define MEMSET memset
# include <sysdeps/aarch64/memset.S>
#endif

View file

@ -28,10 +28,10 @@
extern __typeof (__redirect_strlen) __strlen;
extern __typeof (__redirect_strlen) __strlen_mte attribute_hidden;
extern __typeof (__redirect_strlen) __strlen_generic attribute_hidden;
extern __typeof (__redirect_strlen) __strlen_asimd attribute_hidden;
libc_ifunc (__strlen, (mte ? __strlen_mte : __strlen_asimd));
libc_ifunc (__strlen, (mte ? __strlen_generic : __strlen_asimd));
# undef strlen
strong_alias (__strlen, strlen);

View file

@ -203,4 +203,3 @@ L(page_cross):
ret
END (__strlen_asimd)
libc_hidden_builtin_def (__strlen_asimd)

View file

@ -17,14 +17,14 @@
<https://www.gnu.org/licenses/>. */
/* The actual strlen code is in ../strlen.S. If we are building libc this file
defines __strlen_mte. Otherwise the include of ../strlen.S will define
the normal __strlen entry points. */
defines __strlen_generic. Otherwise the include of ../strlen.S will define
the normal __strlen entry points. */
#include <sysdep.h>
#if IS_IN (libc)
# define STRLEN __strlen_mte
# define STRLEN __strlen_generic
/* Do not hide the generic version of strlen, we use it internally. */
# undef libc_hidden_builtin_def
@ -32,7 +32,7 @@
# ifdef SHARED
/* It doesn't make sense to send libc-internal strlen calls through a PLT. */
.globl __GI_strlen; __GI_strlen = __strlen_mte
.globl __GI_strlen; __GI_strlen = __strlen_generic
# endif
#endif