mirror of
git://sourceware.org/git/glibc.git
synced 2025-03-06 20:58:33 +01:00
AArch64: Cleanup ifuncs
Cleanup ifuncs. Remove uses of libc_hidden_builtin_def, use ENTRY rather than ENTRY_ALIGN, remove unnecessary defines and conditional compilation. Rename strlen_mte to strlen_generic. Remove rtld-memset. Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
This commit is contained in:
parent
9db31d7456
commit
9fd3409842
18 changed files with 41 additions and 125 deletions
|
@ -29,7 +29,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
ENTRY_ALIGN (MEMSET, 6)
|
||||
ENTRY (MEMSET)
|
||||
|
||||
PTR_ARG (0)
|
||||
SIZE_ARG (2)
|
||||
|
|
|
@ -17,6 +17,6 @@ sysdep_routines += \
|
|||
memset_kunpeng \
|
||||
memset_mops \
|
||||
strlen_asimd \
|
||||
strlen_mte \
|
||||
strlen_generic \
|
||||
# sysdep_routines
|
||||
endif
|
||||
|
|
|
@ -70,7 +70,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||
|
||||
IFUNC_IMPL (i, name, strlen,
|
||||
IFUNC_IMPL_ADD (array, i, strlen, !mte, __strlen_asimd)
|
||||
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_mte))
|
||||
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_generic))
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -26,10 +26,6 @@
|
|||
* Use base integer registers.
|
||||
*/
|
||||
|
||||
#ifndef MEMCHR
|
||||
# define MEMCHR __memchr_nosimd
|
||||
#endif
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define chrin x1
|
||||
|
@ -62,7 +58,7 @@
|
|||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
|
||||
|
||||
ENTRY_ALIGN (MEMCHR, 6)
|
||||
ENTRY (__memchr_nosimd)
|
||||
|
||||
PTR_ARG (0)
|
||||
SIZE_ARG (2)
|
||||
|
@ -219,5 +215,4 @@ L(none_chr):
|
|||
mov result, 0
|
||||
ret
|
||||
|
||||
END (MEMCHR)
|
||||
libc_hidden_builtin_def (MEMCHR)
|
||||
END (__memchr_nosimd)
|
||||
|
|
|
@ -39,9 +39,6 @@
|
|||
#define vlen8 x8
|
||||
|
||||
#if HAVE_AARCH64_SVE_ASM
|
||||
# if IS_IN (libc)
|
||||
# define MEMCPY __memcpy_a64fx
|
||||
# define MEMMOVE __memmove_a64fx
|
||||
|
||||
.arch armv8.2-a+sve
|
||||
|
||||
|
@ -97,7 +94,7 @@
|
|||
#undef BTI_C
|
||||
#define BTI_C
|
||||
|
||||
ENTRY (MEMCPY)
|
||||
ENTRY (__memcpy_a64fx)
|
||||
|
||||
PTR_ARG (0)
|
||||
PTR_ARG (1)
|
||||
|
@ -234,11 +231,10 @@ L(last_bytes):
|
|||
st1b z3.b, p0, [dstend, -1, mul vl]
|
||||
ret
|
||||
|
||||
END (MEMCPY)
|
||||
libc_hidden_builtin_def (MEMCPY)
|
||||
END (__memcpy_a64fx)
|
||||
|
||||
|
||||
ENTRY_ALIGN (MEMMOVE, 4)
|
||||
ENTRY_ALIGN (__memmove_a64fx, 4)
|
||||
|
||||
PTR_ARG (0)
|
||||
PTR_ARG (1)
|
||||
|
@ -307,7 +303,5 @@ L(full_overlap):
|
|||
mov dst, dstin
|
||||
b L(last_bytes)
|
||||
|
||||
END (MEMMOVE)
|
||||
libc_hidden_builtin_def (MEMMOVE)
|
||||
# endif /* IS_IN (libc) */
|
||||
END (__memmove_a64fx)
|
||||
#endif /* HAVE_AARCH64_SVE_ASM */
|
||||
|
|
|
@ -71,7 +71,7 @@
|
|||
The non-temporal stores help optimize cache utilization. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
ENTRY_ALIGN (__memcpy_falkor, 6)
|
||||
ENTRY (__memcpy_falkor)
|
||||
|
||||
PTR_ARG (0)
|
||||
PTR_ARG (1)
|
||||
|
@ -198,7 +198,6 @@ L(loop64):
|
|||
ret
|
||||
|
||||
END (__memcpy_falkor)
|
||||
libc_hidden_builtin_def (__memcpy_falkor)
|
||||
|
||||
|
||||
/* RATIONALE:
|
||||
|
@ -216,7 +215,7 @@ libc_hidden_builtin_def (__memcpy_falkor)
|
|||
|
||||
For small and medium cases memcpy is used. */
|
||||
|
||||
ENTRY_ALIGN (__memmove_falkor, 6)
|
||||
ENTRY (__memmove_falkor)
|
||||
|
||||
PTR_ARG (0)
|
||||
PTR_ARG (1)
|
||||
|
@ -311,5 +310,4 @@ L(move_long):
|
|||
3: ret
|
||||
|
||||
END (__memmove_falkor)
|
||||
libc_hidden_builtin_def (__memmove_falkor)
|
||||
#endif
|
||||
|
|
|
@ -141,7 +141,6 @@ L(copy64_from_end):
|
|||
ret
|
||||
|
||||
END (__memcpy_sve)
|
||||
libc_hidden_builtin_def (__memcpy_sve)
|
||||
|
||||
|
||||
ENTRY (__memmove_sve)
|
||||
|
@ -208,5 +207,4 @@ L(return):
|
|||
ret
|
||||
|
||||
END (__memmove_sve)
|
||||
libc_hidden_builtin_def (__memmove_sve)
|
||||
#endif
|
||||
|
|
|
@ -65,21 +65,7 @@
|
|||
Overlapping large forward memmoves use a loop that copies backwards.
|
||||
*/
|
||||
|
||||
#ifndef MEMMOVE
|
||||
# define MEMMOVE memmove
|
||||
#endif
|
||||
#ifndef MEMCPY
|
||||
# define MEMCPY memcpy
|
||||
#endif
|
||||
|
||||
#if IS_IN (libc)
|
||||
|
||||
# undef MEMCPY
|
||||
# define MEMCPY __memcpy_thunderx
|
||||
# undef MEMMOVE
|
||||
# define MEMMOVE __memmove_thunderx
|
||||
|
||||
ENTRY_ALIGN (MEMMOVE, 6)
|
||||
ENTRY (__memmove_thunderx)
|
||||
|
||||
PTR_ARG (0)
|
||||
PTR_ARG (1)
|
||||
|
@ -91,9 +77,9 @@ ENTRY_ALIGN (MEMMOVE, 6)
|
|||
b.lo L(move_long)
|
||||
|
||||
/* Common case falls through into memcpy. */
|
||||
END (MEMMOVE)
|
||||
libc_hidden_builtin_def (MEMMOVE)
|
||||
ENTRY (MEMCPY)
|
||||
END (__memmove_thunderx)
|
||||
|
||||
ENTRY (__memcpy_thunderx)
|
||||
|
||||
PTR_ARG (0)
|
||||
PTR_ARG (1)
|
||||
|
@ -316,7 +302,4 @@ L(move_long):
|
|||
stp C_l, C_h, [dstin]
|
||||
3: ret
|
||||
|
||||
END (MEMCPY)
|
||||
libc_hidden_builtin_def (MEMCPY)
|
||||
|
||||
#endif
|
||||
END (__memcpy_thunderx)
|
||||
|
|
|
@ -75,27 +75,12 @@
|
|||
#define I_v v16
|
||||
#define J_v v17
|
||||
|
||||
#ifndef MEMMOVE
|
||||
# define MEMMOVE memmove
|
||||
#endif
|
||||
#ifndef MEMCPY
|
||||
# define MEMCPY memcpy
|
||||
#endif
|
||||
|
||||
#if IS_IN (libc)
|
||||
|
||||
#undef MEMCPY
|
||||
#define MEMCPY __memcpy_thunderx2
|
||||
#undef MEMMOVE
|
||||
#define MEMMOVE __memmove_thunderx2
|
||||
|
||||
|
||||
/* Overlapping large forward memmoves use a loop that copies backwards.
|
||||
Otherwise memcpy is used. Small moves branch to memcopy16 directly.
|
||||
The longer memcpy cases fall through to the memcpy head.
|
||||
*/
|
||||
|
||||
ENTRY_ALIGN (MEMMOVE, 6)
|
||||
ENTRY (__memmove_thunderx2)
|
||||
|
||||
PTR_ARG (0)
|
||||
PTR_ARG (1)
|
||||
|
@ -109,8 +94,7 @@ ENTRY_ALIGN (MEMMOVE, 6)
|
|||
ccmp tmp1, count, 2, hi
|
||||
b.lo L(move_long)
|
||||
|
||||
END (MEMMOVE)
|
||||
libc_hidden_builtin_def (MEMMOVE)
|
||||
END (__memmove_thunderx2)
|
||||
|
||||
|
||||
/* Copies are split into 3 main cases: small copies of up to 16 bytes,
|
||||
|
@ -124,8 +108,7 @@ libc_hidden_builtin_def (MEMMOVE)
|
|||
|
||||
#define MEMCPY_PREFETCH_LDR 640
|
||||
|
||||
.p2align 4
|
||||
ENTRY (MEMCPY)
|
||||
ENTRY (__memcpy_thunderx2)
|
||||
|
||||
PTR_ARG (0)
|
||||
PTR_ARG (1)
|
||||
|
@ -449,7 +432,7 @@ L(move_long):
|
|||
3: ret
|
||||
|
||||
|
||||
END (MEMCPY)
|
||||
END (__memcpy_thunderx2)
|
||||
.section .rodata
|
||||
.p2align 4
|
||||
|
||||
|
@ -472,6 +455,3 @@ L(ext_table):
|
|||
.word L(ext_size_13) -.
|
||||
.word L(ext_size_14) -.
|
||||
.word L(ext_size_15) -.
|
||||
|
||||
libc_hidden_builtin_def (MEMCPY)
|
||||
#endif
|
||||
|
|
|
@ -33,8 +33,6 @@
|
|||
#define vector_length x9
|
||||
|
||||
#if HAVE_AARCH64_SVE_ASM
|
||||
# if IS_IN (libc)
|
||||
# define MEMSET __memset_a64fx
|
||||
|
||||
.arch armv8.2-a+sve
|
||||
|
||||
|
@ -49,7 +47,7 @@
|
|||
#undef BTI_C
|
||||
#define BTI_C
|
||||
|
||||
ENTRY (MEMSET)
|
||||
ENTRY (__memset_a64fx)
|
||||
PTR_ARG (0)
|
||||
SIZE_ARG (2)
|
||||
|
||||
|
@ -166,8 +164,6 @@ L(L2):
|
|||
add count, count, CACHE_LINE_SIZE
|
||||
b L(last)
|
||||
|
||||
END (MEMSET)
|
||||
libc_hidden_builtin_def (MEMSET)
|
||||
END (__memset_a64fx)
|
||||
|
||||
#endif /* IS_IN (libc) */
|
||||
#endif /* HAVE_AARCH64_SVE_ASM */
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
ENTRY_ALIGN (MEMSET, 6)
|
||||
ENTRY (MEMSET)
|
||||
|
||||
PTR_ARG (0)
|
||||
SIZE_ARG (2)
|
||||
|
@ -183,4 +183,3 @@ L(zva_64):
|
|||
#endif
|
||||
|
||||
END (MEMSET)
|
||||
libc_hidden_builtin_def (MEMSET)
|
||||
|
|
|
@ -19,8 +19,7 @@
|
|||
|
||||
#include <sysdep.h>
|
||||
|
||||
#if IS_IN (libc)
|
||||
# define MEMSET __memset_emag
|
||||
#define MEMSET __memset_emag
|
||||
|
||||
/*
|
||||
* Using DC ZVA to zero memory does not produce better performance if
|
||||
|
@ -30,7 +29,6 @@
|
|||
* workloads.
|
||||
*/
|
||||
|
||||
# define DC_ZVA_THRESHOLD 0
|
||||
#define DC_ZVA_THRESHOLD 0
|
||||
|
||||
# include "./memset_base64.S"
|
||||
#endif
|
||||
#include "./memset_base64.S"
|
||||
|
|
|
@ -21,9 +21,15 @@
|
|||
|
||||
#if IS_IN (libc)
|
||||
# define MEMSET __memset_generic
|
||||
|
||||
/* Do not hide the generic version of memset, we use it internally. */
|
||||
# undef libc_hidden_builtin_def
|
||||
# define libc_hidden_builtin_def(name)
|
||||
|
||||
/* Add a hidden definition for use within libc.so. */
|
||||
# ifdef SHARED
|
||||
.globl __GI_memset; __GI_memset = __memset_generic
|
||||
# endif
|
||||
# include <sysdeps/aarch64/memset.S>
|
||||
#endif
|
||||
|
||||
#include <../memset.S>
|
||||
|
|
|
@ -20,16 +20,13 @@
|
|||
#include <sysdep.h>
|
||||
#include <sysdeps/aarch64/memset-reg.h>
|
||||
|
||||
#if IS_IN (libc)
|
||||
# define MEMSET __memset_kunpeng
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses
|
||||
*
|
||||
*/
|
||||
|
||||
ENTRY_ALIGN (MEMSET, 6)
|
||||
ENTRY (__memset_kunpeng)
|
||||
|
||||
PTR_ARG (0)
|
||||
SIZE_ARG (2)
|
||||
|
@ -108,6 +105,4 @@ L(set_long):
|
|||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
END (MEMSET)
|
||||
libc_hidden_builtin_def (MEMSET)
|
||||
#endif
|
||||
END (__memset_kunpeng)
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
/* Memset for aarch64, for the dynamic linker.
|
||||
Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library. If not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
#if IS_IN (rtld)
|
||||
# define MEMSET memset
|
||||
# include <sysdeps/aarch64/memset.S>
|
||||
#endif
|
|
@ -28,10 +28,10 @@
|
|||
|
||||
extern __typeof (__redirect_strlen) __strlen;
|
||||
|
||||
extern __typeof (__redirect_strlen) __strlen_mte attribute_hidden;
|
||||
extern __typeof (__redirect_strlen) __strlen_generic attribute_hidden;
|
||||
extern __typeof (__redirect_strlen) __strlen_asimd attribute_hidden;
|
||||
|
||||
libc_ifunc (__strlen, (mte ? __strlen_mte : __strlen_asimd));
|
||||
libc_ifunc (__strlen, (mte ? __strlen_generic : __strlen_asimd));
|
||||
|
||||
# undef strlen
|
||||
strong_alias (__strlen, strlen);
|
||||
|
|
|
@ -203,4 +203,3 @@ L(page_cross):
|
|||
ret
|
||||
|
||||
END (__strlen_asimd)
|
||||
libc_hidden_builtin_def (__strlen_asimd)
|
||||
|
|
|
@ -17,14 +17,14 @@
|
|||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* The actual strlen code is in ../strlen.S. If we are building libc this file
|
||||
defines __strlen_mte. Otherwise the include of ../strlen.S will define
|
||||
the normal __strlen entry points. */
|
||||
defines __strlen_generic. Otherwise the include of ../strlen.S will define
|
||||
the normal __strlen entry points. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
#if IS_IN (libc)
|
||||
|
||||
# define STRLEN __strlen_mte
|
||||
# define STRLEN __strlen_generic
|
||||
|
||||
/* Do not hide the generic version of strlen, we use it internally. */
|
||||
# undef libc_hidden_builtin_def
|
||||
|
@ -32,7 +32,7 @@
|
|||
|
||||
# ifdef SHARED
|
||||
/* It doesn't make sense to send libc-internal strlen calls through a PLT. */
|
||||
.globl __GI_strlen; __GI_strlen = __strlen_mte
|
||||
.globl __GI_strlen; __GI_strlen = __strlen_generic
|
||||
# endif
|
||||
#endif
|
||||
|
Loading…
Add table
Reference in a new issue