mirror of
git://sourceware.org/git/glibc.git
synced 2025-03-06 20:58:33 +01:00
LoongArch: Add ifunc support for strrchr{aligned, lsx, lasx}
According to glibc strrchr microbenchmark test results, this implementation could reduce the runtime time as following: Name Percent of rutime reduced strrchr-lasx 10%-50% strrchr-lsx 0%-50% strrchr-aligned 5%-50% Generic strrchr is implemented by function strlen + memrchr, the lasx version will compare with generic strrchr implemented by strlen-lasx + memrchr-lasx, the lsx version will compare with generic strrchr implemented by strlen-lsx + memrchr-lsx, the aligned version will compare with generic strrchr implemented by strlen-aligned + memrchr-generic.
This commit is contained in:
parent
06251002d4
commit
24279aecf3
7 changed files with 578 additions and 0 deletions
|
@ -9,6 +9,9 @@ sysdep_routines += \
|
||||||
strchr-aligned \
|
strchr-aligned \
|
||||||
strchr-lsx \
|
strchr-lsx \
|
||||||
strchr-lasx \
|
strchr-lasx \
|
||||||
|
strrchr-aligned \
|
||||||
|
strrchr-lsx \
|
||||||
|
strrchr-lasx \
|
||||||
strchrnul-aligned \
|
strchrnul-aligned \
|
||||||
strchrnul-lsx \
|
strchrnul-lsx \
|
||||||
strchrnul-lasx \
|
strchrnul-lasx \
|
||||||
|
|
|
@ -94,6 +94,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||||
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
|
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
IFUNC_IMPL (i, name, strrchr,
|
||||||
|
#if !defined __loongarch_soft_float
|
||||||
|
IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx)
|
||||||
|
IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx)
|
||||||
|
#endif
|
||||||
|
IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned)
|
||||||
|
)
|
||||||
|
|
||||||
IFUNC_IMPL (i, name, memcpy,
|
IFUNC_IMPL (i, name, memcpy,
|
||||||
#if !defined __loongarch_soft_float
|
#if !defined __loongarch_soft_float
|
||||||
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
|
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
|
||||||
|
|
41
sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
Normal file
41
sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
/* Common definition for strrchr ifunc selections.
|
||||||
|
All versions must be listed in ifunc-impl-list.c.
|
||||||
|
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <ldsodefs.h>
|
||||||
|
#include <ifunc-init.h>
|
||||||
|
|
||||||
|
#if !defined __loongarch_soft_float
|
||||||
|
extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
|
||||||
|
extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
|
||||||
|
|
||||||
|
static inline void *
|
||||||
|
IFUNC_SELECTOR (void)
|
||||||
|
{
|
||||||
|
#if !defined __loongarch_soft_float
|
||||||
|
if (SUPPORT_LASX)
|
||||||
|
return OPTIMIZE (lasx);
|
||||||
|
else if (SUPPORT_LSX)
|
||||||
|
return OPTIMIZE (lsx);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
return OPTIMIZE (aligned);
|
||||||
|
}
|
170
sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
Normal file
170
sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
Normal file
|
@ -0,0 +1,170 @@
|
||||||
|
/* Optimized strrchr implementation using basic LoongArch instructions.
|
||||||
|
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library. If not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <sys/regdef.h>
|
||||||
|
#include <sys/asm.h>
|
||||||
|
|
||||||
|
#if IS_IN (libc)
|
||||||
|
# define STRRCHR __strrchr_aligned
|
||||||
|
#else
|
||||||
|
# define STRRCHR strrchr
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LEAF(STRRCHR, 6)
|
||||||
|
slli.d t0, a0, 3
|
||||||
|
bstrins.d a0, zero, 2, 0
|
||||||
|
lu12i.w a2, 0x01010
|
||||||
|
ld.d t2, a0, 0
|
||||||
|
|
||||||
|
andi a1, a1, 0xff
|
||||||
|
ori a2, a2, 0x101
|
||||||
|
li.d t3, -1
|
||||||
|
bstrins.d a2, a2, 63, 32
|
||||||
|
|
||||||
|
sll.d t5, t3, t0
|
||||||
|
slli.d a3, a2, 7
|
||||||
|
orn t4, t2, t5
|
||||||
|
mul.d a1, a1, a2
|
||||||
|
|
||||||
|
sub.d t0, t4, a2
|
||||||
|
andn t1, a3, t4
|
||||||
|
and t1, t0, t1
|
||||||
|
beqz t1, L(find_tail)
|
||||||
|
|
||||||
|
|
||||||
|
ctz.d t0, t1
|
||||||
|
orn t0, zero, t0
|
||||||
|
xor t2, t4, a1
|
||||||
|
srl.d t0, t3, t0
|
||||||
|
|
||||||
|
orn t2, t2, t0
|
||||||
|
orn t2, t2, t5
|
||||||
|
revb.d t2, t2
|
||||||
|
sub.d t1, t2, a2
|
||||||
|
|
||||||
|
andn t0, a3, t2
|
||||||
|
and t1, t0, t1
|
||||||
|
ctz.d t0, t1
|
||||||
|
srli.d t0, t0, 3
|
||||||
|
|
||||||
|
addi.d a0, a0, 7
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
maskeqz a0, a0, t1
|
||||||
|
jr ra
|
||||||
|
|
||||||
|
|
||||||
|
L(find_tail):
|
||||||
|
addi.d a4, a0, 8
|
||||||
|
addi.d a0, a0, 8
|
||||||
|
L(loop_ascii):
|
||||||
|
ld.d t2, a0, 0
|
||||||
|
sub.d t1, t2, a2
|
||||||
|
|
||||||
|
and t0, t1, a3
|
||||||
|
bnez t0, L(more_check)
|
||||||
|
ld.d t2, a0, 8
|
||||||
|
sub.d t1, t2, a2
|
||||||
|
|
||||||
|
and t0, t1, a3
|
||||||
|
addi.d a0, a0, 16
|
||||||
|
beqz t0, L(loop_ascii)
|
||||||
|
addi.d a0, a0, -8
|
||||||
|
|
||||||
|
L(more_check):
|
||||||
|
andn t0, a3, t2
|
||||||
|
and t1, t1, t0
|
||||||
|
bnez t1, L(tail)
|
||||||
|
addi.d a0, a0, 8
|
||||||
|
|
||||||
|
|
||||||
|
L(loop_nonascii):
|
||||||
|
ld.d t2, a0, 0
|
||||||
|
sub.d t1, t2, a2
|
||||||
|
andn t0, a3, t2
|
||||||
|
and t1, t0, t1
|
||||||
|
|
||||||
|
bnez t1, L(tail)
|
||||||
|
ld.d t2, a0, 8
|
||||||
|
addi.d a0, a0, 16
|
||||||
|
sub.d t1, t2, a2
|
||||||
|
|
||||||
|
andn t0, a3, t2
|
||||||
|
and t1, t0, t1
|
||||||
|
beqz t1, L(loop_nonascii)
|
||||||
|
addi.d a0, a0, -8
|
||||||
|
|
||||||
|
L(tail):
|
||||||
|
ctz.d t0, t1
|
||||||
|
orn t0, zero, t0
|
||||||
|
xor t2, t2, a1
|
||||||
|
srl.d t0, t3, t0
|
||||||
|
|
||||||
|
|
||||||
|
orn t2, t2, t0
|
||||||
|
revb.d t2, t2
|
||||||
|
sub.d t1, t2, a2
|
||||||
|
andn t0, a3, t2
|
||||||
|
|
||||||
|
and t1, t0, t1
|
||||||
|
bnez t1, L(count_pos)
|
||||||
|
L(find_loop):
|
||||||
|
beq a0, a4, L(find_end)
|
||||||
|
ld.d t2, a0, -8
|
||||||
|
|
||||||
|
addi.d a0, a0, -8
|
||||||
|
xor t2, t2, a1
|
||||||
|
sub.d t1, t2, a2
|
||||||
|
andn t0, a3, t2
|
||||||
|
|
||||||
|
and t1, t0, t1
|
||||||
|
beqz t1, L(find_loop)
|
||||||
|
revb.d t2, t2
|
||||||
|
sub.d t1, t2, a2
|
||||||
|
|
||||||
|
|
||||||
|
andn t0, a3, t2
|
||||||
|
and t1, t0, t1
|
||||||
|
L(count_pos):
|
||||||
|
ctz.d t0, t1
|
||||||
|
addi.d a0, a0, 7
|
||||||
|
|
||||||
|
srli.d t0, t0, 3
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
jr ra
|
||||||
|
nop
|
||||||
|
|
||||||
|
L(find_end):
|
||||||
|
xor t2, t4, a1
|
||||||
|
orn t2, t2, t5
|
||||||
|
revb.d t2, t2
|
||||||
|
sub.d t1, t2, a2
|
||||||
|
|
||||||
|
|
||||||
|
andn t0, a3, t2
|
||||||
|
and t1, t0, t1
|
||||||
|
ctz.d t0, t1
|
||||||
|
srli.d t0, t0, 3
|
||||||
|
|
||||||
|
addi.d a0, a4, -1
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
maskeqz a0, a0, t1
|
||||||
|
jr ra
|
||||||
|
END(STRRCHR)
|
||||||
|
|
||||||
|
libc_hidden_builtin_def(STRRCHR)
|
176
sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
Normal file
176
sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
Normal file
|
@ -0,0 +1,176 @@
|
||||||
|
/* Optimized strrchr implementation using LoongArch LASX instructions.
|
||||||
|
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library. If not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <sys/regdef.h>
|
||||||
|
#include <sys/asm.h>
|
||||||
|
|
||||||
|
#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||||
|
|
||||||
|
#define STRRCHR __strrchr_lasx
|
||||||
|
|
||||||
|
LEAF(STRRCHR, 6)
|
||||||
|
move a2, a0
|
||||||
|
bstrins.d a0, zero, 5, 0
|
||||||
|
xvld xr0, a0, 0
|
||||||
|
xvld xr1, a0, 32
|
||||||
|
|
||||||
|
li.d t2, -1
|
||||||
|
xvreplgr2vr.b xr4, a1
|
||||||
|
xvmsknz.b xr2, xr0
|
||||||
|
xvmsknz.b xr3, xr1
|
||||||
|
|
||||||
|
xvpickve.w xr5, xr2, 4
|
||||||
|
xvpickve.w xr6, xr3, 4
|
||||||
|
vilvl.h vr2, vr5, vr2
|
||||||
|
vilvl.h vr3, vr6, vr3
|
||||||
|
|
||||||
|
vilvl.w vr2, vr3, vr2
|
||||||
|
movfr2gr.d t0, fa2
|
||||||
|
sra.d t0, t0, a2
|
||||||
|
beq t0, t2, L(find_tail)
|
||||||
|
|
||||||
|
|
||||||
|
xvseq.b xr2, xr0, xr4
|
||||||
|
xvseq.b xr3, xr1, xr4
|
||||||
|
xvmsknz.b xr2, xr2
|
||||||
|
xvmsknz.b xr3, xr3
|
||||||
|
|
||||||
|
xvpickve.w xr4, xr2, 4
|
||||||
|
xvpickve.w xr5, xr3, 4
|
||||||
|
vilvl.h vr2, vr4, vr2
|
||||||
|
vilvl.h vr3, vr5, vr3
|
||||||
|
|
||||||
|
vilvl.w vr1, vr3, vr2
|
||||||
|
slli.d t3, t2, 1
|
||||||
|
movfr2gr.d t1, fa1
|
||||||
|
cto.d t0, t0
|
||||||
|
|
||||||
|
srl.d t1, t1, a2
|
||||||
|
sll.d t3, t3, t0
|
||||||
|
addi.d a0, a2, 63
|
||||||
|
andn t1, t1, t3
|
||||||
|
|
||||||
|
|
||||||
|
clz.d t0, t1
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
maskeqz a0, a0, t1
|
||||||
|
jr ra
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
L(find_tail):
|
||||||
|
addi.d a3, a0, 64
|
||||||
|
L(loop):
|
||||||
|
xvld xr2, a0, 64
|
||||||
|
xvld xr3, a0, 96
|
||||||
|
addi.d a0, a0, 64
|
||||||
|
|
||||||
|
xvmin.bu xr5, xr2, xr3
|
||||||
|
xvsetanyeqz.b fcc0, xr5
|
||||||
|
bceqz fcc0, L(loop)
|
||||||
|
xvmsknz.b xr5, xr2
|
||||||
|
|
||||||
|
|
||||||
|
xvmsknz.b xr6, xr3
|
||||||
|
xvpickve.w xr7, xr5, 4
|
||||||
|
xvpickve.w xr8, xr6, 4
|
||||||
|
vilvl.h vr5, vr7, vr5
|
||||||
|
|
||||||
|
vilvl.h vr6, vr8, vr6
|
||||||
|
xvseq.b xr2, xr2, xr4
|
||||||
|
xvseq.b xr3, xr3, xr4
|
||||||
|
xvmsknz.b xr2, xr2
|
||||||
|
|
||||||
|
xvmsknz.b xr3, xr3
|
||||||
|
xvpickve.w xr7, xr2, 4
|
||||||
|
xvpickve.w xr8, xr3, 4
|
||||||
|
vilvl.h vr2, vr7, vr2
|
||||||
|
|
||||||
|
vilvl.h vr3, vr8, vr3
|
||||||
|
vilvl.w vr5, vr6, vr5
|
||||||
|
vilvl.w vr2, vr3, vr2
|
||||||
|
movfr2gr.d t0, fa5
|
||||||
|
|
||||||
|
|
||||||
|
movfr2gr.d t1, fa2
|
||||||
|
slli.d t3, t2, 1
|
||||||
|
cto.d t0, t0
|
||||||
|
sll.d t3, t3, t0
|
||||||
|
|
||||||
|
andn t1, t1, t3
|
||||||
|
beqz t1, L(find_loop)
|
||||||
|
clz.d t0, t1
|
||||||
|
addi.d a0, a0, 63
|
||||||
|
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
jr ra
|
||||||
|
L(find_loop):
|
||||||
|
beq a0, a3, L(find_end)
|
||||||
|
xvld xr2, a0, -64
|
||||||
|
|
||||||
|
xvld xr3, a0, -32
|
||||||
|
addi.d a0, a0, -64
|
||||||
|
xvseq.b xr2, xr2, xr4
|
||||||
|
xvseq.b xr3, xr3, xr4
|
||||||
|
|
||||||
|
|
||||||
|
xvmax.bu xr5, xr2, xr3
|
||||||
|
xvseteqz.v fcc0, xr5
|
||||||
|
bcnez fcc0, L(find_loop)
|
||||||
|
xvmsknz.b xr0, xr2
|
||||||
|
|
||||||
|
xvmsknz.b xr1, xr3
|
||||||
|
xvpickve.w xr2, xr0, 4
|
||||||
|
xvpickve.w xr3, xr1, 4
|
||||||
|
vilvl.h vr0, vr2, vr0
|
||||||
|
|
||||||
|
vilvl.h vr1, vr3, vr1
|
||||||
|
vilvl.w vr0, vr1, vr0
|
||||||
|
movfr2gr.d t0, fa0
|
||||||
|
addi.d a0, a0, 63
|
||||||
|
|
||||||
|
clz.d t0, t0
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
jr ra
|
||||||
|
nop
|
||||||
|
|
||||||
|
|
||||||
|
L(find_end):
|
||||||
|
xvseq.b xr2, xr0, xr4
|
||||||
|
xvseq.b xr3, xr1, xr4
|
||||||
|
xvmsknz.b xr2, xr2
|
||||||
|
xvmsknz.b xr3, xr3
|
||||||
|
|
||||||
|
xvpickve.w xr4, xr2, 4
|
||||||
|
xvpickve.w xr5, xr3, 4
|
||||||
|
vilvl.h vr2, vr4, vr2
|
||||||
|
vilvl.h vr3, vr5, vr3
|
||||||
|
|
||||||
|
vilvl.w vr1, vr3, vr2
|
||||||
|
movfr2gr.d t1, fa1
|
||||||
|
addi.d a0, a2, 63
|
||||||
|
srl.d t1, t1, a2
|
||||||
|
|
||||||
|
clz.d t0, t1
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
maskeqz a0, a0, t1
|
||||||
|
jr ra
|
||||||
|
END(STRRCHR)
|
||||||
|
|
||||||
|
libc_hidden_builtin_def(STRRCHR)
|
||||||
|
#endif
|
144
sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
Normal file
144
sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
/* Optimized strrchr implementation using LoongArch LSX instructions.
|
||||||
|
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library. If not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <sys/regdef.h>
|
||||||
|
#include <sys/asm.h>
|
||||||
|
|
||||||
|
#if IS_IN (libc) && !defined __loongarch_soft_float
|
||||||
|
|
||||||
|
#define STRRCHR __strrchr_lsx
|
||||||
|
|
||||||
|
LEAF(STRRCHR, 6)
|
||||||
|
move a2, a0
|
||||||
|
bstrins.d a0, zero, 4, 0
|
||||||
|
vld vr0, a0, 0
|
||||||
|
vld vr1, a0, 16
|
||||||
|
|
||||||
|
li.d t2, -1
|
||||||
|
vreplgr2vr.b vr4, a1
|
||||||
|
vmsknz.b vr2, vr0
|
||||||
|
vmsknz.b vr3, vr1
|
||||||
|
|
||||||
|
vilvl.h vr2, vr3, vr2
|
||||||
|
movfr2gr.s t0, fa2
|
||||||
|
sra.w t0, t0, a2
|
||||||
|
beq t0, t2, L(find_tail)
|
||||||
|
|
||||||
|
vseq.b vr2, vr0, vr4
|
||||||
|
vseq.b vr3, vr1, vr4
|
||||||
|
vmsknz.b vr2, vr2
|
||||||
|
vmsknz.b vr3, vr3
|
||||||
|
|
||||||
|
|
||||||
|
vilvl.h vr1, vr3, vr2
|
||||||
|
slli.d t3, t2, 1
|
||||||
|
movfr2gr.s t1, fa1
|
||||||
|
cto.w t0, t0
|
||||||
|
|
||||||
|
srl.w t1, t1, a2
|
||||||
|
sll.d t3, t3, t0
|
||||||
|
addi.d a0, a2, 31
|
||||||
|
andn t1, t1, t3
|
||||||
|
|
||||||
|
clz.w t0, t1
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
maskeqz a0, a0, t1
|
||||||
|
jr ra
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
L(find_tail):
|
||||||
|
addi.d a3, a0, 32
|
||||||
|
L(loop):
|
||||||
|
vld vr2, a0, 32
|
||||||
|
vld vr3, a0, 48
|
||||||
|
addi.d a0, a0, 32
|
||||||
|
|
||||||
|
vmin.bu vr5, vr2, vr3
|
||||||
|
vsetanyeqz.b fcc0, vr5
|
||||||
|
bceqz fcc0, L(loop)
|
||||||
|
vmsknz.b vr5, vr2
|
||||||
|
|
||||||
|
vmsknz.b vr6, vr3
|
||||||
|
vilvl.h vr5, vr6, vr5
|
||||||
|
vseq.b vr2, vr2, vr4
|
||||||
|
vseq.b vr3, vr3, vr4
|
||||||
|
|
||||||
|
vmsknz.b vr2, vr2
|
||||||
|
vmsknz.b vr3, vr3
|
||||||
|
vilvl.h vr2, vr3, vr2
|
||||||
|
movfr2gr.s t0, fa5
|
||||||
|
|
||||||
|
|
||||||
|
movfr2gr.s t1, fa2
|
||||||
|
slli.d t3, t2, 1
|
||||||
|
cto.w t0, t0
|
||||||
|
sll.d t3, t3, t0
|
||||||
|
|
||||||
|
andn t1, t1, t3
|
||||||
|
beqz t1, L(find_loop)
|
||||||
|
clz.w t0, t1
|
||||||
|
addi.d a0, a0, 31
|
||||||
|
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
jr ra
|
||||||
|
L(find_loop):
|
||||||
|
beq a0, a3, L(find_end)
|
||||||
|
vld vr2, a0, -32
|
||||||
|
|
||||||
|
vld vr3, a0, -16
|
||||||
|
addi.d a0, a0, -32
|
||||||
|
vseq.b vr2, vr2, vr4
|
||||||
|
vseq.b vr3, vr3, vr4
|
||||||
|
|
||||||
|
|
||||||
|
vmax.bu vr5, vr2, vr3
|
||||||
|
vseteqz.v fcc0, vr5
|
||||||
|
bcnez fcc0, L(find_loop)
|
||||||
|
vmsknz.b vr0, vr2
|
||||||
|
|
||||||
|
vmsknz.b vr1, vr3
|
||||||
|
vilvl.h vr0, vr1, vr0
|
||||||
|
movfr2gr.s t0, fa0
|
||||||
|
addi.d a0, a0, 31
|
||||||
|
|
||||||
|
clz.w t0, t0
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
jr ra
|
||||||
|
nop
|
||||||
|
|
||||||
|
L(find_end):
|
||||||
|
vseq.b vr2, vr0, vr4
|
||||||
|
vseq.b vr3, vr1, vr4
|
||||||
|
vmsknz.b vr2, vr2
|
||||||
|
vmsknz.b vr3, vr3
|
||||||
|
|
||||||
|
|
||||||
|
vilvl.h vr1, vr3, vr2
|
||||||
|
movfr2gr.s t1, fa1
|
||||||
|
addi.d a0, a2, 31
|
||||||
|
srl.w t1, t1, a2
|
||||||
|
|
||||||
|
clz.w t0, t1
|
||||||
|
sub.d a0, a0, t0
|
||||||
|
maskeqz a0, a0, t1
|
||||||
|
jr ra
|
||||||
|
END(STRRCHR)
|
||||||
|
|
||||||
|
libc_hidden_builtin_def(STRRCHR)
|
||||||
|
#endif
|
36
sysdeps/loongarch/lp64/multiarch/strrchr.c
Normal file
36
sysdeps/loongarch/lp64/multiarch/strrchr.c
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
/* Multiple versions of strrchr.
|
||||||
|
All versions must be listed in ifunc-impl-list.c.
|
||||||
|
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* Define multiple versions only for the definition in libc. */
|
||||||
|
#if IS_IN (libc)
|
||||||
|
# define strrchr __redirect_strrchr
|
||||||
|
# include <string.h>
|
||||||
|
# undef strrchr
|
||||||
|
|
||||||
|
# define SYMBOL_NAME strrchr
|
||||||
|
# include "ifunc-strrchr.h"
|
||||||
|
|
||||||
|
libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ());
|
||||||
|
weak_alias (strrchr, rindex)
|
||||||
|
# ifdef SHARED
|
||||||
|
__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr)
|
||||||
|
__attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr);
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#endif
|
Loading…
Add table
Reference in a new issue