LoongArch: Add ifunc support for strcpy, stpcpy{aligned, unaligned, lsx, lasx}

According to glibc strcpy and stpcpy microbenchmark test results(changed
to use generic_strcpy and generic_stpcpy instead of strlen + memcpy),
comparing with the generic version, this implementation could reduce the
runtime as following:

Name              Percent of rutime reduced
strcpy-aligned    8%-45%
strcpy-unaligned  8%-48%, comparing with the aligned version, unaligned
                  version takes less instructions to copy the tail of data
		  which length is less than 8. it also has better performance
		  in case src and dest cannot be both aligned with 8bytes
strcpy-lsx        20%-80%
strcpy-lasx       15%-86%
stpcpy-aligned    6%-43%
stpcpy-unaligned  8%-48%
stpcpy-lsx        10%-80%
stpcpy-lasx       10%-87%
This commit is contained in:
dengjianbo 2023-09-13 15:34:59 +08:00 committed by caiyinyu
parent c6c73e136a
commit 06251002d4
12 changed files with 963 additions and 0 deletions

View file

@ -16,6 +16,14 @@ sysdep_routines += \
strcmp-lsx \
strncmp-aligned \
strncmp-lsx \
strcpy-aligned \
strcpy-unaligned \
strcpy-lsx \
strcpy-lasx \
stpcpy-aligned \
stpcpy-unaligned \
stpcpy-lsx \
stpcpy-lasx \
memcpy-aligned \
memcpy-unaligned \
memmove-unaligned \

View file

@ -76,6 +76,24 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned)
)
IFUNC_IMPL (i, name, strcpy,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LASX, __strcpy_lasx)
IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LSX, __strcpy_lsx)
#endif
IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_UAL, __strcpy_unaligned)
IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_aligned)
)
IFUNC_IMPL (i, name, stpcpy,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LASX, __stpcpy_lasx)
IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LSX, __stpcpy_lsx)
#endif
IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_UAL, __stpcpy_unaligned)
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
)
IFUNC_IMPL (i, name, memcpy,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)

View file

@ -0,0 +1,27 @@
/* stpcpy-aligned implementation is in strcpy-aligned.S.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
# define STPCPY __stpcpy_aligned
#else
# define STPCPY __stpcpy
#endif
#define USE_AS_STPCPY
#define STRCPY STPCPY
#include "strcpy-aligned.S"

View file

@ -0,0 +1,22 @@
/* stpcpy-lasx implementation is in strcpy-lasx.S.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#define STPCPY __stpcpy_lasx
#define USE_AS_STPCPY
#define STRCPY STPCPY
#include "strcpy-lasx.S"

View file

@ -0,0 +1,22 @@
/* stpcpy-lsx implementation is in strcpy-lsx.S.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#define STPCPY __stpcpy_lsx
#define USE_AS_STPCPY
#define STRCPY STPCPY
#include "strcpy-lsx.S"

View file

@ -0,0 +1,22 @@
/* stpcpy-unaligned implementation is in strcpy-unaligned.S.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#define STPCPY __stpcpy_unaligned
#define USE_AS_STPCPY
#define STRCPY STPCPY
#include "strcpy-unaligned.S"

View file

@ -0,0 +1,42 @@
/* Multiple versions of stpcpy.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2017-2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
# define stpcpy __redirect_stpcpy
# define __stpcpy __redirect___stpcpy
# define NO_MEMPCPY_STPCPY_REDIRECT
# define __NO_STRING_INLINES
# include <string.h>
# undef stpcpy
# undef __stpcpy
# define SYMBOL_NAME stpcpy
# include "ifunc-lasx.h"
libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ());
weak_alias (__stpcpy, stpcpy)
# ifdef SHARED
__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy)
__attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy);
__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy)
__attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy);
# endif
#endif

View file

@ -0,0 +1,202 @@
/* Optimized strcpy stpcpy aligned implementation using basic LoongArch
instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc)
# ifndef STRCPY
# define STRCPY __strcpy_aligned
# endif
#else
# ifndef STRCPY
# define STRCPY strcpy
# endif
#endif
LEAF(STRCPY, 6)
andi a3, a0, 0x7
move a2, a0
beqz a3, L(dest_align)
sub.d a5, a1, a3
addi.d a5, a5, 8
L(make_dest_align):
ld.b t0, a1, 0
addi.d a1, a1, 1
st.b t0, a2, 0
addi.d a2, a2, 1
beqz t0, L(al_out)
bne a1, a5, L(make_dest_align)
L(dest_align):
andi a4, a1, 7
bstrins.d a1, zero, 2, 0
lu12i.w t5, 0x1010
ld.d t0, a1, 0
ori t5, t5, 0x101
bstrins.d t5, t5, 63, 32
slli.d t6, t5, 0x7
bnez a4, L(unalign)
sub.d t1, t0, t5
andn t2, t6, t0
and t3, t1, t2
bnez t3, L(al_end)
L(al_loop):
st.d t0, a2, 0
ld.d t0, a1, 8
addi.d a1, a1, 8
addi.d a2, a2, 8
sub.d t1, t0, t5
andn t2, t6, t0
and t3, t1, t2
beqz t3, L(al_loop)
L(al_end):
ctz.d t1, t3
srli.d t1, t1, 3
addi.d t1, t1, 1
andi a3, t1, 8
andi a4, t1, 4
andi a5, t1, 2
andi a6, t1, 1
L(al_end_8):
beqz a3, L(al_end_4)
st.d t0, a2, 0
#ifdef USE_AS_STPCPY
addi.d a0, a2, 7
#endif
jr ra
L(al_end_4):
beqz a4, L(al_end_2)
st.w t0, a2, 0
addi.d a2, a2, 4
srli.d t0, t0, 32
L(al_end_2):
beqz a5, L(al_end_1)
st.h t0, a2, 0
addi.d a2, a2, 2
srli.d t0, t0, 16
L(al_end_1):
beqz a6, L(al_out)
st.b t0, a2, 0
addi.d a2, a2, 1
L(al_out):
#ifdef USE_AS_STPCPY
addi.d a0, a2, -1
#endif
jr ra
.align 4
L(unalign):
slli.d a5, a4, 3
li.d t1, -1
sub.d a6, zero, a5
srl.d a7, t0, a5
sll.d t7, t1, a6
or t0, a7, t7
sub.d t1, t0, t5
andn t2, t6, t0
and t3, t1, t2
bnez t3, L(un_end)
ld.d t4, a1, 8
sub.d t1, t4, t5
andn t2, t6, t4
sll.d t0, t4, a6
and t3, t1, t2
or t0, t0, a7
bnez t3, L(un_end_with_remaining)
L(un_loop):
srl.d a7, t4, a5
ld.d t4, a1, 16
addi.d a1, a1, 8
st.d t0, a2, 0
addi.d a2, a2, 8
sub.d t1, t4, t5
andn t2, t6, t4
sll.d t0, t4, a6
and t3, t1, t2
or t0, t0, a7
beqz t3, L(un_loop)
L(un_end_with_remaining):
ctz.d t1, t3
srli.d t1, t1, 3
addi.d t1, t1, 1
sub.d t1, t1, a4
blt t1, zero, L(un_end_less_8)
st.d t0, a2, 0
addi.d a2, a2, 8
beqz t1, L(un_out)
srl.d t0, t4, a5
b L(un_end_less_8)
L(un_end):
ctz.d t1, t3
srli.d t1, t1, 3
addi.d t1, t1, 1
L(un_end_less_8):
andi a4, t1, 4
andi a5, t1, 2
andi a6, t1, 1
L(un_end_4):
beqz a4, L(un_end_2)
st.w t0, a2, 0
addi.d a2, a2, 4
srli.d t0, t0, 32
L(un_end_2):
beqz a5, L(un_end_1)
st.h t0, a2, 0
addi.d a2, a2, 2
srli.d t0, t0, 16
L(un_end_1):
beqz a6, L(un_out)
st.b t0, a2, 0
addi.d a2, a2, 1
L(un_out):
#ifdef USE_AS_STPCPY
addi.d a0, a2, -1
#endif
jr ra
END(STRCPY)
libc_hidden_builtin_def (STRCPY)

View file

@ -0,0 +1,215 @@
/* Optimized strcpy stpcpy implementation using LoongArch LASX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc) && !defined __loongarch_soft_float
# ifndef STRCPY
# define STRCPY __strcpy_lasx
# endif
# ifdef USE_AS_STPCPY
# define dstend a0
# else
# define dstend a4
# endif
LEAF(STRCPY, 6)
ori t8, zero, 0xfe0
andi t0, a1, 0xfff
li.d t7, -1
move a2, a0
bltu t8, t0, L(page_cross_start)
L(start_entry):
xvld xr0, a1, 0
li.d t0, 32
andi t1, a2, 0x1f
xvsetanyeqz.b fcc0, xr0
sub.d t0, t0, t1
bcnez fcc0, L(end)
add.d a1, a1, t0
xvst xr0, a2, 0
andi a3, a1, 0x1f
add.d a2, a2, t0
bnez a3, L(unaligned)
xvld xr0, a1, 0
xvsetanyeqz.b fcc0, xr0
bcnez fcc0, L(al_end)
L(al_loop):
xvst xr0, a2, 0
xvld xr0, a1, 32
addi.d a2, a2, 32
addi.d a1, a1, 32
xvsetanyeqz.b fcc0, xr0
bceqz fcc0, L(al_loop)
L(al_end):
xvmsknz.b xr0, xr0
xvpickve.w xr1, xr0, 4
vilvl.h vr0, vr1, vr0
movfr2gr.s t0, fa0
cto.w t0, t0
add.d a1, a1, t0
xvld xr0, a1, -31
add.d dstend, a2, t0
xvst xr0, dstend, -31
jr ra
nop
L(page_cross_start):
move a4, a1
bstrins.d a4, zero, 4, 0
xvld xr0, a4, 0
xvmsknz.b xr0, xr0
xvpickve.w xr1, xr0, 4
vilvl.h vr0, vr1, vr0
movfr2gr.s t0, fa0
sra.w t0, t0, a1
beq t0, t7, L(start_entry)
b L(tail)
L(unaligned):
andi t0, a1, 0xfff
bltu t8, t0, L(un_page_cross)
L(un_start_entry):
xvld xr0, a1, 0
xvsetanyeqz.b fcc0, xr0
bcnez fcc0, L(un_end)
addi.d a1, a1, 32
L(un_loop):
xvst xr0, a2, 0
andi t0, a1, 0xfff
addi.d a2, a2, 32
bltu t8, t0, L(page_cross_loop)
L(un_loop_entry):
xvld xr0, a1, 0
addi.d a1, a1, 32
xvsetanyeqz.b fcc0, xr0
bceqz fcc0, L(un_loop)
addi.d a1, a1, -32
L(un_end):
xvmsknz.b xr0, xr0
xvpickve.w xr1, xr0, 4
vilvl.h vr0, vr1, vr0
movfr2gr.s t0, fa0
L(un_tail):
cto.w t0, t0
add.d a1, a1, t0
xvld xr0, a1, -31
add.d dstend, a2, t0
xvst xr0, dstend, -31
jr ra
L(un_page_cross):
sub.d a4, a1, a3
xvld xr0, a4, 0
xvmsknz.b xr0, xr0
xvpickve.w xr1, xr0, 4
vilvl.h vr0, vr1, vr0
movfr2gr.s t0, fa0
sra.w t0, t0, a1
beq t0, t7, L(un_start_entry)
b L(un_tail)
L(page_cross_loop):
sub.d a4, a1, a3
xvld xr0, a4, 0
xvmsknz.b xr0, xr0
xvpickve.w xr1, xr0, 4
vilvl.h vr0, vr1, vr0
movfr2gr.s t0, fa0
sra.w t0, t0, a1
beq t0, t7, L(un_loop_entry)
b L(un_tail)
L(end):
xvmsknz.b xr0, xr0
xvpickve.w xr1, xr0, 4
vilvl.h vr0, vr1, vr0
movfr2gr.s t0, fa0
L(tail):
cto.w t0, t0
add.d dstend, a2, t0
add.d a5, a1, t0
L(less_32):
srli.d t1, t0, 4
beqz t1, L(less_16)
vld vr0, a1, 0
vld vr1, a5, -15
vst vr0, a2, 0
vst vr1, dstend, -15
jr ra
L(less_16):
srli.d t1, t0, 3
beqz t1, L(less_8)
ld.d t2, a1, 0
ld.d t3, a5, -7
st.d t2, a2, 0
st.d t3, dstend, -7
jr ra
L(less_8):
li.d t1, 3
bltu t0, t1, L(less_3)
ld.w t2, a1, 0
ld.w t3, a5, -3
st.w t2, a2, 0
st.w t3, dstend, -3
jr ra
L(less_3):
beqz t0, L(zero_byte)
ld.h t2, a1, 0
st.h t2, a2, 0
L(zero_byte):
st.b zero, dstend, 0
jr ra
END(STRCPY)
libc_hidden_builtin_def (STRCPY)
#endif

View file

@ -0,0 +1,212 @@
/* Optimized strcpy stpcpy implementation using LoongArch LSX instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc) && !defined __loongarch_soft_float
# ifndef STRCPY
# define STRCPY __strcpy_lsx
# endif
LEAF(STRCPY, 6)
pcalau12i t0, %pc_hi20(L(INDEX))
andi a4, a1, 0xf
vld vr1, t0, %pc_lo12(L(INDEX))
move a2, a0
beqz a4, L(load_start)
xor t0, a1, a4
vld vr0, t0, 0
vreplgr2vr.b vr2, a4
vadd.b vr2, vr2, vr1
vshuf.b vr0, vr2, vr0, vr2
vsetanyeqz.b fcc0, vr0
bcnez fcc0, L(end)
L(load_start):
vld vr0, a1, 0
li.d t1, 16
andi a3, a2, 0xf
vsetanyeqz.b fcc0, vr0
sub.d t0, t1, a3
bcnez fcc0, L(end)
add.d a1, a1, t0
vst vr0, a2, 0
andi a3, a1, 0xf
add.d a2, a2, t0
bnez a3, L(unaligned)
vld vr0, a1, 0
vsetanyeqz.b fcc0, vr0
bcnez fcc0, L(al_end)
L(al_loop):
vst vr0, a2, 0
vld vr0, a1, 16
addi.d a2, a2, 16
addi.d a1, a1, 16
vsetanyeqz.b fcc0, vr0
bceqz fcc0, L(al_loop)
L(al_end):
vmsknz.b vr1, vr0
movfr2gr.s t0, fa1
cto.w t0, t0
add.d a1, a1, t0
vld vr0, a1, -15
# ifdef USE_AS_STPCPY
add.d a0, a2, t0
vst vr0, a0, -15
# else
add.d a2, a2, t0
vst vr0, a2, -15
# endif
jr ra
L(end):
vmsknz.b vr1, vr0
movfr2gr.s t0, fa1
cto.w t0, t0
addi.d t0, t0, 1
L(end_16):
andi t1, t0, 16
beqz t1, L(end_8)
vst vr0, a2, 0
# ifdef USE_AS_STPCPY
addi.d a0, a2, 15
# endif
jr ra
L(end_8):
andi t2, t0, 8
andi t3, t0, 4
andi t4, t0, 2
andi t5, t0, 1
beqz t2, L(end_4)
vstelm.d vr0, a2, 0, 0
addi.d a2, a2, 8
vbsrl.v vr0, vr0, 8
L(end_4):
beqz t3, L(end_2)
vstelm.w vr0, a2, 0, 0
addi.d a2, a2, 4
vbsrl.v vr0, vr0, 4
L(end_2):
beqz t4, L(end_1)
vstelm.h vr0, a2, 0, 0
addi.d a2, a2, 2
vbsrl.v vr0, vr0, 2
L(end_1):
beqz t5, L(out)
vstelm.b vr0, a2, 0, 0
addi.d a2, a2, 1
L(out):
# ifdef USE_AS_STPCPY
addi.d a0, a2, -1
# endif
jr ra
.align 4
L(unaligned):
bstrins.d a1, zero, 3, 0
vld vr2, a1, 0
vreplgr2vr.b vr3, a3
vslt.b vr4, vr1, vr3
vor.v vr0, vr2, vr4
vsetanyeqz.b fcc0, vr0
bcnez fcc0, L(un_first_end)
vld vr0, a1, 16
vadd.b vr3, vr3, vr1
vshuf.b vr4, vr0, vr2, vr3
vsetanyeqz.b fcc0, vr0
bcnez fcc0, L(un_end)
vor.v vr2, vr0, vr0
addi.d a1, a1, 16
L(un_loop):
vld vr0, a1, 16
vst vr4, a2, 0
addi.d a2, a2, 16
vshuf.b vr4, vr0, vr2, vr3
vsetanyeqz.b fcc0, vr0
bcnez fcc0, L(un_end)
vld vr2, a1, 32
vst vr4, a2, 0
addi.d a1, a1, 32
addi.d a2, a2, 16
vshuf.b vr4, vr2, vr0, vr3
vsetanyeqz.b fcc0, vr2
bceqz fcc0, L(un_loop)
vor.v vr0, vr2, vr2
addi.d a1, a1, -16
L(un_end):
vsetanyeqz.b fcc0, vr4
bcnez fcc0, 1f
vst vr4, a2, 0
1:
vmsknz.b vr1, vr0
movfr2gr.s t0, fa1
cto.w t0, t0
add.d a1, a1, t0
vld vr0, a1, 1
add.d a2, a2, t0
sub.d a2, a2, a3
vst vr0, a2, 1
# ifdef USE_AS_STPCPY
addi.d a0, a2, 16
# endif
jr ra
L(un_first_end):
addi.d a2, a2, -16
addi.d a1, a1, -16
b 1b
END(STRCPY)
.section .rodata.cst16,"M",@progbits,16
.align 4
L(INDEX):
.dword 0x0706050403020100
.dword 0x0f0e0d0c0b0a0908
libc_hidden_builtin_def (STRCPY)
#endif

View file

@ -0,0 +1,138 @@
/* Optimized strcpy unaligned implementation using basic LoongArch
instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc)
# ifndef STRCPY
# define STRCPY __strcpy_unaligned
# endif
# ifdef USE_AS_STPCPY
# define dstend a0
# else
# define dstend a4
# endif
LEAF(STRCPY, 6)
lu12i.w t5, 0x01010
li.w t0, 0xff8
ori t5, t5, 0x101
andi t1, a1, 0xfff
bstrins.d t5, t5, 63, 32
move a2, a0
slli.d t6, t5, 7
bltu t0, t1, L(page_cross)
L(start_entry):
ld.d t0, a1, 0
li.d t3, 8
andi a3, a1, 0x7
sub.d t1, t0, t5
andn t2, t6, t0
sub.d t3, t3, a3
and t1, t1, t2
bnez t1, L(end)
add.d a1, a1, t3
st.d t0, a2, 0
add.d a2, a2, t3
ld.d t0, a1, 0
sub.d t1, t0, t5
andn t2, t6, t0
and t1, t1, t2
bnez t1, L(long_end)
L(loop):
st.d t0, a2, 0
ld.d t0, a1, 8
addi.d a2, a2, 8
addi.d a1, a1, 8
sub.d t1, t0, t5
andn t2, t6, t0
and t1, t1, t2
beqz t1, L(loop)
L(long_end):
ctz.d t1, t1
srli.d t1, t1, 3
add.d a1, a1, t1
ld.d t0, a1, -7
add.d dstend, a2, t1
st.d t0, dstend, -7
jr ra
nop
L(end):
ctz.d t1, t1
srli.d t1, t1, 3
add.d a3, a1, t1
add.d dstend, a2, t1
L(less_8):
li.d t0, 3
bltu t1, t0, L(less_3)
ld.w t1, a1, 0
ld.w t2, a3, -3
st.w t1, a2, 0
st.w t2, dstend, -3
jr ra
L(less_3):
beqz t1, L(zero_bytes)
ld.h t1, a1, 0
st.h t1, a2, 0
L(zero_bytes):
st.b zero, dstend, 0
jr ra
L(page_cross):
move a4, a1
bstrins.d a4, zero, 2, 0
ld.d t0, a4, 0
li.d t3, -1
slli.d t4, a1, 3
srl.d t3, t3, t4
srl.d t0, t0, t4
orn t0, t0, t3
sub.d t1, t0, t5
andn t2, t6, t0
and t1, t1, t2
beqz t1, L(start_entry)
b L(end)
END(STRCPY)
libc_hidden_builtin_def (STRCPY)
#endif

View file

@ -0,0 +1,35 @@
/* Multiple versions of strcpy.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
# define strcpy __redirect_strcpy
# include <string.h>
# undef strcpy
# define SYMBOL_NAME strcpy
# include "ifunc-lasx.h"
libc_ifunc_redirected (__redirect_strcpy, strcpy, IFUNC_SELECTOR ());
# ifdef SHARED
__hidden_ver1 (strcpy, __GI_strcpy, __redirect_strcpy)
__attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcpy);
# endif
#endif