diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile index 360a6718c0..39550bea9b 100644 --- a/sysdeps/loongarch/lp64/multiarch/Makefile +++ b/sysdeps/loongarch/lp64/multiarch/Makefile @@ -16,6 +16,14 @@ sysdep_routines += \ strcmp-lsx \ strncmp-aligned \ strncmp-lsx \ + strcpy-aligned \ + strcpy-unaligned \ + strcpy-lsx \ + strcpy-lasx \ + stpcpy-aligned \ + stpcpy-unaligned \ + stpcpy-lsx \ + stpcpy-lasx \ memcpy-aligned \ memcpy-unaligned \ memmove-unaligned \ diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c index e397d58c9d..39a14f1d6e 100644 --- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c @@ -76,6 +76,24 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned) ) + IFUNC_IMPL (i, name, strcpy, +#if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LASX, __strcpy_lasx) + IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LSX, __strcpy_lsx) +#endif + IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_UAL, __strcpy_unaligned) + IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_aligned) + ) + + IFUNC_IMPL (i, name, stpcpy, +#if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LASX, __stpcpy_lasx) + IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LSX, __stpcpy_lsx) +#endif + IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_UAL, __stpcpy_unaligned) + IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned) + ) + IFUNC_IMPL (i, name, memcpy, #if !defined __loongarch_soft_float IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S new file mode 100644 index 0000000000..1f763db685 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S @@ -0,0 +1,27 @@ +/* stpcpy-aligned implementation is in strcpy-aligned.S. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#if IS_IN (libc) +# define STPCPY __stpcpy_aligned +#else +# define STPCPY __stpcpy +#endif + +#define USE_AS_STPCPY +#define STRCPY STPCPY +#include "strcpy-aligned.S" diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S new file mode 100644 index 0000000000..13d6c9539b --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S @@ -0,0 +1,22 @@ +/* stpcpy-lasx implementation is in strcpy-lasx.S. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#define STPCPY __stpcpy_lasx +#define USE_AS_STPCPY +#define STRCPY STPCPY +#include "strcpy-lasx.S" diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S new file mode 100644 index 0000000000..e0f17ab589 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S @@ -0,0 +1,22 @@ +/* stpcpy-lsx implementation is in strcpy-lsx.S. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#define STPCPY __stpcpy_lsx +#define USE_AS_STPCPY +#define STRCPY STPCPY +#include "strcpy-lsx.S" diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S new file mode 100644 index 0000000000..cc2f971214 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S @@ -0,0 +1,22 @@ +/* stpcpy-unaligned implementation is in strcpy-unaligned.S. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#define STPCPY __stpcpy_unaligned +#define USE_AS_STPCPY +#define STRCPY STPCPY +#include "strcpy-unaligned.S" diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy.c b/sysdeps/loongarch/lp64/multiarch/stpcpy.c new file mode 100644 index 0000000000..d4860d7a47 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/stpcpy.c @@ -0,0 +1,42 @@ +/* Multiple versions of stpcpy. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +# define stpcpy __redirect_stpcpy +# define __stpcpy __redirect___stpcpy +# define NO_MEMPCPY_STPCPY_REDIRECT +# define __NO_STRING_INLINES +# include +# undef stpcpy +# undef __stpcpy + +# define SYMBOL_NAME stpcpy +# include "ifunc-lasx.h" + +libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ()); + +weak_alias (__stpcpy, stpcpy) +# ifdef SHARED +__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy) + __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy); +__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy) + __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy); +# endif +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S new file mode 100644 index 0000000000..4ed539fdb4 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S @@ -0,0 +1,202 @@ +/* Optimized strcpy stpcpy aligned implementation using basic LoongArch + instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include +#include + +#if IS_IN (libc) +# ifndef STRCPY +# define STRCPY __strcpy_aligned +# endif +#else +# ifndef STRCPY +# define STRCPY strcpy +# endif +#endif + +LEAF(STRCPY, 6) + andi a3, a0, 0x7 + move a2, a0 + beqz a3, L(dest_align) + sub.d a5, a1, a3 + addi.d a5, a5, 8 + +L(make_dest_align): + ld.b t0, a1, 0 + addi.d a1, a1, 1 + st.b t0, a2, 0 + addi.d a2, a2, 1 + beqz t0, L(al_out) + + bne a1, a5, L(make_dest_align) + +L(dest_align): + andi a4, a1, 7 + bstrins.d a1, zero, 2, 0 + + lu12i.w t5, 0x1010 + ld.d t0, a1, 0 + ori t5, t5, 0x101 + bstrins.d t5, t5, 63, 32 + + slli.d t6, t5, 0x7 + bnez a4, L(unalign) + sub.d t1, t0, t5 + andn t2, t6, t0 + + and t3, t1, t2 + bnez t3, L(al_end) + +L(al_loop): + st.d t0, a2, 0 + ld.d t0, a1, 8 + + addi.d a1, a1, 8 + addi.d a2, a2, 8 + sub.d t1, t0, t5 + andn t2, t6, t0 + + and t3, t1, t2 + beqz t3, L(al_loop) + +L(al_end): + ctz.d t1, t3 + srli.d t1, t1, 3 + addi.d t1, t1, 1 + + andi a3, t1, 8 + andi a4, t1, 4 + andi a5, t1, 2 + andi a6, t1, 1 + +L(al_end_8): + beqz a3, L(al_end_4) + st.d t0, a2, 0 +#ifdef USE_AS_STPCPY + addi.d a0, a2, 7 +#endif + jr ra +L(al_end_4): + beqz a4, L(al_end_2) + st.w t0, a2, 0 + addi.d a2, a2, 4 + srli.d t0, t0, 32 +L(al_end_2): + beqz a5, L(al_end_1) + st.h t0, a2, 0 + addi.d a2, a2, 2 + srli.d t0, t0, 16 +L(al_end_1): + beqz a6, L(al_out) + st.b t0, a2, 0 + addi.d a2, a2, 1 +L(al_out): +#ifdef USE_AS_STPCPY + addi.d a0, a2, -1 +#endif + jr ra + + .align 4 +L(unalign): + slli.d a5, a4, 3 + li.d t1, -1 + sub.d a6, zero, a5 + + srl.d a7, t0, a5 + sll.d t7, t1, a6 + + or t0, a7, t7 + sub.d t1, t0, t5 + andn t2, t6, t0 + and t3, t1, t2 + + bnez t3, L(un_end) + + ld.d t4, a1, 8 + + sub.d t1, t4, t5 + andn t2, t6, t4 + sll.d t0, t4, a6 + and t3, t1, t2 + + or t0, t0, a7 + bnez t3, L(un_end_with_remaining) + +L(un_loop): + srl.d a7, t4, a5 + + ld.d t4, a1, 16 + addi.d a1, a1, 8 + + st.d t0, a2, 0 + addi.d a2, a2, 8 + + sub.d t1, t4, t5 + andn t2, t6, t4 + sll.d t0, t4, a6 + and t3, t1, t2 + + or t0, t0, a7 + beqz t3, L(un_loop) + +L(un_end_with_remaining): + ctz.d t1, t3 + srli.d t1, t1, 3 + addi.d t1, t1, 1 + sub.d t1, t1, a4 + + blt t1, zero, L(un_end_less_8) + st.d t0, a2, 0 + addi.d a2, a2, 8 + beqz t1, L(un_out) + srl.d t0, t4, a5 + b L(un_end_less_8) + +L(un_end): + ctz.d t1, t3 + srli.d t1, t1, 3 + addi.d t1, t1, 1 + +L(un_end_less_8): + andi a4, t1, 4 + andi a5, t1, 2 + andi a6, t1, 1 +L(un_end_4): + beqz a4, L(un_end_2) + st.w t0, a2, 0 + addi.d a2, a2, 4 + srli.d t0, t0, 32 +L(un_end_2): + beqz a5, L(un_end_1) + st.h t0, a2, 0 + addi.d a2, a2, 2 + srli.d t0, t0, 16 +L(un_end_1): + beqz a6, L(un_out) + st.b t0, a2, 0 + addi.d a2, a2, 1 +L(un_out): +#ifdef USE_AS_STPCPY + addi.d a0, a2, -1 +#endif + jr ra +END(STRCPY) + +libc_hidden_builtin_def (STRCPY) diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S new file mode 100644 index 0000000000..c282561253 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S @@ -0,0 +1,215 @@ +/* Optimized strcpy stpcpy implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include +#include + +#if IS_IN (libc) && !defined __loongarch_soft_float + +# ifndef STRCPY +# define STRCPY __strcpy_lasx +# endif + +# ifdef USE_AS_STPCPY +# define dstend a0 +# else +# define dstend a4 +# endif + +LEAF(STRCPY, 6) + ori t8, zero, 0xfe0 + andi t0, a1, 0xfff + li.d t7, -1 + move a2, a0 + + bltu t8, t0, L(page_cross_start) +L(start_entry): + xvld xr0, a1, 0 + li.d t0, 32 + andi t1, a2, 0x1f + + xvsetanyeqz.b fcc0, xr0 + sub.d t0, t0, t1 + bcnez fcc0, L(end) + add.d a1, a1, t0 + + xvst xr0, a2, 0 + andi a3, a1, 0x1f + add.d a2, a2, t0 + bnez a3, L(unaligned) + + + xvld xr0, a1, 0 + xvsetanyeqz.b fcc0, xr0 + bcnez fcc0, L(al_end) +L(al_loop): + xvst xr0, a2, 0 + + xvld xr0, a1, 32 + addi.d a2, a2, 32 + addi.d a1, a1, 32 + xvsetanyeqz.b fcc0, xr0 + + bceqz fcc0, L(al_loop) +L(al_end): + xvmsknz.b xr0, xr0 + xvpickve.w xr1, xr0, 4 + vilvl.h vr0, vr1, vr0 + + movfr2gr.s t0, fa0 + cto.w t0, t0 + add.d a1, a1, t0 + xvld xr0, a1, -31 + + + add.d dstend, a2, t0 + xvst xr0, dstend, -31 + jr ra + nop + +L(page_cross_start): + move a4, a1 + bstrins.d a4, zero, 4, 0 + xvld xr0, a4, 0 + xvmsknz.b xr0, xr0 + + xvpickve.w xr1, xr0, 4 + vilvl.h vr0, vr1, vr0 + movfr2gr.s t0, fa0 + sra.w t0, t0, a1 + + beq t0, t7, L(start_entry) + b L(tail) +L(unaligned): + andi t0, a1, 0xfff + bltu t8, t0, L(un_page_cross) + + +L(un_start_entry): + xvld xr0, a1, 0 + xvsetanyeqz.b fcc0, xr0 + bcnez fcc0, L(un_end) + addi.d a1, a1, 32 + +L(un_loop): + xvst xr0, a2, 0 + andi t0, a1, 0xfff + addi.d a2, a2, 32 + bltu t8, t0, L(page_cross_loop) + +L(un_loop_entry): + xvld xr0, a1, 0 + addi.d a1, a1, 32 + xvsetanyeqz.b fcc0, xr0 + bceqz fcc0, L(un_loop) + + addi.d a1, a1, -32 +L(un_end): + xvmsknz.b xr0, xr0 + xvpickve.w xr1, xr0, 4 + vilvl.h vr0, vr1, vr0 + + + movfr2gr.s t0, fa0 +L(un_tail): + cto.w t0, t0 + add.d a1, a1, t0 + xvld xr0, a1, -31 + + add.d dstend, a2, t0 + xvst xr0, dstend, -31 + jr ra +L(un_page_cross): + sub.d a4, a1, a3 + + xvld xr0, a4, 0 + xvmsknz.b xr0, xr0 + xvpickve.w xr1, xr0, 4 + vilvl.h vr0, vr1, vr0 + + movfr2gr.s t0, fa0 + sra.w t0, t0, a1 + beq t0, t7, L(un_start_entry) + b L(un_tail) + + +L(page_cross_loop): + sub.d a4, a1, a3 + xvld xr0, a4, 0 + xvmsknz.b xr0, xr0 + xvpickve.w xr1, xr0, 4 + + vilvl.h vr0, vr1, vr0 + movfr2gr.s t0, fa0 + sra.w t0, t0, a1 + beq t0, t7, L(un_loop_entry) + + b L(un_tail) +L(end): + xvmsknz.b xr0, xr0 + xvpickve.w xr1, xr0, 4 + vilvl.h vr0, vr1, vr0 + + movfr2gr.s t0, fa0 +L(tail): + cto.w t0, t0 + add.d dstend, a2, t0 + add.d a5, a1, t0 + +L(less_32): + srli.d t1, t0, 4 + beqz t1, L(less_16) + vld vr0, a1, 0 + vld vr1, a5, -15 + + vst vr0, a2, 0 + vst vr1, dstend, -15 + jr ra +L(less_16): + srli.d t1, t0, 3 + + beqz t1, L(less_8) + ld.d t2, a1, 0 + ld.d t3, a5, -7 + st.d t2, a2, 0 + + st.d t3, dstend, -7 + jr ra +L(less_8): + li.d t1, 3 + bltu t0, t1, L(less_3) + + ld.w t2, a1, 0 + ld.w t3, a5, -3 + st.w t2, a2, 0 + st.w t3, dstend, -3 + + jr ra +L(less_3): + beqz t0, L(zero_byte) + ld.h t2, a1, 0 + + st.h t2, a2, 0 +L(zero_byte): + st.b zero, dstend, 0 + jr ra +END(STRCPY) + +libc_hidden_builtin_def (STRCPY) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S new file mode 100644 index 0000000000..fc2498f7db --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S @@ -0,0 +1,212 @@ +/* Optimized strcpy stpcpy implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include +#include + +#if IS_IN (libc) && !defined __loongarch_soft_float + +# ifndef STRCPY +# define STRCPY __strcpy_lsx +# endif + +LEAF(STRCPY, 6) + pcalau12i t0, %pc_hi20(L(INDEX)) + andi a4, a1, 0xf + vld vr1, t0, %pc_lo12(L(INDEX)) + move a2, a0 + + beqz a4, L(load_start) + xor t0, a1, a4 + vld vr0, t0, 0 + vreplgr2vr.b vr2, a4 + + vadd.b vr2, vr2, vr1 + vshuf.b vr0, vr2, vr0, vr2 + vsetanyeqz.b fcc0, vr0 + bcnez fcc0, L(end) + +L(load_start): + vld vr0, a1, 0 + li.d t1, 16 + andi a3, a2, 0xf + vsetanyeqz.b fcc0, vr0 + + + sub.d t0, t1, a3 + bcnez fcc0, L(end) + add.d a1, a1, t0 + vst vr0, a2, 0 + + andi a3, a1, 0xf + add.d a2, a2, t0 + bnez a3, L(unaligned) + vld vr0, a1, 0 + + vsetanyeqz.b fcc0, vr0 + bcnez fcc0, L(al_end) +L(al_loop): + vst vr0, a2, 0 + vld vr0, a1, 16 + + addi.d a2, a2, 16 + addi.d a1, a1, 16 + vsetanyeqz.b fcc0, vr0 + bceqz fcc0, L(al_loop) + + +L(al_end): + vmsknz.b vr1, vr0 + movfr2gr.s t0, fa1 + cto.w t0, t0 + add.d a1, a1, t0 + + vld vr0, a1, -15 +# ifdef USE_AS_STPCPY + add.d a0, a2, t0 + vst vr0, a0, -15 +# else + add.d a2, a2, t0 + vst vr0, a2, -15 +# endif + jr ra + +L(end): + vmsknz.b vr1, vr0 + movfr2gr.s t0, fa1 + cto.w t0, t0 + addi.d t0, t0, 1 + +L(end_16): + andi t1, t0, 16 + beqz t1, L(end_8) + vst vr0, a2, 0 +# ifdef USE_AS_STPCPY + addi.d a0, a2, 15 +# endif + jr ra + +L(end_8): + andi t2, t0, 8 + andi t3, t0, 4 + andi t4, t0, 2 + andi t5, t0, 1 + + beqz t2, L(end_4) + vstelm.d vr0, a2, 0, 0 + addi.d a2, a2, 8 + vbsrl.v vr0, vr0, 8 + +L(end_4): + beqz t3, L(end_2) + vstelm.w vr0, a2, 0, 0 + addi.d a2, a2, 4 + vbsrl.v vr0, vr0, 4 + +L(end_2): + beqz t4, L(end_1) + vstelm.h vr0, a2, 0, 0 + addi.d a2, a2, 2 + vbsrl.v vr0, vr0, 2 + + +L(end_1): + beqz t5, L(out) + vstelm.b vr0, a2, 0, 0 + addi.d a2, a2, 1 +L(out): +# ifdef USE_AS_STPCPY + addi.d a0, a2, -1 +# endif + jr ra + + .align 4 +L(unaligned): + bstrins.d a1, zero, 3, 0 + vld vr2, a1, 0 + vreplgr2vr.b vr3, a3 + vslt.b vr4, vr1, vr3 + + vor.v vr0, vr2, vr4 + vsetanyeqz.b fcc0, vr0 + bcnez fcc0, L(un_first_end) + vld vr0, a1, 16 + + vadd.b vr3, vr3, vr1 + vshuf.b vr4, vr0, vr2, vr3 + vsetanyeqz.b fcc0, vr0 + bcnez fcc0, L(un_end) + + + vor.v vr2, vr0, vr0 + addi.d a1, a1, 16 +L(un_loop): + vld vr0, a1, 16 + vst vr4, a2, 0 + + addi.d a2, a2, 16 + vshuf.b vr4, vr0, vr2, vr3 + vsetanyeqz.b fcc0, vr0 + bcnez fcc0, L(un_end) + + vld vr2, a1, 32 + vst vr4, a2, 0 + addi.d a1, a1, 32 + addi.d a2, a2, 16 + + vshuf.b vr4, vr2, vr0, vr3 + vsetanyeqz.b fcc0, vr2 + bceqz fcc0, L(un_loop) + vor.v vr0, vr2, vr2 + + + addi.d a1, a1, -16 +L(un_end): + vsetanyeqz.b fcc0, vr4 + bcnez fcc0, 1f + vst vr4, a2, 0 + +1: + vmsknz.b vr1, vr0 + movfr2gr.s t0, fa1 + cto.w t0, t0 + add.d a1, a1, t0 + + vld vr0, a1, 1 + add.d a2, a2, t0 + sub.d a2, a2, a3 + vst vr0, a2, 1 +# ifdef USE_AS_STPCPY + addi.d a0, a2, 16 +# endif + jr ra +L(un_first_end): + addi.d a2, a2, -16 + addi.d a1, a1, -16 + b 1b +END(STRCPY) + + .section .rodata.cst16,"M",@progbits,16 + .align 4 +L(INDEX): + .dword 0x0706050403020100 + .dword 0x0f0e0d0c0b0a0908 + +libc_hidden_builtin_def (STRCPY) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S new file mode 100644 index 0000000000..9e31883bb1 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S @@ -0,0 +1,138 @@ +/* Optimized strcpy unaligned implementation using basic LoongArch + instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include +#include + +#if IS_IN (libc) + +# ifndef STRCPY +# define STRCPY __strcpy_unaligned +# endif + +# ifdef USE_AS_STPCPY +# define dstend a0 +# else +# define dstend a4 +# endif + +LEAF(STRCPY, 6) + lu12i.w t5, 0x01010 + li.w t0, 0xff8 + ori t5, t5, 0x101 + andi t1, a1, 0xfff + + bstrins.d t5, t5, 63, 32 + move a2, a0 + slli.d t6, t5, 7 + bltu t0, t1, L(page_cross) + +L(start_entry): + ld.d t0, a1, 0 + li.d t3, 8 + andi a3, a1, 0x7 + sub.d t1, t0, t5 + + andn t2, t6, t0 + sub.d t3, t3, a3 + and t1, t1, t2 + bnez t1, L(end) + + + add.d a1, a1, t3 + st.d t0, a2, 0 + add.d a2, a2, t3 + ld.d t0, a1, 0 + + sub.d t1, t0, t5 + andn t2, t6, t0 + and t1, t1, t2 + bnez t1, L(long_end) + +L(loop): + st.d t0, a2, 0 + ld.d t0, a1, 8 + addi.d a2, a2, 8 + addi.d a1, a1, 8 + + sub.d t1, t0, t5 + andn t2, t6, t0 + and t1, t1, t2 + beqz t1, L(loop) + + +L(long_end): + ctz.d t1, t1 + srli.d t1, t1, 3 + add.d a1, a1, t1 + ld.d t0, a1, -7 + + add.d dstend, a2, t1 + st.d t0, dstend, -7 + jr ra + nop + +L(end): + ctz.d t1, t1 + srli.d t1, t1, 3 + add.d a3, a1, t1 + add.d dstend, a2, t1 + +L(less_8): + li.d t0, 3 + bltu t1, t0, L(less_3) + ld.w t1, a1, 0 + ld.w t2, a3, -3 + + + st.w t1, a2, 0 + st.w t2, dstend, -3 + jr ra +L(less_3): + beqz t1, L(zero_bytes) + + ld.h t1, a1, 0 + st.h t1, a2, 0 +L(zero_bytes): + st.b zero, dstend, 0 + jr ra + +L(page_cross): + move a4, a1 + bstrins.d a4, zero, 2, 0 + ld.d t0, a4, 0 + li.d t3, -1 + + slli.d t4, a1, 3 + srl.d t3, t3, t4 + srl.d t0, t0, t4 + orn t0, t0, t3 + + + sub.d t1, t0, t5 + andn t2, t6, t0 + and t1, t1, t2 + beqz t1, L(start_entry) + + b L(end) +END(STRCPY) + +libc_hidden_builtin_def (STRCPY) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy.c b/sysdeps/loongarch/lp64/multiarch/strcpy.c new file mode 100644 index 0000000000..46afd068f9 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/strcpy.c @@ -0,0 +1,35 @@ +/* Multiple versions of strcpy. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +# define strcpy __redirect_strcpy +# include +# undef strcpy + +# define SYMBOL_NAME strcpy +# include "ifunc-lasx.h" + +libc_ifunc_redirected (__redirect_strcpy, strcpy, IFUNC_SELECTOR ()); + +# ifdef SHARED +__hidden_ver1 (strcpy, __GI_strcpy, __redirect_strcpy) + __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcpy); +# endif +#endif