Loongarch: Add ifunc support for strchr{aligned, lsx, lasx} and strchrnul{aligned, lsx, lasx}

These implementations improve the time to run strchr{nul}
microbenchmark in glibc as below:
strchr-lasx       reduces the runtime about 50%-83%
strchr-lsx        reduces the runtime about 30%-67%
strchr-aligned    reduces the runtime about 10%-20%
strchrnul-lasx    reduces the runtime about 50%-83%
strchrnul-lsx     reduces the runtime about 36%-65%
strchrnul-aligned reduces the runtime about 6%-10%
This commit is contained in:
dengjianbo 2023-08-15 09:08:11 +08:00 committed by caiyinyu
parent 652b9fdb77
commit ba67bc8e0a
12 changed files with 581 additions and 0 deletions

View file

@ -3,5 +3,11 @@ sysdep_routines += \
strlen-aligned \
strlen-lsx \
strlen-lasx \
strchr-aligned \
strchr-lsx \
strchr-lasx \
strchrnul-aligned \
strchrnul-lsx \
strchrnul-lasx \
# sysdep_routines
endif

View file

@ -37,5 +37,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#endif
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
)
IFUNC_IMPL (i, name, strchr,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LSX, __strchr_lsx)
#endif
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_aligned)
)
IFUNC_IMPL (i, name, strchrnul,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LASX, __strchrnul_lasx)
IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LSX, __strchrnul_lsx)
#endif
IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
)
return i;
}

View file

@ -0,0 +1,41 @@
/* Common definition for strchr ifunc selections.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <ldsodefs.h>
#include <ifunc-init.h>
#if !defined __loongarch_soft_float
extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
#endif
extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
#if !defined __loongarch_soft_float
if (SUPPORT_LASX)
return OPTIMIZE (lasx);
else if (SUPPORT_LSX)
return OPTIMIZE (lsx);
else
#endif
return OPTIMIZE (aligned);
}

View file

@ -0,0 +1,41 @@
/* Common definition for strchrnul ifunc selections.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <ldsodefs.h>
#include <ifunc-init.h>
#if !defined __loongarch_soft_float
extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
#endif
extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
#if !defined __loongarch_soft_float
if (SUPPORT_LASX)
return OPTIMIZE (lasx);
else if (SUPPORT_LSX)
return OPTIMIZE (lsx);
else
#endif
return OPTIMIZE (aligned);
}

View file

@ -0,0 +1,99 @@
/* Optimized strchr implementation using basic Loongarch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc)
# define STRCHR_NAME __strchr_aligned
#else
# define STRCHR_NAME strchr
#endif
LEAF(STRCHR_NAME, 6)
slli.d t1, a0, 3
bstrins.d a0, zero, 2, 0
lu12i.w a2, 0x01010
ld.d t2, a0, 0
ori a2, a2, 0x101
andi a1, a1, 0xff
bstrins.d a2, a2, 63, 32
li.w t0, -1
mul.d a1, a1, a2
sll.d t0, t0, t1
slli.d a3, a2, 7
orn t2, t2, t0
sll.d t3, a1, t1
xor t4, t2, t3
sub.d a4, t2, a2
sub.d a5, t4, a2
andn a4, a4, t2
andn a5, a5, t4
or t0, a4, a5
and t0, t0, a3
bnez t0, L(end)
addi.d a0, a0, 8
L(loop):
ld.d t4, a0, 0
xor t2, t4, a1
sub.d a4, t4, a2
sub.d a5, t2, a2
andn a4, a4, t4
andn a5, a5, t2
or t0, a4, a5
and t0, t0, a3
bnez t0, L(end)
ld.d t4, a0, 8
addi.d a0, a0, 16
xor t2, t4, a1
sub.d a4, t4, a2
sub.d a5, t2, a2
andn a4, a4, t4
andn a5, a5, t2
or t0, a4, a5
and t0, t0, a3
beqz t0, L(loop)
addi.d a0, a0, -8
L(end):
and t0, a5, a3
and t1, a4, a3
ctz.d t0, t0
ctz.d t1, t1
srli.w t2, t0, 3
sltu t3, t1, t0
add.d a0, a0, t2
masknez a0, a0, t3
jr ra
END(STRCHR_NAME)

View file

@ -0,0 +1,91 @@
/* Optimized strchr implementation using loongarch LASX SIMD instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc) && !defined __loongarch_soft_float
#ifndef AS_STRCHRNUL
# define STRCHR __strchr_lasx
#endif
LEAF(STRCHR, 6)
andi t1, a0, 0x1f
bstrins.d a0, zero, 4, 0
xvld xr0, a0, 0
li.d t2, -1
xvreplgr2vr.b xr1, a1
sll.d t1, t2, t1
xvxor.v xr2, xr0, xr1
xvmin.bu xr0, xr0, xr2
xvmsknz.b xr0, xr0
xvpickve.w xr3, xr0, 4
vilvl.h vr0, vr3, vr0
movfr2gr.s t0, fa0
orn t0, t0, t1
bne t0, t2, L(end)
addi.d a0, a0, 32
nop
L(loop):
xvld xr0, a0, 0
xvxor.v xr2, xr0, xr1
xvmin.bu xr0, xr0, xr2
xvsetanyeqz.b fcc0, xr0
bcnez fcc0, L(loop_end)
xvld xr0, a0, 32
addi.d a0, a0, 64
xvxor.v xr2, xr0, xr1
xvmin.bu xr0, xr0, xr2
xvsetanyeqz.b fcc0, xr0
bceqz fcc0, L(loop)
addi.d a0, a0, -32
L(loop_end):
xvmsknz.b xr0, xr0
xvpickve.w xr1, xr0, 4
vilvl.h vr0, vr1, vr0
movfr2gr.s t0, fa0
L(end):
cto.w t0, t0
add.d a0, a0, t0
#ifndef AS_STRCHRNUL
vreplgr2vr.b vr0, t0
xvpermi.q xr3, xr2, 1
vshuf.b vr0, vr3, vr2, vr0
vpickve2gr.bu t0, vr0, 0
masknez a0, a0, t0
#endif
jr ra
END(STRCHR)
libc_hidden_builtin_def(STRCHR)
#endif

View file

@ -0,0 +1,73 @@
/* Optimized strlen implementation using loongarch LSX SIMD instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc) && !defined __loongarch_soft_float
#ifndef AS_STRCHRNUL
# define STRCHR __strchr_lsx
#endif
LEAF(STRCHR, 6)
andi t1, a0, 0xf
bstrins.d a0, zero, 3, 0
vld vr0, a0, 0
li.d t2, -1
vreplgr2vr.b vr1, a1
sll.d t3, t2, t1
vxor.v vr2, vr0, vr1
vmin.bu vr0, vr0, vr2
vmsknz.b vr0, vr0
movfr2gr.s t0, fa0
ext.w.h t0, t0
orn t0, t0, t3
beq t0, t2, L(loop)
L(found):
cto.w t0, t0
add.d a0, a0, t0
#ifndef AS_STRCHRNUL
vreplve.b vr2, vr2, t0
vpickve2gr.bu t1, vr2, 0
masknez a0, a0, t1
#endif
jr ra
L(loop):
vld vr0, a0, 16
addi.d a0, a0, 16
vxor.v vr2, vr0, vr1
vmin.bu vr0, vr0, vr2
vsetanyeqz.b fcc0, vr0
bceqz fcc0, L(loop)
vmsknz.b vr0, vr0
movfr2gr.s t0, fa0
b L(found)
END(STRCHR)
libc_hidden_builtin_def (STRCHR)
#endif

View file

@ -0,0 +1,36 @@
/* Multiple versions of strchr.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
# define strchr __redirect_strchr
# include <string.h>
# undef strchr
# define SYMBOL_NAME strchr
# include "ifunc-strchr.h"
libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
weak_alias(strchr, index)
# ifdef SHARED
__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
__attribute__ ((visibility ("hidden"))) __attribute_copy__ (strchr);
# endif
#endif

View file

@ -0,0 +1,95 @@
/* Optimized strchrnul implementation using basic Loongarch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc)
# define STRCHRNUL_NAME __strchrnul_aligned
#else
# define STRCHRNUL_NAME __strchrnul
#endif
LEAF(STRCHRNUL_NAME, 6)
slli.d t1, a0, 3
bstrins.d a0, zero, 2, 0
lu12i.w a2, 0x01010
ld.d t2, a0, 0
ori a2, a2, 0x101
andi a1, a1, 0xff
bstrins.d a2, a2, 63, 32
li.w t0, -1
mul.d a1, a1, a2
sll.d t0, t0, t1
slli.d a3, a2, 7
orn t2, t2, t0
sll.d t3, a1, t1
xor t4, t2, t3
sub.d a4, t2, a2
sub.d a5, t4, a2
andn a4, a4, t2
andn a5, a5, t4
or t0, a4, a5
and t0, t0, a3
bnez t0, L(end)
addi.d a0, a0, 8
L(loop):
ld.d t4, a0, 0
xor t2, t4, a1
sub.d a4, t4, a2
sub.d a5, t2, a2
andn a4, a4, t4
andn a5, a5, t2
or t0, a4, a5
and t0, t0, a3
bnez t0, L(end)
ld.d t4, a0, 8
addi.d a0, a0, 16
xor t2, t4, a1
sub.d a4, t4, a2
sub.d a5, t2, a2
andn a4, a4, t4
andn a5, a5, t2
or t0, a4, a5
and t0, t0, a3
beqz t0, L(loop)
addi.d a0, a0, -8
L(end):
ctz.d t0, t0
srli.w t0, t0, 3
add.d a0, a0, t0
jr ra
END(STRCHRNUL_NAME)
libc_hidden_builtin_def (STRCHRNUL_NAME)

View file

@ -0,0 +1,22 @@
/* Optimized strchrnul implementation using loongarch LASX SIMD instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#define STRCHR __strchrnul_lasx
#define AS_STRCHRNUL
#include "strchr-lasx.S"

View file

@ -0,0 +1,22 @@
/* Optimized strchrnul implementation using loongarch LSX SIMD instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#define STRCHR __strchrnul_lsx
#define AS_STRCHRNUL
#include "strchr-lsx.S"

View file

@ -0,0 +1,39 @@
/* Multiple versions of strchrnul.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
# define strchrnul __redirect_strchrnul
# define __strchrnul __redirect___strchrnul
# include <string.h>
# undef __strchrnul
# undef strchrnul
# define SYMBOL_NAME strchrnul
# include "ifunc-strchrnul.h"
libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
IFUNC_SELECTOR ());
weak_alias (__strchrnul, strchrnul)
# ifdef SHARED
__hidden_ver1 (__strchrnul, __GI___strchrnul, __redirect_strchrnul)
__attribute__((visibility ("hidden"))) __attribute_copy__ (strchrnul);
# endif
#endif