aarch64/fpu: Add vector variants of atanh

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
This commit is contained in:
Joe Ramsay 2024-04-03 12:13:53 +01:00 committed by Szabolcs Nagy
parent 81406ea3c5
commit 8b67920528
14 changed files with 288 additions and 0 deletions

View file

@ -3,6 +3,7 @@ libmvec-supported-funcs = acos \
asin \
asinh \
atan \
atanh \
atan2 \
cos \
cosh \

View file

@ -89,6 +89,11 @@ libmvec {
_ZGVnN4v_asinhf;
_ZGVsMxv_asinh;
_ZGVsMxv_asinhf;
_ZGVnN2v_atanh;
_ZGVnN2v_atanhf;
_ZGVnN4v_atanhf;
_ZGVsMxv_atanh;
_ZGVsMxv_atanhf;
_ZGVnN2v_cosh;
_ZGVnN2v_coshf;
_ZGVnN4v_coshf;

View file

@ -22,6 +22,7 @@ libmvec_hidden_proto (V_NAME_F1(acosh));
libmvec_hidden_proto (V_NAME_F1(asin));
libmvec_hidden_proto (V_NAME_F1(asinh));
libmvec_hidden_proto (V_NAME_F1(atan));
libmvec_hidden_proto (V_NAME_F1(atanh));
libmvec_hidden_proto (V_NAME_F1(cos));
libmvec_hidden_proto (V_NAME_F1(cosh));
libmvec_hidden_proto (V_NAME_F1(erf));

View file

@ -0,0 +1,64 @@
/* Double-precision vector (Advanced SIMD) atanh function
Copyright (C) 2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define WANT_V_LOG1P_K0_SHORTCUT 0
#include "v_log1p_inline.h"
const static struct data
{
struct v_log1p_data log1p_consts;
uint64x2_t one, half;
} data = { .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
.one = V2 (0x3ff0000000000000),
.half = V2 (0x3fe0000000000000) };
static float64x2_t VPCS_ATTR NOINLINE
special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
{
return v_call_f64 (atanh, x, y, special);
}
/* Approximation for vector double-precision atanh(x) using modified log1p.
The greatest observed error is 3.31 ULP:
_ZGVnN2v_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
want 0x1.ffd8ff31b501cp-6. */
VPCS_ATTR
float64x2_t V_NAME_D1 (atanh) (float64x2_t x)
{
const struct data *d = ptr_barrier (&data);
float64x2_t ax = vabsq_f64 (x);
uint64x2_t ia = vreinterpretq_u64_f64 (ax);
uint64x2_t sign = veorq_u64 (vreinterpretq_u64_f64 (x), ia);
uint64x2_t special = vcgeq_u64 (ia, d->one);
float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->half));
#if WANT_SIMD_EXCEPT
ax = v_zerofy_f64 (ax, special);
#endif
float64x2_t y;
y = vaddq_f64 (ax, ax);
y = vdivq_f64 (y, vsubq_f64 (v_f64 (1), ax));
y = log1p_inline (y, &d->log1p_consts);
if (__glibc_unlikely (v_any_u64 (special)))
return special_case (x, vmulq_f64 (y, halfsign), special);
return vmulq_f64 (y, halfsign);
}

View file

@ -0,0 +1,59 @@
/* Double-precision vector (SVE) atanh function
Copyright (C) 2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define WANT_SV_LOG1P_K0_SHORTCUT 0
#include "sv_log1p_inline.h"
#define One (0x3ff0000000000000)
#define Half (0x3fe0000000000000)
static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
{
return sv_call_f64 (atanh, x, y, special);
}
/* SVE approximation for double-precision atanh, based on log1p.
The greatest observed error is 2.81 ULP:
_ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
want 0x1.ffd8ff31b501cp-6. */
svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
{
svfloat64_t ax = svabs_x (pg, x);
svuint64_t iax = svreinterpret_u64 (ax);
svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax);
svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half));
/* It is special if iax >= 1. */
// svbool_t special = svcmpge (pg, iax, One);
svbool_t special = svacge (pg, x, 1.0);
/* Computation is performed based on the following sequence of equality:
(1+x)/(1-x) = 1 + 2x/(1-x). */
svfloat64_t y;
y = svadd_x (pg, ax, ax);
y = svdiv_x (pg, y, svsub_x (pg, sv_f64 (1), ax));
/* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y). */
y = sv_log1p_inline (y, pg);
if (__glibc_unlikely (svptest_any (pg, special)))
return special_case (x, svmul_x (pg, halfsign, y), special);
return svmul_x (pg, halfsign, y);
}

View file

@ -0,0 +1,79 @@
/* Single-precision vector (Advanced SIMD) atanh function
Copyright (C) 2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "v_math.h"
#include "v_log1pf_inline.h"
const static struct data
{
struct v_log1pf_data log1pf_consts;
uint32x4_t one;
#if WANT_SIMD_EXCEPT
uint32x4_t tiny_bound;
#endif
} data = {
.log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
.one = V4 (0x3f800000),
#if WANT_SIMD_EXCEPT
/* 0x1p-12, below which atanhf(x) rounds to x. */
.tiny_bound = V4 (0x39800000),
#endif
};
#define AbsMask v_u32 (0x7fffffff)
#define Half v_u32 (0x3f000000)
static float32x4_t NOINLINE VPCS_ATTR
special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
{
return v_call_f32 (atanhf, x, y, special);
}
/* Approximation for vector single-precision atanh(x) using modified log1p.
The maximum error is 3.08 ULP:
__v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5
want 0x1.ffcb82p-5. */
VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
{
const struct data *d = ptr_barrier (&data);
float32x4_t halfsign = vbslq_f32 (AbsMask, v_f32 (0.5), x);
float32x4_t ax = vabsq_f32 (x);
uint32x4_t iax = vreinterpretq_u32_f32 (ax);
#if WANT_SIMD_EXCEPT
uint32x4_t special
= vorrq_u32 (vcgeq_u32 (iax, d->one), vcltq_u32 (iax, d->tiny_bound));
/* Side-step special cases by setting those lanes to 0, which will trigger no
exceptions. These will be fixed up later. */
if (__glibc_unlikely (v_any_u32 (special)))
ax = v_zerofy_f32 (ax, special);
#else
uint32x4_t special = vcgeq_u32 (iax, d->one);
#endif
float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax));
y = log1pf_inline (y, d->log1pf_consts);
if (__glibc_unlikely (v_any_u32 (special)))
return special_case (x, vmulq_f32 (halfsign, y), special);
return vmulq_f32 (halfsign, y);
}
libmvec_hidden_def (V_NAME_F1 (atanh))
HALF_WIDTH_ALIAS_F1 (atanh)

View file

@ -0,0 +1,54 @@
/* Single-precision vector (SVE) atanh function
Copyright (C) 2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "sv_log1pf_inline.h"
#define One (0x3f800000)
#define Half (0x3f000000)
static svfloat32_t NOINLINE
special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
{
return sv_call_f32 (atanhf, x, y, special);
}
/* Approximation for vector single-precision atanh(x) using modified log1p.
The maximum error is 2.28 ULP:
_ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5
want 0x1.ffbbb6p-5. */
svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
{
svfloat32_t ax = svabs_x (pg, x);
svuint32_t iax = svreinterpret_u32 (ax);
svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
svfloat32_t halfsign = svreinterpret_f32 (svorr_x (pg, sign, Half));
svbool_t special = svcmpge (pg, iax, One);
/* Computation is performed based on the following sequence of equality:
* (1+x)/(1-x) = 1 + 2x/(1-x). */
svfloat32_t y = svadd_x (pg, ax, ax);
y = svdiv_x (pg, y, svsub_x (pg, sv_f32 (1), ax));
/* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y). */
y = sv_log1pf_inline (y, pg);
if (__glibc_unlikely (svptest_any (pg, special)))
return special_case (x, svmul_x (pg, halfsign, y), special);
return svmul_x (pg, halfsign, y);
}

View file

@ -49,6 +49,10 @@
# define __DECL_SIMD_atan __DECL_SIMD_aarch64
# undef __DECL_SIMD_atanf
# define __DECL_SIMD_atanf __DECL_SIMD_aarch64
# undef __DECL_SIMD_atanh
# define __DECL_SIMD_atanh __DECL_SIMD_aarch64
# undef __DECL_SIMD_atanhf
# define __DECL_SIMD_atanhf __DECL_SIMD_aarch64
# undef __DECL_SIMD_atan2
# define __DECL_SIMD_atan2 __DECL_SIMD_aarch64
# undef __DECL_SIMD_atan2f
@ -137,6 +141,7 @@ __vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
@ -157,6 +162,7 @@ __vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
@ -182,6 +188,7 @@ __sv_f32_t _ZGVsMxv_acoshf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_asinhf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_erff (__sv_f32_t, __sv_bool_t);
@ -202,6 +209,7 @@ __sv_f64_t _ZGVsMxv_acosh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_asinh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_erf (__sv_f64_t, __sv_bool_t);

View file

@ -28,6 +28,7 @@ VPCS_VECTOR_WRAPPER (acosh_advsimd, _ZGVnN2v_acosh)
VPCS_VECTOR_WRAPPER (asin_advsimd, _ZGVnN2v_asin)
VPCS_VECTOR_WRAPPER (asinh_advsimd, _ZGVnN2v_asinh)
VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
VPCS_VECTOR_WRAPPER (atanh_advsimd, _ZGVnN2v_atanh)
VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh)

View file

@ -47,6 +47,7 @@ SVE_VECTOR_WRAPPER (acosh_sve, _ZGVsMxv_acosh)
SVE_VECTOR_WRAPPER (asin_sve, _ZGVsMxv_asin)
SVE_VECTOR_WRAPPER (asinh_sve, _ZGVsMxv_asinh)
SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
SVE_VECTOR_WRAPPER (atanh_sve, _ZGVsMxv_atanh)
SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh)

View file

@ -28,6 +28,7 @@ VPCS_VECTOR_WRAPPER (acoshf_advsimd, _ZGVnN4v_acoshf)
VPCS_VECTOR_WRAPPER (asinf_advsimd, _ZGVnN4v_asinf)
VPCS_VECTOR_WRAPPER (asinhf_advsimd, _ZGVnN4v_asinhf)
VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
VPCS_VECTOR_WRAPPER (atanhf_advsimd, _ZGVnN4v_atanhf)
VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf)

View file

@ -47,6 +47,7 @@ SVE_VECTOR_WRAPPER (acoshf_sve, _ZGVsMxv_acoshf)
SVE_VECTOR_WRAPPER (asinf_sve, _ZGVsMxv_asinf)
SVE_VECTOR_WRAPPER (asinhf_sve, _ZGVsMxv_asinhf)
SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
SVE_VECTOR_WRAPPER (atanhf_sve, _ZGVsMxv_atanhf)
SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf)

View file

@ -173,11 +173,19 @@ double: 2
float: 2
ldouble: 4
Function: "atanh_advsimd":
double: 1
float: 1
Function: "atanh_downward":
double: 3
float: 3
ldouble: 4
Function: "atanh_sve":
double: 2
float: 1
Function: "atanh_towardzero":
double: 2
float: 2

View file

@ -77,18 +77,23 @@ GLIBC_2.40 _ZGVnN2v_acosh F
GLIBC_2.40 _ZGVnN2v_acoshf F
GLIBC_2.40 _ZGVnN2v_asinh F
GLIBC_2.40 _ZGVnN2v_asinhf F
GLIBC_2.40 _ZGVnN2v_atanh F
GLIBC_2.40 _ZGVnN2v_atanhf F
GLIBC_2.40 _ZGVnN2v_cosh F
GLIBC_2.40 _ZGVnN2v_coshf F
GLIBC_2.40 _ZGVnN2v_erf F
GLIBC_2.40 _ZGVnN2v_erff F
GLIBC_2.40 _ZGVnN4v_acoshf F
GLIBC_2.40 _ZGVnN4v_asinhf F
GLIBC_2.40 _ZGVnN4v_atanhf F
GLIBC_2.40 _ZGVnN4v_coshf F
GLIBC_2.40 _ZGVnN4v_erff F
GLIBC_2.40 _ZGVsMxv_acosh F
GLIBC_2.40 _ZGVsMxv_acoshf F
GLIBC_2.40 _ZGVsMxv_asinh F
GLIBC_2.40 _ZGVsMxv_asinhf F
GLIBC_2.40 _ZGVsMxv_atanh F
GLIBC_2.40 _ZGVsMxv_atanhf F
GLIBC_2.40 _ZGVsMxv_cosh F
GLIBC_2.40 _ZGVsMxv_coshf F
GLIBC_2.40 _ZGVsMxv_erf F