diff --git a/sysdeps/aarch64/fpu/acoshf_sve.c b/sysdeps/aarch64/fpu/acoshf_sve.c
index d63a3fea23..74adac7efd 100644
--- a/sysdeps/aarch64/fpu/acoshf_sve.c
+++ b/sysdeps/aarch64/fpu/acoshf_sve.c
@@ -17,23 +17,26 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include "sv_math.h"
+#include "sv_log1pf_inline.h"
+
 #define One 0x3f800000
 #define Thres 0x20000000 /* asuint(0x1p64) - One.  */
 
-#include "sv_log1pf_inline.h"
-
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special)
 {
+  svfloat32_t x = svadd_x (svptrue_b32 (), xm1, 1.0f);
+  svfloat32_t y = sv_log1pf_inline (tmp, svptrue_b32 ());
   return sv_call_f32 (acoshf, x, y, special);
 }
 
 /* Single-precision SVE acosh(x) routine. Implements the same algorithm as
    vector acoshf and log1p.
 
-   Maximum error is 2.78 ULPs:
-   SV_NAME_F1 (acosh) (0x1.01e996p+0) got 0x1.f45b42p-4
-				     want 0x1.f45b3cp-4.  */
+   Maximum error is 2.47 ULPs:
+   SV_NAME_F1 (acosh) (0x1.01ca76p+0) got 0x1.e435a6p-4
+				     want 0x1.e435a2p-4.  */
 svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
 {
   svuint32_t ix = svreinterpret_u32 (x);
@@ -41,9 +44,9 @@ svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
 
   svfloat32_t xm1 = svsub_x (pg, x, 1.0f);
   svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f));
-  svfloat32_t y = sv_log1pf_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg);
+  svfloat32_t tmp = svadd_x (pg, xm1, svsqrt_x (pg, u));
 
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
-  return y;
+    return special_case (xm1, tmp, special);
+  return sv_log1pf_inline (tmp, pg);
 }
diff --git a/sysdeps/aarch64/fpu/asinhf_sve.c b/sysdeps/aarch64/fpu/asinhf_sve.c
index e817624dda..f07b8a2ae5 100644
--- a/sysdeps/aarch64/fpu/asinhf_sve.c
+++ b/sysdeps/aarch64/fpu/asinhf_sve.c
@@ -20,20 +20,23 @@
 #include "sv_math.h"
 #include "sv_log1pf_inline.h"
 
-#define BigBound (0x5f800000)  /* asuint(0x1p64).  */
+#define BigBound 0x5f800000 /* asuint(0x1p64).  */
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t iax, svuint32_t sign, svfloat32_t y, svbool_t special)
 {
+  svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign));
+  y = svreinterpret_f32 (
+      svorr_x (svptrue_b32 (), sign, svreinterpret_u32 (y)));
   return sv_call_f32 (asinhf, x, y, special);
 }
 
 /* Single-precision SVE asinh(x) routine. Implements the same algorithm as
    vector asinhf and log1p.
 
-   Maximum error is 2.48 ULPs:
-   SV_NAME_F1 (asinh) (0x1.008864p-3) got 0x1.ffbbbcp-4
-				     want 0x1.ffbbb8p-4.  */
+   Maximum error is 1.92 ULPs:
+   SV_NAME_F1 (asinh) (-0x1.0922ecp-1) got -0x1.fd0bccp-2
+				      want -0x1.fd0bc8p-2.  */
 svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
 {
   svfloat32_t ax = svabs_x (pg, x);
@@ -49,8 +52,6 @@ svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
       = sv_log1pf_inline (svadd_x (pg, ax, svdiv_x (pg, ax2, d)), pg);
 
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (
-	x, svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y))),
-	special);
+    return special_case (iax, sign, y, special);
   return svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y)));
 }
diff --git a/sysdeps/aarch64/fpu/atanhf_sve.c b/sysdeps/aarch64/fpu/atanhf_sve.c
index aebd0247a7..98e9950bba 100644
--- a/sysdeps/aarch64/fpu/atanhf_sve.c
+++ b/sysdeps/aarch64/fpu/atanhf_sve.c
@@ -17,21 +17,25 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include "sv_math.h"
 #include "sv_log1pf_inline.h"
 
 #define One (0x3f800000)
 #define Half (0x3f000000)
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t iax, svuint32_t sign, svfloat32_t halfsign,
+	      svfloat32_t y, svbool_t special)
 {
+  svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign));
+  y = svmul_x (svptrue_b32 (), halfsign, y);
   return sv_call_f32 (atanhf, x, y, special);
 }
 
 /* Approximation for vector single-precision atanh(x) using modified log1p.
-   The maximum error is 2.28 ULP:
-   _ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5
-				 want 0x1.ffbbb6p-5.  */
+   The maximum error is 1.99 ULP:
+   _ZGVsMxv_atanhf(0x1.f1583p-5) got 0x1.f1f4fap-5
+				want 0x1.f1f4f6p-5.  */
 svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
 {
   svfloat32_t ax = svabs_x (pg, x);
@@ -48,7 +52,7 @@ svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
   y = sv_log1pf_inline (y, pg);
 
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, svmul_x (pg, halfsign, y), special);
+    return special_case (iax, sign, halfsign, y, special);
 
   return svmul_x (pg, halfsign, y);
 }
diff --git a/sysdeps/aarch64/fpu/log1pf_sve.c b/sysdeps/aarch64/fpu/log1pf_sve.c
index ec2329ba96..937115f6fe 100644
--- a/sysdeps/aarch64/fpu/log1pf_sve.c
+++ b/sysdeps/aarch64/fpu/log1pf_sve.c
@@ -18,30 +18,13 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
-
-static const struct data
-{
-  float poly[8];
-  float ln2, exp_bias;
-  uint32_t four, three_quarters;
-} data = {.poly = {/* Do not store first term of polynomial, which is -0.5, as
-                      this can be fmov-ed directly instead of including it in
-                      the main load-and-mla polynomial schedule.  */
-		   0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-		   -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f,
-		   0x1.abcb6p-4f, -0x1.6f0d5ep-5f},
-	  .ln2 = 0x1.62e43p-1f,
-	  .exp_bias = 0x1p-23f,
-	  .four = 0x40800000,
-	  .three_quarters = 0x3f400000};
-
-#define SignExponentMask 0xff800000
+#include "sv_log1pf_inline.h"
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svfloat32_t x, svbool_t special)
 {
-  return sv_call_f32 (log1pf, x, y, special);
+  return sv_call_f32 (log1pf, x, sv_log1pf_inline (x, svptrue_b32 ()),
+		      special);
 }
 
 /* Vector log1pf approximation using polynomial on reduced interval. Worst-case
@@ -50,53 +33,14 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
 				 want 0x1.9f323ep-2.  */
 svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
 {
-  const struct data *d = ptr_barrier (&data);
   /* x < -1, Inf/Nan.  */
   svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000);
   special = svorn_z (pg, special, svcmpge (pg, x, -1));
 
-  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
-			   is in [-0.25, 0.5]):
-     log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
-
-     We approximate log1p(m) with a polynomial, then scale by
-     k*log(2). Instead of doing this directly, we use an intermediate
-     scale factor s = 4*k*log(2) to ensure the scale is representable
-     as a normalised fp32 number.  */
-  svfloat32_t m = svadd_x (pg, x, 1);
-
-  /* Choose k to scale x to the range [-1/4, 1/2].  */
-  svint32_t k
-      = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters),
-		 sv_s32 (SignExponentMask));
-
-  /* Scale x by exponent manipulation.  */
-  svfloat32_t m_scale = svreinterpret_f32 (
-      svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k)));
-
-  /* Scale up to ensure that the scale factor is representable as normalised
-     fp32 number, and scale m down accordingly.  */
-  svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four));
-  m_scale = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1), s, 0.25));
-
-  /* Evaluate polynomial on reduced interval.  */
-  svfloat32_t ms2 = svmul_x (pg, m_scale, m_scale),
-	      ms4 = svmul_x (pg, ms2, ms2);
-  svfloat32_t p = sv_estrin_7_f32_x (pg, m_scale, ms2, ms4, d->poly);
-  p = svmad_x (pg, m_scale, p, -0.5);
-  p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p));
-
-  /* The scale factor to be applied back at the end - by multiplying float(k)
-     by 2^-23 we get the unbiased exponent of k.  */
-  svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->exp_bias);
-
-  /* Apply the scaling back.  */
-  svfloat32_t y = svmla_x (pg, p, scale_back, d->ln2);
-
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
+    return special_case (x, special);
 
-  return y;
+  return sv_log1pf_inline (x, pg);
 }
 
 strong_alias (SV_NAME_F1 (log1p), SV_NAME_F1 (logp1))
diff --git a/sysdeps/aarch64/fpu/sv_log1pf_inline.h b/sysdeps/aarch64/fpu/sv_log1pf_inline.h
index b20877495a..59cbf6c410 100644
--- a/sysdeps/aarch64/fpu/sv_log1pf_inline.h
+++ b/sysdeps/aarch64/fpu/sv_log1pf_inline.h
@@ -22,55 +22,76 @@
 
 #include "sv_math.h"
 #include "vecmath_config.h"
-#include "poly_sve_f32.h"
+
+#define SignExponentMask 0xff800000
 
 static const struct sv_log1pf_data
 {
-  float32_t poly[9];
-  float32_t ln2;
-  float32_t scale_back;
+  float c0, c2, c4, c6;
+  float c1, c3, c5, c7;
+  float ln2, exp_bias, quarter;
+  uint32_t four, three_quarters;
 } sv_log1pf_data = {
-  /* Polynomial generated using FPMinimax in [-0.25, 0.5].  */
-  .poly = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-	    -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
-	    -0x1.6f0d5ep-5f },
-  .scale_back = 0x1.0p-23f,
-  .ln2 = 0x1.62e43p-1f,
+  /* Do not store first term of polynomial, which is -0.5, as
+     this can be fmov-ed directly instead of including it in
+     the main load-and-mla polynomial schedule.  */
+  .c0 = 0x1.5555aap-2f,		.c1 = -0x1.000038p-2f, .c2 = 0x1.99675cp-3f,
+  .c3 = -0x1.54ef78p-3f,	.c4 = 0x1.28a1f4p-3f,  .c5 = -0x1.0da91p-3f,
+  .c6 = 0x1.abcb6p-4f,		.c7 = -0x1.6f0d5ep-5f, .ln2 = 0x1.62e43p-1f,
+  .exp_bias = 0x1p-23f,		.quarter = 0x1p-2f,    .four = 0x40800000,
+  .three_quarters = 0x3f400000,
 };
 
-static inline svfloat32_t
-eval_poly (svfloat32_t m, const float32_t *c, svbool_t pg)
-{
-  svfloat32_t p_12 = svmla_x (pg, sv_f32 (c[0]), m, sv_f32 (c[1]));
-  svfloat32_t m2 = svmul_x (pg, m, m);
-  svfloat32_t q = svmla_x (pg, m, m2, p_12);
-  svfloat32_t p = sv_pw_horner_6_f32_x (pg, m, m2, c + 2);
-  p = svmul_x (pg, m2, p);
-
-  return svmla_x (pg, q, m2, p);
-}
-
 static inline svfloat32_t
 sv_log1pf_inline (svfloat32_t x, svbool_t pg)
 {
   const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data);
 
-  svfloat32_t m = svadd_x (pg, x, 1.0f);
+  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+			 is in [-0.25, 0.5]):
+   log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
 
-  svint32_t ks = svsub_x (pg, svreinterpret_s32 (m),
-			  svreinterpret_s32 (svdup_f32 (0.75f)));
-  ks = svand_x (pg, ks, 0xff800000);
-  svuint32_t k = svreinterpret_u32 (ks);
-  svfloat32_t s = svreinterpret_f32 (
-      svsub_x (pg, svreinterpret_u32 (svdup_f32 (4.0f)), k));
+   We approximate log1p(m) with a polynomial, then scale by
+   k*log(2). Instead of doing this directly, we use an intermediate
+   scale factor s = 4*k*log(2) to ensure the scale is representable
+   as a normalised fp32 number.  */
+  svfloat32_t m = svadd_x (pg, x, 1);
 
-  svfloat32_t m_scale
-      = svreinterpret_f32 (svsub_x (pg, svreinterpret_u32 (x), k));
-  m_scale
-      = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1.0f), sv_f32 (0.25f), s));
-  svfloat32_t p = eval_poly (m_scale, d->poly, pg);
-  svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->scale_back);
-  return svmla_x (pg, p, scale_back, d->ln2);
+  /* Choose k to scale x to the range [-1/4, 1/2].  */
+  svint32_t k
+      = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters),
+		 sv_s32 (SignExponentMask));
+
+  /* Scale x by exponent manipulation.  */
+  svfloat32_t m_scale = svreinterpret_f32 (
+      svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k)));
+
+  /* Scale up to ensure that the scale factor is representable as normalised
+     fp32 number, and scale m down accordingly.  */
+  svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four));
+  svfloat32_t fconst = svld1rq_f32 (svptrue_b32 (), &d->ln2);
+  m_scale = svadd_x (pg, m_scale, svmla_lane_f32 (sv_f32 (-1), s, fconst, 2));
+
+  /* Evaluate polynomial on reduced interval.  */
+  svfloat32_t ms2 = svmul_x (svptrue_b32 (), m_scale, m_scale);
+
+  svfloat32_t c1357 = svld1rq_f32 (svptrue_b32 (), &d->c1);
+  svfloat32_t p01 = svmla_lane_f32 (sv_f32 (d->c0), m_scale, c1357, 0);
+  svfloat32_t p23 = svmla_lane_f32 (sv_f32 (d->c2), m_scale, c1357, 1);
+  svfloat32_t p45 = svmla_lane_f32 (sv_f32 (d->c4), m_scale, c1357, 2);
+  svfloat32_t p67 = svmla_lane_f32 (sv_f32 (d->c6), m_scale, c1357, 3);
+
+  svfloat32_t p = svmla_x (pg, p45, p67, ms2);
+  p = svmla_x (pg, p23, p, ms2);
+  p = svmla_x (pg, p01, p, ms2);
+
+  p = svmad_x (pg, m_scale, p, -0.5);
+  p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p));
+
+  /* The scale factor to be applied back at the end - by multiplying float(k)
+   by 2^-23 we get the unbiased exponent of k.  */
+  svfloat32_t scale_back = svmul_lane_f32 (svcvt_f32_x (pg, k), fconst, 1);
+  return svmla_lane_f32 (p, scale_back, fconst, 0);
 }
 
 #endif