glibc/sysdeps/powerpc/fpu/math_private.h
Adhemerval Zanella 8f170dc819 math: Use tanpif from CORE-MATH
The CORE-MATH implementation is correctly rounded (for any rounding mode)
and shows better performance to the generic tanpif.

The code was adapted to glibc style and to use the definition of
math_config.h (to handle errno, overflow, and underflow).

Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (Neoverse-N1,
gcc 13.3.1), and powerpc (POWER10, gcc 13.2.1):

latency                      master        patched   improvement
x86_64                      85.1683        47.7990        43.88%
x86_64v2                    76.8219        41.4679        46.02%
x86_64v3                    73.7775        37.7734        48.80%
aarch64 (Neoverse)          35.4514        18.0742        49.02%
power8                      22.7604        10.1054        55.60%
power10                     22.1358         9.9553        55.03%

reciprocal-throughput        master        patched   improvement
x86_64                      41.0174        19.4718        52.53%
x86_64v2                    34.8565        11.3761        67.36%
x86_64v3                    34.0325         9.6989        71.50%
aarch64 (Neoverse)          25.4349         9.2017        63.82%
power8                      13.8626         3.8486        72.24%
power10                     11.7933         3.6420        69.12%

Reviewed-by: DJ Delorie <dj@redhat.com>
2025-02-12 16:31:57 -03:00

68 lines
2.1 KiB
C

/* Private inline math functions for powerpc.
Copyright (C) 2006-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifndef _PPC_MATH_PRIVATE_H_
#define _PPC_MATH_PRIVATE_H_
#include <sysdep.h>
#include <ldsodefs.h>
#include <dl-procinfo.h>
#include_next <math_private.h>
#ifdef _ARCH_PWR9
#if __GNUC_PREREQ (8, 0)
# define _GL_HAS_BUILTIN_ILOGB 1
#elif defined __has_builtin
# define _GL_HAS_BUILTIN_ILOGB __has_builtin (__builtin_vsx_scalar_extract_exp)
#else
# define _GL_HAS_BUILTIN_ILOGB 0
#endif
#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
#define __builtin_ilogbf __builtin_ilogb
#define __builtin_test_dc_ilogb(x, y) \
__builtin_vsx_scalar_test_data_class_dp(x, y)
#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
#define __builtin_test_dc_ilogbf128(x, y) \
__builtin_vsx_scalar_test_data_class_qp(x, y)
#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
#if __HAVE_DISTINCT_FLOAT128
extern __always_inline _Float128
__ieee754_sqrtf128 (_Float128 __x)
{
_Float128 __z;
asm ("xssqrtqp %0,%1" : "=v" (__z) : "v" (__x));
return __z;
}
#endif
#else /* !_ARCH_PWR9 */
#define _GL_HAS_BUILTIN_ILOGB 0
#endif
#ifdef _ARCH_PWR6
/* ISA 2.03 provides frin/round() and cntlzw/ctznll(). */
# define ROUNDEVEN_INTRINSICS 0
# define ROUNDEVENF_INTRINSICS 0
#endif
#endif /* _PPC_MATH_PRIVATE_H_ */