mirror of
git://sourceware.org/git/glibc.git
synced 2025-03-06 20:58:33 +01:00
The CORE-MATH implementation is correctly rounded (for any rounding mode) and shows better performance to the generic tanpif. The code was adapted to glibc style and to use the definition of math_config.h (to handle errno, overflow, and underflow). Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (Neoverse-N1, gcc 13.3.1), and powerpc (POWER10, gcc 13.2.1): latency master patched improvement x86_64 85.1683 47.7990 43.88% x86_64v2 76.8219 41.4679 46.02% x86_64v3 73.7775 37.7734 48.80% aarch64 (Neoverse) 35.4514 18.0742 49.02% power8 22.7604 10.1054 55.60% power10 22.1358 9.9553 55.03% reciprocal-throughput master patched improvement x86_64 41.0174 19.4718 52.53% x86_64v2 34.8565 11.3761 67.36% x86_64v3 34.0325 9.6989 71.50% aarch64 (Neoverse) 25.4349 9.2017 63.82% power8 13.8626 3.8486 72.24% power10 11.7933 3.6420 69.12% Reviewed-by: DJ Delorie <dj@redhat.com>
68 lines
2.1 KiB
C
68 lines
2.1 KiB
C
/* Private inline math functions for powerpc.
|
|
Copyright (C) 2006-2025 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef _PPC_MATH_PRIVATE_H_
|
|
#define _PPC_MATH_PRIVATE_H_
|
|
|
|
#include <sysdep.h>
|
|
#include <ldsodefs.h>
|
|
#include <dl-procinfo.h>
|
|
|
|
#include_next <math_private.h>
|
|
|
|
#ifdef _ARCH_PWR9
|
|
|
|
#if __GNUC_PREREQ (8, 0)
|
|
# define _GL_HAS_BUILTIN_ILOGB 1
|
|
#elif defined __has_builtin
|
|
# define _GL_HAS_BUILTIN_ILOGB __has_builtin (__builtin_vsx_scalar_extract_exp)
|
|
#else
|
|
# define _GL_HAS_BUILTIN_ILOGB 0
|
|
#endif
|
|
|
|
#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
|
|
#define __builtin_ilogbf __builtin_ilogb
|
|
|
|
#define __builtin_test_dc_ilogb(x, y) \
|
|
__builtin_vsx_scalar_test_data_class_dp(x, y)
|
|
#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
|
|
|
|
#define __builtin_test_dc_ilogbf128(x, y) \
|
|
__builtin_vsx_scalar_test_data_class_qp(x, y)
|
|
#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
|
|
|
|
#if __HAVE_DISTINCT_FLOAT128
|
|
extern __always_inline _Float128
|
|
__ieee754_sqrtf128 (_Float128 __x)
|
|
{
|
|
_Float128 __z;
|
|
asm ("xssqrtqp %0,%1" : "=v" (__z) : "v" (__x));
|
|
return __z;
|
|
}
|
|
#endif
|
|
#else /* !_ARCH_PWR9 */
|
|
#define _GL_HAS_BUILTIN_ILOGB 0
|
|
#endif
|
|
|
|
#ifdef _ARCH_PWR6
|
|
/* ISA 2.03 provides frin/round() and cntlzw/ctznll(). */
|
|
# define ROUNDEVEN_INTRINSICS 0
|
|
# define ROUNDEVENF_INTRINSICS 0
|
|
#endif
|
|
|
|
#endif /* _PPC_MATH_PRIVATE_H_ */
|