mirror of
git://sourceware.org/git/glibc.git
synced 2025-03-06 20:58:33 +01:00
This uses a new algorithm similar to already proposed earlier [1]. With x = mx * 2^ex and y = my * 2^ey (mx, my, ex, ey being integers), the simplest implementation is: mx * 2^ex == 2 * mx * 2^(ex - 1) while (ex > ey) { mx *= 2; --ex; mx %= my; } With mx/my being mantissa of double floating pointer, on each step the argument reduction can be improved 8 (which is sizeof of uint32_t minus MANTISSA_WIDTH plus the signal bit): while (ex > ey) { mx << 8; ex -= 8; mx %= my; } */ The implementation uses builtin clz and ctz, along with shifts to convert hx/hy back to doubles. Different than the original patch, this path assume modulo/divide operation is slow, so use multiplication with invert values. I see the following performance improvements using fmod benchtests (result only show the 'mean' result): Architecture | Input | master | patch -----------------|-----------------|----------|-------- x86_64 (Ryzen 9) | subnormals | 17.2549 | 12.0318 x86_64 (Ryzen 9) | normal | 85.4096 | 49.9641 x86_64 (Ryzen 9) | close-exponents | 19.1072 | 15.8224 aarch64 (N1) | subnormal | 10.2182 | 6.81778 aarch64 (N1) | normal | 60.0616 | 20.3667 aarch64 (N1) | close-exponents | 11.5256 | 8.39685 I also see similar improvements on arm-linux-gnueabihf when running on the N1 aarch64 chips, where it a lot of soft-fp implementation (for modulo, and multiplication): Architecture | Input | master | patch -----------------|-----------------|----------|-------- armhf (N1) | subnormal | 11.6662 | 10.8955 armhf (N1) | normal | 69.2759 | 34.1524 armhf (N1) | close-exponents | 13.6472 | 18.2131 Instead of using the math_private.h definitions, I used the math_config.h instead which is used on newer math implementations. Co-authored-by: kirill <kirill.okhotnikov@gmail.com> [1] https://sourceware.org/pipermail/libc-alpha/2020-November/119794.html Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
215 lines
5.1 KiB
C
215 lines
5.1 KiB
C
/* Configuration for math routines.
|
|
Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef _MATH_CONFIG_H
|
|
#define _MATH_CONFIG_H
|
|
|
|
#include <math.h>
|
|
#include <math_private.h>
|
|
#include <nan-high-order-bit.h>
|
|
#include <stdint.h>
|
|
|
|
#ifndef WANT_ROUNDING
|
|
/* Correct special case results in non-nearest rounding modes. */
|
|
# define WANT_ROUNDING 1
|
|
#endif
|
|
#ifndef WANT_ERRNO
|
|
/* Set errno according to ISO C with (math_errhandling & MATH_ERRNO) != 0. */
|
|
# define WANT_ERRNO 1
|
|
#endif
|
|
#ifndef WANT_ERRNO_UFLOW
|
|
/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */
|
|
# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
|
|
#endif
|
|
|
|
#ifndef TOINT_INTRINSICS
|
|
/* When set, the roundtoint and converttoint functions are provided with
|
|
the semantics documented below. */
|
|
# define TOINT_INTRINSICS 0
|
|
#endif
|
|
|
|
#if TOINT_INTRINSICS
|
|
/* Round x to nearest int in all rounding modes, ties have to be rounded
|
|
consistently with converttoint so the results match. If the result
|
|
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
|
|
static inline double_t
|
|
roundtoint (double_t x);
|
|
|
|
/* Convert x to nearest int in all rounding modes, ties have to be rounded
|
|
consistently with roundtoint. If the result is not representible in an
|
|
int32_t then the semantics is unspecified. */
|
|
static inline int32_t
|
|
converttoint (double_t x);
|
|
#endif
|
|
|
|
static inline uint32_t
|
|
asuint (float f)
|
|
{
|
|
union
|
|
{
|
|
float f;
|
|
uint32_t i;
|
|
} u = {f};
|
|
return u.i;
|
|
}
|
|
|
|
static inline float
|
|
asfloat (uint32_t i)
|
|
{
|
|
union
|
|
{
|
|
uint32_t i;
|
|
float f;
|
|
} u = {i};
|
|
return u.f;
|
|
}
|
|
|
|
static inline uint64_t
|
|
asuint64 (double f)
|
|
{
|
|
union
|
|
{
|
|
double f;
|
|
uint64_t i;
|
|
} u = {f};
|
|
return u.i;
|
|
}
|
|
|
|
static inline double
|
|
asdouble (uint64_t i)
|
|
{
|
|
union
|
|
{
|
|
uint64_t i;
|
|
double f;
|
|
} u = {i};
|
|
return u.f;
|
|
}
|
|
|
|
static inline int
|
|
issignalingf_inline (float x)
|
|
{
|
|
uint32_t ix = asuint (x);
|
|
if (HIGH_ORDER_BIT_IS_SET_FOR_SNAN)
|
|
return (ix & 0x7fc00000) == 0x7fc00000;
|
|
return 2 * (ix ^ 0x00400000) > 2 * 0x7fc00000UL;
|
|
}
|
|
|
|
#define BIT_WIDTH 32
|
|
#define MANTISSA_WIDTH 23
|
|
#define EXPONENT_WIDTH 8
|
|
#define MANTISSA_MASK 0x007fffff
|
|
#define EXPONENT_MASK 0x7f800000
|
|
#define EXP_MANT_MASK 0x7fffffff
|
|
#define QUIET_NAN_MASK 0x00400000
|
|
#define SIGN_MASK 0x80000000
|
|
|
|
static inline bool
|
|
is_nan (uint32_t x)
|
|
{
|
|
return (x & EXP_MANT_MASK) > EXPONENT_MASK;
|
|
}
|
|
|
|
static inline uint32_t
|
|
get_mantissa (uint32_t x)
|
|
{
|
|
return x & MANTISSA_MASK;
|
|
}
|
|
|
|
/* Convert integer number X, unbiased exponent EP, and sign S to double:
|
|
|
|
result = X * 2^(EP+1 - exponent_bias)
|
|
|
|
NB: zero is not supported. */
|
|
static inline double
|
|
make_float (uint32_t x, int ep, uint32_t s)
|
|
{
|
|
int lz = __builtin_clz (x) - EXPONENT_WIDTH;
|
|
x <<= lz;
|
|
ep -= lz;
|
|
|
|
if (__glibc_unlikely (ep < 0 || x == 0))
|
|
{
|
|
x >>= -ep;
|
|
ep = 0;
|
|
}
|
|
return asfloat (s + x + (ep << MANTISSA_WIDTH));
|
|
}
|
|
|
|
#define NOINLINE __attribute__ ((noinline))
|
|
|
|
attribute_hidden float __math_oflowf (uint32_t);
|
|
attribute_hidden float __math_uflowf (uint32_t);
|
|
attribute_hidden float __math_may_uflowf (uint32_t);
|
|
attribute_hidden float __math_divzerof (uint32_t);
|
|
attribute_hidden float __math_invalidf (float);
|
|
|
|
/* Shared between expf, exp2f, exp10f, and powf. */
|
|
#define EXP2F_TABLE_BITS 5
|
|
#define EXP2F_POLY_ORDER 3
|
|
extern const struct exp2f_data
|
|
{
|
|
uint64_t tab[1 << EXP2F_TABLE_BITS];
|
|
double shift_scaled;
|
|
double poly[EXP2F_POLY_ORDER];
|
|
double shift;
|
|
double invln2_scaled;
|
|
double poly_scaled[EXP2F_POLY_ORDER];
|
|
} __exp2f_data attribute_hidden;
|
|
|
|
#define LOGF_TABLE_BITS 4
|
|
#define LOGF_POLY_ORDER 4
|
|
extern const struct logf_data
|
|
{
|
|
struct
|
|
{
|
|
double invc, logc;
|
|
} tab[1 << LOGF_TABLE_BITS];
|
|
double ln2;
|
|
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
|
|
} __logf_data attribute_hidden;
|
|
|
|
#define LOG2F_TABLE_BITS 4
|
|
#define LOG2F_POLY_ORDER 4
|
|
extern const struct log2f_data
|
|
{
|
|
struct
|
|
{
|
|
double invc, logc;
|
|
} tab[1 << LOG2F_TABLE_BITS];
|
|
double poly[LOG2F_POLY_ORDER];
|
|
} __log2f_data attribute_hidden;
|
|
|
|
#define POWF_LOG2_TABLE_BITS 4
|
|
#define POWF_LOG2_POLY_ORDER 5
|
|
#if TOINT_INTRINSICS
|
|
# define POWF_SCALE_BITS EXP2F_TABLE_BITS
|
|
#else
|
|
# define POWF_SCALE_BITS 0
|
|
#endif
|
|
#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
|
|
extern const struct powf_log2_data
|
|
{
|
|
struct
|
|
{
|
|
double invc, logc;
|
|
} tab[1 << POWF_LOG2_TABLE_BITS];
|
|
double poly[POWF_LOG2_POLY_ORDER];
|
|
} __powf_log2_data attribute_hidden;
|
|
|
|
#endif
|