mirror of git://sourceware.org/git/glibc.git
math: Use tgamma from CORE-MATH
The current implementation precision shows the following accuracy, on
one range ([-20,20]) with 10e9 uniform randomly generated numbers for
each range (first column is the accuracy in ULP, with '0' being
correctly rounded, second is the number of samples with the
corresponding precision):
* Range [-20,20]
* FE_TONEAREST
0: 4504877808 45.05%
1: 4402224940 44.02%
2: 947652295 9.48%
3: 131076831 1.31%
4: 13222216 0.13%
5: 910045 0.01%
6: 35253 0.00%
7: 606 0.00%
8: 6 0.00%
* FE_UPWARD
0: 3477307921 34.77%
1: 4838637866 48.39%
2: 1413942684 14.14%
3: 240762564 2.41%
4: 27113094 0.27%
5: 2130934 0.02%
6: 102599 0.00%
7: 2324 0.00%
8: 14 0.00%
* FE_DOWNWARD
0: 3923545410 39.24%
1: 4745067290 47.45%
2: 1137899814 11.38%
3: 171596912 1.72%
4: 20013805 0.20%
5: 1773899 0.02%
6: 99911 0.00%
7: 2928 0.00%
8: 31 0.00%
* FE_TOWARDZERO
0: 3697160741 36.97%
1: 4731951491 47.32%
2: 1303092738 13.03%
3: 231969191 2.32%
4: 32344517 0.32%
5: 3283092 0.03%
6: 193010 0.00%
7: 5175 0.00%
8: 45 0.00%
The CORE-MATH implementation is correctly rounded for any rounding mode.
The code was adapted to glibc style and to use the definition of
math_config.h (to handle errno, overflow, and underflow).
Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (Neoverse-N1,
gcc 13.3.1), and powerpc (POWER10, gcc 13.2.1) shows:
reciprocal-throughput master patched improvement
x86_64 237.7960 175.4090 26.24%
x86_64v2 232.9320 163.4460 29.83%
x86_64v3 193.0680 89.7721 53.50%
aarch64 113.6340 56.7350 50.07%
power10 92.0617 26.6137 71.09%
Latency master patched improvement
x86_64 266.7190 208.0130 22.01%
x86_64v2 263.6070 200.0280 24.12%
x86_64v3 214.0260 146.5180 31.54%
aarch64 114.4760 58.5235 48.88%
power10 84.3718 35.7473 57.63%
Checked on x86_64-linux-gnu, aarch64-linux-gnu, and
powerpc64le-linux-gnu.
Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
parent
d67d2f4688
commit
1cae0550e8
|
|
@ -243,6 +243,8 @@ core-math:
|
|||
sysdeps/ieee754/dbl-64/e_acosh.c
|
||||
# src/binary64/atanh/atanh.c, revision 4da7f241
|
||||
sysdeps/ieee754/dbl-64/e_atanh.c
|
||||
# src/binary64/tgamma/tgamma.c, revision 0f185e23
|
||||
sysdeps/ieee754/dbl-64/e_gamma_r.c
|
||||
# src/binary64/lgamma/lgamma.c, revision 0413bb7e
|
||||
sysdeps/ieee754/dbl-64/e_lgamma_r.c
|
||||
# src/binary64/asinh/asinh.c, revision fde815f8
|
||||
|
|
|
|||
|
|
@ -204,7 +204,6 @@ libm-calls = \
|
|||
e_remainderF \
|
||||
e_sinhF \
|
||||
e_sqrtF \
|
||||
gamma_productF \
|
||||
k_tanF \
|
||||
s_asinhF \
|
||||
s_atanF \
|
||||
|
|
@ -341,6 +340,7 @@ test-types-basic = \
|
|||
type-ldouble-suffix := l
|
||||
type-ldouble-routines := \
|
||||
e_rem_pio2l \
|
||||
gamma_productl \
|
||||
k_cosl \
|
||||
k_sincosl \
|
||||
k_sinl \
|
||||
|
|
@ -386,6 +386,7 @@ type-float-routines := \
|
|||
type-float128-suffix := f128
|
||||
type-float128-routines := \
|
||||
e_rem_pio2f128 \
|
||||
gamma_productf128 \
|
||||
k_cosf128 \
|
||||
k_sincosf128 \
|
||||
k_sinf128 \
|
||||
|
|
|
|||
|
|
@ -6,15 +6,10 @@ asm-CPPFLAGS += -DGAS_SYNTAX
|
|||
long-double-fcts = yes
|
||||
|
||||
ifeq ($(subdir),math)
|
||||
# These functions change the rounding mode internally and need to
|
||||
# update both the SSE2 rounding mode and the 387 rounding mode. See
|
||||
# the handling of MATH_SET_BOTH_ROUNDING_MODES in
|
||||
# sysdeps/i386/fpu/fenv_private.h.
|
||||
CFLAGS-e_gamma_r.c += -DMATH_SET_BOTH_ROUNDING_MODES
|
||||
|
||||
# The CORE-MATH implementation assumes FLT_EVAL_METHOD == 0 to provide
|
||||
# correctly rounded results.
|
||||
CFLAGS-e_lgamma_r.c += -fexcess-precision=standard
|
||||
CFLAGS-e_gamma_r.c += -fexcess-precision=standard
|
||||
endif
|
||||
|
||||
ifeq ($(subdir),gmon)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,46 +0,0 @@
|
|||
/* Compute a product of X, X+1, ..., with an error estimate.
|
||||
Copyright (C) 2013-2025 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <math.h>
|
||||
#include <math_private.h>
|
||||
#include <fenv_private.h>
|
||||
#include <mul_split.h>
|
||||
|
||||
/* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N
|
||||
- 1, in the form R * (1 + *EPS) where the return value R is an
|
||||
approximation to the product and *EPS is set to indicate the
|
||||
approximate error in the return value. X is such that all the
|
||||
values X + 1, ..., X + N - 1 are exactly representable, and X_EPS /
|
||||
X is small enough that factors quadratic in it can be
|
||||
neglected. */
|
||||
|
||||
double
|
||||
__gamma_product (double x, double x_eps, int n, double *eps)
|
||||
{
|
||||
SET_RESTORE_ROUND (FE_TONEAREST);
|
||||
double ret = x;
|
||||
*eps = x_eps / x;
|
||||
for (int i = 1; i < n; i++)
|
||||
{
|
||||
*eps += x_eps / (x + i);
|
||||
double lo;
|
||||
mul_split (&ret, &lo, ret, x + i);
|
||||
*eps += lo / ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -1 +0,0 @@
|
|||
/* Not needed. */
|
||||
|
|
@ -46,3 +46,15 @@ double: 0
|
|||
|
||||
Function: "lgamma_upward":
|
||||
double: 0
|
||||
|
||||
Function: "tgamma":
|
||||
double: 0
|
||||
|
||||
Function: "tgamma_downward":
|
||||
double: 0
|
||||
|
||||
Function: "tgamma_towardzero":
|
||||
double: 0
|
||||
|
||||
Function: "tgamma_upward":
|
||||
double: 0
|
||||
|
|
|
|||
|
|
@ -180,6 +180,9 @@ attribute_hidden double __math_oflow (uint32_t);
|
|||
attribute_hidden double __math_uflow (uint32_t);
|
||||
/* The result underflows to 0 in some directed rounding mode only. */
|
||||
attribute_hidden double __math_may_uflow (uint32_t);
|
||||
/* The result underflows, raise the exception, set errno, and returns the
|
||||
value. */
|
||||
attribute_hidden double __math_uflow_value (double);
|
||||
/* Division by zero. */
|
||||
attribute_hidden double __math_divzero (uint32_t);
|
||||
|
||||
|
|
@ -203,6 +206,8 @@ attribute_hidden double __math_check_uflow (double);
|
|||
attribute_hidden double __math_check_uflow_lt (double, double);
|
||||
/* Check if the |X| if less than Y. */
|
||||
attribute_hidden double __math_check_uflow_zero_lt (double, double, double);
|
||||
/* Return pole error. */
|
||||
attribute_hidden double __math_erange (double);
|
||||
|
||||
/* Check if the result overflowed to infinity. */
|
||||
static inline double
|
||||
|
|
|
|||
|
|
@ -77,6 +77,13 @@ __math_may_uflow (uint32_t sign)
|
|||
{
|
||||
return xflow (sign, 0x1.8p-538);
|
||||
}
|
||||
|
||||
attribute_hidden double
|
||||
__math_uflow_value (double x)
|
||||
{
|
||||
math_force_eval (0x1p-767 * 0x1p-767);
|
||||
return with_errno (x, ERANGE);
|
||||
}
|
||||
#endif
|
||||
|
||||
attribute_hidden double
|
||||
|
|
@ -146,3 +153,9 @@ __math_check_oflow (double y)
|
|||
{
|
||||
return isinf (y) ? with_errno (y, ERANGE) : y;
|
||||
}
|
||||
|
||||
attribute_hidden double
|
||||
__math_erange (double y)
|
||||
{
|
||||
return with_errno (y, ERANGE);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,44 +0,0 @@
|
|||
/* Compute a product of X, X+1, ..., with an error estimate.
|
||||
Copyright (C) 2013-2025 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <math.h>
|
||||
#include <math-narrow-eval.h>
|
||||
#include <math_private.h>
|
||||
#include <float.h>
|
||||
|
||||
/* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N
|
||||
- 1, in the form R * (1 + *EPS) where the return value R is an
|
||||
approximation to the product and *EPS is set to indicate the
|
||||
approximate error in the return value. X is such that all the
|
||||
values X + 1, ..., X + N - 1 are exactly representable, and X_EPS /
|
||||
X is small enough that factors quadratic in it can be
|
||||
neglected. */
|
||||
|
||||
double
|
||||
__gamma_product (double x, double x_eps, int n, double *eps)
|
||||
{
|
||||
long double x_full = (long double) x + (long double) x_eps;
|
||||
long double ret = x_full;
|
||||
for (int i = 1; i < n; i++)
|
||||
ret *= x_full + i;
|
||||
|
||||
double fret = math_narrow_eval ((double) ret);
|
||||
*eps = (ret - fret) / fret;
|
||||
|
||||
return fret;
|
||||
}
|
||||
Loading…
Reference in New Issue