math: Use tgamma from CORE-MATH

The current implementation precision shows the following accuracy, on
one range ([-20,20]) with 10e9 uniform randomly generated numbers for
each range (first column is the accuracy in ULP, with '0' being
correctly rounded, second is the number of samples with the
corresponding precision):

* Range [-20,20]
 * FE_TONEAREST
     0:       4504877808  45.05%
     1:       4402224940  44.02%
     2:        947652295   9.48%
     3:        131076831   1.31%
     4:         13222216   0.13%
     5:           910045   0.01%
     6:            35253   0.00%
     7:              606   0.00%
     8:                6   0.00%
 * FE_UPWARD
     0:       3477307921  34.77%
     1:       4838637866  48.39%
     2:       1413942684  14.14%
     3:        240762564   2.41%
     4:         27113094   0.27%
     5:          2130934   0.02%
     6:           102599   0.00%
     7:             2324   0.00%
     8:               14   0.00%
 * FE_DOWNWARD
     0:       3923545410  39.24%
     1:       4745067290  47.45%
     2:       1137899814  11.38%
     3:        171596912   1.72%
     4:         20013805   0.20%
     5:          1773899   0.02%
     6:            99911   0.00%
     7:             2928   0.00%
     8:               31   0.00%
 * FE_TOWARDZERO
     0:       3697160741  36.97%
     1:       4731951491  47.32%
     2:       1303092738  13.03%
     3:        231969191   2.32%
     4:         32344517   0.32%
     5:          3283092   0.03%
     6:           193010   0.00%
     7:             5175   0.00%
     8:               45   0.00%

The CORE-MATH implementation is correctly rounded for any rounding mode.
The code was adapted to glibc style and to use the definition of
math_config.h (to handle errno, overflow, and underflow).

Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (Neoverse-N1,
gcc 13.3.1), and powerpc (POWER10, gcc 13.2.1) shows:

reciprocal-throughput        master        patched   improvement
x86_64                     237.7960       175.4090        26.24%
x86_64v2                   232.9320       163.4460        29.83%
x86_64v3                   193.0680        89.7721        53.50%
aarch64                    113.6340        56.7350        50.07%
power10                     92.0617        26.6137        71.09%

Latency                      master        patched   improvement
x86_64                     266.7190       208.0130        22.01%
x86_64v2                   263.6070       200.0280        24.12%
x86_64v3                   214.0260       146.5180        31.54%
aarch64                    114.4760        58.5235        48.88%
power10                     84.3718        35.7473        57.63%

Checked on x86_64-linux-gnu, aarch64-linux-gnu, and
powerpc64le-linux-gnu.

Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
Adhemerval Zanella 2025-10-10 15:15:28 -03:00
parent d67d2f4688
commit 1cae0550e8
10 changed files with 1404 additions and 290 deletions

View File

@ -243,6 +243,8 @@ core-math:
sysdeps/ieee754/dbl-64/e_acosh.c
# src/binary64/atanh/atanh.c, revision 4da7f241
sysdeps/ieee754/dbl-64/e_atanh.c
# src/binary64/tgamma/tgamma.c, revision 0f185e23
sysdeps/ieee754/dbl-64/e_gamma_r.c
# src/binary64/lgamma/lgamma.c, revision 0413bb7e
sysdeps/ieee754/dbl-64/e_lgamma_r.c
# src/binary64/asinh/asinh.c, revision fde815f8

View File

@ -204,7 +204,6 @@ libm-calls = \
e_remainderF \
e_sinhF \
e_sqrtF \
gamma_productF \
k_tanF \
s_asinhF \
s_atanF \
@ -341,6 +340,7 @@ test-types-basic = \
type-ldouble-suffix := l
type-ldouble-routines := \
e_rem_pio2l \
gamma_productl \
k_cosl \
k_sincosl \
k_sinl \
@ -386,6 +386,7 @@ type-float-routines := \
type-float128-suffix := f128
type-float128-routines := \
e_rem_pio2f128 \
gamma_productf128 \
k_cosf128 \
k_sincosf128 \
k_sinf128 \

View File

@ -6,15 +6,10 @@ asm-CPPFLAGS += -DGAS_SYNTAX
long-double-fcts = yes
ifeq ($(subdir),math)
# These functions change the rounding mode internally and need to
# update both the SSE2 rounding mode and the 387 rounding mode. See
# the handling of MATH_SET_BOTH_ROUNDING_MODES in
# sysdeps/i386/fpu/fenv_private.h.
CFLAGS-e_gamma_r.c += -DMATH_SET_BOTH_ROUNDING_MODES
# The CORE-MATH implementation assumes FLT_EVAL_METHOD == 0 to provide
# correctly rounded results.
CFLAGS-e_lgamma_r.c += -fexcess-precision=standard
CFLAGS-e_gamma_r.c += -fexcess-precision=standard
endif
ifeq ($(subdir),gmon)

File diff suppressed because it is too large Load Diff

View File

@ -1,46 +0,0 @@
/* Compute a product of X, X+1, ..., with an error estimate.
Copyright (C) 2013-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <math.h>
#include <math_private.h>
#include <fenv_private.h>
#include <mul_split.h>
/* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N
- 1, in the form R * (1 + *EPS) where the return value R is an
approximation to the product and *EPS is set to indicate the
approximate error in the return value. X is such that all the
values X + 1, ..., X + N - 1 are exactly representable, and X_EPS /
X is small enough that factors quadratic in it can be
neglected. */
double
__gamma_product (double x, double x_eps, int n, double *eps)
{
SET_RESTORE_ROUND (FE_TONEAREST);
double ret = x;
*eps = x_eps / x;
for (int i = 1; i < n; i++)
{
*eps += x_eps / (x + i);
double lo;
mul_split (&ret, &lo, ret, x + i);
*eps += lo / ret;
}
return ret;
}

View File

@ -1 +0,0 @@
/* Not needed. */

View File

@ -46,3 +46,15 @@ double: 0
Function: "lgamma_upward":
double: 0
Function: "tgamma":
double: 0
Function: "tgamma_downward":
double: 0
Function: "tgamma_towardzero":
double: 0
Function: "tgamma_upward":
double: 0

View File

@ -180,6 +180,9 @@ attribute_hidden double __math_oflow (uint32_t);
attribute_hidden double __math_uflow (uint32_t);
/* The result underflows to 0 in some directed rounding mode only. */
attribute_hidden double __math_may_uflow (uint32_t);
/* The result underflows, raise the exception, set errno, and returns the
value. */
attribute_hidden double __math_uflow_value (double);
/* Division by zero. */
attribute_hidden double __math_divzero (uint32_t);
@ -203,6 +206,8 @@ attribute_hidden double __math_check_uflow (double);
attribute_hidden double __math_check_uflow_lt (double, double);
/* Check if the |X| if less than Y. */
attribute_hidden double __math_check_uflow_zero_lt (double, double, double);
/* Return pole error. */
attribute_hidden double __math_erange (double);
/* Check if the result overflowed to infinity. */
static inline double

View File

@ -77,6 +77,13 @@ __math_may_uflow (uint32_t sign)
{
return xflow (sign, 0x1.8p-538);
}
attribute_hidden double
__math_uflow_value (double x)
{
math_force_eval (0x1p-767 * 0x1p-767);
return with_errno (x, ERANGE);
}
#endif
attribute_hidden double
@ -146,3 +153,9 @@ __math_check_oflow (double y)
{
return isinf (y) ? with_errno (y, ERANGE) : y;
}
attribute_hidden double
__math_erange (double y)
{
return with_errno (y, ERANGE);
}

View File

@ -1,44 +0,0 @@
/* Compute a product of X, X+1, ..., with an error estimate.
Copyright (C) 2013-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <math.h>
#include <math-narrow-eval.h>
#include <math_private.h>
#include <float.h>
/* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N
- 1, in the form R * (1 + *EPS) where the return value R is an
approximation to the product and *EPS is set to indicate the
approximate error in the return value. X is such that all the
values X + 1, ..., X + N - 1 are exactly representable, and X_EPS /
X is small enough that factors quadratic in it can be
neglected. */
double
__gamma_product (double x, double x_eps, int n, double *eps)
{
long double x_full = (long double) x + (long double) x_eps;
long double ret = x_full;
for (int i = 1; i < n; i++)
ret *= x_full + i;
double fret = math_narrow_eval ((double) ret);
*eps = (ret - fret) / fret;
return fret;
}