mirror of git://sourceware.org/git/glibc.git
math: Optimize dbl-64 remainder implementation
The commit 34b9f8bc17 provides an optimized fmod implementation; use
the same strategy used for remainderf and implement the double variant
on top of fmod.
I see the following performance improvements using remainder benchtests
(using reciprocal-throughput metric):
Architecture | Input | master | patch | Improvemnt
-----------------|-----------------|----------|-----------------------
x86_64 | subnormals | 76.1345 | 21.5334 | 71.72%
x86_64 | normal | 553.2670 | 426.5670 | 22.90%
x86_64 | close-exponent | 30.5111 | 22.6893 | 25.64%
aarch64 | subnormals | 26.0734 | 8.4876 | 67.45%
aarch64 | normal | 205.2590 | 200.082 | 2.52%
aarch64 | close-exponent | 13.8481 | 13.6663 | 1.31%
The aarch64 used as Neoverse-N1, gcc 15.1.1; while the x86_64 was
a AMD Ryzen 9 5900X, gcc 15.2.1.
This implementation also fixes the math/test-double-remainder issues
on alpha.
Tested on aarch64-linux-gnu and x86_64-linux-gnu.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
This commit is contained in:
parent
849a274531
commit
f0facb2d27
|
|
@ -1,153 +1,73 @@
|
|||
/*
|
||||
* IBM Accurate Mathematical Library
|
||||
* written by International Business Machines Corp.
|
||||
* Copyright (C) 2001-2025 Free Software Foundation, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation; either version 2.1 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program; if not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/**************************************************************************/
|
||||
/* MODULE_NAME urem.c */
|
||||
/* */
|
||||
/* FUNCTION: uremainder */
|
||||
/* */
|
||||
/* An ultimate remainder routine. Given two IEEE double machine numbers x */
|
||||
/* ,y it computes the correctly rounded (to nearest) value of remainder */
|
||||
/* of dividing x by y. */
|
||||
/* Assumption: Machine arithmetic operations are performed in */
|
||||
/* round to nearest mode of IEEE 754 standard. */
|
||||
/* */
|
||||
/* ************************************************************************/
|
||||
/* Remainder function, double version.
|
||||
Copyright (C) 2008-2025 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "endian.h"
|
||||
#include "mydefs.h"
|
||||
#include "urem.h"
|
||||
#include <math.h>
|
||||
#include <math_private.h>
|
||||
#include <fenv_private.h>
|
||||
#include <libm-alias-finite.h>
|
||||
#include "math_config.h"
|
||||
|
||||
/**************************************************************************/
|
||||
/* An ultimate remainder routine. Given two IEEE double machine numbers x */
|
||||
/* ,y it computes the correctly rounded (to nearest) value of remainder */
|
||||
/**************************************************************************/
|
||||
double
|
||||
__ieee754_remainder (double x, double y)
|
||||
{
|
||||
double z, d, xx;
|
||||
int4 kx, ky, n, nn, n1, m1, l;
|
||||
mynumber u, t, w = { { 0, 0 } }, v = { { 0, 0 } }, ww = { { 0, 0 } }, r;
|
||||
u.x = x;
|
||||
t.x = y;
|
||||
kx = u.i[HIGH_HALF] & 0x7fffffff; /* no sign for x*/
|
||||
t.i[HIGH_HALF] &= 0x7fffffff; /*no sign for y */
|
||||
ky = t.i[HIGH_HALF];
|
||||
/*------ |x| < 2^1023 and 2^-970 < |y| < 2^1024 ------------------*/
|
||||
if (kx < 0x7fe00000 && ky < 0x7ff00000 && ky >= 0x03500000)
|
||||
uint64_t hx = asuint64 (x);
|
||||
uint64_t hy = asuint64 (y);
|
||||
uint64_t sx = hx >> 63;
|
||||
|
||||
hx &= ~SIGN_MASK;
|
||||
hy &= ~SIGN_MASK;
|
||||
|
||||
/* |y| < DBL_MAX / 2 ? */
|
||||
y = fabs (y);
|
||||
if (__glibc_likely (hy < UINT64_C (0x7fe0000000000000)))
|
||||
{
|
||||
SET_RESTORE_ROUND_NOEX (FE_TONEAREST);
|
||||
if (kx + 0x00100000 < ky)
|
||||
return x;
|
||||
if ((kx - 0x01500000) < ky)
|
||||
/* |x| not finite, |y| equal 0 is handled by fmod. */
|
||||
if (__glibc_unlikely (hx >= EXPONENT_MASK))
|
||||
return (x * y) / (x * y);
|
||||
|
||||
x = fabs (__ieee754_fmod (x, y + y));
|
||||
if (x + x > y)
|
||||
{
|
||||
z = x / t.x;
|
||||
v.i[HIGH_HALF] = t.i[HIGH_HALF];
|
||||
d = (z + big.x) - big.x;
|
||||
xx = (x - d * v.x) - d * (t.x - v.x);
|
||||
if (d - z != 0.5 && d - z != -0.5)
|
||||
return (xx != 0) ? xx : ((x > 0) ? ZERO.x : nZERO.x);
|
||||
else
|
||||
{
|
||||
if (fabs (xx) > 0.5 * t.x)
|
||||
return (z > d) ? xx - t.x : xx + t.x;
|
||||
else
|
||||
return xx;
|
||||
}
|
||||
} /* (kx<(ky+0x01500000)) */
|
||||
else
|
||||
{
|
||||
r.x = 1.0 / t.x;
|
||||
n = t.i[HIGH_HALF];
|
||||
nn = (n & 0x7ff00000) + 0x01400000;
|
||||
w.i[HIGH_HALF] = n;
|
||||
ww.x = t.x - w.x;
|
||||
l = (kx - nn) & 0xfff00000;
|
||||
n1 = ww.i[HIGH_HALF];
|
||||
m1 = r.i[HIGH_HALF];
|
||||
while (l > 0)
|
||||
{
|
||||
r.i[HIGH_HALF] = m1 - l;
|
||||
z = u.x * r.x;
|
||||
w.i[HIGH_HALF] = n + l;
|
||||
ww.i[HIGH_HALF] = (n1) ? n1 + l : n1;
|
||||
d = (z + big.x) - big.x;
|
||||
u.x = (u.x - d * w.x) - d * ww.x;
|
||||
l = (u.i[HIGH_HALF] & 0x7ff00000) - nn;
|
||||
}
|
||||
r.i[HIGH_HALF] = m1;
|
||||
w.i[HIGH_HALF] = n;
|
||||
ww.i[HIGH_HALF] = n1;
|
||||
z = u.x * r.x;
|
||||
d = (z + big.x) - big.x;
|
||||
u.x = (u.x - d * w.x) - d * ww.x;
|
||||
if (fabs (u.x) < 0.5 * t.x)
|
||||
return (u.x != 0) ? u.x : ((x > 0) ? ZERO.x : nZERO.x);
|
||||
else
|
||||
if (fabs (u.x) > 0.5 * t.x)
|
||||
return (d > z) ? u.x + t.x : u.x - t.x;
|
||||
else
|
||||
{
|
||||
z = u.x / t.x; d = (z + big.x) - big.x;
|
||||
return ((u.x - d * w.x) - d * ww.x);
|
||||
}
|
||||
}
|
||||
} /* (kx<0x7fe00000&&ky<0x7ff00000&&ky>=0x03500000) */
|
||||
else
|
||||
{
|
||||
if (kx < 0x7fe00000 && ky < 0x7ff00000 && (ky > 0 || t.i[LOW_HALF] != 0))
|
||||
{
|
||||
y = fabs (y) * t128.x;
|
||||
z = __ieee754_remainder (x, y) * t128.x;
|
||||
z = __ieee754_remainder (z, y) * tm128.x;
|
||||
return z;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((kx & 0x7ff00000) == 0x7fe00000 && ky < 0x7ff00000 &&
|
||||
(ky > 0 || t.i[LOW_HALF] != 0))
|
||||
{
|
||||
y = fabs (y);
|
||||
z = 2.0 * __ieee754_remainder (0.5 * x, y);
|
||||
d = fabs (z);
|
||||
if (d <= fabs (d - y))
|
||||
return z;
|
||||
else if (d == y)
|
||||
return 0.0 * x;
|
||||
else
|
||||
return (z > 0) ? z - y : z + y;
|
||||
}
|
||||
else /* if x is too big */
|
||||
{
|
||||
if (ky == 0 && t.i[LOW_HALF] == 0) /* y = 0 */
|
||||
return (x * y) / (x * y);
|
||||
else if (kx >= 0x7ff00000 /* x not finite */
|
||||
|| (ky > 0x7ff00000 /* y is NaN */
|
||||
|| (ky == 0x7ff00000 && t.i[LOW_HALF] != 0)))
|
||||
return (x * y) / (x * y);
|
||||
else
|
||||
return x;
|
||||
}
|
||||
x -= y;
|
||||
if (x + x >= y)
|
||||
x -= y;
|
||||
/* Make sure x is not -0. This can occur only when x = y
|
||||
and rounding direction is towards negative infinity. */
|
||||
else if (x == 0.0)
|
||||
x = 0.0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* |x| not finite or |y| is NaN or 0 */
|
||||
if ((hx >= EXPONENT_MASK || (hy - 1) >= EXPONENT_MASK))
|
||||
return (x * y) / (x * y);
|
||||
|
||||
x = fabs (x);
|
||||
double y_half = y * 0.5;
|
||||
if (x > y_half)
|
||||
{
|
||||
x -= y;
|
||||
if (x >= y_half)
|
||||
x -= y;
|
||||
else if (x == 0.0)
|
||||
x = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
return sx ? -x : x;
|
||||
}
|
||||
libm_alias_finite (__ieee754_remainder, __remainder)
|
||||
|
|
|
|||
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* IBM Accurate Mathematical Library
|
||||
* Copyright (C) 2001-2025 Free Software Foundation, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation; either version 2.1 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program; if not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/************************************************************************/
|
||||
/* MODULE_NAME: urem.h */
|
||||
/* */
|
||||
/* */
|
||||
/* common data and variables definition for BIG or LITTLE ENDIAN */
|
||||
/************************************************************************/
|
||||
|
||||
#ifndef UREM_H
|
||||
#define UREM_H
|
||||
|
||||
#ifdef BIG_ENDI
|
||||
static const mynumber big = {{0x43380000, 0}}, /* 6755399441055744 */
|
||||
t128 = {{0x47f00000, 0}}, /* 2^ 128 */
|
||||
tm128 = {{0x37f00000, 0}}, /* 2^-128 */
|
||||
ZERO = {{0, 0}}, /* 0.0 */
|
||||
nZERO = {{0x80000000, 0}}; /* -0.0 */
|
||||
#else
|
||||
#ifdef LITTLE_ENDI
|
||||
static const mynumber big = {{0, 0x43380000}}, /* 6755399441055744 */
|
||||
t128 = {{0, 0x47f00000}}, /* 2^ 128 */
|
||||
tm128 = {{0, 0x37f00000}}, /* 2^-128 */
|
||||
ZERO = {{0, 0}}, /* 0.0 */
|
||||
nZERO = {{0, 0x80000000}}; /* -0.0 */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Loading…
Reference in New Issue