math: Optimize dbl-64 remainder implementation

The commit 34b9f8bc17 provides an optimized fmod implementation; use
the same strategy used for remainderf and implement the double variant
on top of fmod.

I see the following performance improvements using remainder benchtests
(using reciprocal-throughput metric):

Architecture     | Input           |   master |   patch  | Improvemnt
-----------------|-----------------|----------|-----------------------
x86_64           | subnormals      |  76.1345 |  21.5334 |     71.72%
x86_64           | normal          | 553.2670 | 426.5670 |     22.90%
x86_64           | close-exponent  |  30.5111 |  22.6893 |     25.64%
aarch64          | subnormals      |  26.0734 |   8.4876 |     67.45%
aarch64          | normal          | 205.2590 |  200.082 |      2.52%
aarch64          | close-exponent  |  13.8481 |  13.6663 |      1.31%

The aarch64 used as Neoverse-N1, gcc 15.1.1; while the x86_64 was
a AMD Ryzen 9 5900X, gcc 15.2.1.

This implementation also fixes the math/test-double-remainder issues
on alpha.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.

Reviewed-by: Wilco Dijkstra  <Wilco.Dijkstra@arm.com>
This commit is contained in:
Adhemerval Zanella 2025-10-02 08:55:46 -03:00
parent 849a274531
commit f0facb2d27
2 changed files with 60 additions and 185 deletions

View File

@ -1,153 +1,73 @@
/*
* IBM Accurate Mathematical Library
* written by International Business Machines Corp.
* Copyright (C) 2001-2025 Free Software Foundation, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <https://www.gnu.org/licenses/>.
*/
/**************************************************************************/
/* MODULE_NAME urem.c */
/* */
/* FUNCTION: uremainder */
/* */
/* An ultimate remainder routine. Given two IEEE double machine numbers x */
/* ,y it computes the correctly rounded (to nearest) value of remainder */
/* of dividing x by y. */
/* Assumption: Machine arithmetic operations are performed in */
/* round to nearest mode of IEEE 754 standard. */
/* */
/* ************************************************************************/
/* Remainder function, double version.
Copyright (C) 2008-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "endian.h"
#include "mydefs.h"
#include "urem.h"
#include <math.h>
#include <math_private.h>
#include <fenv_private.h>
#include <libm-alias-finite.h>
#include "math_config.h"
/**************************************************************************/
/* An ultimate remainder routine. Given two IEEE double machine numbers x */
/* ,y it computes the correctly rounded (to nearest) value of remainder */
/**************************************************************************/
double
__ieee754_remainder (double x, double y)
{
double z, d, xx;
int4 kx, ky, n, nn, n1, m1, l;
mynumber u, t, w = { { 0, 0 } }, v = { { 0, 0 } }, ww = { { 0, 0 } }, r;
u.x = x;
t.x = y;
kx = u.i[HIGH_HALF] & 0x7fffffff; /* no sign for x*/
t.i[HIGH_HALF] &= 0x7fffffff; /*no sign for y */
ky = t.i[HIGH_HALF];
/*------ |x| < 2^1023 and 2^-970 < |y| < 2^1024 ------------------*/
if (kx < 0x7fe00000 && ky < 0x7ff00000 && ky >= 0x03500000)
uint64_t hx = asuint64 (x);
uint64_t hy = asuint64 (y);
uint64_t sx = hx >> 63;
hx &= ~SIGN_MASK;
hy &= ~SIGN_MASK;
/* |y| < DBL_MAX / 2 ? */
y = fabs (y);
if (__glibc_likely (hy < UINT64_C (0x7fe0000000000000)))
{
SET_RESTORE_ROUND_NOEX (FE_TONEAREST);
if (kx + 0x00100000 < ky)
return x;
if ((kx - 0x01500000) < ky)
/* |x| not finite, |y| equal 0 is handled by fmod. */
if (__glibc_unlikely (hx >= EXPONENT_MASK))
return (x * y) / (x * y);
x = fabs (__ieee754_fmod (x, y + y));
if (x + x > y)
{
z = x / t.x;
v.i[HIGH_HALF] = t.i[HIGH_HALF];
d = (z + big.x) - big.x;
xx = (x - d * v.x) - d * (t.x - v.x);
if (d - z != 0.5 && d - z != -0.5)
return (xx != 0) ? xx : ((x > 0) ? ZERO.x : nZERO.x);
else
{
if (fabs (xx) > 0.5 * t.x)
return (z > d) ? xx - t.x : xx + t.x;
else
return xx;
}
} /* (kx<(ky+0x01500000)) */
else
{
r.x = 1.0 / t.x;
n = t.i[HIGH_HALF];
nn = (n & 0x7ff00000) + 0x01400000;
w.i[HIGH_HALF] = n;
ww.x = t.x - w.x;
l = (kx - nn) & 0xfff00000;
n1 = ww.i[HIGH_HALF];
m1 = r.i[HIGH_HALF];
while (l > 0)
{
r.i[HIGH_HALF] = m1 - l;
z = u.x * r.x;
w.i[HIGH_HALF] = n + l;
ww.i[HIGH_HALF] = (n1) ? n1 + l : n1;
d = (z + big.x) - big.x;
u.x = (u.x - d * w.x) - d * ww.x;
l = (u.i[HIGH_HALF] & 0x7ff00000) - nn;
}
r.i[HIGH_HALF] = m1;
w.i[HIGH_HALF] = n;
ww.i[HIGH_HALF] = n1;
z = u.x * r.x;
d = (z + big.x) - big.x;
u.x = (u.x - d * w.x) - d * ww.x;
if (fabs (u.x) < 0.5 * t.x)
return (u.x != 0) ? u.x : ((x > 0) ? ZERO.x : nZERO.x);
else
if (fabs (u.x) > 0.5 * t.x)
return (d > z) ? u.x + t.x : u.x - t.x;
else
{
z = u.x / t.x; d = (z + big.x) - big.x;
return ((u.x - d * w.x) - d * ww.x);
}
}
} /* (kx<0x7fe00000&&ky<0x7ff00000&&ky>=0x03500000) */
else
{
if (kx < 0x7fe00000 && ky < 0x7ff00000 && (ky > 0 || t.i[LOW_HALF] != 0))
{
y = fabs (y) * t128.x;
z = __ieee754_remainder (x, y) * t128.x;
z = __ieee754_remainder (z, y) * tm128.x;
return z;
}
else
{
if ((kx & 0x7ff00000) == 0x7fe00000 && ky < 0x7ff00000 &&
(ky > 0 || t.i[LOW_HALF] != 0))
{
y = fabs (y);
z = 2.0 * __ieee754_remainder (0.5 * x, y);
d = fabs (z);
if (d <= fabs (d - y))
return z;
else if (d == y)
return 0.0 * x;
else
return (z > 0) ? z - y : z + y;
}
else /* if x is too big */
{
if (ky == 0 && t.i[LOW_HALF] == 0) /* y = 0 */
return (x * y) / (x * y);
else if (kx >= 0x7ff00000 /* x not finite */
|| (ky > 0x7ff00000 /* y is NaN */
|| (ky == 0x7ff00000 && t.i[LOW_HALF] != 0)))
return (x * y) / (x * y);
else
return x;
}
x -= y;
if (x + x >= y)
x -= y;
/* Make sure x is not -0. This can occur only when x = y
and rounding direction is towards negative infinity. */
else if (x == 0.0)
x = 0.0;
}
}
else
{
/* |x| not finite or |y| is NaN or 0 */
if ((hx >= EXPONENT_MASK || (hy - 1) >= EXPONENT_MASK))
return (x * y) / (x * y);
x = fabs (x);
double y_half = y * 0.5;
if (x > y_half)
{
x -= y;
if (x >= y_half)
x -= y;
else if (x == 0.0)
x = 0.0;
}
}
return sx ? -x : x;
}
libm_alias_finite (__ieee754_remainder, __remainder)

View File

@ -1,45 +0,0 @@
/*
* IBM Accurate Mathematical Library
* Copyright (C) 2001-2025 Free Software Foundation, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <https://www.gnu.org/licenses/>.
*/
/************************************************************************/
/* MODULE_NAME: urem.h */
/* */
/* */
/* common data and variables definition for BIG or LITTLE ENDIAN */
/************************************************************************/
#ifndef UREM_H
#define UREM_H
#ifdef BIG_ENDI
static const mynumber big = {{0x43380000, 0}}, /* 6755399441055744 */
t128 = {{0x47f00000, 0}}, /* 2^ 128 */
tm128 = {{0x37f00000, 0}}, /* 2^-128 */
ZERO = {{0, 0}}, /* 0.0 */
nZERO = {{0x80000000, 0}}; /* -0.0 */
#else
#ifdef LITTLE_ENDI
static const mynumber big = {{0, 0x43380000}}, /* 6755399441055744 */
t128 = {{0, 0x47f00000}}, /* 2^ 128 */
tm128 = {{0, 0x37f00000}}, /* 2^-128 */
ZERO = {{0, 0}}, /* 0.0 */
nZERO = {{0, 0x80000000}}; /* -0.0 */
#endif
#endif
#endif