math: Optimize flt-32 remainder implementation

With same micro-optimization done for the double variant:

  * Combine the |y| zero check.
  * Rework the check to adjust result and call fmod.
  * Remove one check after fmod.
  * Remove float-int-float roundtrip on return.

Also use math_config.h macros and indent the code.  The resulting
strategy is different in many places that I think requires a
different Copyright.

I see the following performance improvements using remainder benchtests
(using reciprocal-throughput metric):

Architecture     | Input           |   master |   patch  | Improvemnt
-----------------|-----------------|----------|-----------------------
x86_64           | subnormals      |  20.4176 |  19.6144 |      3.93%
x86_64           | normal          |  54.0939 |  52.2343 |      3.44%
x86_64           | close-exponent  |  23.9120 |  22.3768 |      6.42%
aarch64          | subnormals      |   9.2423 |   8.3825 |      9.30%
aarch64          | normal          |  30.5393 |   29.244 |      4.24%
aarch64          | close-exponent  |  15.5405 |  13.9256 |     10.39%

The aarch64 used as Neoverse-N1, gcc 15.1.1; while the x86_64 was
a AMD Ryzen 9 5900X, gcc 15.2.1.

Checked on x86_64-linux-gnu and aarch64-linux-gnu.

Reviewed-by: Wilco Dijkstra  <Wilco.Dijkstra@arm.com>
This commit is contained in:
Adhemerval Zanella 2025-10-02 08:55:47 -03:00
parent f0facb2d27
commit 61ac7c6a75
1 changed files with 59 additions and 51 deletions

View File

@ -1,65 +1,73 @@
/* e_remainderf.c -- float version of e_remainder.c. /* Remainder function, float version.
*/ Copyright (C) 2008-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
/* The GNU C Library is free software; you can redistribute it and/or
* ==================================================== modify it under the terms of the GNU Lesser General Public
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this The GNU C Library is distributed in the hope that it will be useful,
* software is freely granted, provided that this notice but WITHOUT ANY WARRANTY; without even the implied warranty of
* is preserved. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* ==================================================== Lesser General Public License for more details.
*/
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <math.h> #include <math.h>
#include <math_private.h>
#include <libm-alias-finite.h> #include <libm-alias-finite.h>
#include "math_config.h"
static const float zero = 0.0;
float float
__ieee754_remainderf(float x, float p) __ieee754_remainderf(float x, float p)
{ {
int32_t hx,hp; uint32_t hx = asuint (x);
uint32_t sx; uint32_t hp = asuint (p);
float p_half; uint32_t sx = hx >> 31;
GET_FLOAT_WORD(hx,x); hp &= ~SIGN_MASK;
GET_FLOAT_WORD(hp,p); hx &= ~SIGN_MASK;
sx = hx&0x80000000;
hp &= 0x7fffffff;
hx &= 0x7fffffff;
/* purge off exception values */ /* |y| < DBL/MAX / 2 ? */
if(hp==0) return (x*p)/(x*p); /* p = 0 */ p = fabsf (p);
if((hx>=0x7f800000)|| /* x not finite */ if (__glibc_likely (hp < 0x7f000000))
((hp>0x7f800000))) /* p is NaN */ {
return (x*p)/(x*p); /* |x| not finite, |y| equal 0 is handled by fmod. */
if (__glibc_unlikely (hx >= EXPONENT_MASK))
return (x * p) / (x * p);
x = fabs (__ieee754_fmodf (x, p + p)); /* now x < 2p */
if (hp<=0x7effffff) x = __ieee754_fmodf(x,p+p); /* now x < 2p */ if (x + x > p)
if ((hx-hp)==0) return zero*x; {
x = fabsf(x); x -= p;
p = fabsf(p); if (x + x >= p)
if (hp<0x01000000) { x -= p;
if(x+x>p) {
x-=p;
if(x+x>=p) x -= p;
}
} else {
p_half = (float)0.5*p;
if(x>p_half) {
x-=p;
if(x>=p_half) x -= p;
}
}
GET_FLOAT_WORD(hx,x);
/* Make sure x is not -0. This can occur only when x = p /* Make sure x is not -0. This can occur only when x = p
and rounding direction is towards negative infinity. */ and rounding direction is towards negative infinity. */
if (hx==0x80000000) hx = 0; else if (x == 0.0)
SET_FLOAT_WORD(x,hx^sx); x = 0.0;
return x; }
}
else
{
/* |x| not finite or |y| is NaN or 0 */
if ((hx >= EXPONENT_MASK || (hp - 1) >= EXPONENT_MASK))
return (x * p) / (x * p);
x = fabsf (x);
float p_half = 0.5f * p;
if (x > p_half)
{
x -= p;
if (x >= p_half)
x -= p;
else if (x == 0.0)
x = 0.0;
}
}
return sx ? -x : x;
} }
libm_alias_finite (__ieee754_remainderf, __remainderf) libm_alias_finite (__ieee754_remainderf, __remainderf)