math: Remove the SVID error handling from remainderf

The optimized i386 version is faster than the generic one, and gcc
implements it through the builtin.  This optimization enables us to
migrate the implementation to a C version.  The performance on a Zen3
chip is similar to the SVID one.

The m68k provided an optimized version through __m81_u(remainderf)
(mathimpl.h), and gcc does not implement it through a builtin (different
than i386).

Performance improves a bit on x86_64 (Zen3, gcc 15.2.1):

reciprocal-throughput          input   master  NO-SVID  improvement
x86_64                    subnormals  17.5349  15.6125       10.96%
x86_64                        normal  53.8134  52.5754        2.30%
x86_64                close-exponent  20.0211  18.6656        6.77%
i686                      subnormals  21.8105  20.1856        7.45%
i686                          normal  73.1945  71.2199        2.70%
i686                  close-exponent  22.2141   20.331        8.48%

Tested on x86_64-linux-gnu and i686-linux-gnu.

Reviewed-by: Wilco Dijkstra  <Wilco.Dijkstra@arm.com>
This commit is contained in:
Adhemerval Zanella 2025-10-31 13:08:50 -03:00
parent 324c088a18
commit c4c6c79d70
32 changed files with 103 additions and 33 deletions

View File

@ -697,6 +697,7 @@ libm {
j1f;
jnf;
log10f;
remainderf;
y0f;
y1f;
ynf;

View File

@ -19,12 +19,13 @@
#include <math_private.h>
#include <math-svid-compat.h>
#include <libm-alias-float.h>
#include <shlib-compat.h>
#if LIBM_SVID_COMPAT
#if LIBM_SVID_COMPAT && SHLIB_COMPAT (libm, GLIBC_2_0, GLIBC_2_43)
/* wrapper remainderf */
float
__remainderf (float x, float y)
__remainder_compatf (float x, float y)
{
if (((__builtin_expect (y == 0.0f, 0) && ! isnan (x))
|| (__builtin_expect (isinf (x), 0) && ! isnan (y)))
@ -33,6 +34,6 @@ __remainderf (float x, float y)
return __ieee754_remainderf (x, y);
}
libm_alias_float (__remainder, remainder)
weak_alias (__remainderf, dremf)
compat_symbol (libm, __remainder_compatf, remainderf, GLIBC_2_0);
weak_alias (__remainder_compatf, dremf)
#endif

View File

@ -1,18 +0,0 @@
/*
* Public domain.
*/
#include <machine/asm.h>
#include <libm-alias-finite.h>
ENTRY(__ieee754_remainderf)
flds 8(%esp)
flds 4(%esp)
1: fprem1
fstsw %ax
sahf
jp 1b
fstp %st(1)
ret
END (__ieee754_remainderf)
libm_alias_finite (__ieee754_remainderf, __remainderf)

View File

@ -0,0 +1,41 @@
/* Floating-point remainder function.
Copyright (C) 2023-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <math.h>
#include <libm-alias-finite.h>
#include <libm-alias-float.h>
#include "sysdeps/ieee754/flt-32/math_config.h"
float
__remainderf (float x, float y)
{
uint32_t hx = asuint (x);
uint32_t hy = asuint (y);
/* fmod(+-Inf,y) or fmod(x,0) */
if (__glibc_unlikely ((is_inf (hx) || y == 0.0f)
&& !is_nan (hy)
&& !is_nan (hx)))
return __math_invalidf (x);
return __builtin_remainderf (x, y);
}
strong_alias (__remainderf, __ieee754_remainderf)
versioned_symbol (libm, __remainderf, remainderf, GLIBC_2_43);
libm_alias_float_other (__remainder, remainder)
libm_alias_finite (__ieee754_remainderf, __remainderf)

View File

@ -18,6 +18,8 @@
#include <math.h>
#include <libm-alias-finite.h>
#include <libm-alias-float.h>
#include <math-svid-compat.h>
#include "math_config.h"
float
@ -34,12 +36,8 @@ __ieee754_remainderf(float x, float p)
p = fabsf (p);
if (__glibc_likely (hp < 0x7f000000))
{
/* |x| not finite, |y| equal 0 is handled by fmod. */
if (__glibc_unlikely (hx >= EXPONENT_MASK))
return (x * p) / (x * p);
x = fabs (__ieee754_fmodf (x, p + p)); /* now x < 2p */
if (x + x > p)
x = fabs (__fmodf (x, p + p)); /* now x < 2p */
if (isgreater (x + x, p))
{
x -= p;
if (x + x >= p)
@ -52,9 +50,9 @@ __ieee754_remainderf(float x, float p)
}
else
{
/* |x| not finite or |y| is NaN or 0 */
if ((hx >= EXPONENT_MASK || (hp - 1) >= EXPONENT_MASK))
return (x * p) / (x * p);
/* |x| not finite or |y| is NaN */
if (__glibc_unlikely (hx >= EXPONENT_MASK || hp > EXPONENT_MASK))
return __math_invalidf (x * p);
x = fabsf (x);
float p_half = 0.5f * p;
@ -64,10 +62,17 @@ __ieee754_remainderf(float x, float p)
if (x >= p_half)
x -= p;
else if (x == 0.0f)
x = 0.0;
x = 0.0f;
}
}
return sx ? -x : x;
}
libm_alias_finite (__ieee754_remainderf, __remainderf)
#if LIBM_SVID_COMPAT
versioned_symbol (libm, __ieee754_remainderf, remainderf, GLIBC_2_43);
libm_alias_float_other (__ieee754_remainder, remainder)
#else
libm_alias_float (__ieee754_remainder, remainder)
weak_alias (__ieee754_remainderf, dremf)
#endif

View File

@ -0,0 +1 @@
/* Not needed */

View File

@ -18,12 +18,26 @@
#include <math.h>
#include <libm-alias-float.h>
#include <libm-alias-finite.h>
#include "mathimpl.h"
#include "sysdeps/ieee754/flt-32/math_config.h"
float
__ieee754_remainderf (float x, float y)
__remainderf (float x, float y)
{
uint32_t hx = asuint (x);
uint32_t hy = asuint (y);
/* fmod(+-Inf,y) or fmod(x,0) */
if (__glibc_unlikely ((is_inf (hx) || y == 0.0f)
&& !is_nan (hy)
&& !is_nan (hx)))
return __math_invalidf (x);
return __m81_u(__ieee754_remainderf)(x, y);
}
strong_alias (__remainderf, __ieee754_remainderf)
versioned_symbol (libm, __remainderf, remainderf, GLIBC_2_43);
libm_alias_float_other (__remainder, remainder)
libm_alias_finite (__ieee754_remainderf, __remainderf)

View File

@ -1328,6 +1328,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1294,6 +1294,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1453,6 +1453,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -959,6 +959,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -959,6 +959,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -959,6 +959,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1335,6 +1335,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -959,6 +959,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -992,6 +992,7 @@ GLIBC_2.43 fmodf F
GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -959,6 +959,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -959,6 +959,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -959,6 +959,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1294,6 +1294,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1106,6 +1106,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1105,6 +1105,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1099,6 +1099,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1483,6 +1483,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1397,6 +1397,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1397,6 +1397,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -959,6 +959,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -959,6 +959,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1404,6 +1404,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1294,6 +1294,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1327,6 +1327,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F

View File

@ -1327,6 +1327,7 @@ GLIBC_2.43 j0f F
GLIBC_2.43 j1f F
GLIBC_2.43 jnf F
GLIBC_2.43 log10f F
GLIBC_2.43 remainderf F
GLIBC_2.43 y0f F
GLIBC_2.43 y1f F
GLIBC_2.43 ynf F