From 61ac7c6a75a467aa7632146589c36dcf609fd735 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Thu, 2 Oct 2025 08:55:47 -0300 Subject: [PATCH] math: Optimize flt-32 remainder implementation With same micro-optimization done for the double variant: * Combine the |y| zero check. * Rework the check to adjust result and call fmod. * Remove one check after fmod. * Remove float-int-float roundtrip on return. Also use math_config.h macros and indent the code. The resulting strategy is different in many places that I think requires a different Copyright. I see the following performance improvements using remainder benchtests (using reciprocal-throughput metric): Architecture | Input | master | patch | Improvemnt -----------------|-----------------|----------|----------------------- x86_64 | subnormals | 20.4176 | 19.6144 | 3.93% x86_64 | normal | 54.0939 | 52.2343 | 3.44% x86_64 | close-exponent | 23.9120 | 22.3768 | 6.42% aarch64 | subnormals | 9.2423 | 8.3825 | 9.30% aarch64 | normal | 30.5393 | 29.244 | 4.24% aarch64 | close-exponent | 15.5405 | 13.9256 | 10.39% The aarch64 used as Neoverse-N1, gcc 15.1.1; while the x86_64 was a AMD Ryzen 9 5900X, gcc 15.2.1. Checked on x86_64-linux-gnu and aarch64-linux-gnu. Reviewed-by: Wilco Dijkstra --- sysdeps/ieee754/flt-32/e_remainderf.c | 110 ++++++++++++++------------ 1 file changed, 59 insertions(+), 51 deletions(-) diff --git a/sysdeps/ieee754/flt-32/e_remainderf.c b/sysdeps/ieee754/flt-32/e_remainderf.c index bb066b7162..d0f069fbd2 100644 --- a/sysdeps/ieee754/flt-32/e_remainderf.c +++ b/sysdeps/ieee754/flt-32/e_remainderf.c @@ -1,65 +1,73 @@ -/* e_remainderf.c -- float version of e_remainder.c. - */ +/* Remainder function, float version. + Copyright (C) 2008-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ #include -#include #include - -static const float zero = 0.0; - +#include "math_config.h" float __ieee754_remainderf(float x, float p) { - int32_t hx,hp; - uint32_t sx; - float p_half; + uint32_t hx = asuint (x); + uint32_t hp = asuint (p); + uint32_t sx = hx >> 31; - GET_FLOAT_WORD(hx,x); - GET_FLOAT_WORD(hp,p); - sx = hx&0x80000000; - hp &= 0x7fffffff; - hx &= 0x7fffffff; + hp &= ~SIGN_MASK; + hx &= ~SIGN_MASK; - /* purge off exception values */ - if(hp==0) return (x*p)/(x*p); /* p = 0 */ - if((hx>=0x7f800000)|| /* x not finite */ - ((hp>0x7f800000))) /* p is NaN */ - return (x*p)/(x*p); + /* |y| < DBL/MAX / 2 ? */ + p = fabsf (p); + if (__glibc_likely (hp < 0x7f000000)) + { + /* |x| not finite, |y| equal 0 is handled by fmod. */ + if (__glibc_unlikely (hx >= EXPONENT_MASK)) + return (x * p) / (x * p); - - if (hp<=0x7effffff) x = __ieee754_fmodf(x,p+p); /* now x < 2p */ - if ((hx-hp)==0) return zero*x; - x = fabsf(x); - p = fabsf(p); - if (hp<0x01000000) { - if(x+x>p) { - x-=p; - if(x+x>=p) x -= p; - } - } else { - p_half = (float)0.5*p; - if(x>p_half) { - x-=p; - if(x>=p_half) x -= p; - } + x = fabs (__ieee754_fmodf (x, p + p)); /* now x < 2p */ + if (x + x > p) + { + x -= p; + if (x + x >= p) + x -= p; + /* Make sure x is not -0. This can occur only when x = p + and rounding direction is towards negative infinity. */ + else if (x == 0.0) + x = 0.0; } - GET_FLOAT_WORD(hx,x); - /* Make sure x is not -0. This can occur only when x = p - and rounding direction is towards negative infinity. */ - if (hx==0x80000000) hx = 0; - SET_FLOAT_WORD(x,hx^sx); - return x; + } + else + { + /* |x| not finite or |y| is NaN or 0 */ + if ((hx >= EXPONENT_MASK || (hp - 1) >= EXPONENT_MASK)) + return (x * p) / (x * p); + + x = fabsf (x); + float p_half = 0.5f * p; + if (x > p_half) + { + x -= p; + if (x >= p_half) + x -= p; + else if (x == 0.0) + x = 0.0; + } + } + + return sx ? -x : x; } libm_alias_finite (__ieee754_remainderf, __remainderf)