mirror of git://sourceware.org/git/glibc.git
math: Optimize flt-32 remainder implementation
With same micro-optimization done for the double variant: * Combine the |y| zero check. * Rework the check to adjust result and call fmod. * Remove one check after fmod. * Remove float-int-float roundtrip on return. Also use math_config.h macros and indent the code. The resulting strategy is different in many places that I think requires a different Copyright. I see the following performance improvements using remainder benchtests (using reciprocal-throughput metric): Architecture | Input | master | patch | Improvemnt -----------------|-----------------|----------|----------------------- x86_64 | subnormals | 20.4176 | 19.6144 | 3.93% x86_64 | normal | 54.0939 | 52.2343 | 3.44% x86_64 | close-exponent | 23.9120 | 22.3768 | 6.42% aarch64 | subnormals | 9.2423 | 8.3825 | 9.30% aarch64 | normal | 30.5393 | 29.244 | 4.24% aarch64 | close-exponent | 15.5405 | 13.9256 | 10.39% The aarch64 used as Neoverse-N1, gcc 15.1.1; while the x86_64 was a AMD Ryzen 9 5900X, gcc 15.2.1. Checked on x86_64-linux-gnu and aarch64-linux-gnu. Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
This commit is contained in:
parent
f0facb2d27
commit
61ac7c6a75
|
|
@ -1,65 +1,73 @@
|
|||
/* e_remainderf.c -- float version of e_remainder.c.
|
||||
*/
|
||||
/* Remainder function, float version.
|
||||
Copyright (C) 2008-2025 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <math.h>
|
||||
#include <math_private.h>
|
||||
#include <libm-alias-finite.h>
|
||||
|
||||
static const float zero = 0.0;
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
float
|
||||
__ieee754_remainderf(float x, float p)
|
||||
{
|
||||
int32_t hx,hp;
|
||||
uint32_t sx;
|
||||
float p_half;
|
||||
uint32_t hx = asuint (x);
|
||||
uint32_t hp = asuint (p);
|
||||
uint32_t sx = hx >> 31;
|
||||
|
||||
GET_FLOAT_WORD(hx,x);
|
||||
GET_FLOAT_WORD(hp,p);
|
||||
sx = hx&0x80000000;
|
||||
hp &= 0x7fffffff;
|
||||
hx &= 0x7fffffff;
|
||||
hp &= ~SIGN_MASK;
|
||||
hx &= ~SIGN_MASK;
|
||||
|
||||
/* purge off exception values */
|
||||
if(hp==0) return (x*p)/(x*p); /* p = 0 */
|
||||
if((hx>=0x7f800000)|| /* x not finite */
|
||||
((hp>0x7f800000))) /* p is NaN */
|
||||
return (x*p)/(x*p);
|
||||
/* |y| < DBL/MAX / 2 ? */
|
||||
p = fabsf (p);
|
||||
if (__glibc_likely (hp < 0x7f000000))
|
||||
{
|
||||
/* |x| not finite, |y| equal 0 is handled by fmod. */
|
||||
if (__glibc_unlikely (hx >= EXPONENT_MASK))
|
||||
return (x * p) / (x * p);
|
||||
|
||||
|
||||
if (hp<=0x7effffff) x = __ieee754_fmodf(x,p+p); /* now x < 2p */
|
||||
if ((hx-hp)==0) return zero*x;
|
||||
x = fabsf(x);
|
||||
p = fabsf(p);
|
||||
if (hp<0x01000000) {
|
||||
if(x+x>p) {
|
||||
x-=p;
|
||||
if(x+x>=p) x -= p;
|
||||
}
|
||||
} else {
|
||||
p_half = (float)0.5*p;
|
||||
if(x>p_half) {
|
||||
x-=p;
|
||||
if(x>=p_half) x -= p;
|
||||
}
|
||||
x = fabs (__ieee754_fmodf (x, p + p)); /* now x < 2p */
|
||||
if (x + x > p)
|
||||
{
|
||||
x -= p;
|
||||
if (x + x >= p)
|
||||
x -= p;
|
||||
/* Make sure x is not -0. This can occur only when x = p
|
||||
and rounding direction is towards negative infinity. */
|
||||
else if (x == 0.0)
|
||||
x = 0.0;
|
||||
}
|
||||
GET_FLOAT_WORD(hx,x);
|
||||
/* Make sure x is not -0. This can occur only when x = p
|
||||
and rounding direction is towards negative infinity. */
|
||||
if (hx==0x80000000) hx = 0;
|
||||
SET_FLOAT_WORD(x,hx^sx);
|
||||
return x;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* |x| not finite or |y| is NaN or 0 */
|
||||
if ((hx >= EXPONENT_MASK || (hp - 1) >= EXPONENT_MASK))
|
||||
return (x * p) / (x * p);
|
||||
|
||||
x = fabsf (x);
|
||||
float p_half = 0.5f * p;
|
||||
if (x > p_half)
|
||||
{
|
||||
x -= p;
|
||||
if (x >= p_half)
|
||||
x -= p;
|
||||
else if (x == 0.0)
|
||||
x = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
return sx ? -x : x;
|
||||
}
|
||||
libm_alias_finite (__ieee754_remainderf, __remainderf)
|
||||
|
|
|
|||
Loading…
Reference in New Issue