mirror of git://sourceware.org/git/glibc.git
powerpc: hypot refactor and optimization
The powerpc hypot is slight optimized by:
- Commit 8df4e219e4, both isnan and isinf are always inlined and thus
the check TEST_INF_NAN does not make sense anymore. The generic
check for POWER7 should be faster on all powerpc configuration.
- The redundant check 'y > two60factor && (x / y) > two60' is removed.
Both changes leads to unrequired ifunc especialization for power7 and
thus they are removed. Finally The code is also cleanup a bit by inlining
the constants floating points.
The performance changes using the hypot benchtests are:
- POWER9 without patch:
"hypot": {
"overflow": {
"duration": 4.98585e+09,
"iterations": 4.84932e+08,
"max": 46.551,
"min": 10.229,
"mean": 10.2815
},
"higher_two500": {
"duration": 5.00192e+09,
"iterations": 4.24843e+08,
"max": 33.319,
"min": 11.606,
"mean": 11.7736
},
"subnormal": {
"duration": 5.0075e+09,
"iterations": 4.06792e+08,
"max": 22.178,
"min": 12.15,
"mean": 12.3097
},
"less_two500": {
"duration": 5.00685e+09,
"iterations": 4.08772e+08,
"max": 22.784,
"min": 12.052,
"mean": 12.2485
},
"default": {
"duration": 5.06002e+09,
"iterations": 4.09894e+08,
"max": 20.648,
"min": 11.874,
"mean": 12.3447
}
}
- POWER9 with patch:
"hypot": {
"overflow": {
"duration": 4.91848e+09,
"iterations": 7.28039e+08,
"max": 47.958,
"min": 6.436,
"mean": 6.75579
},
"higher_two500": {
"duration": 4.9359e+09,
"iterations": 6.63376e+08,
"max": 20.783,
"min": 7.321,
"mean": 7.44057
},
"subnormal": {
"duration": 4.9479e+09,
"iterations": 6.19772e+08,
"max": 18.856,
"min": 7.817,
"mean": 7.98341
},
"less_two500": {
"duration": 4.94275e+09,
"iterations": 6.3889e+08,
"max": 17.452,
"min": 7.597,
"mean": 7.73647
},
"default": {
"duration": 5.03645e+09,
"iterations": 5.70718e+08,
"max": 18.904,
"min": 8.55,
"mean": 8.82476
}
}
- POWER7 without patch
"hypot": {
"overflow": {
"duration": 4.86637e+09,
"iterations": 6.43196e+08,
"max": 53.958,
"min": 7.328,
"mean": 7.56592
},
"higher_two500": {
"duration": 4.99842e+09,
"iterations": 3.11012e+08,
"max": 78.227,
"min": 15.696,
"mean": 16.0715
},
"subnormal": {
"duration": 4.99841e+09,
"iterations": 3.08935e+08,
"max": 51.392,
"min": 15.983,
"mean": 16.1795
},
"less_two500": {
"duration": 5.00108e+09,
"iterations": 2.99464e+08,
"max": 73.247,
"min": 16.416,
"mean": 16.7001
},
"default": {
"duration": 5.04645e+09,
"iterations": 3.52608e+08,
"max": 70.073,
"min": 13.38,
"mean": 14.3118
}
}
- POWER7 with patch
"hypot": {
"overflow": {
"duration": 4.80785e+09,
"iterations": 8.00001e+08,
"max": 66.262,
"min": 5.888,
"mean": 6.00981
},
"higher_two500": {
"duration": 4.9859e+09,
"iterations": 3.39449e+08,
"max": 5148.44,
"min": 14.539,
"mean": 14.6882
},
"subnormal": {
"duration": 4.9905e+09,
"iterations": 3.28874e+08,
"max": 64.905,
"min": 14.971,
"mean": 15.1745
},
"less_two500": {
"duration": 4.99494e+09,
"iterations": 3.19755e+08,
"max": 103.696,
"min": 14.972,
"mean": 15.6211
},
"default": {
"duration": 5.03951e+09,
"iterations": 4.02502e+08,
"max": 61.008,
"min": 12.368,
"mean": 12.5205
}
}
Checked on powerpc-linux-gnu (built without --with-cpu, with
--with-cpu=power4 and with --with-cpu=power5+ and --disable-multi-arch),
powerpc64-linux-gnu (built without --with-cp and with --with-cpu=power5+
and --disable-multi-arch).
* sysdeps/powerpc/fpu/e_hypot.c (two60, two500, two600, two1022,
twoM500, twoM600, two60factor, pdnum): Remove.
(TEST_INFO_NAN, GET_TW0_HIGH_WORD): Remove macro.
(__ieee754_hypot): Replace static variables with inline definition,
remove ununsed branches.
* sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
(libm-sysdep_routines): Remove e_hypot-* objects.
(CFLAGS-e_hypot-power7.c, CFLAGS-e_hypotf-power7.c): Remove rule.
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c: Remove
file.
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c: Likewise.
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c: Likewise.
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c: Likewise.
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c: Likewise.
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c: Likewise.
Reviewed-by: Gabriel F. T. Gomes <gabrielftg@linux.ibm.com>
This commit is contained in:
parent
f215dbbdf1
commit
69461d9896
16
ChangeLog
16
ChangeLog
|
|
@ -1,5 +1,21 @@
|
|||
2019-07-08 Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
|
||||
* sysdeps/powerpc/fpu/e_hypot.c (two60, two500, two600, two1022,
|
||||
twoM500, twoM600, two60factor, pdnum): Remove.
|
||||
(TEST_INFO_NAN, GET_TW0_HIGH_WORD): Remove macro.
|
||||
(__ieee754_hypot): Replace static variables with inline definition,
|
||||
remove ununsed branches.
|
||||
* sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
|
||||
(libm-sysdep_routines): Remove e_hypot-* objects.
|
||||
(CFLAGS-e_hypot-power7.c, CFLAGS-e_hypotf-power7.c): Remove rule.
|
||||
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c: Remove
|
||||
file.
|
||||
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c: Likewise.
|
||||
|
||||
* benchtests/Makefile (bench-math): Add hypot.
|
||||
* benchtests/hypot-inputs: New file.
|
||||
|
||||
|
|
|
|||
|
|
@ -22,15 +22,6 @@
|
|||
#include <math-underflow.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static const double two60 = 1.152921504606847e+18;
|
||||
static const double two500 = 3.2733906078961419e+150;
|
||||
static const double two600 = 4.149515568880993e+180;
|
||||
static const double two1022 = 4.49423283715579e+307;
|
||||
static const double twoM500 = 3.054936363499605e-151;
|
||||
static const double twoM600 = 2.4099198651028841e-181;
|
||||
static const double two60factor = 1.5592502418239997e+290;
|
||||
static const double pdnum = 2.225073858507201e-308;
|
||||
|
||||
/* __ieee754_hypot(x,y)
|
||||
*
|
||||
* This a FP only version without any FP->INT conversion.
|
||||
|
|
@ -39,53 +30,18 @@ static const double pdnum = 2.225073858507201e-308;
|
|||
* is needed.
|
||||
*/
|
||||
|
||||
#ifdef _ARCH_PWR7
|
||||
/* POWER7 isinf and isnan optimization are fast. */
|
||||
# define TEST_INF_NAN(x, y) \
|
||||
if ((isinf(x) || isinf(y)) \
|
||||
&& !issignaling (x) && !issignaling (y)) \
|
||||
return INFINITY; \
|
||||
if (isnan(x) || isnan(y)) \
|
||||
return x + y;
|
||||
# else
|
||||
/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are
|
||||
* costly (especially for POWER6). */
|
||||
# define GET_TW0_HIGH_WORD(d1,d2,i1,i2) \
|
||||
do { \
|
||||
ieee_double_shape_type gh_u1; \
|
||||
ieee_double_shape_type gh_u2; \
|
||||
gh_u1.value = (d1); \
|
||||
gh_u2.value = (d2); \
|
||||
(i1) = gh_u1.parts.msw & 0x7fffffff; \
|
||||
(i2) = gh_u2.parts.msw & 0x7fffffff; \
|
||||
} while (0)
|
||||
|
||||
# define TEST_INF_NAN(x, y) \
|
||||
do { \
|
||||
uint32_t hx, hy; \
|
||||
GET_TW0_HIGH_WORD(x, y, hx, hy); \
|
||||
if (hy > hx) { \
|
||||
uint32_t ht = hx; hx = hy; hy = ht; \
|
||||
} \
|
||||
if (hx >= 0x7ff00000) { \
|
||||
if ((hx == 0x7ff00000 || hy == 0x7ff00000) \
|
||||
&& !issignaling (x) && !issignaling (y)) \
|
||||
return INFINITY; \
|
||||
return x + y; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
double
|
||||
__ieee754_hypot (double x, double y)
|
||||
{
|
||||
if ((isinf (x) || isinf (y))
|
||||
&& !issignaling (x) && !issignaling (y))
|
||||
return INFINITY;
|
||||
if (isnan (x) || isnan (y))
|
||||
return x + y;
|
||||
|
||||
x = fabs (x);
|
||||
y = fabs (y);
|
||||
|
||||
TEST_INF_NAN (x, y);
|
||||
|
||||
if (y > x)
|
||||
{
|
||||
double t = x;
|
||||
|
|
@ -94,40 +50,34 @@ __ieee754_hypot (double x, double y)
|
|||
}
|
||||
if (y == 0.0)
|
||||
return x;
|
||||
|
||||
/* if y is higher enough, y * 2^60 might overflow. The tests if
|
||||
y >= 1.7976931348623157e+308/2^60 (two60factor) and uses the
|
||||
appropriate check to avoid the overflow exception generation. */
|
||||
if (y > two60factor)
|
||||
if (y <= 0x1.fffffffffffffp+963 && x > (y * 0x1p+60))
|
||||
return x + y;
|
||||
|
||||
if (x > 0x1p+500)
|
||||
{
|
||||
if ((x / y) > two60)
|
||||
return x + y;
|
||||
x *= 0x1p-600;
|
||||
y *= 0x1p-600;
|
||||
return sqrt (x * x + y * y) / 0x1p-600;
|
||||
}
|
||||
else
|
||||
if (y < 0x1p-500)
|
||||
{
|
||||
if (x > (y * two60))
|
||||
return x + y;
|
||||
}
|
||||
if (x > two500)
|
||||
{
|
||||
x *= twoM600;
|
||||
y *= twoM600;
|
||||
return sqrt (x * x + y * y) / twoM600;
|
||||
}
|
||||
if (y < twoM500)
|
||||
{
|
||||
if (y <= pdnum)
|
||||
if (y <= 0x0.fffffffffffffp-1022)
|
||||
{
|
||||
x *= two1022;
|
||||
y *= two1022;
|
||||
double ret = sqrt (x * x + y * y) / two1022;
|
||||
x *= 0x1p+1022;
|
||||
y *= 0x1p+1022;
|
||||
double ret = sqrt (x * x + y * y) / 0x1p+1022;
|
||||
math_check_force_underflow_nonneg (ret);
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
{
|
||||
x *= two600;
|
||||
y *= two600;
|
||||
return sqrt (x * x + y * y) / two600;
|
||||
x *= 0x1p+600;
|
||||
y *= 0x1p+600;
|
||||
return sqrt (x * x + y * y) / 0x1p+600;
|
||||
}
|
||||
}
|
||||
return sqrt (x * x + y * y);
|
||||
|
|
|
|||
|
|
@ -8,8 +8,7 @@ sysdep_calls := s_modf-power5+ s_modf-ppc64 \
|
|||
sysdep_routines += $(sysdep_calls)
|
||||
libm-sysdep_routines += s_logb-power7 s_logbf-power7 \
|
||||
s_logbl-power7 s_logb-ppc64 s_logbf-ppc64 \
|
||||
s_logbl-ppc64 e_hypot-ppc64 \
|
||||
e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \
|
||||
s_logbl-ppc64 \
|
||||
$(sysdep_calls:s_%=m_%)
|
||||
|
||||
CFLAGS-s_logbf-power7.c = -mcpu=power7
|
||||
|
|
@ -17,8 +16,6 @@ CFLAGS-s_logbl-power7.c = -mcpu=power7
|
|||
CFLAGS-s_logb-power7.c = -mcpu=power7
|
||||
CFLAGS-s_modf-power5+.c = -mcpu=power5+
|
||||
CFLAGS-s_modff-power5+.c = -mcpu=power5+
|
||||
CFLAGS-e_hypot-power7.c = -mcpu=power7
|
||||
CFLAGS-e_hypotf-power7.c = -mcpu=power7
|
||||
|
||||
# These files quiet sNaNs in a way that is optimized away without
|
||||
# -fsignaling-nans.
|
||||
|
|
|
|||
|
|
@ -1,19 +0,0 @@
|
|||
/* __ieee_hypot() POWER7 version.
|
||||
Copyright (C) 2013-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c>
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
/* __ieee_hypot() PowerPC64 version.
|
||||
Copyright (C) 2013-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#undef strong_alias
|
||||
#define strong_alias(a, b)
|
||||
|
||||
#define __ieee754_hypot __ieee754_hypot_ppc64
|
||||
|
||||
#include <sysdeps/powerpc/fpu/e_hypot.c>
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
/* Multiple versions of ieee754_hypot.
|
||||
Copyright (C) 2013-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <math.h>
|
||||
#include <math_private.h>
|
||||
#include <math_ldbl_opt.h>
|
||||
#include <shlib-compat.h>
|
||||
#include "init-arch.h"
|
||||
|
||||
extern __typeof (__ieee754_hypot) __ieee754_hypot_ppc64 attribute_hidden;
|
||||
extern __typeof (__ieee754_hypot) __ieee754_hypot_power7 attribute_hidden;
|
||||
|
||||
libc_ifunc (__ieee754_hypot,
|
||||
(hwcap & PPC_FEATURE_ARCH_2_06)
|
||||
? __ieee754_hypot_power7
|
||||
: __ieee754_hypot_ppc64);
|
||||
|
||||
strong_alias (__ieee754_hypot, __hypot_finite)
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
/* __ieee_hypotf() POWER7 version.
|
||||
Copyright (C) 2013-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c>
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
/* __ieee_hypot() PowerPC64 version.
|
||||
Copyright (C) 2013-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#undef strong_alias
|
||||
#define strong_alias(a, b)
|
||||
|
||||
#define __ieee754_hypotf __ieee754_hypotf_ppc64
|
||||
|
||||
#include <sysdeps/powerpc/fpu/e_hypotf.c>
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
/* Multiple versions of ieee754_hypot.
|
||||
Copyright (C) 2013-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <math.h>
|
||||
#include <math_private.h>
|
||||
#include <math_ldbl_opt.h>
|
||||
#include <shlib-compat.h>
|
||||
#include "init-arch.h"
|
||||
|
||||
extern __typeof (__ieee754_hypotf) __ieee754_hypotf_ppc64 attribute_hidden;
|
||||
extern __typeof (__ieee754_hypotf) __ieee754_hypotf_power7 attribute_hidden;
|
||||
|
||||
libc_ifunc (__ieee754_hypotf,
|
||||
(hwcap & PPC_FEATURE_ARCH_2_06)
|
||||
? __ieee754_hypotf_power7
|
||||
: __ieee754_hypotf_ppc64);
|
||||
|
||||
strong_alias (__ieee754_hypotf, __hypotf_finite)
|
||||
Loading…
Reference in New Issue