mirror of git://sourceware.org/git/glibc.git
aarch64: Implement AdvSIMD and SVE rsqrt(f) routines
Vector variants of the new C23 rsqrt routines for both AdvSIMD and SVE, as well as in both single and double precision. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
This commit is contained in:
parent
09d85861f1
commit
e2b00d59eb
|
|
@ -242,6 +242,17 @@
|
|||
#define __DECL_SIMD_atan2f64x
|
||||
#define __DECL_SIMD_atan2f128x
|
||||
|
||||
#define __DECL_SIMD_rsqrt
|
||||
#define __DECL_SIMD_rsqrtf
|
||||
#define __DECL_SIMD_rsqrtl
|
||||
#define __DECL_SIMD_rsqrtf16
|
||||
#define __DECL_SIMD_rsqrtf32
|
||||
#define __DECL_SIMD_rsqrtf64
|
||||
#define __DECL_SIMD_rsqrtf128
|
||||
#define __DECL_SIMD_rsqrtf32x
|
||||
#define __DECL_SIMD_rsqrtf64x
|
||||
#define __DECL_SIMD_rsqrtf128x
|
||||
|
||||
#define __DECL_SIMD_log10
|
||||
#define __DECL_SIMD_log10f
|
||||
#define __DECL_SIMD_log10l
|
||||
|
|
|
|||
|
|
@ -203,7 +203,7 @@ __MATHCALL (powr,, (_Mdouble_ __x, _Mdouble_ __y));
|
|||
__MATHCALL (rootn,, (_Mdouble_ __x, long long int __y));
|
||||
|
||||
/* Return the reciprocal of the square root of X. */
|
||||
__MATHCALL (rsqrt,, (_Mdouble_ __x));
|
||||
__MATHCALL_VEC (rsqrt,, (_Mdouble_ __x));
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ libmvec-supported-funcs = acos \
|
|||
log2 \
|
||||
log2p1 \
|
||||
pow \
|
||||
rsqrt \
|
||||
sin \
|
||||
sinh \
|
||||
sinpi \
|
||||
|
|
|
|||
|
|
@ -200,5 +200,10 @@ libmvec {
|
|||
_ZGVnN4v_log10p1f;
|
||||
_ZGVsMxv_log10p1;
|
||||
_ZGVsMxv_log10p1f;
|
||||
_ZGVnN2v_rsqrt;
|
||||
_ZGVnN2v_rsqrtf;
|
||||
_ZGVnN4v_rsqrtf;
|
||||
_ZGVsMxv_rsqrt;
|
||||
_ZGVsMxv_rsqrtf;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ libmvec_hidden_proto (V_NAME_F1(log2p1));
|
|||
libmvec_hidden_proto (V_NAME_F1(logp1));
|
||||
libmvec_hidden_proto (V_NAME_F1(log));
|
||||
libmvec_hidden_proto (V_NAME_F2(pow));
|
||||
libmvec_hidden_proto (V_NAME_F1(rsqrt));
|
||||
libmvec_hidden_proto (V_NAME_F1(sin));
|
||||
libmvec_hidden_proto (V_NAME_F1(sinh));
|
||||
libmvec_hidden_proto (V_NAME_F1(sinpi));
|
||||
|
|
|
|||
|
|
@ -157,6 +157,10 @@
|
|||
# define __DECL_SIMD_pow __DECL_SIMD_aarch64
|
||||
# undef __DECL_SIMD_powf
|
||||
# define __DECL_SIMD_powf __DECL_SIMD_aarch64
|
||||
# undef __DECL_SIMD_rsqrt
|
||||
# define __DECL_SIMD_rsqrt __DECL_SIMD_aarch64
|
||||
# undef __DECL_SIMD_rsqrtf
|
||||
# define __DECL_SIMD_rsqrtf __DECL_SIMD_aarch64
|
||||
# undef __DECL_SIMD_sin
|
||||
# define __DECL_SIMD_sin __DECL_SIMD_aarch64
|
||||
# undef __DECL_SIMD_sinf
|
||||
|
|
@ -239,6 +243,7 @@ __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
|
|||
__vpcs __f32x4_t _ZGVnN4v_log2p1f (__f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4v_logp1f (__f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4v_rsqrtf (__f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4v_sinpif (__f32x4_t);
|
||||
|
|
@ -278,6 +283,7 @@ __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
|
|||
__vpcs __f64x2_t _ZGVnN2v_log2p1 (__f64x2_t);
|
||||
__vpcs __f64x2_t _ZGVnN2v_logp1 (__f64x2_t);
|
||||
__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
|
||||
__vpcs __f64x2_t _ZGVnN2v_rsqrt (__f64x2_t);
|
||||
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
|
||||
__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
|
||||
__vpcs __f64x2_t _ZGVnN2v_sinpi (__f64x2_t);
|
||||
|
|
@ -322,6 +328,7 @@ __sv_f32_t _ZGVsMxv_log2f (__sv_f32_t, __sv_bool_t);
|
|||
__sv_f32_t _ZGVsMxv_log2p1f (__sv_f32_t, __sv_bool_t);
|
||||
__sv_f32_t _ZGVsMxv_logp1f (__sv_f32_t, __sv_bool_t);
|
||||
__sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t);
|
||||
__sv_f32_t _ZGVsMxv_rsqrtf (__sv_f32_t, __sv_bool_t);
|
||||
__sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t);
|
||||
__sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t);
|
||||
__sv_f32_t _ZGVsMxv_sinpif (__sv_f32_t, __sv_bool_t);
|
||||
|
|
@ -361,6 +368,7 @@ __sv_f64_t _ZGVsMxv_log2 (__sv_f64_t, __sv_bool_t);
|
|||
__sv_f64_t _ZGVsMxv_log2p1 (__sv_f64_t, __sv_bool_t);
|
||||
__sv_f64_t _ZGVsMxv_logp1 (__sv_f64_t, __sv_bool_t);
|
||||
__sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t);
|
||||
__sv_f64_t _ZGVsMxv_rsqrt (__sv_f64_t, __sv_bool_t);
|
||||
__sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t);
|
||||
__sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t);
|
||||
__sv_f64_t _ZGVsMxv_sinpi (__sv_f64_t, __sv_bool_t);
|
||||
|
|
|
|||
|
|
@ -80,6 +80,8 @@
|
|||
!GCC$ builtin (logp1f) attributes simd (notinbranch)
|
||||
!GCC$ builtin (pow) attributes simd (notinbranch)
|
||||
!GCC$ builtin (powf) attributes simd (notinbranch)
|
||||
!GCC$ builtin (rsqrt) attributes simd (notinbranch)
|
||||
!GCC$ builtin (rsqrtf) attributes simd (notinbranch)
|
||||
!GCC$ builtin (sin) attributes simd (notinbranch)
|
||||
!GCC$ builtin (sinf) attributes simd (notinbranch)
|
||||
!GCC$ builtin (sinh) attributes simd (notinbranch)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,84 @@
|
|||
/* Double-precision vector (Advanced SIMD) rsqrt function
|
||||
|
||||
Copyright (C) 2025 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "v_math.h"
|
||||
|
||||
static const struct data
|
||||
{
|
||||
float64x2_t special_bound;
|
||||
float64x2_t scale_up, scale_down;
|
||||
} data = {
|
||||
/* When x < 0x1p-1021, estimate becomes infinity.
|
||||
x is scaled up by 0x1p54, so the estimate does not reach infinity.
|
||||
Then the result is multiplied by 0x1p27.
|
||||
The difference between the lowest power possible (-1074) and the special
|
||||
bound (-1021) is 54, so 2^54 is used as the scaling value. */
|
||||
.special_bound = V2 (0x1p-1021),
|
||||
.scale_up = V2 (0x1p54),
|
||||
.scale_down = V2 (0x1p27),
|
||||
};
|
||||
|
||||
static inline float64x2_t VPCS_ATTR
|
||||
inline_rsqrt (float64x2_t x)
|
||||
{
|
||||
/* Do estimate instruction. */
|
||||
float64x2_t estimate = vrsqrteq_f64 (x);
|
||||
|
||||
/* Do first step instruction. */
|
||||
float64x2_t estimate_squared = vmulq_f64 (estimate, estimate);
|
||||
float64x2_t step = vrsqrtsq_f64 (x, estimate_squared);
|
||||
estimate = vmulq_f64 (estimate, step);
|
||||
|
||||
/* Do second step instruction. */
|
||||
estimate_squared = vmulq_f64 (estimate, estimate);
|
||||
step = vrsqrtsq_f64 (x, estimate_squared);
|
||||
estimate = vmulq_f64 (estimate, step);
|
||||
|
||||
/* Do third step instruction.
|
||||
This is required to achieve < 3.0 ULP. */
|
||||
estimate_squared = vmulq_f64 (estimate, estimate);
|
||||
step = vrsqrtsq_f64 (x, estimate_squared);
|
||||
estimate = vmulq_f64 (estimate, step);
|
||||
return estimate;
|
||||
}
|
||||
|
||||
static float64x2_t NOINLINE
|
||||
special_case (float64x2_t x, uint64x2_t special, const struct data *d)
|
||||
{
|
||||
x = vbslq_f64 (special, vmulq_f64 (x, d->scale_up), x);
|
||||
float64x2_t estimate = inline_rsqrt (x);
|
||||
return vbslq_f64 (special, vmulq_f64 (estimate, d->scale_down), estimate);
|
||||
}
|
||||
|
||||
/* Double-precision implementation of vector rsqrt(x).
|
||||
Maximum observed error: 1.45 + 0.5
|
||||
_ZGVnN2v_rsqrt(0x1.d13fb41254643p+1023) got 0x1.0c8dee1b29dfap-512
|
||||
want 0x1.0c8dee1b29df8p-512. */
|
||||
float64x2_t VPCS_ATTR V_NAME_D1 (rsqrt) (float64x2_t x)
|
||||
{
|
||||
const struct data *d = ptr_barrier (&data);
|
||||
|
||||
/* Special case: x < special_bound. */
|
||||
uint64x2_t special = vcgtq_f64 (d->special_bound, x);
|
||||
if (__glibc_unlikely (v_any_u64 (special)))
|
||||
{
|
||||
return special_case (x, special, d);
|
||||
}
|
||||
return inline_rsqrt (x);
|
||||
}
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
/* Double-precision vector (SVE) rsqrt function
|
||||
|
||||
Copyright (C) 2025 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "sv_math.h"
|
||||
|
||||
static const struct data
|
||||
{
|
||||
float64_t special_bound;
|
||||
int64_t scale_up, scale_down;
|
||||
} data = {
|
||||
/* When x < 0x1p-1021, estimate becomes infinity.
|
||||
x is scaled up by 0x1p54, so the estimate does not reach infinity.
|
||||
Then the result is multiplied by 0x1p27.
|
||||
The difference between the lowest power possible (-1074) and the special
|
||||
bound (-1021) is 54, so 2^54 is used as the scaling value. */
|
||||
.special_bound = 0x1p-1021,
|
||||
.scale_up = 54,
|
||||
.scale_down = 27,
|
||||
};
|
||||
|
||||
static inline svfloat64_t
|
||||
inline_rsqrt (svfloat64_t x)
|
||||
{
|
||||
/* Do estimate instruction. */
|
||||
svfloat64_t estimate = svrsqrte_f64 (x);
|
||||
|
||||
/* Do first step instruction. */
|
||||
svfloat64_t estimate_squared = svmul_x (svptrue_b64 (), estimate, estimate);
|
||||
svfloat64_t step = svrsqrts_f64 (x, estimate_squared);
|
||||
estimate = svmul_x (svptrue_b64 (), estimate, step);
|
||||
|
||||
/* Do second step instruction. */
|
||||
estimate_squared = svmul_x (svptrue_b64 (), estimate, estimate);
|
||||
step = svrsqrts_f64 (x, estimate_squared);
|
||||
estimate = svmul_x (svptrue_b64 (), estimate, step);
|
||||
|
||||
/* Do third step instruction.
|
||||
This is required to achieve < 3.0 ULP. */
|
||||
estimate_squared = svmul_x (svptrue_b64 (), estimate, estimate);
|
||||
step = svrsqrts_f64 (x, estimate_squared);
|
||||
estimate = svmul_x (svptrue_b64 (), estimate, step);
|
||||
return estimate;
|
||||
}
|
||||
|
||||
static svfloat64_t NOINLINE
|
||||
special_case (svfloat64_t x, svbool_t special, const struct data *d)
|
||||
{
|
||||
x = svscale_f64_m (special, x, sv_s64 (d->scale_up));
|
||||
svfloat64_t estimate = inline_rsqrt (x);
|
||||
return svscale_f64_m (special, estimate, sv_s64 (d->scale_down));
|
||||
}
|
||||
|
||||
/* Double-precision SVE implementation of rsqrt(x).
|
||||
Maximum observed error: 1.45 + 0.5
|
||||
_ZGVnN2v_rsqrt(0x1.d13fb41254643p+1023) got 0x1.0c8dee1b29dfap-512
|
||||
want 0x1.0c8dee1b29df8p-512. */
|
||||
svfloat64_t SV_NAME_D1 (rsqrt) (svfloat64_t x, svbool_t pg)
|
||||
{
|
||||
const struct data *d = ptr_barrier (&data);
|
||||
|
||||
svbool_t special = svcmplt_n_f64 (pg, x, d->special_bound);
|
||||
if (__glibc_unlikely (svptest_any (pg, special)))
|
||||
{
|
||||
return special_case (x, special, d);
|
||||
}
|
||||
return inline_rsqrt (x);
|
||||
}
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
/* Single-precision vector (Advanced SIMD) rsqrt function
|
||||
|
||||
Copyright (C) 2025 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "v_math.h"
|
||||
|
||||
static const struct data
|
||||
{
|
||||
float32x4_t special_bound;
|
||||
float32x4_t scale_up, scale_down;
|
||||
} data = {
|
||||
/* When x < 0x1p-128, estimate becomes infinity.
|
||||
x is scaled up by 0x1p22f, so estimate does not reach infinity.
|
||||
Then the result is multiplied by 0x1p11f.
|
||||
The difference between the lowest power possible (-149) and the special
|
||||
bound (-128) is 21. 22 is used here so that a power of 2 can be used for
|
||||
scaling in both directions. */
|
||||
.special_bound = V4 (0x1p-128f),
|
||||
.scale_up = V4 (0x1p22f),
|
||||
.scale_down = V4 (0x1p11f),
|
||||
};
|
||||
|
||||
static inline float32x4_t VPCS_ATTR
|
||||
inline_rsqrt (float32x4_t x)
|
||||
{
|
||||
/* Do estimate instruction. */
|
||||
float32x4_t estimate = vrsqrteq_f32 (x);
|
||||
|
||||
/* Do first step instruction. */
|
||||
float32x4_t estimate_squared = vmulq_f32 (estimate, estimate);
|
||||
float32x4_t step = vrsqrtsq_f32 (x, estimate_squared);
|
||||
estimate = vmulq_f32 (estimate, step);
|
||||
|
||||
/* Do second step instruction.
|
||||
This is required to achieve < 3.0 ULP. */
|
||||
estimate_squared = vmulq_f32 (estimate, estimate);
|
||||
step = vrsqrtsq_f32 (x, estimate_squared);
|
||||
estimate = vmulq_f32 (estimate, step);
|
||||
return estimate;
|
||||
}
|
||||
|
||||
static float32x4_t NOINLINE
|
||||
special_case (float32x4_t x, uint32x4_t special, const struct data *d)
|
||||
{
|
||||
x = vbslq_f32 (special, vmulq_f32 (x, d->scale_up), x);
|
||||
float32x4_t estimate = inline_rsqrt (x);
|
||||
return vbslq_f32 (special, vmulq_f32 (estimate, d->scale_down), estimate);
|
||||
}
|
||||
|
||||
/* Single-precision implementation of vector rqsrtf(x).
|
||||
Maximum observed error: 1.47 + 0.5
|
||||
_ZGVnN4v_rsqrtf (0x1.f610dep+127) got 0x1.02852cp-64
|
||||
want 0x1.02853p-64. */
|
||||
float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (rsqrt) (float32x4_t x)
|
||||
{
|
||||
const struct data *d = ptr_barrier (&data);
|
||||
|
||||
/* Special case: x < special_bound. */
|
||||
uint32x4_t special = vcgtq_f32 (d->special_bound, x);
|
||||
if (__glibc_unlikely (v_any_u32 (special)))
|
||||
{
|
||||
return special_case (x, special, d);
|
||||
}
|
||||
return inline_rsqrt (x);
|
||||
}
|
||||
libmvec_hidden_def (V_NAME_F1 (rsqrt))
|
||||
HALF_WIDTH_ALIAS_F1 (rsqrt)
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
/* Single-precision vector (SVE) rsqrt function
|
||||
|
||||
Copyright (C) 2025 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "sv_math.h"
|
||||
|
||||
static const struct data
|
||||
{
|
||||
float32_t special_bound;
|
||||
int32_t scale_up, scale_down;
|
||||
} data = {
|
||||
/* When x < 0x1p-128, estimate becomes infinity.
|
||||
x is scaled up by 0x1p22f, so estimate does not reach infinity.
|
||||
Then the result is multiplied by 0x1p11f.
|
||||
The difference between the lowest power possible (-149) and the special
|
||||
bound (-128) is 21. 22 is used here so that a power of 2 can be used for
|
||||
scaling in both directions. */
|
||||
.special_bound = 0x1p-128f,
|
||||
.scale_up = 22,
|
||||
.scale_down = 11,
|
||||
};
|
||||
|
||||
static inline svfloat32_t
|
||||
inline_rsqrt (svfloat32_t x)
|
||||
{
|
||||
/* Do estimate instruction. */
|
||||
svfloat32_t estimate = svrsqrte_f32 (x);
|
||||
|
||||
/* Do first step instruction. */
|
||||
svfloat32_t estimate_squared = svmul_x (svptrue_b32 (), estimate, estimate);
|
||||
svfloat32_t step = svrsqrts_f32 (x, estimate_squared);
|
||||
estimate = svmul_x (svptrue_b32 (), estimate, step);
|
||||
|
||||
/* Do second step instruction.
|
||||
This is required to achieve < 3.0 ULP. */
|
||||
estimate_squared = svmul_x (svptrue_b32 (), estimate, estimate);
|
||||
step = svrsqrts_f32 (x, estimate_squared);
|
||||
estimate = svmul_x (svptrue_b32 (), estimate, step);
|
||||
return estimate;
|
||||
}
|
||||
|
||||
static svfloat32_t NOINLINE
|
||||
special_case (svfloat32_t x, svbool_t special, const struct data *d)
|
||||
{
|
||||
x = svscale_f32_m (special, x, sv_s32 (d->scale_up));
|
||||
svfloat32_t estimate = inline_rsqrt (x);
|
||||
return svscale_f32_m (special, estimate, sv_s32 (d->scale_down));
|
||||
}
|
||||
|
||||
/* Single-precision SVE implementation of rsqrtf(x).
|
||||
Maximum observed error: 1.47 + 0.5
|
||||
_ZGVsMxv_rsqrtf (0x1.f610dep+127) got 0x1.02852cp-64
|
||||
want 0x1.02853p-64. */
|
||||
svfloat32_t SV_NAME_F1 (rsqrt) (svfloat32_t x, svbool_t pg)
|
||||
{
|
||||
const struct data *d = ptr_barrier (&data);
|
||||
|
||||
svbool_t special = svcmplt_n_f32 (pg, x, 0x1p-128f);
|
||||
if (__glibc_unlikely (svptest_any (pg, special)))
|
||||
{
|
||||
return special_case (x, special, d);
|
||||
}
|
||||
return inline_rsqrt (x);
|
||||
}
|
||||
|
|
@ -54,6 +54,7 @@ VPCS_VECTOR_WRAPPER (log1p_advsimd, _ZGVnN2v_log1p)
|
|||
VPCS_VECTOR_WRAPPER (log2_advsimd, _ZGVnN2v_log2)
|
||||
VPCS_VECTOR_WRAPPER (log2p1_advsimd, _ZGVnN2v_log2p1)
|
||||
VPCS_VECTOR_WRAPPER_ff (pow_advsimd, _ZGVnN2vv_pow)
|
||||
VPCS_VECTOR_WRAPPER (rsqrt_advsimd, _ZGVnN2v_rsqrt)
|
||||
VPCS_VECTOR_WRAPPER (sin_advsimd, _ZGVnN2v_sin)
|
||||
VPCS_VECTOR_WRAPPER (sinh_advsimd, _ZGVnN2v_sinh)
|
||||
VPCS_VECTOR_WRAPPER (sinpi_advsimd, _ZGVnN2v_sinpi)
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ SVE_VECTOR_WRAPPER (log1p_sve, _ZGVsMxv_log1p)
|
|||
SVE_VECTOR_WRAPPER (log2_sve, _ZGVsMxv_log2)
|
||||
SVE_VECTOR_WRAPPER (log2p1_sve, _ZGVsMxv_log2p1)
|
||||
SVE_VECTOR_WRAPPER_ff (pow_sve, _ZGVsMxvv_pow)
|
||||
SVE_VECTOR_WRAPPER (rsqrt_sve, _ZGVsMxv_rsqrt)
|
||||
SVE_VECTOR_WRAPPER (sin_sve, _ZGVsMxv_sin)
|
||||
SVE_VECTOR_WRAPPER (sinh_sve, _ZGVsMxv_sinh)
|
||||
SVE_VECTOR_WRAPPER (sinpi_sve, _ZGVsMxv_sinpi)
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ VPCS_VECTOR_WRAPPER (log1pf_advsimd, _ZGVnN4v_log1pf)
|
|||
VPCS_VECTOR_WRAPPER (log2f_advsimd, _ZGVnN4v_log2f)
|
||||
VPCS_VECTOR_WRAPPER (log2p1f_advsimd, _ZGVnN4v_log2p1f)
|
||||
VPCS_VECTOR_WRAPPER_ff (powf_advsimd, _ZGVnN4vv_powf)
|
||||
VPCS_VECTOR_WRAPPER (rsqrtf_advsimd, _ZGVnN4v_rsqrtf)
|
||||
VPCS_VECTOR_WRAPPER (sinf_advsimd, _ZGVnN4v_sinf)
|
||||
VPCS_VECTOR_WRAPPER (sinhf_advsimd, _ZGVnN4v_sinhf)
|
||||
VPCS_VECTOR_WRAPPER (sinpif_advsimd, _ZGVnN4v_sinpif)
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ SVE_VECTOR_WRAPPER (log1pf_sve, _ZGVsMxv_log1pf)
|
|||
SVE_VECTOR_WRAPPER (log2f_sve, _ZGVsMxv_log2f)
|
||||
SVE_VECTOR_WRAPPER (log2p1f_sve, _ZGVsMxv_log2p1f)
|
||||
SVE_VECTOR_WRAPPER_ff (powf_sve, _ZGVsMxvv_powf)
|
||||
SVE_VECTOR_WRAPPER (rsqrtf_sve, _ZGVsMxv_rsqrtf)
|
||||
SVE_VECTOR_WRAPPER (sinf_sve, _ZGVsMxv_sinf)
|
||||
SVE_VECTOR_WRAPPER (sinhf_sve, _ZGVsMxv_sinhf)
|
||||
SVE_VECTOR_WRAPPER (sinpif_sve, _ZGVsMxv_sinpif)
|
||||
|
|
|
|||
|
|
@ -176,10 +176,13 @@ GLIBC_2.43 _ZGVnN2v_log10p1 F
|
|||
GLIBC_2.43 _ZGVnN2v_log10p1f F
|
||||
GLIBC_2.43 _ZGVnN2v_log2p1 F
|
||||
GLIBC_2.43 _ZGVnN2v_log2p1f F
|
||||
GLIBC_2.43 _ZGVnN2v_rsqrt F
|
||||
GLIBC_2.43 _ZGVnN2v_rsqrtf F
|
||||
GLIBC_2.43 _ZGVnN4v_exp10m1f F
|
||||
GLIBC_2.43 _ZGVnN4v_exp2m1f F
|
||||
GLIBC_2.43 _ZGVnN4v_log10p1f F
|
||||
GLIBC_2.43 _ZGVnN4v_log2p1f F
|
||||
GLIBC_2.43 _ZGVnN4v_rsqrtf F
|
||||
GLIBC_2.43 _ZGVsMxv_exp10m1 F
|
||||
GLIBC_2.43 _ZGVsMxv_exp10m1f F
|
||||
GLIBC_2.43 _ZGVsMxv_exp2m1 F
|
||||
|
|
@ -188,3 +191,5 @@ GLIBC_2.43 _ZGVsMxv_log10p1 F
|
|||
GLIBC_2.43 _ZGVsMxv_log10p1f F
|
||||
GLIBC_2.43 _ZGVsMxv_log2p1 F
|
||||
GLIBC_2.43 _ZGVsMxv_log2p1f F
|
||||
GLIBC_2.43 _ZGVsMxv_rsqrt F
|
||||
GLIBC_2.43 _ZGVsMxv_rsqrtf F
|
||||
|
|
|
|||
Loading…
Reference in New Issue