From e2b00d59eba07a97c10d540031fedc83df8b7f76 Mon Sep 17 00:00:00 2001 From: James Chesterman Date: Fri, 5 Dec 2025 10:37:45 +0000 Subject: [PATCH] aarch64: Implement AdvSIMD and SVE rsqrt(f) routines Vector variants of the new C23 rsqrt routines for both AdvSIMD and SVE, as well as in both single and double precision. Reviewed-by: Adhemerval Zanella --- bits/libm-simd-decl-stubs.h | 11 +++ math/bits/mathcalls.h | 2 +- sysdeps/aarch64/fpu/Makefile | 1 + sysdeps/aarch64/fpu/Versions | 5 ++ sysdeps/aarch64/fpu/advsimd_f32_protos.h | 1 + sysdeps/aarch64/fpu/bits/math-vector.h | 8 ++ .../fpu/finclude/math-vector-fortran.h | 2 + sysdeps/aarch64/fpu/rsqrt_advsimd.c | 84 +++++++++++++++++++ sysdeps/aarch64/fpu/rsqrt_sve.c | 83 ++++++++++++++++++ sysdeps/aarch64/fpu/rsqrtf_advsimd.c | 82 ++++++++++++++++++ sysdeps/aarch64/fpu/rsqrtf_sve.c | 79 +++++++++++++++++ .../fpu/test-double-advsimd-wrappers.c | 1 + .../aarch64/fpu/test-double-sve-wrappers.c | 1 + .../aarch64/fpu/test-float-advsimd-wrappers.c | 1 + sysdeps/aarch64/fpu/test-float-sve-wrappers.c | 1 + .../unix/sysv/linux/aarch64/libmvec.abilist | 5 ++ 16 files changed, 366 insertions(+), 1 deletion(-) create mode 100644 sysdeps/aarch64/fpu/rsqrt_advsimd.c create mode 100644 sysdeps/aarch64/fpu/rsqrt_sve.c create mode 100644 sysdeps/aarch64/fpu/rsqrtf_advsimd.c create mode 100644 sysdeps/aarch64/fpu/rsqrtf_sve.c diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h index 21e72917f7..e12936d7f7 100644 --- a/bits/libm-simd-decl-stubs.h +++ b/bits/libm-simd-decl-stubs.h @@ -242,6 +242,17 @@ #define __DECL_SIMD_atan2f64x #define __DECL_SIMD_atan2f128x +#define __DECL_SIMD_rsqrt +#define __DECL_SIMD_rsqrtf +#define __DECL_SIMD_rsqrtl +#define __DECL_SIMD_rsqrtf16 +#define __DECL_SIMD_rsqrtf32 +#define __DECL_SIMD_rsqrtf64 +#define __DECL_SIMD_rsqrtf128 +#define __DECL_SIMD_rsqrtf32x +#define __DECL_SIMD_rsqrtf64x +#define __DECL_SIMD_rsqrtf128x + #define __DECL_SIMD_log10 #define __DECL_SIMD_log10f #define __DECL_SIMD_log10l diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index 37149d8364..592a80fcb9 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -203,7 +203,7 @@ __MATHCALL (powr,, (_Mdouble_ __x, _Mdouble_ __y)); __MATHCALL (rootn,, (_Mdouble_ __x, long long int __y)); /* Return the reciprocal of the square root of X. */ -__MATHCALL (rsqrt,, (_Mdouble_ __x)); +__MATHCALL_VEC (rsqrt,, (_Mdouble_ __x)); #endif diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile index df6cbe6b53..998fc08d43 100644 --- a/sysdeps/aarch64/fpu/Makefile +++ b/sysdeps/aarch64/fpu/Makefile @@ -29,6 +29,7 @@ libmvec-supported-funcs = acos \ log2 \ log2p1 \ pow \ + rsqrt \ sin \ sinh \ sinpi \ diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions index c03de40fe1..d68510a20e 100644 --- a/sysdeps/aarch64/fpu/Versions +++ b/sysdeps/aarch64/fpu/Versions @@ -200,5 +200,10 @@ libmvec { _ZGVnN4v_log10p1f; _ZGVsMxv_log10p1; _ZGVsMxv_log10p1f; + _ZGVnN2v_rsqrt; + _ZGVnN2v_rsqrtf; + _ZGVnN4v_rsqrtf; + _ZGVsMxv_rsqrt; + _ZGVsMxv_rsqrtf; } } diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h index 77a902c11a..abdb1ff114 100644 --- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h +++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h @@ -47,6 +47,7 @@ libmvec_hidden_proto (V_NAME_F1(log2p1)); libmvec_hidden_proto (V_NAME_F1(logp1)); libmvec_hidden_proto (V_NAME_F1(log)); libmvec_hidden_proto (V_NAME_F2(pow)); +libmvec_hidden_proto (V_NAME_F1(rsqrt)); libmvec_hidden_proto (V_NAME_F1(sin)); libmvec_hidden_proto (V_NAME_F1(sinh)); libmvec_hidden_proto (V_NAME_F1(sinpi)); diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h index 56b47f58ef..7406552f49 100644 --- a/sysdeps/aarch64/fpu/bits/math-vector.h +++ b/sysdeps/aarch64/fpu/bits/math-vector.h @@ -157,6 +157,10 @@ # define __DECL_SIMD_pow __DECL_SIMD_aarch64 # undef __DECL_SIMD_powf # define __DECL_SIMD_powf __DECL_SIMD_aarch64 +# undef __DECL_SIMD_rsqrt +# define __DECL_SIMD_rsqrt __DECL_SIMD_aarch64 +# undef __DECL_SIMD_rsqrtf +# define __DECL_SIMD_rsqrtf __DECL_SIMD_aarch64 # undef __DECL_SIMD_sin # define __DECL_SIMD_sin __DECL_SIMD_aarch64 # undef __DECL_SIMD_sinf @@ -239,6 +243,7 @@ __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log2p1f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_logp1f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_rsqrtf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinpif (__f32x4_t); @@ -278,6 +283,7 @@ __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log2p1 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_logp1 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t); +__vpcs __f64x2_t _ZGVnN2v_rsqrt (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sinpi (__f64x2_t); @@ -322,6 +328,7 @@ __sv_f32_t _ZGVsMxv_log2f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log2p1f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_logp1f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t); +__sv_f32_t _ZGVsMxv_rsqrtf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinpif (__sv_f32_t, __sv_bool_t); @@ -361,6 +368,7 @@ __sv_f64_t _ZGVsMxv_log2 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log2p1 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_logp1 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t); +__sv_f64_t _ZGVsMxv_rsqrt (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sinpi (__sv_f64_t, __sv_bool_t); diff --git a/sysdeps/aarch64/fpu/finclude/math-vector-fortran.h b/sysdeps/aarch64/fpu/finclude/math-vector-fortran.h index 0109687b5b..55e3469a2c 100644 --- a/sysdeps/aarch64/fpu/finclude/math-vector-fortran.h +++ b/sysdeps/aarch64/fpu/finclude/math-vector-fortran.h @@ -80,6 +80,8 @@ !GCC$ builtin (logp1f) attributes simd (notinbranch) !GCC$ builtin (pow) attributes simd (notinbranch) !GCC$ builtin (powf) attributes simd (notinbranch) +!GCC$ builtin (rsqrt) attributes simd (notinbranch) +!GCC$ builtin (rsqrtf) attributes simd (notinbranch) !GCC$ builtin (sin) attributes simd (notinbranch) !GCC$ builtin (sinf) attributes simd (notinbranch) !GCC$ builtin (sinh) attributes simd (notinbranch) diff --git a/sysdeps/aarch64/fpu/rsqrt_advsimd.c b/sysdeps/aarch64/fpu/rsqrt_advsimd.c new file mode 100644 index 0000000000..f1504c6e5a --- /dev/null +++ b/sysdeps/aarch64/fpu/rsqrt_advsimd.c @@ -0,0 +1,84 @@ +/* Double-precision vector (Advanced SIMD) rsqrt function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "v_math.h" + +static const struct data +{ + float64x2_t special_bound; + float64x2_t scale_up, scale_down; +} data = { + /* When x < 0x1p-1021, estimate becomes infinity. + x is scaled up by 0x1p54, so the estimate does not reach infinity. + Then the result is multiplied by 0x1p27. + The difference between the lowest power possible (-1074) and the special + bound (-1021) is 54, so 2^54 is used as the scaling value. */ + .special_bound = V2 (0x1p-1021), + .scale_up = V2 (0x1p54), + .scale_down = V2 (0x1p27), +}; + +static inline float64x2_t VPCS_ATTR +inline_rsqrt (float64x2_t x) +{ + /* Do estimate instruction. */ + float64x2_t estimate = vrsqrteq_f64 (x); + + /* Do first step instruction. */ + float64x2_t estimate_squared = vmulq_f64 (estimate, estimate); + float64x2_t step = vrsqrtsq_f64 (x, estimate_squared); + estimate = vmulq_f64 (estimate, step); + + /* Do second step instruction. */ + estimate_squared = vmulq_f64 (estimate, estimate); + step = vrsqrtsq_f64 (x, estimate_squared); + estimate = vmulq_f64 (estimate, step); + + /* Do third step instruction. + This is required to achieve < 3.0 ULP. */ + estimate_squared = vmulq_f64 (estimate, estimate); + step = vrsqrtsq_f64 (x, estimate_squared); + estimate = vmulq_f64 (estimate, step); + return estimate; +} + +static float64x2_t NOINLINE +special_case (float64x2_t x, uint64x2_t special, const struct data *d) +{ + x = vbslq_f64 (special, vmulq_f64 (x, d->scale_up), x); + float64x2_t estimate = inline_rsqrt (x); + return vbslq_f64 (special, vmulq_f64 (estimate, d->scale_down), estimate); +} + +/* Double-precision implementation of vector rsqrt(x). + Maximum observed error: 1.45 + 0.5 + _ZGVnN2v_rsqrt(0x1.d13fb41254643p+1023) got 0x1.0c8dee1b29dfap-512 + want 0x1.0c8dee1b29df8p-512. */ +float64x2_t VPCS_ATTR V_NAME_D1 (rsqrt) (float64x2_t x) +{ + const struct data *d = ptr_barrier (&data); + + /* Special case: x < special_bound. */ + uint64x2_t special = vcgtq_f64 (d->special_bound, x); + if (__glibc_unlikely (v_any_u64 (special))) + { + return special_case (x, special, d); + } + return inline_rsqrt (x); +} diff --git a/sysdeps/aarch64/fpu/rsqrt_sve.c b/sysdeps/aarch64/fpu/rsqrt_sve.c new file mode 100644 index 0000000000..6eda24f3db --- /dev/null +++ b/sysdeps/aarch64/fpu/rsqrt_sve.c @@ -0,0 +1,83 @@ +/* Double-precision vector (SVE) rsqrt function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "sv_math.h" + +static const struct data +{ + float64_t special_bound; + int64_t scale_up, scale_down; +} data = { + /* When x < 0x1p-1021, estimate becomes infinity. + x is scaled up by 0x1p54, so the estimate does not reach infinity. + Then the result is multiplied by 0x1p27. + The difference between the lowest power possible (-1074) and the special + bound (-1021) is 54, so 2^54 is used as the scaling value. */ + .special_bound = 0x1p-1021, + .scale_up = 54, + .scale_down = 27, +}; + +static inline svfloat64_t +inline_rsqrt (svfloat64_t x) +{ + /* Do estimate instruction. */ + svfloat64_t estimate = svrsqrte_f64 (x); + + /* Do first step instruction. */ + svfloat64_t estimate_squared = svmul_x (svptrue_b64 (), estimate, estimate); + svfloat64_t step = svrsqrts_f64 (x, estimate_squared); + estimate = svmul_x (svptrue_b64 (), estimate, step); + + /* Do second step instruction. */ + estimate_squared = svmul_x (svptrue_b64 (), estimate, estimate); + step = svrsqrts_f64 (x, estimate_squared); + estimate = svmul_x (svptrue_b64 (), estimate, step); + + /* Do third step instruction. + This is required to achieve < 3.0 ULP. */ + estimate_squared = svmul_x (svptrue_b64 (), estimate, estimate); + step = svrsqrts_f64 (x, estimate_squared); + estimate = svmul_x (svptrue_b64 (), estimate, step); + return estimate; +} + +static svfloat64_t NOINLINE +special_case (svfloat64_t x, svbool_t special, const struct data *d) +{ + x = svscale_f64_m (special, x, sv_s64 (d->scale_up)); + svfloat64_t estimate = inline_rsqrt (x); + return svscale_f64_m (special, estimate, sv_s64 (d->scale_down)); +} + +/* Double-precision SVE implementation of rsqrt(x). + Maximum observed error: 1.45 + 0.5 + _ZGVnN2v_rsqrt(0x1.d13fb41254643p+1023) got 0x1.0c8dee1b29dfap-512 + want 0x1.0c8dee1b29df8p-512. */ +svfloat64_t SV_NAME_D1 (rsqrt) (svfloat64_t x, svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + + svbool_t special = svcmplt_n_f64 (pg, x, d->special_bound); + if (__glibc_unlikely (svptest_any (pg, special))) + { + return special_case (x, special, d); + } + return inline_rsqrt (x); +} diff --git a/sysdeps/aarch64/fpu/rsqrtf_advsimd.c b/sysdeps/aarch64/fpu/rsqrtf_advsimd.c new file mode 100644 index 0000000000..2bf5a0a607 --- /dev/null +++ b/sysdeps/aarch64/fpu/rsqrtf_advsimd.c @@ -0,0 +1,82 @@ +/* Single-precision vector (Advanced SIMD) rsqrt function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "v_math.h" + +static const struct data +{ + float32x4_t special_bound; + float32x4_t scale_up, scale_down; +} data = { + /* When x < 0x1p-128, estimate becomes infinity. + x is scaled up by 0x1p22f, so estimate does not reach infinity. + Then the result is multiplied by 0x1p11f. + The difference between the lowest power possible (-149) and the special + bound (-128) is 21. 22 is used here so that a power of 2 can be used for + scaling in both directions. */ + .special_bound = V4 (0x1p-128f), + .scale_up = V4 (0x1p22f), + .scale_down = V4 (0x1p11f), +}; + +static inline float32x4_t VPCS_ATTR +inline_rsqrt (float32x4_t x) +{ + /* Do estimate instruction. */ + float32x4_t estimate = vrsqrteq_f32 (x); + + /* Do first step instruction. */ + float32x4_t estimate_squared = vmulq_f32 (estimate, estimate); + float32x4_t step = vrsqrtsq_f32 (x, estimate_squared); + estimate = vmulq_f32 (estimate, step); + + /* Do second step instruction. + This is required to achieve < 3.0 ULP. */ + estimate_squared = vmulq_f32 (estimate, estimate); + step = vrsqrtsq_f32 (x, estimate_squared); + estimate = vmulq_f32 (estimate, step); + return estimate; +} + +static float32x4_t NOINLINE +special_case (float32x4_t x, uint32x4_t special, const struct data *d) +{ + x = vbslq_f32 (special, vmulq_f32 (x, d->scale_up), x); + float32x4_t estimate = inline_rsqrt (x); + return vbslq_f32 (special, vmulq_f32 (estimate, d->scale_down), estimate); +} + +/* Single-precision implementation of vector rqsrtf(x). + Maximum observed error: 1.47 + 0.5 + _ZGVnN4v_rsqrtf (0x1.f610dep+127) got 0x1.02852cp-64 + want 0x1.02853p-64. */ +float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (rsqrt) (float32x4_t x) +{ + const struct data *d = ptr_barrier (&data); + + /* Special case: x < special_bound. */ + uint32x4_t special = vcgtq_f32 (d->special_bound, x); + if (__glibc_unlikely (v_any_u32 (special))) + { + return special_case (x, special, d); + } + return inline_rsqrt (x); +} +libmvec_hidden_def (V_NAME_F1 (rsqrt)) +HALF_WIDTH_ALIAS_F1 (rsqrt) diff --git a/sysdeps/aarch64/fpu/rsqrtf_sve.c b/sysdeps/aarch64/fpu/rsqrtf_sve.c new file mode 100644 index 0000000000..b752694e26 --- /dev/null +++ b/sysdeps/aarch64/fpu/rsqrtf_sve.c @@ -0,0 +1,79 @@ +/* Single-precision vector (SVE) rsqrt function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "sv_math.h" + +static const struct data +{ + float32_t special_bound; + int32_t scale_up, scale_down; +} data = { + /* When x < 0x1p-128, estimate becomes infinity. + x is scaled up by 0x1p22f, so estimate does not reach infinity. + Then the result is multiplied by 0x1p11f. + The difference between the lowest power possible (-149) and the special + bound (-128) is 21. 22 is used here so that a power of 2 can be used for + scaling in both directions. */ + .special_bound = 0x1p-128f, + .scale_up = 22, + .scale_down = 11, +}; + +static inline svfloat32_t +inline_rsqrt (svfloat32_t x) +{ + /* Do estimate instruction. */ + svfloat32_t estimate = svrsqrte_f32 (x); + + /* Do first step instruction. */ + svfloat32_t estimate_squared = svmul_x (svptrue_b32 (), estimate, estimate); + svfloat32_t step = svrsqrts_f32 (x, estimate_squared); + estimate = svmul_x (svptrue_b32 (), estimate, step); + + /* Do second step instruction. + This is required to achieve < 3.0 ULP. */ + estimate_squared = svmul_x (svptrue_b32 (), estimate, estimate); + step = svrsqrts_f32 (x, estimate_squared); + estimate = svmul_x (svptrue_b32 (), estimate, step); + return estimate; +} + +static svfloat32_t NOINLINE +special_case (svfloat32_t x, svbool_t special, const struct data *d) +{ + x = svscale_f32_m (special, x, sv_s32 (d->scale_up)); + svfloat32_t estimate = inline_rsqrt (x); + return svscale_f32_m (special, estimate, sv_s32 (d->scale_down)); +} + +/* Single-precision SVE implementation of rsqrtf(x). + Maximum observed error: 1.47 + 0.5 + _ZGVsMxv_rsqrtf (0x1.f610dep+127) got 0x1.02852cp-64 + want 0x1.02853p-64. */ +svfloat32_t SV_NAME_F1 (rsqrt) (svfloat32_t x, svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + + svbool_t special = svcmplt_n_f32 (pg, x, 0x1p-128f); + if (__glibc_unlikely (svptest_any (pg, special))) + { + return special_case (x, special, d); + } + return inline_rsqrt (x); +} diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c index 8138f2f2a7..42d076b9a9 100644 --- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c +++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c @@ -54,6 +54,7 @@ VPCS_VECTOR_WRAPPER (log1p_advsimd, _ZGVnN2v_log1p) VPCS_VECTOR_WRAPPER (log2_advsimd, _ZGVnN2v_log2) VPCS_VECTOR_WRAPPER (log2p1_advsimd, _ZGVnN2v_log2p1) VPCS_VECTOR_WRAPPER_ff (pow_advsimd, _ZGVnN2vv_pow) +VPCS_VECTOR_WRAPPER (rsqrt_advsimd, _ZGVnN2v_rsqrt) VPCS_VECTOR_WRAPPER (sin_advsimd, _ZGVnN2v_sin) VPCS_VECTOR_WRAPPER (sinh_advsimd, _ZGVnN2v_sinh) VPCS_VECTOR_WRAPPER (sinpi_advsimd, _ZGVnN2v_sinpi) diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c index 034098c1ec..543816558b 100644 --- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c +++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c @@ -73,6 +73,7 @@ SVE_VECTOR_WRAPPER (log1p_sve, _ZGVsMxv_log1p) SVE_VECTOR_WRAPPER (log2_sve, _ZGVsMxv_log2) SVE_VECTOR_WRAPPER (log2p1_sve, _ZGVsMxv_log2p1) SVE_VECTOR_WRAPPER_ff (pow_sve, _ZGVsMxvv_pow) +SVE_VECTOR_WRAPPER (rsqrt_sve, _ZGVsMxv_rsqrt) SVE_VECTOR_WRAPPER (sin_sve, _ZGVsMxv_sin) SVE_VECTOR_WRAPPER (sinh_sve, _ZGVsMxv_sinh) SVE_VECTOR_WRAPPER (sinpi_sve, _ZGVsMxv_sinpi) diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c index 3879b3c654..5217709796 100644 --- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c +++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c @@ -54,6 +54,7 @@ VPCS_VECTOR_WRAPPER (log1pf_advsimd, _ZGVnN4v_log1pf) VPCS_VECTOR_WRAPPER (log2f_advsimd, _ZGVnN4v_log2f) VPCS_VECTOR_WRAPPER (log2p1f_advsimd, _ZGVnN4v_log2p1f) VPCS_VECTOR_WRAPPER_ff (powf_advsimd, _ZGVnN4vv_powf) +VPCS_VECTOR_WRAPPER (rsqrtf_advsimd, _ZGVnN4v_rsqrtf) VPCS_VECTOR_WRAPPER (sinf_advsimd, _ZGVnN4v_sinf) VPCS_VECTOR_WRAPPER (sinhf_advsimd, _ZGVnN4v_sinhf) VPCS_VECTOR_WRAPPER (sinpif_advsimd, _ZGVnN4v_sinpif) diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c index 38a6e03c23..a35b2fc7b5 100644 --- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c +++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c @@ -73,6 +73,7 @@ SVE_VECTOR_WRAPPER (log1pf_sve, _ZGVsMxv_log1pf) SVE_VECTOR_WRAPPER (log2f_sve, _ZGVsMxv_log2f) SVE_VECTOR_WRAPPER (log2p1f_sve, _ZGVsMxv_log2p1f) SVE_VECTOR_WRAPPER_ff (powf_sve, _ZGVsMxvv_powf) +SVE_VECTOR_WRAPPER (rsqrtf_sve, _ZGVsMxv_rsqrtf) SVE_VECTOR_WRAPPER (sinf_sve, _ZGVsMxv_sinf) SVE_VECTOR_WRAPPER (sinhf_sve, _ZGVsMxv_sinhf) SVE_VECTOR_WRAPPER (sinpif_sve, _ZGVsMxv_sinpif) diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist index fb2eec79a5..6d13d53613 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist @@ -176,10 +176,13 @@ GLIBC_2.43 _ZGVnN2v_log10p1 F GLIBC_2.43 _ZGVnN2v_log10p1f F GLIBC_2.43 _ZGVnN2v_log2p1 F GLIBC_2.43 _ZGVnN2v_log2p1f F +GLIBC_2.43 _ZGVnN2v_rsqrt F +GLIBC_2.43 _ZGVnN2v_rsqrtf F GLIBC_2.43 _ZGVnN4v_exp10m1f F GLIBC_2.43 _ZGVnN4v_exp2m1f F GLIBC_2.43 _ZGVnN4v_log10p1f F GLIBC_2.43 _ZGVnN4v_log2p1f F +GLIBC_2.43 _ZGVnN4v_rsqrtf F GLIBC_2.43 _ZGVsMxv_exp10m1 F GLIBC_2.43 _ZGVsMxv_exp10m1f F GLIBC_2.43 _ZGVsMxv_exp2m1 F @@ -188,3 +191,5 @@ GLIBC_2.43 _ZGVsMxv_log10p1 F GLIBC_2.43 _ZGVsMxv_log10p1f F GLIBC_2.43 _ZGVsMxv_log2p1 F GLIBC_2.43 _ZGVsMxv_log2p1f F +GLIBC_2.43 _ZGVsMxv_rsqrt F +GLIBC_2.43 _ZGVsMxv_rsqrtf F