AArch64: fix SVE tanpi(f) [BZ #33642]

Fixed svld1rq using incorrect predicates (BZ #33642). Next to no performance variations (tested on V1). Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
2025-11-18 15:03:10 +00:00 · 2025-11-18 15:03:10 +00:00 · e889160273
parent d989840693
commit e889160273
2 changed files with 7 additions and 7 deletions
--- a/sysdeps/aarch64/fpu/tanpi_sve.c
+++ b/sysdeps/aarch64/fpu/tanpi_sve.c
@ -1,6 +1,6 @@
 /* Double-precision (SVE) tanpi function

-   Copyright (C) 2024 Free Software Foundation, Inc.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
@ -58,10 +58,10 @@ svfloat64_t SV_NAME_D1 (tanpi) (svfloat64_t x, const svbool_t pg)
  svfloat64_t r2 = svmul_x (pg, r, r);
  svfloat64_t r4 = svmul_x (pg, r2, r2);

-  svfloat64_t c_1_3 = svld1rq (pg, &d->c1);
-  svfloat64_t c_5_7 = svld1rq (pg, &d->c5);
-  svfloat64_t c_9_11 = svld1rq (pg, &d->c9);
-  svfloat64_t c_13_14 = svld1rq (pg, &d->c13);
+  svfloat64_t c_1_3 = svld1rq (svptrue_b64 (), &d->c1);
+  svfloat64_t c_5_7 = svld1rq (svptrue_b64 (), &d->c5);
+  svfloat64_t c_9_11 = svld1rq (svptrue_b64 (), &d->c9);
+  svfloat64_t c_13_14 = svld1rq (svptrue_b64 (), &d->c13);
  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r2, c_1_3, 0);
  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r2, c_1_3, 1);
  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), r2, c_5_7, 0);
--- a/sysdeps/aarch64/fpu/tanpif_sve.c
+++ b/sysdeps/aarch64/fpu/tanpif_sve.c
@ -1,6 +1,6 @@
 /* Single-precision (SVE) tanpi function

-   Copyright (C) 2024 Free Software Foundation, Inc.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
@ -37,7 +37,7 @@ const static struct v_tanpif_data
 svfloat32_t SV_NAME_F1 (tanpi) (svfloat32_t x, const svbool_t pg)
 {
  const struct v_tanpif_data *d = ptr_barrier (&tanpif_data);
-  svfloat32_t odd_coeffs = svld1rq (pg, &d->c1);
+  svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1);
  svfloat32_t n = svrintn_x (pg, x);

  /* inf produces nan that propagates.  */