AArch64: fix SVE tanpi(f) [BZ #33642]

Fixed svld1rq using incorrect predicates (BZ #33642).
Next to no performance variations (tested on V1).

Reviewed-by: Wilco Dijkstra  <Wilco.Dijkstra@arm.com>
This commit is contained in:
Pierre Blanchard 2025-11-18 15:03:10 +00:00 committed by Wilco Dijkstra
parent d989840693
commit e889160273
2 changed files with 7 additions and 7 deletions

View File

@ -1,6 +1,6 @@
/* Double-precision (SVE) tanpi function
Copyright (C) 2024 Free Software Foundation, Inc.
Copyright (C) 2024-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -58,10 +58,10 @@ svfloat64_t SV_NAME_D1 (tanpi) (svfloat64_t x, const svbool_t pg)
svfloat64_t r2 = svmul_x (pg, r, r);
svfloat64_t r4 = svmul_x (pg, r2, r2);
svfloat64_t c_1_3 = svld1rq (pg, &d->c1);
svfloat64_t c_5_7 = svld1rq (pg, &d->c5);
svfloat64_t c_9_11 = svld1rq (pg, &d->c9);
svfloat64_t c_13_14 = svld1rq (pg, &d->c13);
svfloat64_t c_1_3 = svld1rq (svptrue_b64 (), &d->c1);
svfloat64_t c_5_7 = svld1rq (svptrue_b64 (), &d->c5);
svfloat64_t c_9_11 = svld1rq (svptrue_b64 (), &d->c9);
svfloat64_t c_13_14 = svld1rq (svptrue_b64 (), &d->c13);
svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r2, c_1_3, 0);
svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r2, c_1_3, 1);
svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), r2, c_5_7, 0);

View File

@ -1,6 +1,6 @@
/* Single-precision (SVE) tanpi function
Copyright (C) 2024 Free Software Foundation, Inc.
Copyright (C) 2024-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -37,7 +37,7 @@ const static struct v_tanpif_data
svfloat32_t SV_NAME_F1 (tanpi) (svfloat32_t x, const svbool_t pg)
{
const struct v_tanpif_data *d = ptr_barrier (&tanpif_data);
svfloat32_t odd_coeffs = svld1rq (pg, &d->c1);
svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1);
svfloat32_t n = svrintn_x (pg, x);
/* inf produces nan that propagates. */