x86: Use separate variable for TLSDESC XSAVE/XSAVEC state size (bug 32810)

Previously, the initialization code reused the xsave_state_full_size
member of struct cpu_features for the TLSDESC state size.  However,
the tunable processing code assumes that this member has the
original XSAVE (non-compact) state size, so that it can use its
value if XSAVEC is disabled via tunable.

This change uses a separate variable and not a struct member because
the value is only needed in ld.so and the static libc, but not in
libc.so.  As a result, struct cpu_features layout does not change,
helping a future backport of this change.

Fixes commit 9b7091415a ("x86-64:
Update _dl_tlsdesc_dynamic to preserve AMX registers").

Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
This commit is contained in:
Florian Weimer 2025-03-28 09:26:59 +01:00
parent 59585ddaa2
commit 145097dff1
9 changed files with 40 additions and 8 deletions

View File

@ -21,6 +21,9 @@ tests += \
tst-cpu-features-supports-static \
tst-get-cpu-features \
tst-get-cpu-features-static \
tst-gnu2-tls2-x86-noxsave \
tst-gnu2-tls2-x86-noxsavec \
tst-gnu2-tls2-x86-noxsavexsavec \
tst-hwcap-tunables \
# tests
tests-static += \
@ -91,6 +94,22 @@ CFLAGS-tst-gnu2-tls2.c += -msse
CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell
CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell
CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell
LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy
LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy
LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy
# Test for bug 32810: incorrect XSAVE state size if XSAVEC is disabled
# via tunable.
tst-gnu2-tls2-x86-noxsave-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE
tst-gnu2-tls2-x86-noxsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
tst-gnu2-tls2-x86-noxsavexsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE,-XSAVEC
$(objpfx)tst-gnu2-tls2-x86-noxsave.out \
$(objpfx)tst-gnu2-tls2-x86-noxsavec.out \
$(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \
$(objpfx)tst-gnu2-tls2mod0.so \
$(objpfx)tst-gnu2-tls2mod1.so \
$(objpfx)tst-gnu2-tls2mod2.so
endif
ifeq ($(subdir),math)

View File

@ -84,6 +84,8 @@ extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
# include <dl-cet.h>
#endif
unsigned long int _dl_x86_features_tlsdesc_state_size;
static void
update_active (struct cpu_features *cpu_features)
{
@ -318,6 +320,7 @@ update_active (struct cpu_features *cpu_features)
= xsave_state_full_size;
cpu_features->xsave_state_full_size
= xsave_state_full_size;
_dl_x86_features_tlsdesc_state_size = xsave_state_full_size;
/* Check if XSAVEC is available. */
if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
@ -406,11 +409,9 @@ update_active (struct cpu_features *cpu_features)
= ALIGN_UP ((amx_size
+ TLSDESC_CALL_REGISTER_SAVE_AREA),
64);
/* Set xsave_state_full_size to the compact AMX
state size for XSAVEC. NB: xsave_state_full_size
is only used in _dl_tlsdesc_dynamic_xsave and
_dl_tlsdesc_dynamic_xsavec. */
cpu_features->xsave_state_full_size = amx_size;
/* Set TLSDESC state size to the compact AMX
state size for XSAVEC. */
_dl_x86_features_tlsdesc_state_size = amx_size;
#endif
cpu_features->xsave_state_size
= ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,

View File

@ -164,6 +164,8 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
/* Update xsave_state_size to XSAVE state size. */
cpu_features->xsave_state_size
= cpu_features->xsave_state_full_size;
_dl_x86_features_tlsdesc_state_size
= cpu_features->xsave_state_full_size;
CPU_FEATURE_UNSET (cpu_features, XSAVEC);
}
}

View File

@ -89,6 +89,8 @@ _dl_diagnostics_cpu (void)
cpu_features->xsave_state_size);
print_cpu_features_value ("xsave_state_full_size",
cpu_features->xsave_state_full_size);
print_cpu_features_value ("tlsdesc_state_full_size",
_dl_x86_features_tlsdesc_state_size);
print_cpu_features_value ("data_cache_size", cpu_features->data_cache_size);
print_cpu_features_value ("shared_cache_size",
cpu_features->shared_cache_size);

View File

@ -935,8 +935,6 @@ struct cpu_features
/* The full state size for XSAVE when XSAVEC is disabled by
GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
and the AMX state size when XSAVEC is available.
*/
unsigned int xsave_state_full_size;
/* Data cache size for use in memory and string routines, typically
@ -990,6 +988,13 @@ extern const struct cpu_features *_dl_x86_get_cpu_features (void)
#define __get_cpu_features() _dl_x86_get_cpu_features()
#if IS_IN (rtld) || IS_IN (libc)
/* XSAVE/XSAVEC state size used by TLS descriptors. Compared to
xsave_state_size from struct cpu_features, this includes additional
registers. */
extern unsigned long int _dl_x86_features_tlsdesc_state_size attribute_hidden;
#endif
#if defined (_LIBC) && !IS_IN (nonlib)
/* Unused for x86. */
# define INIT_ARCH()

View File

@ -0,0 +1 @@
#include <elf/tst-gnu2-tls2.c>

View File

@ -0,0 +1 @@
#include <elf/tst-gnu2-tls2.c>

View File

@ -0,0 +1 @@
#include <elf/tst-gnu2-tls2.c>

View File

@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic:
# endif
#else
/* Allocate stack space of the required size to save the state. */
sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP
sub _dl_x86_features_tlsdesc_state_size(%rip), %RSP_LP
#endif
/* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
r10 and r11. */