From 427c25278d1dae62dffa07ea5cd0fc33f07190af Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Fri, 31 Oct 2025 17:00:46 -0300 Subject: [PATCH] x86: Adapt "%v" usage on clang to emit VEX enconding clang does not support the %v to select the AVX encoding, nor the '%d' asm contrain, and for AVX build it requires all 3 arguments. This patch add a new internal header, math-inline-asm.h, that adds functions to abstract the inline asm required differences between gcc and clang. Reviewed-by: H.J. Lu --- sysdeps/i386/fpu/fclrexcpt.c | 5 +- sysdeps/i386/fpu/fedisblxcpt.c | 5 +- sysdeps/i386/fpu/feenablxcpt.c | 5 +- sysdeps/i386/fpu/fegetenv.c | 3 +- sysdeps/i386/fpu/fegetmode.c | 3 +- sysdeps/i386/fpu/feholdexcpt.c | 5 +- sysdeps/i386/fpu/fesetenv.c | 5 +- sysdeps/i386/fpu/fesetexcept.c | 8 ++-- sysdeps/i386/fpu/fesetmode.c | 6 ++- sysdeps/i386/fpu/fesetround.c | 6 +-- sysdeps/i386/fpu/feupdateenv.c | 3 +- sysdeps/i386/fpu/fgetexcptflg.c | 6 +-- sysdeps/i386/fpu/fsetexcptflg.c | 5 +- sysdeps/i386/fpu/ftestexcept.c | 5 +- sysdeps/i386/setfpucw.c | 7 +-- sysdeps/x86/fpu/fenv_private.h | 37 +++++++-------- sysdeps/x86/fpu/math-inline-asm.h | 77 +++++++++++++++++++++++++++++++ sysdeps/x86/fpu/math_private.h | 8 ++-- sysdeps/x86/fpu/sfp-machine.h | 4 +- sysdeps/x86/fpu/test-fenv-sse-2.c | 23 +++------ sysdeps/x86_64/fpu/fclrexcpt.c | 5 +- sysdeps/x86_64/fpu/fedisblxcpt.c | 5 +- sysdeps/x86_64/fpu/feenablxcpt.c | 5 +- sysdeps/x86_64/fpu/fegetenv.c | 12 +++-- sysdeps/x86_64/fpu/fegetmode.c | 3 +- sysdeps/x86_64/fpu/feholdexcpt.c | 10 ++-- sysdeps/x86_64/fpu/fesetenv.c | 9 ++-- sysdeps/x86_64/fpu/fesetexcept.c | 6 +-- sysdeps/x86_64/fpu/fesetmode.c | 6 ++- sysdeps/x86_64/fpu/fesetround.c | 7 +-- sysdeps/x86_64/fpu/feupdateenv.c | 4 +- sysdeps/x86_64/fpu/fgetexcptflg.c | 5 +- sysdeps/x86_64/fpu/fraiseexcpt.c | 18 ++------ sysdeps/x86_64/fpu/fsetexcptflg.c | 5 +- sysdeps/x86_64/fpu/ftestexcept.c | 5 +- 35 files changed, 210 insertions(+), 121 deletions(-) create mode 100644 sysdeps/x86/fpu/math-inline-asm.h diff --git a/sysdeps/i386/fpu/fclrexcpt.c b/sysdeps/i386/fpu/fclrexcpt.c index 39bcf3de59..cd301a0819 100644 --- a/sysdeps/i386/fpu/fclrexcpt.c +++ b/sysdeps/i386/fpu/fclrexcpt.c @@ -19,6 +19,7 @@ #include #include #include +#include int __feclearexcept (int excepts) @@ -44,13 +45,13 @@ __feclearexcept (int excepts) unsigned int xnew_exc; /* Get the current MXCSR. */ - __asm__ ("%vstmxcsr %0" : "=m" (xnew_exc)); + stmxcsr_inline_asm (&xnew_exc); /* Clear the relevant bits. */ xnew_exc &= ~excepts; /* Put the new data in effect. */ - __asm__ ("%vldmxcsr %0" : : "m" (xnew_exc)); + ldmxcsr_inline_asm (&xnew_exc); } /* Success. */ diff --git a/sysdeps/i386/fpu/fedisblxcpt.c b/sysdeps/i386/fpu/fedisblxcpt.c index a2dfa8e4c9..eb232eaadc 100644 --- a/sysdeps/i386/fpu/fedisblxcpt.c +++ b/sysdeps/i386/fpu/fedisblxcpt.c @@ -19,6 +19,7 @@ #include #include #include +#include int fedisableexcept (int excepts) @@ -41,11 +42,11 @@ fedisableexcept (int excepts) unsigned int xnew_exc; /* Get the current control word. */ - __asm__ ("%vstmxcsr %0" : "=m" (xnew_exc)); + stmxcsr_inline_asm (&xnew_exc); xnew_exc |= excepts << 7; - __asm__ ("%vldmxcsr %0" : : "m" (xnew_exc)); + ldmxcsr_inline_asm (&xnew_exc); } return old_exc; diff --git a/sysdeps/i386/fpu/feenablxcpt.c b/sysdeps/i386/fpu/feenablxcpt.c index fa1d82a4b6..01d19b8fce 100644 --- a/sysdeps/i386/fpu/feenablxcpt.c +++ b/sysdeps/i386/fpu/feenablxcpt.c @@ -19,6 +19,7 @@ #include #include #include +#include int feenableexcept (int excepts) @@ -41,11 +42,11 @@ feenableexcept (int excepts) unsigned int xnew_exc; /* Get the current control word. */ - __asm__ ("%vstmxcsr %0" : "=m" (xnew_exc)); + stmxcsr_inline_asm (&xnew_exc); xnew_exc &= ~(excepts << 7); - __asm__ ("%vldmxcsr %0" : : "m" (xnew_exc)); + ldmxcsr_inline_asm (&xnew_exc); } return old_exc; diff --git a/sysdeps/i386/fpu/fegetenv.c b/sysdeps/i386/fpu/fegetenv.c index 5b35577151..9cfc884f3a 100644 --- a/sysdeps/i386/fpu/fegetenv.c +++ b/sysdeps/i386/fpu/fegetenv.c @@ -19,6 +19,7 @@ #include #include #include +#include int __fegetenv (fenv_t *envp) @@ -30,7 +31,7 @@ __fegetenv (fenv_t *envp) __asm__ ("fldenv %0" : : "m" (*envp)); if (CPU_FEATURE_USABLE (SSE)) - __asm__ ("%vstmxcsr %0" : "=m" (envp->__eip)); + stmxcsr_inline_asm (&envp->__eip); /* Success. */ return 0; diff --git a/sysdeps/i386/fpu/fegetmode.c b/sysdeps/i386/fpu/fegetmode.c index 8b109072f5..1d9577855f 100644 --- a/sysdeps/i386/fpu/fegetmode.c +++ b/sysdeps/i386/fpu/fegetmode.c @@ -20,12 +20,13 @@ #include #include #include +#include int fegetmode (femode_t *modep) { _FPU_GETCW (modep->__control_word); if (CPU_FEATURE_USABLE (SSE)) - __asm__ ("%vstmxcsr %0" : "=m" (modep->__mxcsr)); + stmxcsr_inline_asm (&modep->__mxcsr); return 0; } diff --git a/sysdeps/i386/fpu/feholdexcpt.c b/sysdeps/i386/fpu/feholdexcpt.c index f6f6b70dd4..02351ce4fc 100644 --- a/sysdeps/i386/fpu/feholdexcpt.c +++ b/sysdeps/i386/fpu/feholdexcpt.c @@ -19,6 +19,7 @@ #include #include #include +#include int __feholdexcept (fenv_t *envp) @@ -33,12 +34,12 @@ __feholdexcept (fenv_t *envp) unsigned int xwork; /* Get the current control word. */ - __asm__ ("%vstmxcsr %0" : "=m" (envp->__eip)); + stmxcsr_inline_asm (&envp->__eip); /* Set all exceptions to non-stop and clear them. */ xwork = (envp->__eip | 0x1f80) & ~0x3f; - __asm__ ("%vldmxcsr %0" : : "m" (xwork)); + ldmxcsr_inline_asm (&xwork); } return 0; diff --git a/sysdeps/i386/fpu/fesetenv.c b/sysdeps/i386/fpu/fesetenv.c index e6b276a0fc..4fdd0f9e99 100644 --- a/sysdeps/i386/fpu/fesetenv.c +++ b/sysdeps/i386/fpu/fesetenv.c @@ -21,6 +21,7 @@ #include #include #include +#include /* All exceptions, including the x86-specific "denormal operand" @@ -80,7 +81,7 @@ __fesetenv (const fenv_t *envp) if (CPU_FEATURE_USABLE (SSE)) { unsigned int mxcsr; - __asm__ ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); if (envp == FE_DFL_ENV) { @@ -111,7 +112,7 @@ __fesetenv (const fenv_t *envp) else mxcsr = envp->__eip; - __asm__ ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); } /* Success. */ diff --git a/sysdeps/i386/fpu/fesetexcept.c b/sysdeps/i386/fpu/fesetexcept.c index 876bde233f..3604bcfca7 100644 --- a/sysdeps/i386/fpu/fesetexcept.c +++ b/sysdeps/i386/fpu/fesetexcept.c @@ -18,6 +18,7 @@ #include #include +#include int fesetexcept (int excepts) @@ -31,15 +32,16 @@ fesetexcept (int excepts) if (CPU_FEATURE_USABLE (SSE)) { - /* Get the control word of the SSE unit. */ unsigned int mxcsr; - __asm__ ("%vstmxcsr %0" : "=m" (mxcsr)); + + /* Get the control word of the SSE unit. */ + stmxcsr_inline_asm (&mxcsr); /* Set relevant flags. */ mxcsr |= excepts; /* Put the new data in effect. */ - __asm__ ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); } else { diff --git a/sysdeps/i386/fpu/fesetmode.c b/sysdeps/i386/fpu/fesetmode.c index ee61ca1cec..a68fb223e1 100644 --- a/sysdeps/i386/fpu/fesetmode.c +++ b/sysdeps/i386/fpu/fesetmode.c @@ -20,6 +20,7 @@ #include #include #include +#include /* All exceptions, including the x86-specific "denormal operand" exception. */ @@ -37,7 +38,8 @@ fesetmode (const femode_t *modep) if (CPU_FEATURE_USABLE (SSE)) { unsigned int mxcsr; - __asm__ ("%vstmxcsr %0" : "=m" (mxcsr)); + + stmxcsr_inline_asm (&mxcsr); /* Preserve SSE exception flags but restore other state in MXCSR. */ mxcsr &= FE_ALL_EXCEPT_X86; @@ -47,7 +49,7 @@ fesetmode (const femode_t *modep) mxcsr |= FE_ALL_EXCEPT_X86 << 7; else mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86; - __asm__ ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); } return 0; } diff --git a/sysdeps/i386/fpu/fesetround.c b/sysdeps/i386/fpu/fesetround.c index e87d794319..90fab76f78 100644 --- a/sysdeps/i386/fpu/fesetround.c +++ b/sysdeps/i386/fpu/fesetround.c @@ -19,6 +19,7 @@ #include #include #include +#include int __fesetround (int round) @@ -38,11 +39,10 @@ __fesetround (int round) if (CPU_FEATURE_USABLE (SSE)) { unsigned int xcw; - - __asm__ ("%vstmxcsr %0" : "=m" (xcw)); + stmxcsr_inline_asm (&xcw); xcw &= ~0x6000; xcw |= round << 3; - __asm__ ("%vldmxcsr %0" : : "m" (xcw)); + ldmxcsr_inline_asm (&xcw); } return 0; diff --git a/sysdeps/i386/fpu/feupdateenv.c b/sysdeps/i386/fpu/feupdateenv.c index 9e1ad97118..9544b86520 100644 --- a/sysdeps/i386/fpu/feupdateenv.c +++ b/sysdeps/i386/fpu/feupdateenv.c @@ -19,6 +19,7 @@ #include #include #include +#include int __feupdateenv (const fenv_t *envp) @@ -31,7 +32,7 @@ __feupdateenv (const fenv_t *envp) /* If the CPU supports SSE we test the MXCSR as well. */ if (CPU_FEATURE_USABLE (SSE)) - __asm__ ("%vstmxcsr %0" : "=m" (xtemp)); + stmxcsr_inline_asm (&xtemp); temp = (temp | xtemp) & FE_ALL_EXCEPT; diff --git a/sysdeps/i386/fpu/fgetexcptflg.c b/sysdeps/i386/fpu/fgetexcptflg.c index 36dd297cdc..ef8ec76c9b 100644 --- a/sysdeps/i386/fpu/fgetexcptflg.c +++ b/sysdeps/i386/fpu/fgetexcptflg.c @@ -19,6 +19,7 @@ #include #include #include +#include int @@ -34,10 +35,9 @@ __fegetexceptflag (fexcept_t *flagp, int excepts) /* If the CPU supports SSE, we clear the MXCSR as well. */ if (CPU_FEATURE_USABLE (SSE)) { - unsigned int sse_exc; - /* Get the current MXCSR. */ - __asm__ ("%vstmxcsr %0" : "=m" (sse_exc)); + unsigned int sse_exc; + stmxcsr_inline_asm (&sse_exc); *flagp |= sse_exc & excepts & FE_ALL_EXCEPT; } diff --git a/sysdeps/i386/fpu/fsetexcptflg.c b/sysdeps/i386/fpu/fsetexcptflg.c index b78d1dcd3c..e386e8032b 100644 --- a/sysdeps/i386/fpu/fsetexcptflg.c +++ b/sysdeps/i386/fpu/fsetexcptflg.c @@ -18,6 +18,7 @@ #include #include +#include int __fesetexceptflag (const fexcept_t *flagp, int excepts) @@ -50,13 +51,13 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts) __asm__ ("fldenv %0" : : "m" (temp)); /* And now similarly for SSE. */ - __asm__ ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); /* Clear or set relevant flags. */ mxcsr ^= (mxcsr ^ *flagp) & excepts; /* Put the new data in effect. */ - __asm__ ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); } else { diff --git a/sysdeps/i386/fpu/ftestexcept.c b/sysdeps/i386/fpu/ftestexcept.c index 51abfd3917..8fb239aeea 100644 --- a/sysdeps/i386/fpu/ftestexcept.c +++ b/sysdeps/i386/fpu/ftestexcept.c @@ -19,19 +19,20 @@ #include #include #include +#include int __fetestexcept (int excepts) { short temp; - int xtemp = 0; + unsigned int xtemp = 0; /* Get current exceptions. */ __asm__ ("fnstsw %0" : "=a" (temp)); /* If the CPU supports SSE we test the MXCSR as well. */ if (CPU_FEATURE_USABLE (SSE)) - __asm__ ("%vstmxcsr %0" : "=m" (xtemp)); + stmxcsr_inline_asm (&xtemp); return (temp | xtemp) & excepts & FE_ALL_EXCEPT; } diff --git a/sysdeps/i386/setfpucw.c b/sysdeps/i386/setfpucw.c index 8438c7ed75..baeddf9af5 100644 --- a/sysdeps/i386/setfpucw.c +++ b/sysdeps/i386/setfpucw.c @@ -21,6 +21,7 @@ #include #include #include +#include void __setfpucw (fpu_control_t set) @@ -40,14 +41,14 @@ __setfpucw (fpu_control_t set) /* If the CPU supports SSE, we set the MXCSR as well. */ if (CPU_FEATURE_USABLE (SSE)) { + /* Get the current MXCSR. */ unsigned int xnew_exc; - /* Get the current MXCSR. */ - __asm__ ("%vstmxcsr %0" : "=m" (xnew_exc)); + stmxcsr_inline_asm (&xnew_exc); xnew_exc &= ~((0xc00 << 3) | (FE_ALL_EXCEPT << 7)); xnew_exc |= ((set & 0xc00) << 3) | ((set & FE_ALL_EXCEPT) << 7); - __asm__ ("%vldmxcsr %0" : : "m" (xnew_exc)); + ldmxcsr_inline_asm (&xnew_exc); } } diff --git a/sysdeps/x86/fpu/fenv_private.h b/sysdeps/x86/fpu/fenv_private.h index c9b573cacd..24a1741961 100644 --- a/sysdeps/x86/fpu/fenv_private.h +++ b/sysdeps/x86/fpu/fenv_private.h @@ -4,6 +4,7 @@ #include #include #include +#include /* This file is used by both the 32- and 64-bit ports. The 64-bit port has a field in the fenv_t for the mxcsr; the 32-bit port does not. @@ -22,10 +23,10 @@ static __always_inline void libc_feholdexcept_sse (fenv_t *e) { unsigned int mxcsr; - asm ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); e->__mxcsr = mxcsr; mxcsr = (mxcsr | 0x1f80) & ~0x3f; - asm volatile ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); } static __always_inline void @@ -43,9 +44,9 @@ static __always_inline void libc_fesetround_sse (int r) { unsigned int mxcsr; - asm ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); mxcsr = (mxcsr & ~0x6000) | (r << 3); - asm volatile ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); } static __always_inline void @@ -61,10 +62,10 @@ static __always_inline void libc_feholdexcept_setround_sse (fenv_t *e, int r) { unsigned int mxcsr; - asm ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); e->__mxcsr = mxcsr; mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); - asm volatile ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); } /* Set both rounding mode and precision. A convenience function for use @@ -96,7 +97,7 @@ static __always_inline int libc_fetestexcept_sse (int e) { unsigned int mxcsr; - asm volatile ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); return mxcsr & e & FE_ALL_EXCEPT; } @@ -111,7 +112,7 @@ libc_fetestexcept_387 (int ex) static __always_inline void libc_fesetenv_sse (fenv_t *e) { - asm volatile ("%vldmxcsr %0" : : "m" (e->__mxcsr)); + ldmxcsr_inline_asm (&e->__mxcsr); } static __always_inline void @@ -129,13 +130,13 @@ static __always_inline int libc_feupdateenv_test_sse (fenv_t *e, int ex) { unsigned int mxcsr, old_mxcsr, cur_ex; - asm volatile ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); cur_ex = mxcsr & FE_ALL_EXCEPT; /* Merge current exceptions with the old environment. */ old_mxcsr = e->__mxcsr; mxcsr = old_mxcsr | cur_ex; - asm volatile ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); /* Raise SIGFPE for any new exceptions since the hold. Expect that the normal environment has all exceptions masked. */ @@ -181,10 +182,10 @@ static __always_inline void libc_feholdsetround_sse (fenv_t *e, int r) { unsigned int mxcsr; - asm ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); e->__mxcsr = mxcsr; mxcsr = (mxcsr & ~0x6000) | (r << 3); - asm volatile ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); } static __always_inline void @@ -215,9 +216,9 @@ static __always_inline void libc_feresetround_sse (fenv_t *e) { unsigned int mxcsr; - asm ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000); - asm volatile ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); } static __always_inline void @@ -307,13 +308,13 @@ static __always_inline void libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r) { unsigned int mxcsr, new_mxcsr; - asm ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); ctx->env.__mxcsr = mxcsr; if (__glibc_unlikely (mxcsr != new_mxcsr)) { - asm volatile ("%vldmxcsr %0" : : "m" (new_mxcsr)); + ldmxcsr_inline_asm (&new_mxcsr); ctx->updated_status = true; } else @@ -404,13 +405,13 @@ libc_feholdsetround_sse_ctx (struct rm_ctx *ctx, int r) { unsigned int mxcsr, new_mxcsr; - asm ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); new_mxcsr = (mxcsr & ~0x6000) | (r << 3); ctx->env.__mxcsr = mxcsr; if (__glibc_unlikely (new_mxcsr != mxcsr)) { - asm volatile ("%vldmxcsr %0" : : "m" (new_mxcsr)); + ldmxcsr_inline_asm (&new_mxcsr); ctx->updated_status = true; } else diff --git a/sysdeps/x86/fpu/math-inline-asm.h b/sysdeps/x86/fpu/math-inline-asm.h new file mode 100644 index 0000000000..d4588979c0 --- /dev/null +++ b/sysdeps/x86/fpu/math-inline-asm.h @@ -0,0 +1,77 @@ +/* Math inline asm compat layer + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _MATH_INLINE_ASM +#define _MATH_INLINE_ASM + +#include + +/* clang does not support the %v to select the AVX encoding, nor the '%d' asm + contrain, and for AVX build it requires all 3 arguments. */ +#ifdef __clang__ +#if defined __AVX__ +# define VPREFIX "v" +# define VROUND_ARG ", %0" +# else +# define VPREFIX "" +# define VROUND_ARG "" +# endif +# define VARGPREFIX "%" +#else +# define VPREFIX "%v" +# define VARGPREFIX "%d" +# define VROUND_ARG "" +#endif + +__extern_always_inline double +trunc_inline_asm (double x) +{ + asm (VPREFIX "roundsd $11, " VARGPREFIX "1, %0" VROUND_ARG : "=v" (x) + : "v" (x)); + return x; +} + +__extern_always_inline float +truncf_inline_asm (float x) +{ + asm (VPREFIX "roundss $11, " VARGPREFIX "1, %0" VROUND_ARG : "=v" (x) + : "v" (x)); + return x; +} + +static __always_inline void +stmxcsr_inline_asm (unsigned int *mxcsr) +{ + asm volatile (VPREFIX "stmxcsr %0" : "=m" (*mxcsr)); +} + +static __always_inline void +ldmxcsr_inline_asm (unsigned int *mxcsr) +{ + asm volatile (VPREFIX "ldmxcsr %0" : : "m" (*mxcsr)); +} + +static __always_inline float +divss_inline_asm (float x, float y) +{ + asm volatile (VPREFIX "divss %1, " VARGPREFIX "0" VROUND_ARG + : "+x" (x) : "x" (y)); + return x; +} + +#endif diff --git a/sysdeps/x86/fpu/math_private.h b/sysdeps/x86/fpu/math_private.h index bba085a578..47de90bcec 100644 --- a/sysdeps/x86/fpu/math_private.h +++ b/sysdeps/x86/fpu/math_private.h @@ -20,8 +20,10 @@ #define X86_MATH_PRIVATE_H 1 #include +#include #include_next + __extern_always_inline long double __NTH (__ieee754_atan2l (long double y, long double x)) { @@ -36,8 +38,7 @@ __trunc (double x) #if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__ return trunc (x); #else - asm ("%vroundsd $11, %d1, %0" : "=v" (x) : "v" (x)); - return x; + return trunc_inline_asm (x); #endif } @@ -47,8 +48,7 @@ __truncf (float x) #if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__ return truncf (x); #else - asm ("%vroundss $11, %d1, %0" : "=v" (x) : "v" (x)); - return x; + return truncf_inline_asm (x); #endif } diff --git a/sysdeps/x86/fpu/sfp-machine.h b/sysdeps/x86/fpu/sfp-machine.h index 002fdb54dd..e30cbdb20b 100644 --- a/sysdeps/x86/fpu/sfp-machine.h +++ b/sysdeps/x86/fpu/sfp-machine.h @@ -1,6 +1,8 @@ /* Configure soft-fp for building sqrtf128. Based on sfp-machine.h in libgcc, with soft-float and other irrelevant parts removed. */ +#include + #if HAVE_X86_LIBGCC_CMP_RETURN_ATTR /* The type of the result of a floating point comparison. This must match `__libgcc_cmp_return__' in GCC for the target. */ @@ -49,7 +51,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI))); # define FP_INIT_ROUNDMODE \ do { \ - __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (_fcw)); \ + stmxcsr_inline_asm (&_fcw); \ } while (0) #else # define _FP_W_TYPE_SIZE 32 diff --git a/sysdeps/x86/fpu/test-fenv-sse-2.c b/sysdeps/x86/fpu/test-fenv-sse-2.c index d12009bb81..0ab090c0ad 100644 --- a/sysdeps/x86/fpu/test-fenv-sse-2.c +++ b/sysdeps/x86/fpu/test-fenv-sse-2.c @@ -24,33 +24,22 @@ #include #include #include - -static uint32_t -get_sse_mxcsr (void) -{ - uint32_t temp; - __asm__ __volatile__ ("%vstmxcsr %0" : "=m" (temp)); - return temp; -} - -static void -set_sse_mxcsr (uint32_t val) -{ - __asm__ __volatile__ ("%vldmxcsr %0" : : "m" (val)); -} +#include static void set_sse_mxcsr_bits (uint32_t mask, uint32_t bits) { - uint32_t mxcsr = get_sse_mxcsr (); + uint32_t mxcsr; + stmxcsr_inline_asm (&mxcsr); mxcsr = (mxcsr & ~mask) | bits; - set_sse_mxcsr (mxcsr); + ldmxcsr_inline_asm (&mxcsr); } static int test_sse_mxcsr_bits (const char *test, uint32_t mask, uint32_t bits) { - uint32_t mxcsr = get_sse_mxcsr (); + uint32_t mxcsr; + stmxcsr_inline_asm (&mxcsr); printf ("Testing %s: mxcsr = %x\n", test, mxcsr); if ((mxcsr & mask) == bits) { diff --git a/sysdeps/x86_64/fpu/fclrexcpt.c b/sysdeps/x86_64/fpu/fclrexcpt.c index 86b4228f2f..82029af7f6 100644 --- a/sysdeps/x86_64/fpu/fclrexcpt.c +++ b/sysdeps/x86_64/fpu/fclrexcpt.c @@ -17,6 +17,7 @@ . */ #include +#include int __feclearexcept (int excepts) @@ -38,13 +39,13 @@ __feclearexcept (int excepts) __asm__ ("fldenv %0" : : "m" (temp)); /* And the same procedure for SSE. */ - __asm__ ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); /* Clear the relevant bits. */ mxcsr &= ~excepts; /* And put them into effect. */ - __asm__ ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); /* Success. */ return 0; diff --git a/sysdeps/x86_64/fpu/fedisblxcpt.c b/sysdeps/x86_64/fpu/fedisblxcpt.c index dab9ad19c2..759cf85e6f 100644 --- a/sysdeps/x86_64/fpu/fedisblxcpt.c +++ b/sysdeps/x86_64/fpu/fedisblxcpt.c @@ -17,6 +17,7 @@ . */ #include +#include int fedisableexcept (int excepts) @@ -35,11 +36,11 @@ fedisableexcept (int excepts) __asm__ ("fldcw %0" : : "m" (new_exc)); /* And now the same for the SSE MXCSR register. */ - __asm__ ("%vstmxcsr %0" : "=m" (new)); + stmxcsr_inline_asm (&new); /* The SSE exception masks are shifted by 7 bits. */ new |= excepts << 7; - __asm__ ("%vldmxcsr %0" : : "m" (new)); + ldmxcsr_inline_asm (&new); return old_exc; } diff --git a/sysdeps/x86_64/fpu/feenablxcpt.c b/sysdeps/x86_64/fpu/feenablxcpt.c index 828b2b247a..4cf04b35bf 100644 --- a/sysdeps/x86_64/fpu/feenablxcpt.c +++ b/sysdeps/x86_64/fpu/feenablxcpt.c @@ -17,6 +17,7 @@ . */ #include +#include int feenableexcept (int excepts) @@ -35,11 +36,11 @@ feenableexcept (int excepts) __asm__ ("fldcw %0" : : "m" (new_exc)); /* And now the same for the SSE MXCSR register. */ - __asm__ ("%vstmxcsr %0" : "=m" (new)); + stmxcsr_inline_asm (&new); /* The SSE exception masks are shifted by 7 bits. */ new &= ~(excepts << 7); - __asm__ ("%vldmxcsr %0" : : "m" (new)); + ldmxcsr_inline_asm (&new); return old_exc; } diff --git a/sysdeps/x86_64/fpu/fegetenv.c b/sysdeps/x86_64/fpu/fegetenv.c index eea9d6bee7..a2523e0978 100644 --- a/sysdeps/x86_64/fpu/fegetenv.c +++ b/sysdeps/x86_64/fpu/fegetenv.c @@ -17,15 +17,17 @@ . */ #include +#include int __fegetenv (fenv_t *envp) { - __asm__ ("fnstenv %0\n" - /* fnstenv changes the exception mask, so load back the - stored environment. */ - "fldenv %0\n" - "%vstmxcsr %1" : "=m" (*envp), "=m" (envp->__mxcsr)); + asm volatile ("fnstenv %0\n" + /* fnstenv changes the exception mask, so load back the + stored environment. */ + "fldenv %0" + : "=m" (*envp)); + stmxcsr_inline_asm (&envp->__mxcsr); /* Success. */ return 0; diff --git a/sysdeps/x86_64/fpu/fegetmode.c b/sysdeps/x86_64/fpu/fegetmode.c index 39d124a6d8..fc75d087fd 100644 --- a/sysdeps/x86_64/fpu/fegetmode.c +++ b/sysdeps/x86_64/fpu/fegetmode.c @@ -18,11 +18,12 @@ #include #include +#include int fegetmode (femode_t *modep) { _FPU_GETCW (modep->__control_word); - __asm__ ("%vstmxcsr %0" : "=m" (modep->__mxcsr)); + stmxcsr_inline_asm (&modep->__mxcsr); return 0; } diff --git a/sysdeps/x86_64/fpu/feholdexcpt.c b/sysdeps/x86_64/fpu/feholdexcpt.c index 9a22a2ea77..74314b0fc9 100644 --- a/sysdeps/x86_64/fpu/feholdexcpt.c +++ b/sysdeps/x86_64/fpu/feholdexcpt.c @@ -17,6 +17,7 @@ . */ #include +#include int __feholdexcept (fenv_t *envp) @@ -25,14 +26,13 @@ __feholdexcept (fenv_t *envp) /* Store the environment. Recall that fnstenv has a side effect of masking all exceptions. Then clear all exceptions. */ - __asm__ ("fnstenv %0\n\t" - "%vstmxcsr %1\n\t" - "fnclex" - : "=m" (*envp), "=m" (envp->__mxcsr)); + asm volatile ("fnstenv %0" : "=m" (*envp)); + stmxcsr_inline_asm (&envp->__mxcsr); + asm volatile ("fnclex" : "=m" (*envp)); /* Set the SSE MXCSR register. */ mxcsr = (envp->__mxcsr | 0x1f80) & ~0x3f; - __asm__ ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); return 0; } diff --git a/sysdeps/x86_64/fpu/fesetenv.c b/sysdeps/x86_64/fpu/fesetenv.c index e4e721afff..9cf5f889ed 100644 --- a/sysdeps/x86_64/fpu/fesetenv.c +++ b/sysdeps/x86_64/fpu/fesetenv.c @@ -17,6 +17,7 @@ . */ #include +#include #include #include @@ -35,8 +36,8 @@ __fesetenv (const fenv_t *envp) values which we do not want to come from the saved environment. Therefore, we get the current environment and replace the values we want to use from the environment specified by the parameter. */ - __asm__ ("fnstenv %0\n" - "%vstmxcsr %1" : "=m" (temp), "=m" (temp.__mxcsr)); + asm volatile ("fnstenv %0" : "=m" (temp)); + stmxcsr_inline_asm (&temp.__mxcsr); if (envp == FE_DFL_ENV) { @@ -103,8 +104,8 @@ __fesetenv (const fenv_t *envp) temp.__mxcsr = envp->__mxcsr; } - __asm__ ("fldenv %0\n" - "%vldmxcsr %1" : : "m" (temp), "m" (temp.__mxcsr)); + asm volatile ("fldenv %0" : "=m" (temp)); + ldmxcsr_inline_asm (&temp.__mxcsr); /* Success. */ return 0; diff --git a/sysdeps/x86_64/fpu/fesetexcept.c b/sysdeps/x86_64/fpu/fesetexcept.c index 91d5270f8e..39a9f75ddc 100644 --- a/sysdeps/x86_64/fpu/fesetexcept.c +++ b/sysdeps/x86_64/fpu/fesetexcept.c @@ -17,15 +17,15 @@ . */ #include +#include int fesetexcept (int excepts) { unsigned int mxcsr; - - __asm__ ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); mxcsr |= excepts & FE_ALL_EXCEPT; - __asm__ ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); return 0; } diff --git a/sysdeps/x86_64/fpu/fesetmode.c b/sysdeps/x86_64/fpu/fesetmode.c index 2b35d7e719..5959218538 100644 --- a/sysdeps/x86_64/fpu/fesetmode.c +++ b/sysdeps/x86_64/fpu/fesetmode.c @@ -17,6 +17,7 @@ . */ #include +#include #include /* All exceptions, including the x86-specific "denormal operand" @@ -28,7 +29,8 @@ fesetmode (const femode_t *modep) { fpu_control_t cw; unsigned int mxcsr; - __asm__ ("%vstmxcsr %0" : "=m" (mxcsr)); + + stmxcsr_inline_asm (&mxcsr); /* Preserve SSE exception flags but restore other state in MXCSR. */ mxcsr &= FE_ALL_EXCEPT_X86; @@ -45,6 +47,6 @@ fesetmode (const femode_t *modep) mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86; } _FPU_SETCW (cw); - __asm__ ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); return 0; } diff --git a/sysdeps/x86_64/fpu/fesetround.c b/sysdeps/x86_64/fpu/fesetround.c index e1ffb3b7a9..e06225213a 100644 --- a/sysdeps/x86_64/fpu/fesetround.c +++ b/sysdeps/x86_64/fpu/fesetround.c @@ -17,12 +17,13 @@ . */ #include +#include int __fesetround (int round) { unsigned short int cw; - int mxcsr; + unsigned int mxcsr; if ((round & ~0xc00) != 0) /* ROUND is no valid rounding mode. */ @@ -36,10 +37,10 @@ __fesetround (int round) /* And now the MSCSR register for SSE, the precision is at different bit positions in the different units, we need to shift it 3 bits. */ - asm ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); mxcsr &= ~ 0x6000; mxcsr |= round << 3; - asm ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); return 0; } diff --git a/sysdeps/x86_64/fpu/feupdateenv.c b/sysdeps/x86_64/fpu/feupdateenv.c index 0e26b92af5..ca946dc744 100644 --- a/sysdeps/x86_64/fpu/feupdateenv.c +++ b/sysdeps/x86_64/fpu/feupdateenv.c @@ -17,6 +17,7 @@ . */ #include +#include int __feupdateenv (const fenv_t *envp) @@ -25,7 +26,8 @@ __feupdateenv (const fenv_t *envp) unsigned int xtemp; /* Save current exceptions. */ - __asm__ ("fnstsw %0\n\t%vstmxcsr %1" : "=m" (temp), "=m" (xtemp)); + asm volatile ("fnstsw %0" : "=m" (temp)); + stmxcsr_inline_asm (&xtemp); temp = (temp | xtemp) & FE_ALL_EXCEPT; /* Install new environment. */ diff --git a/sysdeps/x86_64/fpu/fgetexcptflg.c b/sysdeps/x86_64/fpu/fgetexcptflg.c index a7b500b600..b52ba851d7 100644 --- a/sysdeps/x86_64/fpu/fgetexcptflg.c +++ b/sysdeps/x86_64/fpu/fgetexcptflg.c @@ -17,6 +17,7 @@ . */ #include +#include int fegetexceptflag (fexcept_t *flagp, int excepts) @@ -25,8 +26,8 @@ fegetexceptflag (fexcept_t *flagp, int excepts) unsigned int mxscr; /* Get the current exceptions for the x87 FPU and SSE unit. */ - __asm__ ("fnstsw %0\n" - "%vstmxcsr %1" : "=m" (temp), "=m" (mxscr)); + __asm__ ("fnstsw %0" : "=m" (temp)); + stmxcsr_inline_asm (&mxscr); *flagp = (temp | mxscr) & FE_ALL_EXCEPT & excepts; diff --git a/sysdeps/x86_64/fpu/fraiseexcpt.c b/sysdeps/x86_64/fpu/fraiseexcpt.c index da3a31f4c2..05474bec13 100644 --- a/sysdeps/x86_64/fpu/fraiseexcpt.c +++ b/sysdeps/x86_64/fpu/fraiseexcpt.c @@ -17,6 +17,7 @@ . */ #include +#include #include int @@ -29,23 +30,12 @@ __feraiseexcept (int excepts) /* First: invalid exception. */ if ((FE_INVALID & excepts) != 0) - { - /* One example of an invalid operation is 0.0 / 0.0. */ - float f = 0.0; - - __asm__ __volatile__ ("%vdivss %0, %d0 " : "+x" (f)); - (void) &f; - } + /* One example of an invalid operation is 0.0 / 0.0. */ + divss_inline_asm (0.0f, 0.0f); /* Next: division by zero. */ if ((FE_DIVBYZERO & excepts) != 0) - { - float f = 1.0; - float g = 0.0; - - __asm__ __volatile__ ("%vdivss %1, %d0" : "+x" (f) : "x" (g)); - (void) &f; - } + divss_inline_asm (1.0f, 0.0f); /* Next: overflow. */ if ((FE_OVERFLOW & excepts) != 0) diff --git a/sysdeps/x86_64/fpu/fsetexcptflg.c b/sysdeps/x86_64/fpu/fsetexcptflg.c index 34ea24c061..8641557323 100644 --- a/sysdeps/x86_64/fpu/fsetexcptflg.c +++ b/sysdeps/x86_64/fpu/fsetexcptflg.c @@ -17,6 +17,7 @@ . */ #include +#include #include int @@ -44,13 +45,13 @@ fesetexceptflag (const fexcept_t *flagp, int excepts) __asm__ ("fldenv %0" : : "m" (temp)); /* And now similarly for SSE. */ - __asm__ ("%vstmxcsr %0" : "=m" (mxcsr)); + stmxcsr_inline_asm (&mxcsr); /* Clear or set relevant flags. */ mxcsr ^= (mxcsr ^ *flagp) & excepts; /* Put the new data in effect. */ - __asm__ ("%vldmxcsr %0" : : "m" (mxcsr)); + ldmxcsr_inline_asm (&mxcsr); /* Success. */ return 0; diff --git a/sysdeps/x86_64/fpu/ftestexcept.c b/sysdeps/x86_64/fpu/ftestexcept.c index 39df30fbd2..5e7054edcb 100644 --- a/sysdeps/x86_64/fpu/ftestexcept.c +++ b/sysdeps/x86_64/fpu/ftestexcept.c @@ -17,6 +17,7 @@ . */ #include +#include int __fetestexcept (int excepts) @@ -25,8 +26,8 @@ __fetestexcept (int excepts) unsigned int mxscr; /* Get current exceptions. */ - __asm__ ("fnstsw %0\n" - "%vstmxcsr %1" : "=m" (temp), "=m" (mxscr)); + asm volatile ("fnstsw %0" : "=m" (temp)); + stmxcsr_inline_asm (&mxscr); return (temp | mxscr) & excepts & FE_ALL_EXCEPT; }