x86: Adapt "%v" usage on clang to emit VEX enconding

clang does not support the %v to select the AVX encoding, nor the '%d' asm
contrain, and for AVX build it requires all 3 arguments.

This patch add a new internal header, math-inline-asm.h, that adds
functions to abstract the inline asm required differences between
gcc and clang.

Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
This commit is contained in:
Adhemerval Zanella 2025-10-31 17:00:46 -03:00
parent d25db12c2a
commit 427c25278d
35 changed files with 210 additions and 121 deletions

View File

@ -19,6 +19,7 @@
#include <fenv.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
__feclearexcept (int excepts)
@ -44,13 +45,13 @@ __feclearexcept (int excepts)
unsigned int xnew_exc;
/* Get the current MXCSR. */
__asm__ ("%vstmxcsr %0" : "=m" (xnew_exc));
stmxcsr_inline_asm (&xnew_exc);
/* Clear the relevant bits. */
xnew_exc &= ~excepts;
/* Put the new data in effect. */
__asm__ ("%vldmxcsr %0" : : "m" (xnew_exc));
ldmxcsr_inline_asm (&xnew_exc);
}
/* Success. */

View File

@ -19,6 +19,7 @@
#include <fenv.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
fedisableexcept (int excepts)
@ -41,11 +42,11 @@ fedisableexcept (int excepts)
unsigned int xnew_exc;
/* Get the current control word. */
__asm__ ("%vstmxcsr %0" : "=m" (xnew_exc));
stmxcsr_inline_asm (&xnew_exc);
xnew_exc |= excepts << 7;
__asm__ ("%vldmxcsr %0" : : "m" (xnew_exc));
ldmxcsr_inline_asm (&xnew_exc);
}
return old_exc;

View File

@ -19,6 +19,7 @@
#include <fenv.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
feenableexcept (int excepts)
@ -41,11 +42,11 @@ feenableexcept (int excepts)
unsigned int xnew_exc;
/* Get the current control word. */
__asm__ ("%vstmxcsr %0" : "=m" (xnew_exc));
stmxcsr_inline_asm (&xnew_exc);
xnew_exc &= ~(excepts << 7);
__asm__ ("%vldmxcsr %0" : : "m" (xnew_exc));
ldmxcsr_inline_asm (&xnew_exc);
}
return old_exc;

View File

@ -19,6 +19,7 @@
#include <fenv.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
__fegetenv (fenv_t *envp)
@ -30,7 +31,7 @@ __fegetenv (fenv_t *envp)
__asm__ ("fldenv %0" : : "m" (*envp));
if (CPU_FEATURE_USABLE (SSE))
__asm__ ("%vstmxcsr %0" : "=m" (envp->__eip));
stmxcsr_inline_asm (&envp->__eip);
/* Success. */
return 0;

View File

@ -20,12 +20,13 @@
#include <fpu_control.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
fegetmode (femode_t *modep)
{
_FPU_GETCW (modep->__control_word);
if (CPU_FEATURE_USABLE (SSE))
__asm__ ("%vstmxcsr %0" : "=m" (modep->__mxcsr));
stmxcsr_inline_asm (&modep->__mxcsr);
return 0;
}

View File

@ -19,6 +19,7 @@
#include <fenv.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
__feholdexcept (fenv_t *envp)
@ -33,12 +34,12 @@ __feholdexcept (fenv_t *envp)
unsigned int xwork;
/* Get the current control word. */
__asm__ ("%vstmxcsr %0" : "=m" (envp->__eip));
stmxcsr_inline_asm (&envp->__eip);
/* Set all exceptions to non-stop and clear them. */
xwork = (envp->__eip | 0x1f80) & ~0x3f;
__asm__ ("%vldmxcsr %0" : : "m" (xwork));
ldmxcsr_inline_asm (&xwork);
}
return 0;

View File

@ -21,6 +21,7 @@
#include <assert.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
/* All exceptions, including the x86-specific "denormal operand"
@ -80,7 +81,7 @@ __fesetenv (const fenv_t *envp)
if (CPU_FEATURE_USABLE (SSE))
{
unsigned int mxcsr;
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
if (envp == FE_DFL_ENV)
{
@ -111,7 +112,7 @@ __fesetenv (const fenv_t *envp)
else
mxcsr = envp->__eip;
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
}
/* Success. */

View File

@ -18,6 +18,7 @@
#include <fenv.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
fesetexcept (int excepts)
@ -31,15 +32,16 @@ fesetexcept (int excepts)
if (CPU_FEATURE_USABLE (SSE))
{
/* Get the control word of the SSE unit. */
unsigned int mxcsr;
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
/* Get the control word of the SSE unit. */
stmxcsr_inline_asm (&mxcsr);
/* Set relevant flags. */
mxcsr |= excepts;
/* Put the new data in effect. */
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
}
else
{

View File

@ -20,6 +20,7 @@
#include <fpu_control.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
/* All exceptions, including the x86-specific "denormal operand"
exception. */
@ -37,7 +38,8 @@ fesetmode (const femode_t *modep)
if (CPU_FEATURE_USABLE (SSE))
{
unsigned int mxcsr;
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
/* Preserve SSE exception flags but restore other state in
MXCSR. */
mxcsr &= FE_ALL_EXCEPT_X86;
@ -47,7 +49,7 @@ fesetmode (const femode_t *modep)
mxcsr |= FE_ALL_EXCEPT_X86 << 7;
else
mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86;
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
}
return 0;
}

View File

@ -19,6 +19,7 @@
#include <fenv.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
__fesetround (int round)
@ -38,11 +39,10 @@ __fesetround (int round)
if (CPU_FEATURE_USABLE (SSE))
{
unsigned int xcw;
__asm__ ("%vstmxcsr %0" : "=m" (xcw));
stmxcsr_inline_asm (&xcw);
xcw &= ~0x6000;
xcw |= round << 3;
__asm__ ("%vldmxcsr %0" : : "m" (xcw));
ldmxcsr_inline_asm (&xcw);
}
return 0;

View File

@ -19,6 +19,7 @@
#include <fenv.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
__feupdateenv (const fenv_t *envp)
@ -31,7 +32,7 @@ __feupdateenv (const fenv_t *envp)
/* If the CPU supports SSE we test the MXCSR as well. */
if (CPU_FEATURE_USABLE (SSE))
__asm__ ("%vstmxcsr %0" : "=m" (xtemp));
stmxcsr_inline_asm (&xtemp);
temp = (temp | xtemp) & FE_ALL_EXCEPT;

View File

@ -19,6 +19,7 @@
#include <fenv.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
@ -34,10 +35,9 @@ __fegetexceptflag (fexcept_t *flagp, int excepts)
/* If the CPU supports SSE, we clear the MXCSR as well. */
if (CPU_FEATURE_USABLE (SSE))
{
unsigned int sse_exc;
/* Get the current MXCSR. */
__asm__ ("%vstmxcsr %0" : "=m" (sse_exc));
unsigned int sse_exc;
stmxcsr_inline_asm (&sse_exc);
*flagp |= sse_exc & excepts & FE_ALL_EXCEPT;
}

View File

@ -18,6 +18,7 @@
#include <fenv.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
__fesetexceptflag (const fexcept_t *flagp, int excepts)
@ -50,13 +51,13 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
__asm__ ("fldenv %0" : : "m" (temp));
/* And now similarly for SSE. */
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
/* Clear or set relevant flags. */
mxcsr ^= (mxcsr ^ *flagp) & excepts;
/* Put the new data in effect. */
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
}
else
{

View File

@ -19,19 +19,20 @@
#include <fenv.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
int
__fetestexcept (int excepts)
{
short temp;
int xtemp = 0;
unsigned int xtemp = 0;
/* Get current exceptions. */
__asm__ ("fnstsw %0" : "=a" (temp));
/* If the CPU supports SSE we test the MXCSR as well. */
if (CPU_FEATURE_USABLE (SSE))
__asm__ ("%vstmxcsr %0" : "=m" (xtemp));
stmxcsr_inline_asm (&xtemp);
return (temp | xtemp) & excepts & FE_ALL_EXCEPT;
}

View File

@ -21,6 +21,7 @@
#include <fenv.h>
#include <unistd.h>
#include <ldsodefs.h>
#include <math-inline-asm.h>
void
__setfpucw (fpu_control_t set)
@ -40,14 +41,14 @@ __setfpucw (fpu_control_t set)
/* If the CPU supports SSE, we set the MXCSR as well. */
if (CPU_FEATURE_USABLE (SSE))
{
/* Get the current MXCSR. */
unsigned int xnew_exc;
/* Get the current MXCSR. */
__asm__ ("%vstmxcsr %0" : "=m" (xnew_exc));
stmxcsr_inline_asm (&xnew_exc);
xnew_exc &= ~((0xc00 << 3) | (FE_ALL_EXCEPT << 7));
xnew_exc |= ((set & 0xc00) << 3) | ((set & FE_ALL_EXCEPT) << 7);
__asm__ ("%vldmxcsr %0" : : "m" (xnew_exc));
ldmxcsr_inline_asm (&xnew_exc);
}
}

View File

@ -4,6 +4,7 @@
#include <bits/floatn.h>
#include <fenv.h>
#include <fpu_control.h>
#include <math-inline-asm.h>
/* This file is used by both the 32- and 64-bit ports. The 64-bit port
has a field in the fenv_t for the mxcsr; the 32-bit port does not.
@ -22,10 +23,10 @@ static __always_inline void
libc_feholdexcept_sse (fenv_t *e)
{
unsigned int mxcsr;
asm ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
e->__mxcsr = mxcsr;
mxcsr = (mxcsr | 0x1f80) & ~0x3f;
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
}
static __always_inline void
@ -43,9 +44,9 @@ static __always_inline void
libc_fesetround_sse (int r)
{
unsigned int mxcsr;
asm ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
mxcsr = (mxcsr & ~0x6000) | (r << 3);
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
}
static __always_inline void
@ -61,10 +62,10 @@ static __always_inline void
libc_feholdexcept_setround_sse (fenv_t *e, int r)
{
unsigned int mxcsr;
asm ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
e->__mxcsr = mxcsr;
mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
}
/* Set both rounding mode and precision. A convenience function for use
@ -96,7 +97,7 @@ static __always_inline int
libc_fetestexcept_sse (int e)
{
unsigned int mxcsr;
asm volatile ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
return mxcsr & e & FE_ALL_EXCEPT;
}
@ -111,7 +112,7 @@ libc_fetestexcept_387 (int ex)
static __always_inline void
libc_fesetenv_sse (fenv_t *e)
{
asm volatile ("%vldmxcsr %0" : : "m" (e->__mxcsr));
ldmxcsr_inline_asm (&e->__mxcsr);
}
static __always_inline void
@ -129,13 +130,13 @@ static __always_inline int
libc_feupdateenv_test_sse (fenv_t *e, int ex)
{
unsigned int mxcsr, old_mxcsr, cur_ex;
asm volatile ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
cur_ex = mxcsr & FE_ALL_EXCEPT;
/* Merge current exceptions with the old environment. */
old_mxcsr = e->__mxcsr;
mxcsr = old_mxcsr | cur_ex;
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
/* Raise SIGFPE for any new exceptions since the hold. Expect that
the normal environment has all exceptions masked. */
@ -181,10 +182,10 @@ static __always_inline void
libc_feholdsetround_sse (fenv_t *e, int r)
{
unsigned int mxcsr;
asm ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
e->__mxcsr = mxcsr;
mxcsr = (mxcsr & ~0x6000) | (r << 3);
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
}
static __always_inline void
@ -215,9 +216,9 @@ static __always_inline void
libc_feresetround_sse (fenv_t *e)
{
unsigned int mxcsr;
asm ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000);
asm volatile ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
}
static __always_inline void
@ -307,13 +308,13 @@ static __always_inline void
libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r)
{
unsigned int mxcsr, new_mxcsr;
asm ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
ctx->env.__mxcsr = mxcsr;
if (__glibc_unlikely (mxcsr != new_mxcsr))
{
asm volatile ("%vldmxcsr %0" : : "m" (new_mxcsr));
ldmxcsr_inline_asm (&new_mxcsr);
ctx->updated_status = true;
}
else
@ -404,13 +405,13 @@ libc_feholdsetround_sse_ctx (struct rm_ctx *ctx, int r)
{
unsigned int mxcsr, new_mxcsr;
asm ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
new_mxcsr = (mxcsr & ~0x6000) | (r << 3);
ctx->env.__mxcsr = mxcsr;
if (__glibc_unlikely (new_mxcsr != mxcsr))
{
asm volatile ("%vldmxcsr %0" : : "m" (new_mxcsr));
ldmxcsr_inline_asm (&new_mxcsr);
ctx->updated_status = true;
}
else

View File

@ -0,0 +1,77 @@
/* Math inline asm compat layer
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifndef _MATH_INLINE_ASM
#define _MATH_INLINE_ASM
#include <sys/cdefs.h>
/* clang does not support the %v to select the AVX encoding, nor the '%d' asm
contrain, and for AVX build it requires all 3 arguments. */
#ifdef __clang__
#if defined __AVX__
# define VPREFIX "v"
# define VROUND_ARG ", %0"
# else
# define VPREFIX ""
# define VROUND_ARG ""
# endif
# define VARGPREFIX "%"
#else
# define VPREFIX "%v"
# define VARGPREFIX "%d"
# define VROUND_ARG ""
#endif
__extern_always_inline double
trunc_inline_asm (double x)
{
asm (VPREFIX "roundsd $11, " VARGPREFIX "1, %0" VROUND_ARG : "=v" (x)
: "v" (x));
return x;
}
__extern_always_inline float
truncf_inline_asm (float x)
{
asm (VPREFIX "roundss $11, " VARGPREFIX "1, %0" VROUND_ARG : "=v" (x)
: "v" (x));
return x;
}
static __always_inline void
stmxcsr_inline_asm (unsigned int *mxcsr)
{
asm volatile (VPREFIX "stmxcsr %0" : "=m" (*mxcsr));
}
static __always_inline void
ldmxcsr_inline_asm (unsigned int *mxcsr)
{
asm volatile (VPREFIX "ldmxcsr %0" : : "m" (*mxcsr));
}
static __always_inline float
divss_inline_asm (float x, float y)
{
asm volatile (VPREFIX "divss %1, " VARGPREFIX "0" VROUND_ARG
: "+x" (x) : "x" (y));
return x;
}
#endif

View File

@ -20,8 +20,10 @@
#define X86_MATH_PRIVATE_H 1
#include <math.h>
#include <math-inline-asm.h>
#include_next <math_private.h>
__extern_always_inline long double
__NTH (__ieee754_atan2l (long double y, long double x))
{
@ -36,8 +38,7 @@ __trunc (double x)
#if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__
return trunc (x);
#else
asm ("%vroundsd $11, %d1, %0" : "=v" (x) : "v" (x));
return x;
return trunc_inline_asm (x);
#endif
}
@ -47,8 +48,7 @@ __truncf (float x)
#if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__
return truncf (x);
#else
asm ("%vroundss $11, %d1, %0" : "=v" (x) : "v" (x));
return x;
return truncf_inline_asm (x);
#endif
}

View File

@ -1,6 +1,8 @@
/* Configure soft-fp for building sqrtf128. Based on sfp-machine.h in
libgcc, with soft-float and other irrelevant parts removed. */
#include <math-inline-asm.h>
#if HAVE_X86_LIBGCC_CMP_RETURN_ATTR
/* The type of the result of a floating point comparison. This must
match `__libgcc_cmp_return__' in GCC for the target. */
@ -49,7 +51,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
# define FP_INIT_ROUNDMODE \
do { \
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (_fcw)); \
stmxcsr_inline_asm (&_fcw); \
} while (0)
#else
# define _FP_W_TYPE_SIZE 32

View File

@ -24,33 +24,22 @@
#include <stdio.h>
#include <cpu-features.h>
#include <support/check.h>
static uint32_t
get_sse_mxcsr (void)
{
uint32_t temp;
__asm__ __volatile__ ("%vstmxcsr %0" : "=m" (temp));
return temp;
}
static void
set_sse_mxcsr (uint32_t val)
{
__asm__ __volatile__ ("%vldmxcsr %0" : : "m" (val));
}
#include <math-inline-asm.h>
static void
set_sse_mxcsr_bits (uint32_t mask, uint32_t bits)
{
uint32_t mxcsr = get_sse_mxcsr ();
uint32_t mxcsr;
stmxcsr_inline_asm (&mxcsr);
mxcsr = (mxcsr & ~mask) | bits;
set_sse_mxcsr (mxcsr);
ldmxcsr_inline_asm (&mxcsr);
}
static int
test_sse_mxcsr_bits (const char *test, uint32_t mask, uint32_t bits)
{
uint32_t mxcsr = get_sse_mxcsr ();
uint32_t mxcsr;
stmxcsr_inline_asm (&mxcsr);
printf ("Testing %s: mxcsr = %x\n", test, mxcsr);
if ((mxcsr & mask) == bits)
{

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
int
__feclearexcept (int excepts)
@ -38,13 +39,13 @@ __feclearexcept (int excepts)
__asm__ ("fldenv %0" : : "m" (temp));
/* And the same procedure for SSE. */
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
/* Clear the relevant bits. */
mxcsr &= ~excepts;
/* And put them into effect. */
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
/* Success. */
return 0;

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
int
fedisableexcept (int excepts)
@ -35,11 +36,11 @@ fedisableexcept (int excepts)
__asm__ ("fldcw %0" : : "m" (new_exc));
/* And now the same for the SSE MXCSR register. */
__asm__ ("%vstmxcsr %0" : "=m" (new));
stmxcsr_inline_asm (&new);
/* The SSE exception masks are shifted by 7 bits. */
new |= excepts << 7;
__asm__ ("%vldmxcsr %0" : : "m" (new));
ldmxcsr_inline_asm (&new);
return old_exc;
}

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
int
feenableexcept (int excepts)
@ -35,11 +36,11 @@ feenableexcept (int excepts)
__asm__ ("fldcw %0" : : "m" (new_exc));
/* And now the same for the SSE MXCSR register. */
__asm__ ("%vstmxcsr %0" : "=m" (new));
stmxcsr_inline_asm (&new);
/* The SSE exception masks are shifted by 7 bits. */
new &= ~(excepts << 7);
__asm__ ("%vldmxcsr %0" : : "m" (new));
ldmxcsr_inline_asm (&new);
return old_exc;
}

View File

@ -17,15 +17,17 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
int
__fegetenv (fenv_t *envp)
{
__asm__ ("fnstenv %0\n"
/* fnstenv changes the exception mask, so load back the
stored environment. */
"fldenv %0\n"
"%vstmxcsr %1" : "=m" (*envp), "=m" (envp->__mxcsr));
asm volatile ("fnstenv %0\n"
/* fnstenv changes the exception mask, so load back the
stored environment. */
"fldenv %0"
: "=m" (*envp));
stmxcsr_inline_asm (&envp->__mxcsr);
/* Success. */
return 0;

View File

@ -18,11 +18,12 @@
#include <fenv.h>
#include <fpu_control.h>
#include <math-inline-asm.h>
int
fegetmode (femode_t *modep)
{
_FPU_GETCW (modep->__control_word);
__asm__ ("%vstmxcsr %0" : "=m" (modep->__mxcsr));
stmxcsr_inline_asm (&modep->__mxcsr);
return 0;
}

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
int
__feholdexcept (fenv_t *envp)
@ -25,14 +26,13 @@ __feholdexcept (fenv_t *envp)
/* Store the environment. Recall that fnstenv has a side effect of
masking all exceptions. Then clear all exceptions. */
__asm__ ("fnstenv %0\n\t"
"%vstmxcsr %1\n\t"
"fnclex"
: "=m" (*envp), "=m" (envp->__mxcsr));
asm volatile ("fnstenv %0" : "=m" (*envp));
stmxcsr_inline_asm (&envp->__mxcsr);
asm volatile ("fnclex" : "=m" (*envp));
/* Set the SSE MXCSR register. */
mxcsr = (envp->__mxcsr | 0x1f80) & ~0x3f;
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
return 0;
}

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
#include <fpu_control.h>
#include <assert.h>
@ -35,8 +36,8 @@ __fesetenv (const fenv_t *envp)
values which we do not want to come from the saved environment.
Therefore, we get the current environment and replace the values
we want to use from the environment specified by the parameter. */
__asm__ ("fnstenv %0\n"
"%vstmxcsr %1" : "=m" (temp), "=m" (temp.__mxcsr));
asm volatile ("fnstenv %0" : "=m" (temp));
stmxcsr_inline_asm (&temp.__mxcsr);
if (envp == FE_DFL_ENV)
{
@ -103,8 +104,8 @@ __fesetenv (const fenv_t *envp)
temp.__mxcsr = envp->__mxcsr;
}
__asm__ ("fldenv %0\n"
"%vldmxcsr %1" : : "m" (temp), "m" (temp.__mxcsr));
asm volatile ("fldenv %0" : "=m" (temp));
ldmxcsr_inline_asm (&temp.__mxcsr);
/* Success. */
return 0;

View File

@ -17,15 +17,15 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
int
fesetexcept (int excepts)
{
unsigned int mxcsr;
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
mxcsr |= excepts & FE_ALL_EXCEPT;
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
return 0;
}

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
#include <fpu_control.h>
/* All exceptions, including the x86-specific "denormal operand"
@ -28,7 +29,8 @@ fesetmode (const femode_t *modep)
{
fpu_control_t cw;
unsigned int mxcsr;
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
/* Preserve SSE exception flags but restore other state in
MXCSR. */
mxcsr &= FE_ALL_EXCEPT_X86;
@ -45,6 +47,6 @@ fesetmode (const femode_t *modep)
mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86;
}
_FPU_SETCW (cw);
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
return 0;
}

View File

@ -17,12 +17,13 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
int
__fesetround (int round)
{
unsigned short int cw;
int mxcsr;
unsigned int mxcsr;
if ((round & ~0xc00) != 0)
/* ROUND is no valid rounding mode. */
@ -36,10 +37,10 @@ __fesetround (int round)
/* And now the MSCSR register for SSE, the precision is at different bit
positions in the different units, we need to shift it 3 bits. */
asm ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
mxcsr &= ~ 0x6000;
mxcsr |= round << 3;
asm ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
return 0;
}

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
int
__feupdateenv (const fenv_t *envp)
@ -25,7 +26,8 @@ __feupdateenv (const fenv_t *envp)
unsigned int xtemp;
/* Save current exceptions. */
__asm__ ("fnstsw %0\n\t%vstmxcsr %1" : "=m" (temp), "=m" (xtemp));
asm volatile ("fnstsw %0" : "=m" (temp));
stmxcsr_inline_asm (&xtemp);
temp = (temp | xtemp) & FE_ALL_EXCEPT;
/* Install new environment. */

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
int
fegetexceptflag (fexcept_t *flagp, int excepts)
@ -25,8 +26,8 @@ fegetexceptflag (fexcept_t *flagp, int excepts)
unsigned int mxscr;
/* Get the current exceptions for the x87 FPU and SSE unit. */
__asm__ ("fnstsw %0\n"
"%vstmxcsr %1" : "=m" (temp), "=m" (mxscr));
__asm__ ("fnstsw %0" : "=m" (temp));
stmxcsr_inline_asm (&mxscr);
*flagp = (temp | mxscr) & FE_ALL_EXCEPT & excepts;

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
#include <math.h>
int
@ -29,23 +30,12 @@ __feraiseexcept (int excepts)
/* First: invalid exception. */
if ((FE_INVALID & excepts) != 0)
{
/* One example of an invalid operation is 0.0 / 0.0. */
float f = 0.0;
__asm__ __volatile__ ("%vdivss %0, %d0 " : "+x" (f));
(void) &f;
}
/* One example of an invalid operation is 0.0 / 0.0. */
divss_inline_asm (0.0f, 0.0f);
/* Next: division by zero. */
if ((FE_DIVBYZERO & excepts) != 0)
{
float f = 1.0;
float g = 0.0;
__asm__ __volatile__ ("%vdivss %1, %d0" : "+x" (f) : "x" (g));
(void) &f;
}
divss_inline_asm (1.0f, 0.0f);
/* Next: overflow. */
if ((FE_OVERFLOW & excepts) != 0)

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
#include <math.h>
int
@ -44,13 +45,13 @@ fesetexceptflag (const fexcept_t *flagp, int excepts)
__asm__ ("fldenv %0" : : "m" (temp));
/* And now similarly for SSE. */
__asm__ ("%vstmxcsr %0" : "=m" (mxcsr));
stmxcsr_inline_asm (&mxcsr);
/* Clear or set relevant flags. */
mxcsr ^= (mxcsr ^ *flagp) & excepts;
/* Put the new data in effect. */
__asm__ ("%vldmxcsr %0" : : "m" (mxcsr));
ldmxcsr_inline_asm (&mxcsr);
/* Success. */
return 0;

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <fenv.h>
#include <math-inline-asm.h>
int
__fetestexcept (int excepts)
@ -25,8 +26,8 @@ __fetestexcept (int excepts)
unsigned int mxscr;
/* Get current exceptions. */
__asm__ ("fnstsw %0\n"
"%vstmxcsr %1" : "=m" (temp), "=m" (mxscr));
asm volatile ("fnstsw %0" : "=m" (temp));
stmxcsr_inline_asm (&mxscr);
return (temp | mxscr) & excepts & FE_ALL_EXCEPT;
}