x86: Don't use asm statement for trunc/truncf

Compiler inlines trunc and truncf with SSE4.1.  But older versions of GCC
doesn't inline them with -Os:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121861

Don't use asm statement for trunc and truncf if compiler can inline them
with -Os.  It removes one register move with GCC 16:

__modff_sse41:                        __modff_sse41:
.LFB23:                               .LFB23:
   .cfi_startproc                        .cfi_startproc
   endbr64                               endbr64
   subq  $24, %rsp                       subq  $24, %rsp
   .cfi_def_cfa_offset 32                .cfi_def_cfa_offset 32
   movq  %fs:40, %rax                    movq  %fs:40, %rax
   movq  %rax, 8(%rsp)                   movq  %rax, 8(%rsp)
   xorl  %eax, %eax                      xorl  %eax, %eax
   movd  %xmm0, %eax                     movd  %xmm0, %eax
   addl  %eax, %eax                      addl  %eax, %eax
   cmpl  $-16777216, %eax                cmpl  $-16777216, %eax
   je .L7                                je .L7
                                   >     movaps   %xmm0, %xmm3
   movaps   %xmm0, %xmm4                 movaps   %xmm0, %xmm4
   movss .LC0(%rip), %xmm2         |     movss .LC0(%rip), %xmm1
   movaps   %xmm2, %xmm3           |     movaps   %xmm1, %xmm2
   andps %xmm0, %xmm2              |     roundss  $11, %xmm3, %xmm3
   roundss $11, %xmm0, %xmm1       |     subss %xmm3, %xmm4
   subss %xmm1, %xmm4              |     andps %xmm0, %xmm1
   andnps   %xmm4, %xmm3           |     andnps   %xmm4, %xmm2
   orps  %xmm3, %xmm2              |     orps  %xmm2, %xmm1
.L3:                                  .L3:
   movss %xmm1, (%rdi)             |     movss %xmm3, (%rdi)
   movq  8(%rsp), %rax                   movq  8(%rsp), %rax
   subq  %fs:40, %rax                    subq  %fs:40, %rax
   jne   .L8                             jne   .L8
   movaps   %xmm2, %xmm0           |     movaps   %xmm1, %xmm0
   addq  $24, %rsp                       addq  $24, %rsp
   .cfi_remember_state                   .cfi_remember_state
   .cfi_def_cfa_offset 8                 .cfi_def_cfa_offset 8
   ret                                   ret

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Reviewed-by: Uros Bizjak <ubizjak@gmail.com>
This commit is contained in:
H.J. Lu 2025-09-15 18:52:18 -07:00
parent 13d67746cb
commit 1fa5773eb1
4 changed files with 94 additions and 12 deletions

View File

@ -308,4 +308,7 @@
/* Define if -mapxf is enabled by default on x86. */
#undef HAVE_X86_APX
/* Define if trunc is inlined on x86. */
#undef HAVE_X86_INLINE_TRUNC
#endif

52
sysdeps/x86/configure vendored
View File

@ -340,6 +340,58 @@ fi
config_vars="$config_vars
test-cc-cflags-no-direct-extern-access = $libc_cv_test_cc_cflags_no_direct_extern_access"
conftest_code="
extern float truncf (float __x) __attribute__ ((__nothrow__,__const__));
float
tf (float x)
{
return truncf (x);
}
"
cat > conftest.c <<EOF
$conftest_code
EOF
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Os inlines trunc" >&5
printf %s "checking if -Os inlines trunc... " >&6; }
if test ${libc_cv_cc_x86_inline_trunc+y}
then :
printf %s "(cached) " >&6
else case e in #(
e) if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S -Os -msse4.1 conftest.c -o conftest 1>&5'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
ac_status=$?
printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }
then
libc_cv_cc_x86_inline_trunc=no
if grep -E -q "roundss" conftest; then
libc_cv_cc_x86_inline_trunc=yes
fi
else
echo "failed to check if -Os inlines trunc."
rm -f conftest*
exit 1
fi ;;
esac
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_x86_inline_trunc" >&5
printf "%s\n" "$libc_cv_cc_x86_inline_trunc" >&6; }
rm -f conftest*
if test "$libc_cv_cc_x86_inline_trunc" = yes; then
printf "%s\n" "#define HAVE_X86_INLINE_TRUNC 1" >>confdefs.h
else
printf "%s\n" "#define HAVE_X86_INLINE_TRUNC 0" >>confdefs.h
fi
if test "${libc_cv_cc_no_direct_extern_access}${libc_cv_test_cc_cflags_no_direct_extern_access}" = yes; then
libc_cv_protected_data=no
fi

View File

@ -192,6 +192,37 @@ fi
LIBC_CONFIG_VAR(test-cc-cflags-no-direct-extern-access,
$libc_cv_test_cc_cflags_no_direct_extern_access)
conftest_code="
extern float truncf (float __x) __attribute__ ((__nothrow__,__const__));
float
tf (float x)
{
return truncf (x);
}
"
dnl Check if CC inlines trunc with -Os.
LIBC_TRY_CC_COMMAND([if -Os inlines trunc],
[$conftest_code],
[-S -Os -msse4.1],
libc_cv_cc_x86_inline_trunc,
[
libc_cv_cc_x86_inline_trunc=no
if grep -E -q "roundss" conftest; then
libc_cv_cc_x86_inline_trunc=yes
fi
],
[
echo "failed to check if -Os inlines trunc."
rm -f conftest*
exit 1
])
if test "$libc_cv_cc_x86_inline_trunc" = yes; then
AC_DEFINE(HAVE_X86_INLINE_TRUNC, 1)
else
AC_DEFINE(HAVE_X86_INLINE_TRUNC, 0)
fi
dnl If the building compiler enables no direct external data access by
dnl default, access to protected data in shared libraries from executables
dnl must be compiled with no direct external data access. If the testing

View File

@ -33,27 +33,23 @@ __NTH (__ieee754_atan2l (long double y, long double x))
__extern_always_inline double
__trunc (double x)
{
#ifdef __AVX__
asm ("vroundsd $11, %1, %1, %0" : "=v" (x) : "v" (x));
#elif defined __SSE4_1__
asm ("roundsd $11, %1, %0" : "=x" (x) : "x" (x));
#if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__
return trunc (x);
#else
x = trunc (x);
#endif
asm ("%vroundsd $11, %d1, %0" : "=v" (x) : "v" (x));
return x;
#endif
}
__extern_always_inline float
__truncf (float x)
{
#ifdef __AVX__
asm ("vroundss $11, %1, %1, %0" : "=v" (x) : "v" (x));
#elif defined __SSE4_1__
asm ("roundss $11, %1, %0" : "=x" (x) : "x" (x));
#if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__
return truncf (x);
#else
x = truncf (x);
#endif
asm ("%vroundss $11, %d1, %0" : "=v" (x) : "v" (x));
return x;
#endif
}
#endif