mirror of git://sourceware.org/git/glibc.git
x86: Don't use asm statement for trunc/truncf
Compiler inlines trunc and truncf with SSE4.1. But older versions of GCC doesn't inline them with -Os: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121861 Don't use asm statement for trunc and truncf if compiler can inline them with -Os. It removes one register move with GCC 16: __modff_sse41: __modff_sse41: .LFB23: .LFB23: .cfi_startproc .cfi_startproc endbr64 endbr64 subq $24, %rsp subq $24, %rsp .cfi_def_cfa_offset 32 .cfi_def_cfa_offset 32 movq %fs:40, %rax movq %fs:40, %rax movq %rax, 8(%rsp) movq %rax, 8(%rsp) xorl %eax, %eax xorl %eax, %eax movd %xmm0, %eax movd %xmm0, %eax addl %eax, %eax addl %eax, %eax cmpl $-16777216, %eax cmpl $-16777216, %eax je .L7 je .L7 > movaps %xmm0, %xmm3 movaps %xmm0, %xmm4 movaps %xmm0, %xmm4 movss .LC0(%rip), %xmm2 | movss .LC0(%rip), %xmm1 movaps %xmm2, %xmm3 | movaps %xmm1, %xmm2 andps %xmm0, %xmm2 | roundss $11, %xmm3, %xmm3 roundss $11, %xmm0, %xmm1 | subss %xmm3, %xmm4 subss %xmm1, %xmm4 | andps %xmm0, %xmm1 andnps %xmm4, %xmm3 | andnps %xmm4, %xmm2 orps %xmm3, %xmm2 | orps %xmm2, %xmm1 .L3: .L3: movss %xmm1, (%rdi) | movss %xmm3, (%rdi) movq 8(%rsp), %rax movq 8(%rsp), %rax subq %fs:40, %rax subq %fs:40, %rax jne .L8 jne .L8 movaps %xmm2, %xmm0 | movaps %xmm1, %xmm0 addq $24, %rsp addq $24, %rsp .cfi_remember_state .cfi_remember_state .cfi_def_cfa_offset 8 .cfi_def_cfa_offset 8 ret ret Signed-off-by: H.J. Lu <hjl.tools@gmail.com> Reviewed-by: Uros Bizjak <ubizjak@gmail.com>
This commit is contained in:
parent
13d67746cb
commit
1fa5773eb1
|
|
@ -308,4 +308,7 @@
|
|||
/* Define if -mapxf is enabled by default on x86. */
|
||||
#undef HAVE_X86_APX
|
||||
|
||||
/* Define if trunc is inlined on x86. */
|
||||
#undef HAVE_X86_INLINE_TRUNC
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -340,6 +340,58 @@ fi
|
|||
config_vars="$config_vars
|
||||
test-cc-cflags-no-direct-extern-access = $libc_cv_test_cc_cflags_no_direct_extern_access"
|
||||
|
||||
conftest_code="
|
||||
extern float truncf (float __x) __attribute__ ((__nothrow__,__const__));
|
||||
|
||||
float
|
||||
tf (float x)
|
||||
{
|
||||
return truncf (x);
|
||||
}
|
||||
"
|
||||
|
||||
cat > conftest.c <<EOF
|
||||
$conftest_code
|
||||
EOF
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Os inlines trunc" >&5
|
||||
printf %s "checking if -Os inlines trunc... " >&6; }
|
||||
if test ${libc_cv_cc_x86_inline_trunc+y}
|
||||
then :
|
||||
printf %s "(cached) " >&6
|
||||
else case e in #(
|
||||
e) if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S -Os -msse4.1 conftest.c -o conftest 1>&5'
|
||||
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||
(eval $ac_try) 2>&5
|
||||
ac_status=$?
|
||||
printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||
test $ac_status = 0; }; }
|
||||
then
|
||||
|
||||
libc_cv_cc_x86_inline_trunc=no
|
||||
if grep -E -q "roundss" conftest; then
|
||||
libc_cv_cc_x86_inline_trunc=yes
|
||||
fi
|
||||
|
||||
else
|
||||
|
||||
echo "failed to check if -Os inlines trunc."
|
||||
rm -f conftest*
|
||||
exit 1
|
||||
|
||||
fi ;;
|
||||
esac
|
||||
fi
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_x86_inline_trunc" >&5
|
||||
printf "%s\n" "$libc_cv_cc_x86_inline_trunc" >&6; }
|
||||
rm -f conftest*
|
||||
if test "$libc_cv_cc_x86_inline_trunc" = yes; then
|
||||
printf "%s\n" "#define HAVE_X86_INLINE_TRUNC 1" >>confdefs.h
|
||||
|
||||
else
|
||||
printf "%s\n" "#define HAVE_X86_INLINE_TRUNC 0" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
if test "${libc_cv_cc_no_direct_extern_access}${libc_cv_test_cc_cflags_no_direct_extern_access}" = yes; then
|
||||
libc_cv_protected_data=no
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -192,6 +192,37 @@ fi
|
|||
LIBC_CONFIG_VAR(test-cc-cflags-no-direct-extern-access,
|
||||
$libc_cv_test_cc_cflags_no_direct_extern_access)
|
||||
|
||||
conftest_code="
|
||||
extern float truncf (float __x) __attribute__ ((__nothrow__,__const__));
|
||||
|
||||
float
|
||||
tf (float x)
|
||||
{
|
||||
return truncf (x);
|
||||
}
|
||||
"
|
||||
dnl Check if CC inlines trunc with -Os.
|
||||
LIBC_TRY_CC_COMMAND([if -Os inlines trunc],
|
||||
[$conftest_code],
|
||||
[-S -Os -msse4.1],
|
||||
libc_cv_cc_x86_inline_trunc,
|
||||
[
|
||||
libc_cv_cc_x86_inline_trunc=no
|
||||
if grep -E -q "roundss" conftest; then
|
||||
libc_cv_cc_x86_inline_trunc=yes
|
||||
fi
|
||||
],
|
||||
[
|
||||
echo "failed to check if -Os inlines trunc."
|
||||
rm -f conftest*
|
||||
exit 1
|
||||
])
|
||||
if test "$libc_cv_cc_x86_inline_trunc" = yes; then
|
||||
AC_DEFINE(HAVE_X86_INLINE_TRUNC, 1)
|
||||
else
|
||||
AC_DEFINE(HAVE_X86_INLINE_TRUNC, 0)
|
||||
fi
|
||||
|
||||
dnl If the building compiler enables no direct external data access by
|
||||
dnl default, access to protected data in shared libraries from executables
|
||||
dnl must be compiled with no direct external data access. If the testing
|
||||
|
|
|
|||
|
|
@ -33,27 +33,23 @@ __NTH (__ieee754_atan2l (long double y, long double x))
|
|||
__extern_always_inline double
|
||||
__trunc (double x)
|
||||
{
|
||||
#ifdef __AVX__
|
||||
asm ("vroundsd $11, %1, %1, %0" : "=v" (x) : "v" (x));
|
||||
#elif defined __SSE4_1__
|
||||
asm ("roundsd $11, %1, %0" : "=x" (x) : "x" (x));
|
||||
#if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__
|
||||
return trunc (x);
|
||||
#else
|
||||
x = trunc (x);
|
||||
#endif
|
||||
asm ("%vroundsd $11, %d1, %0" : "=v" (x) : "v" (x));
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
__extern_always_inline float
|
||||
__truncf (float x)
|
||||
{
|
||||
#ifdef __AVX__
|
||||
asm ("vroundss $11, %1, %1, %0" : "=v" (x) : "v" (x));
|
||||
#elif defined __SSE4_1__
|
||||
asm ("roundss $11, %1, %0" : "=x" (x) : "x" (x));
|
||||
#if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__
|
||||
return truncf (x);
|
||||
#else
|
||||
x = truncf (x);
|
||||
#endif
|
||||
asm ("%vroundss $11, %d1, %0" : "=v" (x) : "v" (x));
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue