mirror of git://sourceware.org/git/glibc.git
X86-64: Prepare memset-vec-unaligned-erms.S
Prepare memset-vec-unaligned-erms.S to make the SSE2 version as the default memset. * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S (MEMSET_CHK_SYMBOL): New. Define if not defined. (__bzero): Check VEC_SIZE == 16 instead of USE_MULTIARCH. Disabled fro now. Replace MEMSET_SYMBOL with MEMSET_CHK_SYMBOL on __memset_chk symbols. Properly check USE_MULTIARCH on __memset symbols.
This commit is contained in:
parent
a25322f4e8
commit
4af1bb06c5
|
@ -1,3 +1,12 @@
|
||||||
|
2016-04-06 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||||||
|
(MEMSET_CHK_SYMBOL): New. Define if not defined.
|
||||||
|
(__bzero): Check VEC_SIZE == 16 instead of USE_MULTIARCH.
|
||||||
|
Disabled fro now.
|
||||||
|
Replace MEMSET_SYMBOL with MEMSET_CHK_SYMBOL on __memset_chk
|
||||||
|
symbols. Properly check USE_MULTIARCH on __memset symbols.
|
||||||
|
|
||||||
2016-04-06 H.J. Lu <hongjiu.lu@intel.com>
|
2016-04-06 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
* benchtests/Makefile (string-benchset): Add memcpy-large,
|
* benchtests/Makefile (string-benchset): Add memcpy-large,
|
||||||
|
|
|
@ -28,6 +28,10 @@
|
||||||
|
|
||||||
#include <sysdep.h>
|
#include <sysdep.h>
|
||||||
|
|
||||||
|
#ifndef MEMSET_CHK_SYMBOL
|
||||||
|
# define MEMSET_CHK_SYMBOL(p,s) MEMSET_SYMBOL(p, s)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef VZEROUPPER
|
#ifndef VZEROUPPER
|
||||||
# if VEC_SIZE > 16
|
# if VEC_SIZE > 16
|
||||||
# define VZEROUPPER vzeroupper
|
# define VZEROUPPER vzeroupper
|
||||||
|
@ -66,8 +70,8 @@
|
||||||
# error SECTION is not defined!
|
# error SECTION is not defined!
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined USE_MULTIARCH && IS_IN (libc)
|
|
||||||
.section SECTION(.text),"ax",@progbits
|
.section SECTION(.text),"ax",@progbits
|
||||||
|
#if VEC_SIZE == 16 && IS_IN (libc) && 0
|
||||||
ENTRY (__bzero)
|
ENTRY (__bzero)
|
||||||
movq %rdi, %rax /* Set return value. */
|
movq %rdi, %rax /* Set return value. */
|
||||||
movq %rsi, %rdx /* Set n. */
|
movq %rsi, %rdx /* Set n. */
|
||||||
|
@ -78,10 +82,10 @@ weak_alias (__bzero, bzero)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined SHARED && IS_IN (libc)
|
#if defined SHARED && IS_IN (libc)
|
||||||
ENTRY_CHK (MEMSET_SYMBOL (__memset_chk, unaligned))
|
ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
|
||||||
cmpq %rdx, %rcx
|
cmpq %rdx, %rcx
|
||||||
jb HIDDEN_JUMPTARGET (__chk_fail)
|
jb HIDDEN_JUMPTARGET (__chk_fail)
|
||||||
END_CHK (MEMSET_SYMBOL (__memset_chk, unaligned))
|
END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ENTRY (MEMSET_SYMBOL (__memset, unaligned))
|
ENTRY (MEMSET_SYMBOL (__memset, unaligned))
|
||||||
|
@ -97,15 +101,16 @@ L(entry_from_bzero):
|
||||||
VMOVU %VEC(0), (%rdi)
|
VMOVU %VEC(0), (%rdi)
|
||||||
VZEROUPPER
|
VZEROUPPER
|
||||||
ret
|
ret
|
||||||
|
#if defined USE_MULTIARCH && IS_IN (libc)
|
||||||
END (MEMSET_SYMBOL (__memset, unaligned))
|
END (MEMSET_SYMBOL (__memset, unaligned))
|
||||||
|
|
||||||
#if VEC_SIZE == 16
|
# if VEC_SIZE == 16
|
||||||
/* Only used to measure performance of REP STOSB. */
|
/* Only used to measure performance of REP STOSB. */
|
||||||
ENTRY (__memset_erms)
|
ENTRY (__memset_erms)
|
||||||
#else
|
# else
|
||||||
/* Provide a symbol to debugger. */
|
/* Provide a symbol to debugger. */
|
||||||
ENTRY (MEMSET_SYMBOL (__memset, erms))
|
ENTRY (MEMSET_SYMBOL (__memset, erms))
|
||||||
#endif
|
# endif
|
||||||
L(stosb):
|
L(stosb):
|
||||||
movq %rdx, %rcx
|
movq %rdx, %rcx
|
||||||
movzbl %sil, %eax
|
movzbl %sil, %eax
|
||||||
|
@ -113,18 +118,18 @@ L(stosb):
|
||||||
rep stosb
|
rep stosb
|
||||||
movq %rdx, %rax
|
movq %rdx, %rax
|
||||||
ret
|
ret
|
||||||
#if VEC_SIZE == 16
|
# if VEC_SIZE == 16
|
||||||
END (__memset_erms)
|
END (__memset_erms)
|
||||||
#else
|
# else
|
||||||
END (MEMSET_SYMBOL (__memset, erms))
|
END (MEMSET_SYMBOL (__memset, erms))
|
||||||
#endif
|
# endif
|
||||||
|
|
||||||
#if defined SHARED && IS_IN (libc)
|
# if defined SHARED && IS_IN (libc)
|
||||||
ENTRY_CHK (MEMSET_SYMBOL (__memset_chk, unaligned_erms))
|
ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
|
||||||
cmpq %rdx, %rcx
|
cmpq %rdx, %rcx
|
||||||
jb HIDDEN_JUMPTARGET (__chk_fail)
|
jb HIDDEN_JUMPTARGET (__chk_fail)
|
||||||
END_CHK (MEMSET_SYMBOL (__memset_chk, unaligned_erms))
|
END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
|
||||||
#endif
|
# endif
|
||||||
|
|
||||||
ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
|
ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
|
||||||
VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
|
VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
|
||||||
|
@ -144,6 +149,7 @@ L(stosb_more_2x_vec):
|
||||||
/* Force 32-bit displacement to avoid long nop between
|
/* Force 32-bit displacement to avoid long nop between
|
||||||
instructions. */
|
instructions. */
|
||||||
ja.d32 L(stosb)
|
ja.d32 L(stosb)
|
||||||
|
#endif
|
||||||
.p2align 4
|
.p2align 4
|
||||||
L(more_2x_vec):
|
L(more_2x_vec):
|
||||||
cmpq $(VEC_SIZE * 4), %rdx
|
cmpq $(VEC_SIZE * 4), %rdx
|
||||||
|
|
Loading…
Reference in New Issue