mirror of git://sourceware.org/git/glibc.git
x86: Add support to build strcmp/strlen/strchr with explicit ISA level
1. Add default ISA level selection in non-multiarch/rtld implementations. 2. Add ISA level build guards to different implementations. - I.e strcmp-avx2.S which is ISA level 3 will only build if compiled ISA level <= 3. Otherwise there is no reason to include it as we will always use one of the ISA level 4 implementations (strcmp-evex.S). 3. Refactor the ifunc selector and ifunc implementation list to use the ISA level aware wrapper macros that allow functions below the compiled ISA level (with a guranteed replacement) to be skipped. Tested with and without multiarch on x86_64 for ISA levels: {generic, x86-64-v2, x86-64-v3, x86-64-v4} And m32 with and without multiarch.
This commit is contained in:
parent
c353689e49
commit
ceabdcd130
|
@ -84,6 +84,7 @@
|
|||
|
||||
/* ISA level >= 2 guaranteed includes. */
|
||||
#define SSE4_2_X86_ISA_LEVEL 2
|
||||
#define SSE4_1_X86_ISA_LEVEL 2
|
||||
#define SSSE3_X86_ISA_LEVEL 2
|
||||
|
||||
|
||||
|
@ -101,9 +102,18 @@
|
|||
when ISA level < 3. */
|
||||
#define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
|
||||
|
||||
/* NB: This feature is disable when ISA level >= 3. All CPUs with
|
||||
this feature don't run on glibc built with ISA level >= 3. */
|
||||
#define Slow_SSE42_X86_ISA_LEVEL 3
|
||||
|
||||
/* Feature(s) enabled when ISA level >= 2. */
|
||||
#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
|
||||
|
||||
/* NB: This feature is disable when ISA level >= 2, which was enabled
|
||||
for the early Atom CPUs. */
|
||||
#define Slow_BSF_X86_ISA_LEVEL 2
|
||||
|
||||
|
||||
/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
|
||||
macros are wrappers for the respective CPU_FEATURE{S}_{USABLE|ARCH}_P
|
||||
runtime checks. They differ in two ways.
|
||||
|
|
|
@ -197,6 +197,12 @@ gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
|
|||
endif
|
||||
|
||||
ifeq ($(subdir),wcsmbs)
|
||||
|
||||
sysdep_routines += \
|
||||
wcsncmp-generic \
|
||||
wcsnlen-generic \
|
||||
# sysdep_routines
|
||||
|
||||
tests += \
|
||||
tst-rsi-wcslen
|
||||
endif
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using
|
||||
/* memrchr dispatch for RTLD and non-multiarch build
|
||||
|
||||
Copyright (C) 2011-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
@ -18,5 +18,11 @@
|
|||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define MEMRCHR __memrchr
|
||||
#include "multiarch/memrchr-sse2.S"
|
||||
|
||||
#define DEFAULT_IMPL_V1 "multiarch/memrchr-sse2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/memrchr-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/memrchr-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
weak_alias (__memrchr, memrchr)
|
||||
|
|
|
@ -144,11 +144,9 @@ sysdep_routines += \
|
|||
wcslen-sse4_1 \
|
||||
wcsncmp-avx2 \
|
||||
wcsncmp-avx2-rtm \
|
||||
wcsncmp-generic \
|
||||
wcsncmp-evex \
|
||||
wcsnlen-avx2 \
|
||||
wcsnlen-avx2-rtm \
|
||||
wcsnlen-generic \
|
||||
wcsnlen-evex \
|
||||
wcsnlen-evex512 \
|
||||
wcsnlen-sse4_1 \
|
||||
|
|
|
@ -23,28 +23,32 @@
|
|||
# define GENERIC sse2
|
||||
#endif
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
|
||||
|
||||
static inline void *
|
||||
IFUNC_SELECTOR (void)
|
||||
{
|
||||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||||
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||||
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
AVX_Fast_Unaligned_Load, ))
|
||||
{
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||||
return OPTIMIZE (evex);
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||||
return OPTIMIZE (avx2_rtm);
|
||||
|
||||
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||||
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
Prefer_No_VZEROUPPER, !))
|
||||
return OPTIMIZE (avx2);
|
||||
}
|
||||
|
||||
|
|
|
@ -205,19 +205,22 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||
|
||||
/* Support sysdeps/x86_64/multiarch/memrchr.c. */
|
||||
IFUNC_IMPL (i, name, memrchr,
|
||||
IFUNC_IMPL_ADD (array, i, memrchr,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__memrchr_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, memrchr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__memrchr_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, memrchr,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__memrchr_evex)
|
||||
|
||||
IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__memrchr_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__memrchr_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__memrchr_avx2_rtm)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, memrchr,
|
||||
1,
|
||||
__memrchr_sse2))
|
||||
|
||||
#ifdef SHARED
|
||||
/* Support sysdeps/x86_64/multiarch/memset_chk.c. */
|
||||
|
@ -346,49 +349,57 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||
|
||||
/* Support sysdeps/x86_64/multiarch/strlen.c. */
|
||||
IFUNC_IMPL (i, name, strlen,
|
||||
IFUNC_IMPL_ADD (array, i, strlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strlen_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strlen_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strlen_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strlen_evex512)
|
||||
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strlen_evex)
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strlen_evex512)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strlen_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strlen_avx2_rtm)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strlen,
|
||||
1,
|
||||
__strlen_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strnlen.c. */
|
||||
IFUNC_IMPL (i, name, strnlen,
|
||||
IFUNC_IMPL_ADD (array, i, strnlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strnlen_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strnlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strnlen_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strnlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strnlen_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strnlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strnlen_evex512)
|
||||
IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strnlen_evex)
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strnlen_evex512)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strnlen_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strnlen_avx2_rtm)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strnlen,
|
||||
1,
|
||||
__strnlen_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/stpncpy.c. */
|
||||
IFUNC_IMPL (i, name, stpncpy,
|
||||
|
@ -422,40 +433,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||
|
||||
/* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */
|
||||
IFUNC_IMPL (i, name, strcasecmp,
|
||||
IFUNC_IMPL_ADD (array, i, strcasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strcasecmp_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strcasecmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strcasecmp_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strcasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strcasecmp_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strcasecmp,
|
||||
CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strcasecmp_sse42)
|
||||
IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strcasecmp_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strcasecmp_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strcasecmp_avx2_rtm)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
|
||||
CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strcasecmp_sse42)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
|
||||
1,
|
||||
__strcasecmp_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */
|
||||
IFUNC_IMPL (i, name, strcasecmp_l,
|
||||
IFUNC_IMPL_ADD (array, i, strcasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strcasecmp_l_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strcasecmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strcasecmp_l_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strcasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strcasecmp_l_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strcasecmp_l,
|
||||
CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strcasecmp_l_sse42)
|
||||
IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
|
||||
__strcasecmp_l_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strcasecmp_l_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strcasecmp_l_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strcasecmp_l_avx2_rtm)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
|
||||
CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strcasecmp_l_sse42)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
|
||||
1,
|
||||
__strcasecmp_l_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strcat.c. */
|
||||
IFUNC_IMPL (i, name, strcat,
|
||||
|
@ -474,74 +492,95 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||
|
||||
/* Support sysdeps/x86_64/multiarch/strchr.c. */
|
||||
IFUNC_IMPL (i, name, strchr,
|
||||
IFUNC_IMPL_ADD (array, i, strchr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strchr_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strchr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strchr_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strchr,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strchr_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
|
||||
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strchr,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strchr_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strchr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strchr_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strchr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strchr_avx2_rtm)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strchr,
|
||||
1,
|
||||
__strchr_sse2)
|
||||
X86_IFUNC_IMPL_ADD_V1 (array, i, strchr,
|
||||
1,
|
||||
__strchr_sse2_no_bsf))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strchrnul.c. */
|
||||
IFUNC_IMPL (i, name, strchrnul,
|
||||
IFUNC_IMPL_ADD (array, i, strchrnul,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strchrnul_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strchrnul,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strchrnul_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strchrnul,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strchrnul_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strchrnul,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strchrnul_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strchrnul_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strchrnul_avx2_rtm)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strchrnul,
|
||||
1,
|
||||
__strchrnul_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strrchr.c. */
|
||||
IFUNC_IMPL (i, name, strrchr,
|
||||
IFUNC_IMPL_ADD (array, i, strrchr,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strrchr_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strrchr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strrchr_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strrchr,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strrchr_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strrchr_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strrchr_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strrchr_avx2_rtm)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strrchr,
|
||||
1,
|
||||
__strrchr_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strcmp.c. */
|
||||
IFUNC_IMPL (i, name, strcmp,
|
||||
IFUNC_IMPL_ADD (array, i, strcmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strcmp_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strcmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strcmp_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strcmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strcmp_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strcmp_sse42)
|
||||
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned)
|
||||
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strcmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__strcmp_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strcmp_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strcmp_avx2_rtm)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
|
||||
CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strcmp_sse42)
|
||||
/* ISA V2 wrapper for SSE2 implementations because the SSE2
|
||||
implementations are also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
|
||||
1,
|
||||
__strcmp_sse2_unaligned)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
|
||||
1,
|
||||
__strcmp_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strcpy.c. */
|
||||
IFUNC_IMPL (i, name, strcpy,
|
||||
|
@ -568,41 +607,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||
|
||||
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
|
||||
IFUNC_IMPL (i, name, strncasecmp,
|
||||
IFUNC_IMPL_ADD (array, i, strncasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strncasecmp_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strncasecmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strncasecmp_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strncasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strncasecmp_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strncasecmp,
|
||||
CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strncasecmp_sse42)
|
||||
IFUNC_IMPL_ADD (array, i, strncasecmp, 1,
|
||||
__strncasecmp_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strncasecmp_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strncasecmp_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strncasecmp_avx2_rtm)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
|
||||
CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strncasecmp_sse42)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
|
||||
1,
|
||||
__strncasecmp_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
|
||||
IFUNC_IMPL (i, name, strncasecmp_l,
|
||||
IFUNC_IMPL_ADD (array, i, strncasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strncasecmp_l_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strncasecmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strncasecmp_l_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strncasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strncasecmp_l_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strncasecmp_l,
|
||||
CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strncasecmp_l_sse42)
|
||||
IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
|
||||
__strncasecmp_l_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strncasecmp_l_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strncasecmp_l_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strncasecmp_l_avx2_rtm)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
|
||||
CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strncasecmp_l_sse42)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
|
||||
1,
|
||||
__strncasecmp_l_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strncat.c. */
|
||||
IFUNC_IMPL (i, name, strncat,
|
||||
|
@ -664,69 +709,85 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||
|
||||
/* Support sysdeps/x86_64/multiarch/wcschr.c. */
|
||||
IFUNC_IMPL (i, name, wcschr,
|
||||
IFUNC_IMPL_ADD (array, i, wcschr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcschr_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, wcschr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcschr_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, wcschr,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcschr_evex)
|
||||
IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, wcschr,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcschr_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcschr_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcschr_avx2_rtm)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcschr,
|
||||
1,
|
||||
__wcschr_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/wcsrchr.c. */
|
||||
IFUNC_IMPL (i, name, wcsrchr,
|
||||
IFUNC_IMPL_ADD (array, i, wcsrchr,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__wcsrchr_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, wcsrchr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcsrchr_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, wcsrchr,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsrchr_evex)
|
||||
IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsrchr_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__wcsrchr_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcsrchr_avx2_rtm)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcsrchr,
|
||||
1,
|
||||
__wcsrchr_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/wcscmp.c. */
|
||||
IFUNC_IMPL (i, name, wcscmp,
|
||||
IFUNC_IMPL_ADD (array, i, wcscmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__wcscmp_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, wcscmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcscmp_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, wcscmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcscmp_evex)
|
||||
IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, wcscmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcscmp_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__wcscmp_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcscmp_avx2_rtm)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcscmp,
|
||||
1,
|
||||
__wcscmp_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/wcsncmp.c. */
|
||||
IFUNC_IMPL (i, name, wcsncmp,
|
||||
IFUNC_IMPL_ADD (array, i, wcsncmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__wcsncmp_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, wcsncmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcsncmp_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, wcsncmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsncmp_evex)
|
||||
IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsncmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsncmp_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__wcsncmp_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcsncmp_avx2_rtm)
|
||||
/* ISA V2 wrapper for GENERIC implementation because the
|
||||
GENERIC implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncmp,
|
||||
1,
|
||||
__wcsncmp_generic))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/wcscpy.c. */
|
||||
IFUNC_IMPL (i, name, wcscpy,
|
||||
|
@ -736,55 +797,59 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||
|
||||
/* Support sysdeps/x86_64/multiarch/wcslen.c. */
|
||||
IFUNC_IMPL (i, name, wcslen,
|
||||
IFUNC_IMPL_ADD (array, i, wcslen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcslen_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, wcslen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcslen_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, wcslen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcslen_evex)
|
||||
IFUNC_IMPL_ADD (array, i, wcslen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcslen_evex512)
|
||||
IFUNC_IMPL_ADD (array, i, wcslen,
|
||||
CPU_FEATURE_USABLE (SSE4_1),
|
||||
__wcslen_sse4_1)
|
||||
IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcslen_evex)
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcslen_evex512)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcslen_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcslen_avx2_rtm)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcslen,
|
||||
CPU_FEATURE_USABLE (SSE4_1),
|
||||
__wcslen_sse4_1)
|
||||
X86_IFUNC_IMPL_ADD_V1 (array, i, wcslen,
|
||||
1,
|
||||
__wcslen_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/wcsnlen.c. */
|
||||
IFUNC_IMPL (i, name, wcsnlen,
|
||||
IFUNC_IMPL_ADD (array, i, wcsnlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsnlen_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, wcsnlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcsnlen_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, wcsnlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsnlen_evex)
|
||||
IFUNC_IMPL_ADD (array, i, wcsnlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsnlen_evex512)
|
||||
IFUNC_IMPL_ADD (array, i, wcsnlen,
|
||||
CPU_FEATURE_USABLE (SSE4_1),
|
||||
__wcsnlen_sse4_1)
|
||||
IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsnlen_evex)
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsnlen_evex512)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsnlen_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__wcsnlen_avx2_rtm)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcsnlen,
|
||||
CPU_FEATURE_USABLE (SSE4_1),
|
||||
__wcsnlen_sse4_1)
|
||||
X86_IFUNC_IMPL_ADD_V1 (array, i, wcsnlen,
|
||||
1,
|
||||
__wcsnlen_generic))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/wmemchr.c. */
|
||||
IFUNC_IMPL (i, name, wmemchr,
|
||||
|
@ -1050,20 +1115,25 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||
|
||||
/* Support sysdeps/x86_64/multiarch/strncmp.c. */
|
||||
IFUNC_IMPL (i, name, strncmp,
|
||||
IFUNC_IMPL_ADD (array, i, strncmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strncmp_avx2)
|
||||
IFUNC_IMPL_ADD (array, i, strncmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strncmp_avx2_rtm)
|
||||
IFUNC_IMPL_ADD (array, i, strncmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strncmp_evex)
|
||||
IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strncmp_sse42)
|
||||
IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2))
|
||||
X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
|
||||
(CPU_FEATURE_USABLE (AVX512VL)
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)),
|
||||
__strncmp_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
|
||||
CPU_FEATURE_USABLE (AVX2),
|
||||
__strncmp_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (RTM)),
|
||||
__strncmp_avx2_rtm)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
|
||||
CPU_FEATURE_USABLE (SSE4_2),
|
||||
__strncmp_sse42)
|
||||
/* ISA V2 wrapper for SSE2 implementation because the SSE2
|
||||
implementation is also used at ISA level 2. */
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
|
||||
1,
|
||||
__strncmp_sse2))
|
||||
|
||||
#ifdef SHARED
|
||||
/* Support sysdeps/x86_64/multiarch/wmemset_chk.c. */
|
||||
|
|
|
@ -19,32 +19,39 @@
|
|||
|
||||
#include <init-arch.h>
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||||
|
||||
static inline void *
|
||||
IFUNC_SELECTOR (void)
|
||||
{
|
||||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||||
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
AVX_Fast_Unaligned_Load, ))
|
||||
{
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||||
return OPTIMIZE (evex);
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||||
return OPTIMIZE (evex);
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||||
return OPTIMIZE (avx2_rtm);
|
||||
return OPTIMIZE (avx2_rtm);
|
||||
|
||||
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||||
return OPTIMIZE (avx2);
|
||||
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
Prefer_No_VZEROUPPER, !))
|
||||
return OPTIMIZE (avx2);
|
||||
}
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
|
||||
/* Keep this as a runtime check as its not guaranteed at ISA
|
||||
level 2. */
|
||||
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
|
||||
return OPTIMIZE (sse42);
|
||||
|
||||
|
|
|
@ -23,33 +23,38 @@
|
|||
# define GENERIC sse2
|
||||
#endif
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
|
||||
|
||||
static inline void *
|
||||
IFUNC_SELECTOR (void)
|
||||
{
|
||||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||||
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||||
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
AVX_Fast_Unaligned_Load, ))
|
||||
{
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||||
return OPTIMIZE (evex);
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||||
return OPTIMIZE (avx2_rtm);
|
||||
|
||||
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||||
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
Prefer_No_VZEROUPPER, !))
|
||||
return OPTIMIZE (avx2);
|
||||
}
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
|
||||
return OPTIMIZE (sse4_1);
|
||||
|
||||
return OPTIMIZE (GENERIC);
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (4)
|
||||
|
||||
# include <sysdep.h>
|
||||
# include "evex256-vecs.h"
|
||||
|
|
|
@ -16,22 +16,26 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
|
||||
so we need this to build for ISA V2 builds. */
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# ifndef MEMRCHR
|
||||
# define MEMRCHR __memrchr_sse2
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <sysdep.h>
|
||||
#define VEC_SIZE 16
|
||||
#define PAGE_SIZE 4096
|
||||
# include <sysdep.h>
|
||||
# define VEC_SIZE 16
|
||||
# define PAGE_SIZE 4096
|
||||
|
||||
.text
|
||||
ENTRY_P2ALIGN(MEMRCHR, 6)
|
||||
#ifdef __ILP32__
|
||||
# ifdef __ILP32__
|
||||
/* Clear upper bits. */
|
||||
mov %RDX_LP, %RDX_LP
|
||||
#endif
|
||||
# endif
|
||||
movd %esi, %xmm0
|
||||
|
||||
/* Get end pointer. */
|
||||
|
@ -352,3 +356,4 @@ L(zero_3):
|
|||
ret
|
||||
/* 2-bytes from next cache line. */
|
||||
END(MEMRCHR)
|
||||
#endif
|
||||
|
|
|
@ -1,15 +1,2 @@
|
|||
#ifndef STRCMP
|
||||
# define STRCMP __strcasecmp_l_avx2_rtm
|
||||
#endif
|
||||
|
||||
#define _GLABEL(x) x ## _rtm
|
||||
#define GLABEL(x) _GLABEL(x)
|
||||
|
||||
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||||
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||||
|
||||
#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||||
|
||||
#define SECTION(p) p##.avx.rtm
|
||||
|
||||
#include "strcasecmp_l-avx2.S"
|
||||
#define USE_AS_STRCASECMP_L
|
||||
#include "strcmp-avx2-rtm.S"
|
||||
|
|
|
@ -16,8 +16,5 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef STRCMP
|
||||
# define STRCMP __strcasecmp_l_avx2
|
||||
#endif
|
||||
#define USE_AS_STRCASECMP_L
|
||||
#include "strcmp-avx2.S"
|
||||
|
|
|
@ -16,8 +16,5 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef STRCMP
|
||||
# define STRCMP __strcasecmp_l_evex
|
||||
#endif
|
||||
#define USE_AS_STRCASECMP_L
|
||||
#include "strcmp-evex.S"
|
||||
|
|
|
@ -17,4 +17,5 @@
|
|||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define USE_AS_STRCASECMP_L
|
||||
|
||||
#include "strcmp-sse2.S"
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (4)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
|
|
|
@ -16,7 +16,11 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
/* NB: atom builds with ISA level == 1 so no reason to hold onto this
|
||||
at ISA level >= 2. */
|
||||
#if ISA_SHOULD_BUILD (1)
|
||||
|
||||
# include <sysdep.h>
|
||||
# include "asm-syntax.h"
|
||||
|
|
|
@ -16,7 +16,12 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc) || defined STRCHR
|
||||
#include <isa-level.h>
|
||||
|
||||
/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
|
||||
so we need this to build for ISA V2 builds. */
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# ifndef STRCHR
|
||||
# define STRCHR __strchr_sse2
|
||||
# endif
|
||||
|
|
|
@ -26,36 +26,40 @@
|
|||
# define SYMBOL_NAME strchr
|
||||
# include <init-arch.h>
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
|
||||
|
||||
static inline void *
|
||||
IFUNC_SELECTOR (void)
|
||||
{
|
||||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||||
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||||
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
AVX_Fast_Unaligned_Load, ))
|
||||
{
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||||
return OPTIMIZE (evex);
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||||
return OPTIMIZE (avx2_rtm);
|
||||
|
||||
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||||
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
Prefer_No_VZEROUPPER, !))
|
||||
return OPTIMIZE (avx2);
|
||||
}
|
||||
|
||||
if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
|
||||
return OPTIMIZE (sse2_no_bsf);
|
||||
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF, !))
|
||||
return OPTIMIZE (sse2);
|
||||
|
||||
return OPTIMIZE (sse2);
|
||||
return OPTIMIZE (sse2_no_bsf);
|
||||
}
|
||||
|
||||
libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
#define STRCHR __strchrnul_avx2
|
||||
#ifndef STRCHRNUL
|
||||
# define STRCHRNUL __strchrnul_avx2
|
||||
#endif
|
||||
|
||||
#define STRCHR STRCHRNUL
|
||||
#define USE_AS_STRCHRNUL 1
|
||||
|
||||
#include "strchr-avx2.S"
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
#define STRCHR __strchrnul_evex
|
||||
#ifndef STRCHRNUL
|
||||
# define STRCHRNUL __strchrnul_evex
|
||||
#endif
|
||||
|
||||
#define STRCHR STRCHRNUL
|
||||
#define USE_AS_STRCHRNUL 1
|
||||
|
||||
#include "strchr-evex.S"
|
||||
|
|
|
@ -16,12 +16,10 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
# ifndef STRCHR
|
||||
# define STRCHR __strchrnul_sse2
|
||||
# endif
|
||||
#ifndef STRCHRNUL
|
||||
# define STRCHRNUL __strchrnul_sse2
|
||||
#endif
|
||||
|
||||
#define AS_STRCHRNUL
|
||||
#define STRCHR STRCHRNUL
|
||||
|
||||
#include "strchr-sse2.S"
|
||||
|
|
|
@ -1,12 +1,9 @@
|
|||
#ifndef STRCMP
|
||||
# define STRCMP __strcmp_avx2_rtm
|
||||
#endif
|
||||
|
||||
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||||
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||||
|
||||
#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||||
|
||||
#define SECTION(p) p##.avx.rtm
|
||||
#define STRCMP_ISA _avx2_rtm
|
||||
|
||||
#include "strcmp-avx2.S"
|
||||
|
|
|
@ -16,7 +16,15 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
|
||||
# ifndef STRCMP_ISA
|
||||
# define STRCMP_ISA _avx2
|
||||
# endif
|
||||
|
||||
# include "strcmp-naming.h"
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
|
@ -86,15 +94,11 @@
|
|||
|
||||
# ifdef USE_AS_STRCASECMP_L
|
||||
# ifdef USE_AS_STRNCMP
|
||||
# define STRCASECMP __strncasecmp_avx2
|
||||
# define LOCALE_REG rcx
|
||||
# define LOCALE_REG_LP RCX_LP
|
||||
# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii
|
||||
# else
|
||||
# define STRCASECMP __strcasecmp_avx2
|
||||
# define LOCALE_REG rdx
|
||||
# define LOCALE_REG_LP RDX_LP
|
||||
# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii
|
||||
# endif
|
||||
# endif
|
||||
|
||||
|
@ -185,18 +189,14 @@
|
|||
.type STRCMP, @function
|
||||
.globl STRCMP
|
||||
|
||||
# ifndef GLABEL
|
||||
# define GLABEL(...) __VA_ARGS__
|
||||
# endif
|
||||
|
||||
# ifdef USE_AS_STRCASECMP_L
|
||||
ENTRY (GLABEL(STRCASECMP))
|
||||
ENTRY (STRCASECMP)
|
||||
movq __libc_tsd_LOCALE@gottpoff(%rip), %rax
|
||||
mov %fs:(%rax), %LOCALE_REG_LP
|
||||
|
||||
/* Either 1 or 5 bytes (dependeing if CET is enabled). */
|
||||
.p2align 4
|
||||
END (GLABEL(STRCASECMP))
|
||||
END (STRCASECMP)
|
||||
/* FALLTHROUGH to strcasecmp/strncasecmp_l. */
|
||||
# endif
|
||||
|
||||
|
|
|
@ -16,7 +16,12 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (4)
|
||||
|
||||
# define STRCMP_ISA _evex
|
||||
# include "strcmp-naming.h"
|
||||
|
||||
# include <sysdep.h>
|
||||
# if defined USE_AS_STRCASECMP_L
|
||||
|
@ -37,10 +42,6 @@
|
|||
# define VMOVA vmovdqa64
|
||||
|
||||
# ifdef USE_AS_WCSCMP
|
||||
# ifndef OVERFLOW_STRCMP
|
||||
# define OVERFLOW_STRCMP __wcscmp_evex
|
||||
# endif
|
||||
|
||||
# define TESTEQ subl $0xff,
|
||||
/* Compare packed dwords. */
|
||||
# define VPCMP vpcmpd
|
||||
|
@ -50,10 +51,6 @@
|
|||
/* 1 dword char == 4 bytes. */
|
||||
# define SIZE_OF_CHAR 4
|
||||
# else
|
||||
# ifndef OVERFLOW_STRCMP
|
||||
# define OVERFLOW_STRCMP __strcmp_evex
|
||||
# endif
|
||||
|
||||
# define TESTEQ incl
|
||||
/* Compare packed bytes. */
|
||||
# define VPCMP vpcmpb
|
||||
|
@ -120,15 +117,11 @@
|
|||
|
||||
# ifdef USE_AS_STRCASECMP_L
|
||||
# ifdef USE_AS_STRNCMP
|
||||
# define STRCASECMP __strncasecmp_evex
|
||||
# define LOCALE_REG rcx
|
||||
# define LOCALE_REG_LP RCX_LP
|
||||
# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii
|
||||
# else
|
||||
# define STRCASECMP __strcasecmp_evex
|
||||
# define LOCALE_REG rdx
|
||||
# define LOCALE_REG_LP RDX_LP
|
||||
# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii
|
||||
# endif
|
||||
# endif
|
||||
|
||||
|
@ -214,7 +207,6 @@
|
|||
.align 16
|
||||
.type STRCMP, @function
|
||||
.globl STRCMP
|
||||
|
||||
# ifdef USE_AS_STRCASECMP_L
|
||||
ENTRY (STRCASECMP)
|
||||
movq __libc_tsd_LOCALE@gottpoff(%rip), %rax
|
||||
|
|
|
@ -16,11 +16,20 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#include "sysdep.h"
|
||||
/* Continue building as ISA level 2. We use this as ISA V2 default
|
||||
because strcmp-sse42 uses pcmpstri (slow on some SSE4.2
|
||||
processors) and this implementation is potenially faster than
|
||||
strcmp-sse42 (aside from the slower page cross case). */
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
ENTRY ( __strcmp_sse2_unaligned)
|
||||
# define STRCMP_ISA _sse2_unaligned
|
||||
# include "strcmp-naming.h"
|
||||
|
||||
# include "sysdep.h"
|
||||
|
||||
ENTRY (STRCMP)
|
||||
movl %edi, %eax
|
||||
xorl %edx, %edx
|
||||
pxor %xmm7, %xmm7
|
||||
|
@ -208,6 +217,5 @@ L(cross_page):
|
|||
L(different):
|
||||
subl %ecx, %eax
|
||||
ret
|
||||
END (__strcmp_sse2_unaligned)
|
||||
|
||||
END (STRCMP)
|
||||
#endif
|
||||
|
|
|
@ -16,7 +16,11 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc) || IS_IN (rtld)
|
||||
#include <isa-level.h>
|
||||
|
||||
/* Continue building at ISA level 2 as the strcmp-sse42 is not always
|
||||
preferable for ISA level == 2 CPUs. */
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# define STRCMP_ISA _sse2
|
||||
# include "strcmp-naming.h"
|
||||
|
|
|
@ -16,7 +16,10 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
# define STRCMP_ISA _sse42
|
||||
|
@ -1766,7 +1769,6 @@ LABEL(unaligned_table):
|
|||
.int LABEL(ashr_0) - LABEL(unaligned_table)
|
||||
|
||||
# undef LABEL
|
||||
# undef GLABEL
|
||||
# undef SECTION
|
||||
# undef movdqa
|
||||
# undef movdqu
|
||||
|
|
|
@ -26,37 +26,50 @@
|
|||
# define SYMBOL_NAME strcmp
|
||||
# include <init-arch.h>
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME)
|
||||
OPTIMIZE (sse2_unaligned) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||||
|
||||
|
||||
|
||||
static inline void *
|
||||
IFUNC_SELECTOR (void)
|
||||
{
|
||||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||||
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
AVX_Fast_Unaligned_Load, ))
|
||||
{
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
|
||||
return OPTIMIZE (evex);
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||||
return OPTIMIZE (avx2_rtm);
|
||||
|
||||
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||||
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
Prefer_No_VZEROUPPER, !))
|
||||
return OPTIMIZE (avx2);
|
||||
}
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
|
||||
/* Keep this as runtime check. Some ISA level >= 2 CPUs such as
|
||||
Tremont, Silvermont, and more check this. */
|
||||
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
|
||||
return OPTIMIZE (sse42);
|
||||
|
||||
/* Keep this as runtime check. The standard SSE2 version has
|
||||
meaningful optimizations around keeping all loads aligned in the
|
||||
main loop which can benefit some ISA level >= 2 CPUs. */
|
||||
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
|
||||
return OPTIMIZE (sse2_unaligned);
|
||||
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
|
|
|
@ -16,7 +16,11 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
/* UNUSED. Exists purely as reference implementation. */
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (4)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (4)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
|
|
|
@ -16,15 +16,20 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc) || defined STRLEN
|
||||
|
||||
# ifndef STRLEN
|
||||
# define STRLEN __strlen_sse2
|
||||
# endif
|
||||
#include <isa-level.h>
|
||||
|
||||
/* ISA level >= 2 for both strlen and wcslen. wcslen uses `pminud`
|
||||
which is SSE4.1. strlen doesn't have an ISA level == 2
|
||||
implementation so the SSE2 implementation must be built with ISA
|
||||
level == 2. */
|
||||
# if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
# ifndef STRLEN
|
||||
# define STRLEN __strlen_sse2
|
||||
# endif
|
||||
|
||||
# ifdef AS_WCSLEN
|
||||
# define PMINU pminud
|
||||
# define PCMPEQ pcmpeqd
|
||||
|
@ -82,7 +87,7 @@ L(n_nonzero):
|
|||
suffice. */
|
||||
mov %RSI_LP, %R10_LP
|
||||
sar $62, %R10_LP
|
||||
jnz __wcslen_sse4_1
|
||||
jnz OVERFLOW_STRLEN
|
||||
sal $2, %RSI_LP
|
||||
# endif
|
||||
|
||||
|
|
|
@ -1,16 +1,4 @@
|
|||
#ifndef STRCMP
|
||||
# define STRCMP __strncasecmp_l_avx2_rtm
|
||||
#endif
|
||||
#define USE_AS_STRCASECMP_L
|
||||
#define USE_AS_STRNCMP
|
||||
|
||||
#define _GLABEL(x) x ## _rtm
|
||||
#define GLABEL(x) _GLABEL(x)
|
||||
|
||||
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||||
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||||
|
||||
#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||||
|
||||
#define SECTION(p) p##.avx.rtm
|
||||
#define OVERFLOW_STRCMP __strcasecmp_l_avx2_rtm
|
||||
|
||||
#include "strncase_l-avx2.S"
|
||||
#include "strcmp-avx2-rtm.S"
|
||||
|
|
|
@ -16,12 +16,7 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef STRCMP
|
||||
# define STRCMP __strncasecmp_l_avx2
|
||||
#endif
|
||||
#define USE_AS_STRCASECMP_L
|
||||
#define USE_AS_STRNCMP
|
||||
#ifndef OVERFLOW_STRCMP
|
||||
# define OVERFLOW_STRCMP __strcasecmp_l_avx2
|
||||
#endif
|
||||
|
||||
#include "strcmp-avx2.S"
|
||||
|
|
|
@ -16,10 +16,6 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef STRCMP
|
||||
# define STRCMP __strncasecmp_l_evex
|
||||
#endif
|
||||
#define OVERFLOW_STRCMP __strcasecmp_l_evex
|
||||
#define USE_AS_STRCASECMP_L
|
||||
#define USE_AS_STRNCMP
|
||||
#include "strcmp-evex.S"
|
||||
|
|
|
@ -1,4 +1,2 @@
|
|||
#define STRCMP __strncmp_avx2_rtm
|
||||
#define USE_AS_STRNCMP 1
|
||||
#define OVERFLOW_STRCMP __strcmp_avx2_rtm
|
||||
#include "strcmp-avx2-rtm.S"
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
#define STRCMP __strncmp_avx2
|
||||
#define USE_AS_STRNCMP 1
|
||||
#define OVERFLOW_STRCMP __strcmp_avx2
|
||||
|
||||
#include "strcmp-avx2.S"
|
||||
|
|
|
@ -1,3 +1,2 @@
|
|||
#define STRCMP __strncmp_evex
|
||||
#define USE_AS_STRNCMP 1
|
||||
#include "strcmp-evex.S"
|
||||
|
|
|
@ -26,33 +26,38 @@
|
|||
# define SYMBOL_NAME strncmp
|
||||
# include <init-arch.h>
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||||
|
||||
static inline void *
|
||||
IFUNC_SELECTOR (void)
|
||||
{
|
||||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||||
const struct cpu_features *cpu_features = __get_cpu_features ();
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||||
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
AVX_Fast_Unaligned_Load, ))
|
||||
{
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
||||
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
|
||||
return OPTIMIZE (evex);
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||||
return OPTIMIZE (avx2_rtm);
|
||||
|
||||
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||||
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
Prefer_No_VZEROUPPER, !))
|
||||
return OPTIMIZE (avx2);
|
||||
}
|
||||
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
|
||||
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
|
||||
return OPTIMIZE (sse42);
|
||||
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
#define STRLEN __strnlen_avx2
|
||||
#ifndef STRNLEN
|
||||
# define STRNLEN __strnlen_avx2
|
||||
#endif
|
||||
|
||||
#define USE_AS_STRNLEN 1
|
||||
#define STRLEN STRNLEN
|
||||
|
||||
#include "strlen-avx2.S"
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
#define STRLEN __strnlen_evex
|
||||
#ifndef STRNLEN
|
||||
# define STRNLEN __strnlen_evex
|
||||
#endif
|
||||
|
||||
#define USE_AS_STRNLEN 1
|
||||
#define STRLEN STRNLEN
|
||||
|
||||
#include "strlen-evex.S"
|
||||
|
|
|
@ -16,11 +16,11 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
# ifndef STRLEN
|
||||
# define STRLEN __strnlen_sse2
|
||||
# endif
|
||||
#ifndef STRNLEN
|
||||
# define STRNLEN __strnlen_sse2
|
||||
#endif
|
||||
|
||||
#define AS_STRNLEN
|
||||
#define AS_STRNLEN 1
|
||||
#define STRLEN STRNLEN
|
||||
|
||||
#include "strlen-sse2.S"
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (4)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
|
|
|
@ -16,36 +16,40 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <isa-level.h>
|
||||
|
||||
/* ISA level >= 2 because there are no {wcs|str}rchr-sse4
|
||||
implementations. */
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
# ifndef STRRCHR
|
||||
# define STRRCHR __strrchr_sse2
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <sysdep.h>
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
# define PCMPEQ pcmpeqd
|
||||
# define CHAR_SIZE 4
|
||||
# define PMINU pminud
|
||||
# else
|
||||
# define PCMPEQ pcmpeqb
|
||||
# define CHAR_SIZE 1
|
||||
# define PMINU pminub
|
||||
# endif
|
||||
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# define PCMPEQ pcmpeqd
|
||||
# define CHAR_SIZE 4
|
||||
# define PMINU pminud
|
||||
#else
|
||||
# define PCMPEQ pcmpeqb
|
||||
# define CHAR_SIZE 1
|
||||
# define PMINU pminub
|
||||
#endif
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
#define VEC_SIZE 16
|
||||
# define PAGE_SIZE 4096
|
||||
# define VEC_SIZE 16
|
||||
|
||||
.text
|
||||
ENTRY(STRRCHR)
|
||||
movd %esi, %xmm0
|
||||
movq %rdi, %rax
|
||||
andl $(PAGE_SIZE - 1), %eax
|
||||
#ifndef USE_AS_WCSRCHR
|
||||
# ifndef USE_AS_WCSRCHR
|
||||
punpcklbw %xmm0, %xmm0
|
||||
punpcklwd %xmm0, %xmm0
|
||||
#endif
|
||||
# endif
|
||||
pshufd $0, %xmm0, %xmm0
|
||||
cmpl $(PAGE_SIZE - VEC_SIZE), %eax
|
||||
ja L(cross_page)
|
||||
|
@ -69,9 +73,9 @@ L(cross_page_continue):
|
|||
/* We are off by 3 for wcsrchr if search CHAR is non-zero. If
|
||||
search CHAR is zero we are correct. Either way `andq
|
||||
-CHAR_SIZE, %rax` gets the correct result. */
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
andq $-CHAR_SIZE, %rax
|
||||
#endif
|
||||
# endif
|
||||
L(ret0):
|
||||
ret
|
||||
|
||||
|
@ -85,9 +89,9 @@ L(first_vec_x0_test):
|
|||
jz L(ret0)
|
||||
bsrl %eax, %eax
|
||||
addq %r8, %rax
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
andq $-CHAR_SIZE, %rax
|
||||
#endif
|
||||
# endif
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
|
@ -100,9 +104,9 @@ L(first_vec_x1):
|
|||
jz L(first_vec_x0_test)
|
||||
bsrl %eax, %eax
|
||||
leaq (VEC_SIZE)(%rdi, %rax), %rax
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
andq $-CHAR_SIZE, %rax
|
||||
#endif
|
||||
# endif
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
|
@ -113,9 +117,9 @@ L(first_vec_x1_test):
|
|||
jz L(first_vec_x0_test)
|
||||
bsrl %eax, %eax
|
||||
leaq (VEC_SIZE)(%rdi, %rax), %rax
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
andq $-CHAR_SIZE, %rax
|
||||
#endif
|
||||
# endif
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
|
@ -128,9 +132,9 @@ L(first_vec_x2):
|
|||
jz L(first_vec_x1_test)
|
||||
bsrl %eax, %eax
|
||||
leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
andq $-CHAR_SIZE, %rax
|
||||
#endif
|
||||
# endif
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
|
@ -165,27 +169,27 @@ L(first_loop):
|
|||
/* Since SSE2 no pminud so wcsrchr needs seperate logic for
|
||||
detecting zero. Note if this is found to be a bottleneck it
|
||||
may be worth adding an SSE4.1 wcsrchr implementation. */
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
movaps %xmm5, %xmm6
|
||||
pxor %xmm8, %xmm8
|
||||
|
||||
PCMPEQ %xmm8, %xmm5
|
||||
PCMPEQ %xmm4, %xmm8
|
||||
por %xmm5, %xmm8
|
||||
#else
|
||||
# else
|
||||
movaps %xmm5, %xmm6
|
||||
PMINU %xmm4, %xmm5
|
||||
#endif
|
||||
# endif
|
||||
|
||||
movaps %xmm4, %xmm9
|
||||
PCMPEQ %xmm0, %xmm4
|
||||
PCMPEQ %xmm0, %xmm6
|
||||
movaps %xmm6, %xmm7
|
||||
por %xmm4, %xmm6
|
||||
#ifndef USE_AS_WCSRCHR
|
||||
# ifndef USE_AS_WCSRCHR
|
||||
pxor %xmm8, %xmm8
|
||||
PCMPEQ %xmm5, %xmm8
|
||||
#endif
|
||||
# endif
|
||||
pmovmskb %xmm8, %ecx
|
||||
pmovmskb %xmm6, %eax
|
||||
|
||||
|
@ -219,9 +223,9 @@ L(first_loop_old_match):
|
|||
|
||||
bsrl %eax, %eax
|
||||
addq %rsi, %rax
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
andq $-CHAR_SIZE, %rax
|
||||
#endif
|
||||
# endif
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
|
@ -247,9 +251,9 @@ L(new_match):
|
|||
jz L(first_loop_old_match)
|
||||
bsrl %eax, %eax
|
||||
addq %rdi, %rax
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
andq $-CHAR_SIZE, %rax
|
||||
#endif
|
||||
# endif
|
||||
ret
|
||||
|
||||
/* Save minimum state for getting most recent match. We can
|
||||
|
@ -267,27 +271,27 @@ L(second_loop):
|
|||
/* Since SSE2 no pminud so wcsrchr needs seperate logic for
|
||||
detecting zero. Note if this is found to be a bottleneck it
|
||||
may be worth adding an SSE4.1 wcsrchr implementation. */
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
movaps %xmm5, %xmm6
|
||||
pxor %xmm8, %xmm8
|
||||
|
||||
PCMPEQ %xmm8, %xmm5
|
||||
PCMPEQ %xmm4, %xmm8
|
||||
por %xmm5, %xmm8
|
||||
#else
|
||||
# else
|
||||
movaps %xmm5, %xmm6
|
||||
PMINU %xmm4, %xmm5
|
||||
#endif
|
||||
# endif
|
||||
|
||||
movaps %xmm4, %xmm9
|
||||
PCMPEQ %xmm0, %xmm4
|
||||
PCMPEQ %xmm0, %xmm6
|
||||
movaps %xmm6, %xmm7
|
||||
por %xmm4, %xmm6
|
||||
#ifndef USE_AS_WCSRCHR
|
||||
# ifndef USE_AS_WCSRCHR
|
||||
pxor %xmm8, %xmm8
|
||||
PCMPEQ %xmm5, %xmm8
|
||||
#endif
|
||||
# endif
|
||||
|
||||
pmovmskb %xmm8, %ecx
|
||||
pmovmskb %xmm6, %eax
|
||||
|
@ -312,9 +316,9 @@ L(second_loop_old_match):
|
|||
orl %ecx, %eax
|
||||
bsrl %eax, %eax
|
||||
addq %rsi, %rax
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
andq $-CHAR_SIZE, %rax
|
||||
#endif
|
||||
# endif
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
|
@ -340,9 +344,9 @@ L(second_loop_new_match):
|
|||
jz L(second_loop_old_match)
|
||||
bsrl %eax, %eax
|
||||
addq %rdi, %rax
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
andq $-CHAR_SIZE, %rax
|
||||
#endif
|
||||
# endif
|
||||
ret
|
||||
|
||||
.p2align 4,, 4
|
||||
|
@ -366,9 +370,10 @@ L(cross_page):
|
|||
jz L(ret1)
|
||||
bsrl %eax, %eax
|
||||
addq %rdi, %rax
|
||||
#ifdef USE_AS_WCSRCHR
|
||||
# ifdef USE_AS_WCSRCHR
|
||||
andq $-CHAR_SIZE, %rax
|
||||
#endif
|
||||
# endif
|
||||
L(ret1):
|
||||
ret
|
||||
END(STRRCHR)
|
||||
#endif
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "../strchr-isa-default-impl.h"
|
||||
|
||||
ENTRY(__strstr_sse2_unaligned)
|
||||
movzbl (%rsi), %eax
|
||||
|
@ -75,7 +76,7 @@ L(next_pair_index):
|
|||
.p2align 4
|
||||
L(strchr):
|
||||
movzbl %al, %esi
|
||||
jmp __strchr_sse2
|
||||
jmp DEFAULT_STRCHR
|
||||
|
||||
.p2align 4
|
||||
L(pair_loop):
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
#define STRCHR __wcschr_avx2
|
||||
#ifndef WCSCHR
|
||||
# define WCSCHR __wcschr_avx2
|
||||
#endif
|
||||
|
||||
#define STRCHR WCSCHR
|
||||
#define USE_AS_WCSCHR 1
|
||||
|
||||
#include "strchr-avx2.S"
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
#define STRCHR __wcschr_evex
|
||||
#ifndef WCSCHR
|
||||
# define WCSCHR __wcschr_evex
|
||||
#endif
|
||||
|
||||
#define STRCHR WCSCHR
|
||||
#define USE_AS_WCSCHR 1
|
||||
|
||||
#include "strchr-evex.S"
|
||||
|
|
|
@ -16,13 +16,17 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
/* ISA level >= 2 because there is no wcschr-sse4 implementations. */
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# ifndef WCSCHR
|
||||
# define WCSCHR __wcschr_sse2
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <sysdep.h>
|
||||
# include <sysdep.h>
|
||||
|
||||
.text
|
||||
ENTRY (WCSCHR)
|
||||
|
@ -155,3 +159,4 @@ L(return_null):
|
|||
ret
|
||||
|
||||
END (WCSCHR)
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
#define STRCMP __wcscmp_avx2_rtm
|
||||
#define USE_AS_WCSCMP 1
|
||||
|
||||
#include "strcmp-avx2-rtm.S"
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
#define STRCMP __wcscmp_avx2
|
||||
#define USE_AS_WCSCMP 1
|
||||
|
||||
#include "strcmp-avx2.S"
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
#define STRCMP __wcscmp_evex
|
||||
#define USE_AS_WCSCMP 1
|
||||
|
||||
#include "strcmp-evex.S"
|
||||
|
|
|
@ -16,11 +16,16 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define USE_AS_WCSCMP
|
||||
#define STRCMP_ISA _sse2
|
||||
#include "strcmp-naming.h"
|
||||
#include <isa-level.h>
|
||||
|
||||
#include <sysdep.h>
|
||||
/* ISA level >= 2 because there is no wcscmp-sse4 implementations. */
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
# include <sysdep.h>
|
||||
|
||||
/* Needed to get right name. */
|
||||
# define USE_AS_WCSCMP
|
||||
# define STRCMP_ISA _sse2
|
||||
# include "strcmp-naming.h"
|
||||
|
||||
/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */
|
||||
|
||||
|
@ -949,3 +954,4 @@ L(equal):
|
|||
ret
|
||||
|
||||
END (STRCMP)
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
#define STRLEN __wcslen_avx2
|
||||
#ifndef WCSLEN
|
||||
# define WCSLEN __wcslen_avx2
|
||||
#endif
|
||||
|
||||
#define STRLEN WCSLEN
|
||||
#define USE_AS_WCSLEN 1
|
||||
|
||||
#include "strlen-avx2.S"
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
#define STRLEN __wcslen_evex
|
||||
#ifndef WCSLEN
|
||||
# define WCSLEN __wcslen_evex
|
||||
#endif
|
||||
|
||||
#define STRLEN WCSLEN
|
||||
#define USE_AS_WCSLEN 1
|
||||
|
||||
#include "strlen-evex.S"
|
||||
|
|
|
@ -16,13 +16,16 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
# ifndef WCSLEN
|
||||
# define WCSLEN __wcslen_sse2
|
||||
# endif
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (1)
|
||||
|
||||
# include <sysdep.h>
|
||||
|
||||
#ifndef WCSLEN
|
||||
# define WCSLEN __wcslen_sse2
|
||||
#endif
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
ENTRY (WCSLEN)
|
||||
|
@ -235,3 +238,5 @@ L(exit_tail7):
|
|||
ret
|
||||
|
||||
END (WCSLEN)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
#define AS_WCSLEN
|
||||
#define STRLEN __wcslen_sse4_1
|
||||
#define SECTION(p) p##.sse4.1
|
||||
#ifndef WCSLEN
|
||||
# define WCSLEN __wcslen_sse4_1
|
||||
#endif
|
||||
|
||||
#define AS_WCSLEN 1
|
||||
#define STRLEN WCSLEN
|
||||
#define SECTION(p) p##.sse4.1
|
||||
|
||||
#include "strlen-sse2.S"
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
#define STRCMP __wcsncmp_avx2_rtm
|
||||
#define USE_AS_STRNCMP 1
|
||||
#define USE_AS_WCSCMP 1
|
||||
#define OVERFLOW_STRCMP __wcscmp_avx2_rtm
|
||||
#include "strcmp-avx2-rtm.S"
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#define STRCMP __wcsncmp_avx2
|
||||
#define USE_AS_STRNCMP 1
|
||||
#define USE_AS_WCSCMP 1
|
||||
#define OVERFLOW_STRCMP __wcscmp_avx2
|
||||
|
||||
#include "strcmp-avx2.S"
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
#define STRCMP __wcsncmp_evex
|
||||
#define USE_AS_STRNCMP 1
|
||||
#define USE_AS_WCSCMP 1
|
||||
|
||||
|
|
|
@ -16,5 +16,10 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define WCSNCMP __wcsncmp_generic
|
||||
#include <wcsmbs/wcsncmp.c>
|
||||
#include <isa-level.h>
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# define WCSNCMP __wcsncmp_generic
|
||||
# include <wcsmbs/wcsncmp.c>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
#define STRLEN __wcsnlen_avx2
|
||||
#ifndef WCSNLEN
|
||||
# define WCSNLEN __wcsnlen_avx2
|
||||
#endif
|
||||
|
||||
#define STRLEN WCSNLEN
|
||||
#define USE_AS_WCSLEN 1
|
||||
#define USE_AS_STRNLEN 1
|
||||
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
#define STRLEN __wcsnlen_evex
|
||||
#ifndef WCSNLEN
|
||||
# define WCSNLEN __wcsnlen_evex
|
||||
#endif
|
||||
|
||||
#define STRLEN WCSNLEN
|
||||
#define USE_AS_WCSLEN 1
|
||||
#define USE_AS_STRNLEN 1
|
||||
|
||||
|
|
|
@ -16,13 +16,18 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if ISA_SHOULD_BUILD (1)
|
||||
|
||||
#if IS_IN (libc)
|
||||
# include <wchar.h>
|
||||
|
||||
# define WCSNLEN __wcsnlen_generic
|
||||
# ifndef WCSNLEN
|
||||
# define WCSNLEN __wcsnlen_generic
|
||||
# endif
|
||||
|
||||
extern __typeof (wcsnlen) __wcsnlen_generic;
|
||||
#endif
|
||||
|
||||
#include "wcsmbs/wcsnlen.c"
|
||||
# include "wcsmbs/wcsnlen.c"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
#ifndef WCSNLEN
|
||||
# define WCSNLEN __wcsnlen_sse4_1
|
||||
# define OVERFLOW_STRLEN __wcslen_sse4_1
|
||||
#endif
|
||||
|
||||
#define AS_WCSLEN
|
||||
#define AS_STRNLEN
|
||||
#define STRLEN __wcsnlen_sse4_1
|
||||
#define STRLEN WCSNLEN
|
||||
#define SECTION(p) p##.sse4.1
|
||||
|
||||
#include "strlen-sse2.S"
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
#define STRRCHR __wcsrchr_avx2
|
||||
#ifndef WCSRCHR
|
||||
# define WCSRCHR __wcsrchr_avx2
|
||||
#endif
|
||||
|
||||
#define STRRCHR WCSRCHR
|
||||
#define USE_AS_WCSRCHR 1
|
||||
|
||||
#include "strrchr-avx2.S"
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
#define STRRCHR __wcsrchr_evex
|
||||
#ifndef WCSRCHR
|
||||
# define WCSRCHR __wcsrchr_evex
|
||||
#endif
|
||||
|
||||
#define STRRCHR WCSRCHR
|
||||
#define USE_AS_WCSRCHR 1
|
||||
#include "strrchr-evex.S"
|
||||
|
|
|
@ -16,12 +16,11 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
# ifndef STRRCHR
|
||||
# define STRRCHR __wcsrchr_sse2
|
||||
# endif
|
||||
#ifndef WCSRCHR
|
||||
# define WCSRCHR __wcsrchr_sse2
|
||||
#endif
|
||||
|
||||
#define STRRCHR WCSRCHR
|
||||
#define USE_AS_WCSRCHR 1
|
||||
#define NO_PMINU 1
|
||||
|
||||
|
|
|
@ -1,11 +1,35 @@
|
|||
/* strcasecmp_l dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Symbols = __strcasecmp_l and __strcasecmp. */
|
||||
|
||||
#include "multiarch/strcasecmp_l-sse2.S"
|
||||
#define DEFAULT_IMPL_V1 "multiarch/strcasecmp_l-sse2.S"
|
||||
/* This may cause regressions on some processors that heavily prefer
|
||||
aligned loads or have slow a implementation of the `pcmpstri`
|
||||
instruction. */
|
||||
#define DEFAULT_IMPL_V2 "multiarch/strcasecmp_l-sse4_2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/strcasecmp_l-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/strcasecmp_l-evex.S"
|
||||
|
||||
libc_hidden_builtin_def (__strcasecmp_l)
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
libc_hidden_def (__strcasecmp_l)
|
||||
weak_alias (__strcasecmp_l, strcasecmp_l)
|
||||
libc_hidden_def (strcasecmp_l)
|
||||
|
||||
weak_alias (__strcasecmp, strcasecmp)
|
||||
libc_hidden_def (__strcasecmp)
|
||||
weak_alias (__strcasecmp, strcasecmp)
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
/* Set default strchr impl based on ISA level.
|
||||
Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <isa-level.h>
|
||||
#if MINIMUM_X86_ISA_LEVEL == 1 || MINIMUM_X86_ISA_LEVEL == 2
|
||||
# define DEFAULT_STRCHR __strchr_sse2
|
||||
#elif MINIMUM_X86_ISA_LEVEL == 3
|
||||
# define DEFAULT_STRCHR __strchr_avx2
|
||||
#elif MINIMUM_X86_ISA_LEVEL == 4
|
||||
# define DEFAULT_STRCHR __strchr_evex
|
||||
#else
|
||||
# error "Unknown default strchr implementation"
|
||||
#endif
|
|
@ -1,5 +1,4 @@
|
|||
/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
|
||||
For AMD x86-64.
|
||||
/* strchr dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2009-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
|
@ -17,8 +16,13 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define STRCHR strchr
|
||||
|
||||
#define DEFAULT_IMPL_V1 "multiarch/strchr-sse2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/strchr-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/strchr-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
#define STRCHR strchr
|
||||
#include "multiarch/strchr-sse2.S"
|
||||
weak_alias (strchr, index)
|
||||
libc_hidden_builtin_def (strchr)
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
/* strchrnul (str, ch) -- Return pointer to first occurrence of CH in STR
|
||||
or terminating NUL byte.
|
||||
For AMD x86-64.
|
||||
/* strchrnul dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2009-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
|
@ -18,7 +16,12 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define STRCHR __strchrnul
|
||||
#include "multiarch/strchrnul-sse2.S"
|
||||
#define STRCHRNUL __strchrnul
|
||||
|
||||
#define DEFAULT_IMPL_V1 "multiarch/strchrnul-sse2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/strchrnul-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/strchrnul-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
weak_alias (__strchrnul, strchrnul)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Highly optimized version for x86-64.
|
||||
/* strcmp dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 1999-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
|
@ -18,5 +18,14 @@
|
|||
|
||||
/* Symbol = strcmp. */
|
||||
|
||||
#include "multiarch/strcmp-sse2.S"
|
||||
#define DEFAULT_IMPL_V1 "multiarch/strcmp-sse2.S"
|
||||
/* strcmp-sse2-unaligned.S is often faster than strcmp-sse42.S and
|
||||
doesn't have the drawback of using the `pcmpstri` instruction
|
||||
which can be very slow on some CPUs. */
|
||||
#define DEFAULT_IMPL_V2 "multiarch/strcmp-sse2-unaligned.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/strcmp-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/strcmp-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
libc_hidden_builtin_def (strcmp)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* SSE2 version of strlen.
|
||||
/* strlen dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2021-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
|
@ -17,6 +17,11 @@
|
|||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define STRLEN strlen
|
||||
#include "multiarch/strlen-sse2.S"
|
||||
|
||||
#define DEFAULT_IMPL_V1 "multiarch/strlen-sse2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/strlen-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/strlen-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
libc_hidden_builtin_def (strlen)
|
||||
|
|
|
@ -1,11 +1,35 @@
|
|||
/* strcasecmp_l dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Symbols = __strncasecmp_l and __strncasecmp. */
|
||||
|
||||
#include "multiarch/strncase_l-sse2.S"
|
||||
#define DEFAULT_IMPL_V1 "multiarch/strncase_l-sse2.S"
|
||||
/* This may cause regressions on some processors that heavily prefer
|
||||
aligned loads or have slow a implementation of the `pcmpstri`
|
||||
instruction. */
|
||||
#define DEFAULT_IMPL_V2 "multiarch/strncase_l-sse4_2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/strncase_l-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/strncase_l-evex.S"
|
||||
|
||||
libc_hidden_builtin_def (__strncasecmp_l)
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
libc_hidden_def (__strncasecmp_l)
|
||||
weak_alias (__strncasecmp_l, strncasecmp_l)
|
||||
libc_hidden_def (strncasecmp_l)
|
||||
|
||||
weak_alias (__strncasecmp, strncasecmp)
|
||||
libc_hidden_def (__strncasecmp)
|
||||
weak_alias (__strncasecmp, strncasecmp)
|
||||
|
|
|
@ -1,4 +1,31 @@
|
|||
/* strncmp dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 1999-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Symbol = strncmp. */
|
||||
|
||||
#include "multiarch/strncmp-sse2.S"
|
||||
#define DEFAULT_IMPL_V1 "multiarch/strncmp-sse2.S"
|
||||
/* This may cause regressions on some processors that heavily prefer
|
||||
aligned loads or have slow a implementation of the `pcmpstri`
|
||||
instruction. */
|
||||
#define DEFAULT_IMPL_V2 "multiarch/strncmp-sse4_2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/strncmp-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/strncmp-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
libc_hidden_builtin_def (strncmp)
|
||||
|
|
|
@ -1,6 +1,29 @@
|
|||
#define STRLEN __strnlen
|
||||
#include "multiarch/strnlen-sse2.S"
|
||||
/* strnlen dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define STRNLEN __strnlen
|
||||
|
||||
#define DEFAULT_IMPL_V1 "multiarch/strnlen-sse2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/strnlen-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/strnlen-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
weak_alias (__strnlen, strnlen)
|
||||
libc_hidden_def (__strnlen)
|
||||
weak_alias (__strnlen, strnlen);
|
||||
libc_hidden_builtin_def (strnlen)
|
||||
libc_hidden_def (strnlen)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR.
|
||||
/* strrchr dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2013-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
|
@ -17,6 +17,12 @@
|
|||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define STRRCHR strrchr
|
||||
#include "multiarch/strrchr-sse2.S"
|
||||
|
||||
#define DEFAULT_IMPL_V1 "multiarch/strrchr-sse2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/strrchr-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/strrchr-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
weak_alias (strrchr, rindex)
|
||||
libc_hidden_builtin_def (strrchr)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* wcschr with SSSE3
|
||||
/* wcschr dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2011-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
|
@ -16,9 +16,14 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
|
||||
#define WCSCHR __wcschr
|
||||
#include "multiarch/wcschr-sse2.S"
|
||||
libc_hidden_def(__wcschr)
|
||||
|
||||
#define DEFAULT_IMPL_V1 "multiarch/wcschr-sse2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/wcschr-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/wcschr-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
libc_hidden_def (__wcschr)
|
||||
weak_alias (__wcschr, wcschr)
|
||||
libc_hidden_weak (wcschr)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Optimized wcscmp for x86-64 with SSE2.
|
||||
/* strlen dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2011-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
|
@ -18,6 +18,11 @@
|
|||
|
||||
/* Symbol = __wcscmp. */
|
||||
|
||||
#include "multiarch/wcscmp-sse2.S"
|
||||
#define DEFAULT_IMPL_V1 "multiarch/wcscmp-sse2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/wcscmp-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/wcscmp-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
libc_hidden_def (__wcscmp)
|
||||
weak_alias (__wcscmp, wcscmp)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Optimized wcslen for x86-64 with SSE2.
|
||||
/* wcslen dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2011-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
|
@ -17,5 +17,18 @@
|
|||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define WCSLEN __wcslen
|
||||
#include "multiarch/wcslen-sse2.S"
|
||||
weak_alias(__wcslen, wcslen)
|
||||
|
||||
#define DEFAULT_IMPL_V1 "multiarch/wcslen-sse2.S"
|
||||
#define DEFAULT_IMPL_V2 "multiarch/wcslen-sse4_1.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/wcslen-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/wcslen-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
||||
weak_alias (__wcslen, wcslen)
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL == 2 && !IS_IN (rtld)
|
||||
/* Hidden def so it can be used as overflow fallback in
|
||||
wcsnlen-sse4_1.S. */
|
||||
libc_hidden_def (__wcslen)
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
/* wcsncmp dispatch for RTLD and non-multiarch .c ISA level 1 build.
|
||||
Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* wcsncmp non-multiarch build is split into two files,
|
||||
wcsncmp-generic.c and wcsncmp.S. The wcsncmp-generic.c build is for
|
||||
ISA level <= 1 and just uses wcsmbs/wcsncmp.c. This must be split
|
||||
into two files because we cannot include C code from assembly or
|
||||
vice versa. */
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 2
|
||||
# include "wcsmbs/wcsncmp.c"
|
||||
#endif
|
|
@ -0,0 +1,40 @@
|
|||
/* wcsncmp dispatch for RTLD and non-multiarch .c files
|
||||
Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* wcsncmp non-multiarch build is split into two files,
|
||||
wcsncmp-generic.c and wcsncmp.S. The wcsncmp.S build is for
|
||||
ISA level >= 3 uses the optimized assembly implementations in
|
||||
multiarch/wcsncmp*.S. This must be split into two files because
|
||||
we cannot include C code from assembly or vice versa. */
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 3
|
||||
|
||||
/* Symbol = wcsncmp. */
|
||||
|
||||
# define DEFAULT_IMPL_V3 "multiarch/wcsncmp-avx2.S"
|
||||
# define DEFAULT_IMPL_V4 "multiarch/wcsncmp-evex.S"
|
||||
|
||||
/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
|
||||
should never be used from here. */
|
||||
# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
|
||||
|
||||
# include "isa-default-impl.h"
|
||||
|
||||
#endif
|
|
@ -0,0 +1,29 @@
|
|||
/* wcsnlen dispatch for RTLD and non-multiarch .c ISA level 1 build.
|
||||
Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* wcsnlen non-multiarch build is split into two files,
|
||||
wcsnlen-generic.c and wcsnlen.S. The wcsnlen-generic.c build is for
|
||||
ISA level <= 1 and just uses wcsmbs/wcsnlen.c. This must be split
|
||||
into two files because we cannot include C code from assembly or
|
||||
vice versa. */
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 1
|
||||
# include "wcsmbs/wcsnlen.c"
|
||||
#endif
|
|
@ -0,0 +1,49 @@
|
|||
/* wcsnlen dispatch for RTLD and non-multiarch .c files
|
||||
Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* wcsnlen non-multiarch build is split into two files,
|
||||
wcsnlen-generic.c and wcsnlen.S. The wcsnlen.S build is for
|
||||
ISA level >= 2 uses the optimized assembly implementations in
|
||||
multiarch/wcsnlen*.S. This must be split into two files because
|
||||
we cannot include C code from assembly or vice versa. */
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 2
|
||||
|
||||
# define WCSNLEN __wcsnlen
|
||||
/* This symbol must stay linked to the name in wcslen.S. */
|
||||
#if IS_IN (rtld)
|
||||
# define OVERFLOW_STRLEN __wcslen
|
||||
#else
|
||||
# define OVERFLOW_STRLEN HIDDEN_JUMPTARGET (__wcslen)
|
||||
#endif
|
||||
|
||||
# define DEFAULT_IMPL_V2 "multiarch/wcsnlen-sse4_1.S"
|
||||
# define DEFAULT_IMPL_V3 "multiarch/wcsnlen-avx2.S"
|
||||
# define DEFAULT_IMPL_V4 "multiarch/wcsnlen-evex.S"
|
||||
|
||||
/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
|
||||
should never be used from here. */
|
||||
# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
|
||||
|
||||
# include "isa-default-impl.h"
|
||||
|
||||
weak_alias (__wcsnlen, wcsnlen)
|
||||
libc_hidden_def (__wcsnlen)
|
||||
#endif
|
|
@ -1,4 +1,4 @@
|
|||
/* wcsrchr optimized with SSE2.
|
||||
/* wcsrchr dispatch for RTLD and non-multiarch build
|
||||
Copyright (C) 2011-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
|
@ -16,5 +16,10 @@
|
|||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define STRRCHR wcsrchr
|
||||
#include "multiarch/wcsrchr-sse2.S"
|
||||
#define WCSRCHR wcsrchr
|
||||
|
||||
#define DEFAULT_IMPL_V1 "multiarch/wcsrchr-sse2.S"
|
||||
#define DEFAULT_IMPL_V3 "multiarch/wcsrchr-avx2.S"
|
||||
#define DEFAULT_IMPL_V4 "multiarch/wcsrchr-evex.S"
|
||||
|
||||
#include "isa-default-impl.h"
|
||||
|
|
Loading…
Reference in New Issue