x86: Add support to build strcmp/strlen/strchr with explicit ISA level

1. Add default ISA level selection in non-multiarch/rtld
   implementations.

2. Add ISA level build guards to different implementations.
    - I.e strcmp-avx2.S which is ISA level 3 will only build if
      compiled ISA level <= 3. Otherwise there is no reason to
      include it as we will always use one of the ISA level 4
      implementations (strcmp-evex.S).

3. Refactor the ifunc selector and ifunc implementation list to use
   the ISA level aware wrapper macros that allow functions below the
   compiled ISA level (with a guranteed replacement) to be skipped.

Tested with and without multiarch on x86_64 for ISA levels:
{generic, x86-64-v2, x86-64-v3, x86-64-v4}

And m32 with and without multiarch.
This commit is contained in:
Noah Goldstein 2022-07-13 16:32:59 -07:00
parent c353689e49
commit ceabdcd130
88 changed files with 1157 additions and 618 deletions

View File

@ -84,6 +84,7 @@
/* ISA level >= 2 guaranteed includes. */
#define SSE4_2_X86_ISA_LEVEL 2
#define SSE4_1_X86_ISA_LEVEL 2
#define SSSE3_X86_ISA_LEVEL 2
@ -101,9 +102,18 @@
when ISA level < 3. */
#define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
/* NB: This feature is disable when ISA level >= 3. All CPUs with
this feature don't run on glibc built with ISA level >= 3. */
#define Slow_SSE42_X86_ISA_LEVEL 3
/* Feature(s) enabled when ISA level >= 2. */
#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
/* NB: This feature is disable when ISA level >= 2, which was enabled
for the early Atom CPUs. */
#define Slow_BSF_X86_ISA_LEVEL 2
/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
macros are wrappers for the respective CPU_FEATURE{S}_{USABLE|ARCH}_P
runtime checks. They differ in two ways.

View File

@ -197,6 +197,12 @@ gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
endif
ifeq ($(subdir),wcsmbs)
sysdep_routines += \
wcsncmp-generic \
wcsnlen-generic \
# sysdep_routines
tests += \
tst-rsi-wcslen
endif

View File

@ -1,4 +1,4 @@
/* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using
/* memrchr dispatch for RTLD and non-multiarch build
Copyright (C) 2011-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@ -18,5 +18,11 @@
<https://www.gnu.org/licenses/>. */
#define MEMRCHR __memrchr
#include "multiarch/memrchr-sse2.S"
#define DEFAULT_IMPL_V1 "multiarch/memrchr-sse2.S"
#define DEFAULT_IMPL_V3 "multiarch/memrchr-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/memrchr-evex.S"
#include "isa-default-impl.h"
weak_alias (__memrchr, memrchr)

View File

@ -144,11 +144,9 @@ sysdep_routines += \
wcslen-sse4_1 \
wcsncmp-avx2 \
wcsncmp-avx2-rtm \
wcsncmp-generic \
wcsncmp-evex \
wcsnlen-avx2 \
wcsnlen-avx2-rtm \
wcsnlen-generic \
wcsnlen-evex \
wcsnlen-evex512 \
wcsnlen-sse4_1 \

View File

@ -23,28 +23,32 @@
# define GENERIC sse2
#endif
extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
const struct cpu_features *cpu_features = __get_cpu_features ();
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}

View File

@ -205,19 +205,22 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/memrchr.c. */
IFUNC_IMPL (i, name, memrchr,
IFUNC_IMPL_ADD (array, i, memrchr,
CPU_FEATURE_USABLE (AVX2),
__memrchr_avx2)
IFUNC_IMPL_ADD (array, i, memrchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__memrchr_avx2_rtm)
IFUNC_IMPL_ADD (array, i, memrchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__memrchr_evex)
IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__memrchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
CPU_FEATURE_USABLE (AVX2),
__memrchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__memrchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, memrchr,
1,
__memrchr_sse2))
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/memset_chk.c. */
@ -346,49 +349,57 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strlen.c. */
IFUNC_IMPL (i, name, strlen,
IFUNC_IMPL_ADD (array, i, strlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__strlen_avx2)
IFUNC_IMPL_ADD (array, i, strlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strlen_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strlen_evex)
IFUNC_IMPL_ADD (array, i, strlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strlen_evex512)
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strlen_evex)
X86_IFUNC_IMPL_ADD_V4 (array, i, strlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strlen_evex512)
X86_IFUNC_IMPL_ADD_V3 (array, i, strlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__strlen_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strlen_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strlen,
1,
__strlen_sse2))
/* Support sysdeps/x86_64/multiarch/strnlen.c. */
IFUNC_IMPL (i, name, strnlen,
IFUNC_IMPL_ADD (array, i, strnlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__strnlen_avx2)
IFUNC_IMPL_ADD (array, i, strnlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strnlen_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strnlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strnlen_evex)
IFUNC_IMPL_ADD (array, i, strnlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strnlen_evex512)
IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strnlen_evex)
X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strnlen_evex512)
X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__strnlen_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strnlen_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strnlen,
1,
__strnlen_sse2))
/* Support sysdeps/x86_64/multiarch/stpncpy.c. */
IFUNC_IMPL (i, name, stpncpy,
@ -422,40 +433,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */
IFUNC_IMPL (i, name, strcasecmp,
IFUNC_IMPL_ADD (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strcasecmp_evex)
IFUNC_IMPL_ADD (array, i, strcasecmp,
CPU_FEATURE_USABLE (AVX2),
__strcasecmp_avx2)
IFUNC_IMPL_ADD (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strcasecmp_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strcasecmp,
CPU_FEATURE_USABLE (SSE4_2),
__strcasecmp_sse42)
IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strcasecmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
CPU_FEATURE_USABLE (AVX2),
__strcasecmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strcasecmp_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
CPU_FEATURE_USABLE (SSE4_2),
__strcasecmp_sse42)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
1,
__strcasecmp_sse2))
/* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */
IFUNC_IMPL (i, name, strcasecmp_l,
IFUNC_IMPL_ADD (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strcasecmp_l_evex)
IFUNC_IMPL_ADD (array, i, strcasecmp,
CPU_FEATURE_USABLE (AVX2),
__strcasecmp_l_avx2)
IFUNC_IMPL_ADD (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strcasecmp_l_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strcasecmp_l,
CPU_FEATURE_USABLE (SSE4_2),
__strcasecmp_l_sse42)
IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
__strcasecmp_l_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strcasecmp_l_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
CPU_FEATURE_USABLE (AVX2),
__strcasecmp_l_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strcasecmp_l_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
CPU_FEATURE_USABLE (SSE4_2),
__strcasecmp_l_sse42)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
1,
__strcasecmp_l_sse2))
/* Support sysdeps/x86_64/multiarch/strcat.c. */
IFUNC_IMPL (i, name, strcat,
@ -474,74 +492,95 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strchr.c. */
IFUNC_IMPL (i, name, strchr,
IFUNC_IMPL_ADD (array, i, strchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__strchr_avx2)
IFUNC_IMPL_ADD (array, i, strchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strchr_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strchr_evex)
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__strchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strchr,
1,
__strchr_sse2)
X86_IFUNC_IMPL_ADD_V1 (array, i, strchr,
1,
__strchr_sse2_no_bsf))
/* Support sysdeps/x86_64/multiarch/strchrnul.c. */
IFUNC_IMPL (i, name, strchrnul,
IFUNC_IMPL_ADD (array, i, strchrnul,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__strchrnul_avx2)
IFUNC_IMPL_ADD (array, i, strchrnul,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strchrnul_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strchrnul,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strchrnul_evex)
IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strchrnul,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strchrnul_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__strchrnul_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strchrnul_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strchrnul,
1,
__strchrnul_sse2))
/* Support sysdeps/x86_64/multiarch/strrchr.c. */
IFUNC_IMPL (i, name, strrchr,
IFUNC_IMPL_ADD (array, i, strrchr,
CPU_FEATURE_USABLE (AVX2),
__strrchr_avx2)
IFUNC_IMPL_ADD (array, i, strrchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strrchr_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strrchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strrchr_evex)
IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strrchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
CPU_FEATURE_USABLE (AVX2),
__strrchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strrchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strrchr,
1,
__strrchr_sse2))
/* Support sysdeps/x86_64/multiarch/strcmp.c. */
IFUNC_IMPL (i, name, strcmp,
IFUNC_IMPL_ADD (array, i, strcmp,
CPU_FEATURE_USABLE (AVX2),
__strcmp_avx2)
IFUNC_IMPL_ADD (array, i, strcmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strcmp_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strcmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strcmp_evex)
IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSE4_2),
__strcmp_sse42)
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strcmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strcmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
CPU_FEATURE_USABLE (AVX2),
__strcmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strcmp_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
CPU_FEATURE_USABLE (SSE4_2),
__strcmp_sse42)
/* ISA V2 wrapper for SSE2 implementations because the SSE2
implementations are also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
1,
__strcmp_sse2_unaligned)
X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
1,
__strcmp_sse2))
/* Support sysdeps/x86_64/multiarch/strcpy.c. */
IFUNC_IMPL (i, name, strcpy,
@ -568,41 +607,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp,
IFUNC_IMPL_ADD (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strncasecmp_evex)
IFUNC_IMPL_ADD (array, i, strncasecmp,
CPU_FEATURE_USABLE (AVX2),
__strncasecmp_avx2)
IFUNC_IMPL_ADD (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strncasecmp_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strncasecmp,
CPU_FEATURE_USABLE (SSE4_2),
__strncasecmp_sse42)
IFUNC_IMPL_ADD (array, i, strncasecmp, 1,
__strncasecmp_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strncasecmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
CPU_FEATURE_USABLE (AVX2),
__strncasecmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strncasecmp_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
CPU_FEATURE_USABLE (SSE4_2),
__strncasecmp_sse42)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
1,
__strncasecmp_sse2))
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp_l,
IFUNC_IMPL_ADD (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strncasecmp_l_evex)
IFUNC_IMPL_ADD (array, i, strncasecmp,
CPU_FEATURE_USABLE (AVX2),
__strncasecmp_l_avx2)
IFUNC_IMPL_ADD (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strncasecmp_l_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strncasecmp_l,
CPU_FEATURE_USABLE (SSE4_2),
__strncasecmp_l_sse42)
IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
__strncasecmp_l_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strncasecmp_l_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
CPU_FEATURE_USABLE (AVX2),
__strncasecmp_l_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strncasecmp_l_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
CPU_FEATURE_USABLE (SSE4_2),
__strncasecmp_l_sse42)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
1,
__strncasecmp_l_sse2))
/* Support sysdeps/x86_64/multiarch/strncat.c. */
IFUNC_IMPL (i, name, strncat,
@ -664,69 +709,85 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/wcschr.c. */
IFUNC_IMPL (i, name, wcschr,
IFUNC_IMPL_ADD (array, i, wcschr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__wcschr_avx2)
IFUNC_IMPL_ADD (array, i, wcschr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcschr_avx2_rtm)
IFUNC_IMPL_ADD (array, i, wcschr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcschr_evex)
IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, wcschr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcschr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__wcschr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcschr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, wcschr,
1,
__wcschr_sse2))
/* Support sysdeps/x86_64/multiarch/wcsrchr.c. */
IFUNC_IMPL (i, name, wcsrchr,
IFUNC_IMPL_ADD (array, i, wcsrchr,
CPU_FEATURE_USABLE (AVX2),
__wcsrchr_avx2)
IFUNC_IMPL_ADD (array, i, wcsrchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsrchr_avx2_rtm)
IFUNC_IMPL_ADD (array, i, wcsrchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsrchr_evex)
IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsrchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
CPU_FEATURE_USABLE (AVX2),
__wcsrchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsrchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, wcsrchr,
1,
__wcsrchr_sse2))
/* Support sysdeps/x86_64/multiarch/wcscmp.c. */
IFUNC_IMPL (i, name, wcscmp,
IFUNC_IMPL_ADD (array, i, wcscmp,
CPU_FEATURE_USABLE (AVX2),
__wcscmp_avx2)
IFUNC_IMPL_ADD (array, i, wcscmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__wcscmp_avx2_rtm)
IFUNC_IMPL_ADD (array, i, wcscmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcscmp_evex)
IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, wcscmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcscmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
CPU_FEATURE_USABLE (AVX2),
__wcscmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__wcscmp_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, wcscmp,
1,
__wcscmp_sse2))
/* Support sysdeps/x86_64/multiarch/wcsncmp.c. */
IFUNC_IMPL (i, name, wcsncmp,
IFUNC_IMPL_ADD (array, i, wcsncmp,
CPU_FEATURE_USABLE (AVX2),
__wcsncmp_avx2)
IFUNC_IMPL_ADD (array, i, wcsncmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsncmp_avx2_rtm)
IFUNC_IMPL_ADD (array, i, wcsncmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsncmp_evex)
IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic))
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsncmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsncmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
CPU_FEATURE_USABLE (AVX2),
__wcsncmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsncmp_avx2_rtm)
/* ISA V2 wrapper for GENERIC implementation because the
GENERIC implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncmp,
1,
__wcsncmp_generic))
/* Support sysdeps/x86_64/multiarch/wcscpy.c. */
IFUNC_IMPL (i, name, wcscpy,
@ -736,55 +797,59 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/wcslen.c. */
IFUNC_IMPL (i, name, wcslen,
IFUNC_IMPL_ADD (array, i, wcslen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__wcslen_avx2)
IFUNC_IMPL_ADD (array, i, wcslen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcslen_avx2_rtm)
IFUNC_IMPL_ADD (array, i, wcslen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcslen_evex)
IFUNC_IMPL_ADD (array, i, wcslen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcslen_evex512)
IFUNC_IMPL_ADD (array, i, wcslen,
CPU_FEATURE_USABLE (SSE4_1),
__wcslen_sse4_1)
IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcslen_evex)
X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcslen_evex512)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__wcslen_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcslen_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, wcslen,
CPU_FEATURE_USABLE (SSE4_1),
__wcslen_sse4_1)
X86_IFUNC_IMPL_ADD_V1 (array, i, wcslen,
1,
__wcslen_sse2))
/* Support sysdeps/x86_64/multiarch/wcsnlen.c. */
IFUNC_IMPL (i, name, wcsnlen,
IFUNC_IMPL_ADD (array, i, wcsnlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsnlen_avx2)
IFUNC_IMPL_ADD (array, i, wcsnlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsnlen_avx2_rtm)
IFUNC_IMPL_ADD (array, i, wcsnlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsnlen_evex)
IFUNC_IMPL_ADD (array, i, wcsnlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsnlen_evex512)
IFUNC_IMPL_ADD (array, i, wcsnlen,
CPU_FEATURE_USABLE (SSE4_1),
__wcsnlen_sse4_1)
IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsnlen_evex)
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsnlen_evex512)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsnlen_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsnlen_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, wcsnlen,
CPU_FEATURE_USABLE (SSE4_1),
__wcsnlen_sse4_1)
X86_IFUNC_IMPL_ADD_V1 (array, i, wcsnlen,
1,
__wcsnlen_generic))
/* Support sysdeps/x86_64/multiarch/wmemchr.c. */
IFUNC_IMPL (i, name, wmemchr,
@ -1050,20 +1115,25 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strncmp.c. */
IFUNC_IMPL (i, name, strncmp,
IFUNC_IMPL_ADD (array, i, strncmp,
CPU_FEATURE_USABLE (AVX2),
__strncmp_avx2)
IFUNC_IMPL_ADD (array, i, strncmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strncmp_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strncmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strncmp_evex)
IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2),
__strncmp_sse42)
IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2))
X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strncmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
CPU_FEATURE_USABLE (AVX2),
__strncmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (RTM)),
__strncmp_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
CPU_FEATURE_USABLE (SSE4_2),
__strncmp_sse42)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
implementation is also used at ISA level 2. */
X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
1,
__strncmp_sse2))
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/wmemset_chk.c. */

View File

@ -19,32 +19,39 @@
#include <init-arch.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
const struct cpu_features *cpu_features = __get_cpu_features ();
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
return OPTIMIZE (avx2_rtm);
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
return OPTIMIZE (avx2);
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
/* Keep this as a runtime check as its not guaranteed at ISA
level 2. */
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
return OPTIMIZE (sse42);

View File

@ -23,33 +23,38 @@
# define GENERIC sse2
#endif
extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
const struct cpu_features *cpu_features = __get_cpu_features ();
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
return OPTIMIZE (sse4_1);
return OPTIMIZE (GENERIC);

View File

@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (3)
# include <sysdep.h>

View File

@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (4)
# include <sysdep.h>
# include "evex256-vecs.h"

View File

@ -16,22 +16,26 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
so we need this to build for ISA V2 builds. */
#if ISA_SHOULD_BUILD (2)
# ifndef MEMRCHR
# define MEMRCHR __memrchr_sse2
# endif
#endif
#include <sysdep.h>
#define VEC_SIZE 16
#define PAGE_SIZE 4096
# include <sysdep.h>
# define VEC_SIZE 16
# define PAGE_SIZE 4096
.text
ENTRY_P2ALIGN(MEMRCHR, 6)
#ifdef __ILP32__
# ifdef __ILP32__
/* Clear upper bits. */
mov %RDX_LP, %RDX_LP
#endif
# endif
movd %esi, %xmm0
/* Get end pointer. */
@ -352,3 +356,4 @@ L(zero_3):
ret
/* 2-bytes from next cache line. */
END(MEMRCHR)
#endif

View File

@ -1,15 +1,2 @@
#ifndef STRCMP
# define STRCMP __strcasecmp_l_avx2_rtm
#endif
#define _GLABEL(x) x ## _rtm
#define GLABEL(x) _GLABEL(x)
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
#define SECTION(p) p##.avx.rtm
#include "strcasecmp_l-avx2.S"
#define USE_AS_STRCASECMP_L
#include "strcmp-avx2-rtm.S"

View File

@ -16,8 +16,5 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifndef STRCMP
# define STRCMP __strcasecmp_l_avx2
#endif
#define USE_AS_STRCASECMP_L
#include "strcmp-avx2.S"

View File

@ -16,8 +16,5 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifndef STRCMP
# define STRCMP __strcasecmp_l_evex
#endif
#define USE_AS_STRCASECMP_L
#include "strcmp-evex.S"

View File

@ -17,4 +17,5 @@
<https://www.gnu.org/licenses/>. */
#define USE_AS_STRCASECMP_L
#include "strcmp-sse2.S"

View File

@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (3)
# include <sysdep.h>

View File

@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (4)
# include <sysdep.h>

View File

@ -16,7 +16,11 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
/* NB: atom builds with ISA level == 1 so no reason to hold onto this
at ISA level >= 2. */
#if ISA_SHOULD_BUILD (1)
# include <sysdep.h>
# include "asm-syntax.h"

View File

@ -16,7 +16,12 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc) || defined STRCHR
#include <isa-level.h>
/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
so we need this to build for ISA V2 builds. */
#if ISA_SHOULD_BUILD (2)
# ifndef STRCHR
# define STRCHR __strchr_sse2
# endif

View File

@ -26,36 +26,40 @@
# define SYMBOL_NAME strchr
# include <init-arch.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
const struct cpu_features *cpu_features = __get_cpu_features ();
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}
if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
return OPTIMIZE (sse2_no_bsf);
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF, !))
return OPTIMIZE (sse2);
return OPTIMIZE (sse2);
return OPTIMIZE (sse2_no_bsf);
}
libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());

View File

@ -1,3 +1,8 @@
#define STRCHR __strchrnul_avx2
#ifndef STRCHRNUL
# define STRCHRNUL __strchrnul_avx2
#endif
#define STRCHR STRCHRNUL
#define USE_AS_STRCHRNUL 1
#include "strchr-avx2.S"

View File

@ -1,3 +1,8 @@
#define STRCHR __strchrnul_evex
#ifndef STRCHRNUL
# define STRCHRNUL __strchrnul_evex
#endif
#define STRCHR STRCHRNUL
#define USE_AS_STRCHRNUL 1
#include "strchr-evex.S"

View File

@ -16,12 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
# ifndef STRCHR
# define STRCHR __strchrnul_sse2
# endif
#ifndef STRCHRNUL
# define STRCHRNUL __strchrnul_sse2
#endif
#define AS_STRCHRNUL
#define STRCHR STRCHRNUL
#include "strchr-sse2.S"

View File

@ -1,12 +1,9 @@
#ifndef STRCMP
# define STRCMP __strcmp_avx2_rtm
#endif
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
#define SECTION(p) p##.avx.rtm
#define STRCMP_ISA _avx2_rtm
#include "strcmp-avx2.S"

View File

@ -16,7 +16,15 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (3)
# ifndef STRCMP_ISA
# define STRCMP_ISA _avx2
# endif
# include "strcmp-naming.h"
# include <sysdep.h>
@ -86,15 +94,11 @@
# ifdef USE_AS_STRCASECMP_L
# ifdef USE_AS_STRNCMP
# define STRCASECMP __strncasecmp_avx2
# define LOCALE_REG rcx
# define LOCALE_REG_LP RCX_LP
# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii
# else
# define STRCASECMP __strcasecmp_avx2
# define LOCALE_REG rdx
# define LOCALE_REG_LP RDX_LP
# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii
# endif
# endif
@ -185,18 +189,14 @@
.type STRCMP, @function
.globl STRCMP
# ifndef GLABEL
# define GLABEL(...) __VA_ARGS__
# endif
# ifdef USE_AS_STRCASECMP_L
ENTRY (GLABEL(STRCASECMP))
ENTRY (STRCASECMP)
movq __libc_tsd_LOCALE@gottpoff(%rip), %rax
mov %fs:(%rax), %LOCALE_REG_LP
/* Either 1 or 5 bytes (dependeing if CET is enabled). */
.p2align 4
END (GLABEL(STRCASECMP))
END (STRCASECMP)
/* FALLTHROUGH to strcasecmp/strncasecmp_l. */
# endif

View File

@ -16,7 +16,12 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (4)
# define STRCMP_ISA _evex
# include "strcmp-naming.h"
# include <sysdep.h>
# if defined USE_AS_STRCASECMP_L
@ -37,10 +42,6 @@
# define VMOVA vmovdqa64
# ifdef USE_AS_WCSCMP
# ifndef OVERFLOW_STRCMP
# define OVERFLOW_STRCMP __wcscmp_evex
# endif
# define TESTEQ subl $0xff,
/* Compare packed dwords. */
# define VPCMP vpcmpd
@ -50,10 +51,6 @@
/* 1 dword char == 4 bytes. */
# define SIZE_OF_CHAR 4
# else
# ifndef OVERFLOW_STRCMP
# define OVERFLOW_STRCMP __strcmp_evex
# endif
# define TESTEQ incl
/* Compare packed bytes. */
# define VPCMP vpcmpb
@ -120,15 +117,11 @@
# ifdef USE_AS_STRCASECMP_L
# ifdef USE_AS_STRNCMP
# define STRCASECMP __strncasecmp_evex
# define LOCALE_REG rcx
# define LOCALE_REG_LP RCX_LP
# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii
# else
# define STRCASECMP __strcasecmp_evex
# define LOCALE_REG rdx
# define LOCALE_REG_LP RDX_LP
# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii
# endif
# endif
@ -214,7 +207,6 @@
.align 16
.type STRCMP, @function
.globl STRCMP
# ifdef USE_AS_STRCASECMP_L
ENTRY (STRCASECMP)
movq __libc_tsd_LOCALE@gottpoff(%rip), %rax

View File

@ -16,11 +16,20 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#include "sysdep.h"
/* Continue building as ISA level 2. We use this as ISA V2 default
because strcmp-sse42 uses pcmpstri (slow on some SSE4.2
processors) and this implementation is potenially faster than
strcmp-sse42 (aside from the slower page cross case). */
#if ISA_SHOULD_BUILD (2)
ENTRY ( __strcmp_sse2_unaligned)
# define STRCMP_ISA _sse2_unaligned
# include "strcmp-naming.h"
# include "sysdep.h"
ENTRY (STRCMP)
movl %edi, %eax
xorl %edx, %edx
pxor %xmm7, %xmm7
@ -208,6 +217,5 @@ L(cross_page):
L(different):
subl %ecx, %eax
ret
END (__strcmp_sse2_unaligned)
END (STRCMP)
#endif

View File

@ -16,7 +16,11 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc) || IS_IN (rtld)
#include <isa-level.h>
/* Continue building at ISA level 2 as the strcmp-sse42 is not always
preferable for ISA level == 2 CPUs. */
#if ISA_SHOULD_BUILD (2)
# define STRCMP_ISA _sse2
# include "strcmp-naming.h"

View File

@ -16,7 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (2)
# include <sysdep.h>
# define STRCMP_ISA _sse42
@ -1766,7 +1769,6 @@ LABEL(unaligned_table):
.int LABEL(ashr_0) - LABEL(unaligned_table)
# undef LABEL
# undef GLABEL
# undef SECTION
# undef movdqa
# undef movdqu

View File

@ -26,37 +26,50 @@
# define SYMBOL_NAME strcmp
# include <init-arch.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
extern __typeof (REDIRECT_NAME)
OPTIMIZE (sse2_unaligned) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
const struct cpu_features *cpu_features = __get_cpu_features ();
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
/* Keep this as runtime check. Some ISA level >= 2 CPUs such as
Tremont, Silvermont, and more check this. */
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
return OPTIMIZE (sse42);
/* Keep this as runtime check. The standard SSE2 version has
meaningful optimizations around keeping all loads aligned in the
main loop which can benefit some ISA level >= 2 CPUs. */
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
return OPTIMIZE (sse2_unaligned);

View File

@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (3)
# include <sysdep.h>

View File

@ -16,7 +16,11 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
/* UNUSED. Exists purely as reference implementation. */
#include <isa-level.h>
#if ISA_SHOULD_BUILD (4)
# include <sysdep.h>

View File

@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (4)
# include <sysdep.h>

View File

@ -16,15 +16,20 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc) || defined STRLEN
# ifndef STRLEN
# define STRLEN __strlen_sse2
# endif
#include <isa-level.h>
/* ISA level >= 2 for both strlen and wcslen. wcslen uses `pminud`
which is SSE4.1. strlen doesn't have an ISA level == 2
implementation so the SSE2 implementation must be built with ISA
level == 2. */
# if ISA_SHOULD_BUILD (2)
# include <sysdep.h>
# ifndef STRLEN
# define STRLEN __strlen_sse2
# endif
# ifdef AS_WCSLEN
# define PMINU pminud
# define PCMPEQ pcmpeqd
@ -82,7 +87,7 @@ L(n_nonzero):
suffice. */
mov %RSI_LP, %R10_LP
sar $62, %R10_LP
jnz __wcslen_sse4_1
jnz OVERFLOW_STRLEN
sal $2, %RSI_LP
# endif

View File

@ -1,16 +1,4 @@
#ifndef STRCMP
# define STRCMP __strncasecmp_l_avx2_rtm
#endif
#define USE_AS_STRCASECMP_L
#define USE_AS_STRNCMP
#define _GLABEL(x) x ## _rtm
#define GLABEL(x) _GLABEL(x)
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
#define SECTION(p) p##.avx.rtm
#define OVERFLOW_STRCMP __strcasecmp_l_avx2_rtm
#include "strncase_l-avx2.S"
#include "strcmp-avx2-rtm.S"

View File

@ -16,12 +16,7 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifndef STRCMP
# define STRCMP __strncasecmp_l_avx2
#endif
#define USE_AS_STRCASECMP_L
#define USE_AS_STRNCMP
#ifndef OVERFLOW_STRCMP
# define OVERFLOW_STRCMP __strcasecmp_l_avx2
#endif
#include "strcmp-avx2.S"

View File

@ -16,10 +16,6 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifndef STRCMP
# define STRCMP __strncasecmp_l_evex
#endif
#define OVERFLOW_STRCMP __strcasecmp_l_evex
#define USE_AS_STRCASECMP_L
#define USE_AS_STRNCMP
#include "strcmp-evex.S"

View File

@ -1,4 +1,2 @@
#define STRCMP __strncmp_avx2_rtm
#define USE_AS_STRNCMP 1
#define OVERFLOW_STRCMP __strcmp_avx2_rtm
#include "strcmp-avx2-rtm.S"

View File

@ -1,4 +1,3 @@
#define STRCMP __strncmp_avx2
#define USE_AS_STRNCMP 1
#define OVERFLOW_STRCMP __strcmp_avx2
#include "strcmp-avx2.S"

View File

@ -1,3 +1,2 @@
#define STRCMP __strncmp_evex
#define USE_AS_STRNCMP 1
#include "strcmp-evex.S"

View File

@ -26,33 +26,38 @@
# define SYMBOL_NAME strncmp
# include <init-arch.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
const struct cpu_features *cpu_features = __get_cpu_features ();
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2))
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
return OPTIMIZE (sse42);

View File

@ -1,4 +1,8 @@
#define STRLEN __strnlen_avx2
#ifndef STRNLEN
# define STRNLEN __strnlen_avx2
#endif
#define USE_AS_STRNLEN 1
#define STRLEN STRNLEN
#include "strlen-avx2.S"

View File

@ -1,4 +1,8 @@
#define STRLEN __strnlen_evex
#ifndef STRNLEN
# define STRNLEN __strnlen_evex
#endif
#define USE_AS_STRNLEN 1
#define STRLEN STRNLEN
#include "strlen-evex.S"

View File

@ -16,11 +16,11 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
# ifndef STRLEN
# define STRLEN __strnlen_sse2
# endif
#ifndef STRNLEN
# define STRNLEN __strnlen_sse2
#endif
#define AS_STRNLEN
#define AS_STRNLEN 1
#define STRLEN STRNLEN
#include "strlen-sse2.S"

View File

@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (3)
# include <sysdep.h>

View File

@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
#if ISA_SHOULD_BUILD (4)
# include <sysdep.h>

View File

@ -16,36 +16,40 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
/* ISA level >= 2 because there are no {wcs|str}rchr-sse4
implementations. */
#if ISA_SHOULD_BUILD (2)
# include <sysdep.h>
# ifndef STRRCHR
# define STRRCHR __strrchr_sse2
# endif
#endif
#include <sysdep.h>
# ifdef USE_AS_WCSRCHR
# define PCMPEQ pcmpeqd
# define CHAR_SIZE 4
# define PMINU pminud
# else
# define PCMPEQ pcmpeqb
# define CHAR_SIZE 1
# define PMINU pminub
# endif
#ifdef USE_AS_WCSRCHR
# define PCMPEQ pcmpeqd
# define CHAR_SIZE 4
# define PMINU pminud
#else
# define PCMPEQ pcmpeqb
# define CHAR_SIZE 1
# define PMINU pminub
#endif
#define PAGE_SIZE 4096
#define VEC_SIZE 16
# define PAGE_SIZE 4096
# define VEC_SIZE 16
.text
ENTRY(STRRCHR)
movd %esi, %xmm0
movq %rdi, %rax
andl $(PAGE_SIZE - 1), %eax
#ifndef USE_AS_WCSRCHR
# ifndef USE_AS_WCSRCHR
punpcklbw %xmm0, %xmm0
punpcklwd %xmm0, %xmm0
#endif
# endif
pshufd $0, %xmm0, %xmm0
cmpl $(PAGE_SIZE - VEC_SIZE), %eax
ja L(cross_page)
@ -69,9 +73,9 @@ L(cross_page_continue):
/* We are off by 3 for wcsrchr if search CHAR is non-zero. If
search CHAR is zero we are correct. Either way `andq
-CHAR_SIZE, %rax` gets the correct result. */
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
#endif
# endif
L(ret0):
ret
@ -85,9 +89,9 @@ L(first_vec_x0_test):
jz L(ret0)
bsrl %eax, %eax
addq %r8, %rax
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
#endif
# endif
ret
.p2align 4
@ -100,9 +104,9 @@ L(first_vec_x1):
jz L(first_vec_x0_test)
bsrl %eax, %eax
leaq (VEC_SIZE)(%rdi, %rax), %rax
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
#endif
# endif
ret
.p2align 4
@ -113,9 +117,9 @@ L(first_vec_x1_test):
jz L(first_vec_x0_test)
bsrl %eax, %eax
leaq (VEC_SIZE)(%rdi, %rax), %rax
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
#endif
# endif
ret
.p2align 4
@ -128,9 +132,9 @@ L(first_vec_x2):
jz L(first_vec_x1_test)
bsrl %eax, %eax
leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
#endif
# endif
ret
.p2align 4
@ -165,27 +169,27 @@ L(first_loop):
/* Since SSE2 no pminud so wcsrchr needs seperate logic for
detecting zero. Note if this is found to be a bottleneck it
may be worth adding an SSE4.1 wcsrchr implementation. */
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
movaps %xmm5, %xmm6
pxor %xmm8, %xmm8
PCMPEQ %xmm8, %xmm5
PCMPEQ %xmm4, %xmm8
por %xmm5, %xmm8
#else
# else
movaps %xmm5, %xmm6
PMINU %xmm4, %xmm5
#endif
# endif
movaps %xmm4, %xmm9
PCMPEQ %xmm0, %xmm4
PCMPEQ %xmm0, %xmm6
movaps %xmm6, %xmm7
por %xmm4, %xmm6
#ifndef USE_AS_WCSRCHR
# ifndef USE_AS_WCSRCHR
pxor %xmm8, %xmm8
PCMPEQ %xmm5, %xmm8
#endif
# endif
pmovmskb %xmm8, %ecx
pmovmskb %xmm6, %eax
@ -219,9 +223,9 @@ L(first_loop_old_match):
bsrl %eax, %eax
addq %rsi, %rax
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
#endif
# endif
ret
.p2align 4
@ -247,9 +251,9 @@ L(new_match):
jz L(first_loop_old_match)
bsrl %eax, %eax
addq %rdi, %rax
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
#endif
# endif
ret
/* Save minimum state for getting most recent match. We can
@ -267,27 +271,27 @@ L(second_loop):
/* Since SSE2 no pminud so wcsrchr needs seperate logic for
detecting zero. Note if this is found to be a bottleneck it
may be worth adding an SSE4.1 wcsrchr implementation. */
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
movaps %xmm5, %xmm6
pxor %xmm8, %xmm8
PCMPEQ %xmm8, %xmm5
PCMPEQ %xmm4, %xmm8
por %xmm5, %xmm8
#else
# else
movaps %xmm5, %xmm6
PMINU %xmm4, %xmm5
#endif
# endif
movaps %xmm4, %xmm9
PCMPEQ %xmm0, %xmm4
PCMPEQ %xmm0, %xmm6
movaps %xmm6, %xmm7
por %xmm4, %xmm6
#ifndef USE_AS_WCSRCHR
# ifndef USE_AS_WCSRCHR
pxor %xmm8, %xmm8
PCMPEQ %xmm5, %xmm8
#endif
# endif
pmovmskb %xmm8, %ecx
pmovmskb %xmm6, %eax
@ -312,9 +316,9 @@ L(second_loop_old_match):
orl %ecx, %eax
bsrl %eax, %eax
addq %rsi, %rax
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
#endif
# endif
ret
.p2align 4
@ -340,9 +344,9 @@ L(second_loop_new_match):
jz L(second_loop_old_match)
bsrl %eax, %eax
addq %rdi, %rax
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
#endif
# endif
ret
.p2align 4,, 4
@ -366,9 +370,10 @@ L(cross_page):
jz L(ret1)
bsrl %eax, %eax
addq %rdi, %rax
#ifdef USE_AS_WCSRCHR
# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
#endif
# endif
L(ret1):
ret
END(STRRCHR)
#endif

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "../strchr-isa-default-impl.h"
ENTRY(__strstr_sse2_unaligned)
movzbl (%rsi), %eax
@ -75,7 +76,7 @@ L(next_pair_index):
.p2align 4
L(strchr):
movzbl %al, %esi
jmp __strchr_sse2
jmp DEFAULT_STRCHR
.p2align 4
L(pair_loop):

View File

@ -1,3 +1,8 @@
#define STRCHR __wcschr_avx2
#ifndef WCSCHR
# define WCSCHR __wcschr_avx2
#endif
#define STRCHR WCSCHR
#define USE_AS_WCSCHR 1
#include "strchr-avx2.S"

View File

@ -1,3 +1,8 @@
#define STRCHR __wcschr_evex
#ifndef WCSCHR
# define WCSCHR __wcschr_evex
#endif
#define STRCHR WCSCHR
#define USE_AS_WCSCHR 1
#include "strchr-evex.S"

View File

@ -16,13 +16,17 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
#include <isa-level.h>
/* ISA level >= 2 because there is no wcschr-sse4 implementations. */
#if ISA_SHOULD_BUILD (2)
# ifndef WCSCHR
# define WCSCHR __wcschr_sse2
# endif
#endif
#include <sysdep.h>
# include <sysdep.h>
.text
ENTRY (WCSCHR)
@ -155,3 +159,4 @@ L(return_null):
ret
END (WCSCHR)
#endif

View File

@ -1,4 +1,3 @@
#define STRCMP __wcscmp_avx2_rtm
#define USE_AS_WCSCMP 1
#include "strcmp-avx2-rtm.S"

View File

@ -1,4 +1,3 @@
#define STRCMP __wcscmp_avx2
#define USE_AS_WCSCMP 1
#include "strcmp-avx2.S"

View File

@ -1,4 +1,3 @@
#define STRCMP __wcscmp_evex
#define USE_AS_WCSCMP 1
#include "strcmp-evex.S"

View File

@ -16,11 +16,16 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define USE_AS_WCSCMP
#define STRCMP_ISA _sse2
#include "strcmp-naming.h"
#include <isa-level.h>
#include <sysdep.h>
/* ISA level >= 2 because there is no wcscmp-sse4 implementations. */
#if ISA_SHOULD_BUILD (2)
# include <sysdep.h>
/* Needed to get right name. */
# define USE_AS_WCSCMP
# define STRCMP_ISA _sse2
# include "strcmp-naming.h"
/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */
@ -949,3 +954,4 @@ L(equal):
ret
END (STRCMP)
#endif

View File

@ -1,4 +1,8 @@
#define STRLEN __wcslen_avx2
#ifndef WCSLEN
# define WCSLEN __wcslen_avx2
#endif
#define STRLEN WCSLEN
#define USE_AS_WCSLEN 1
#include "strlen-avx2.S"

View File

@ -1,4 +1,8 @@
#define STRLEN __wcslen_evex
#ifndef WCSLEN
# define WCSLEN __wcslen_evex
#endif
#define STRLEN WCSLEN
#define USE_AS_WCSLEN 1
#include "strlen-evex.S"

View File

@ -16,13 +16,16 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
# ifndef WCSLEN
# define WCSLEN __wcslen_sse2
# endif
#include <isa-level.h>
#if ISA_SHOULD_BUILD (1)
# include <sysdep.h>
#ifndef WCSLEN
# define WCSLEN __wcslen_sse2
#endif
#include <sysdep.h>
.text
ENTRY (WCSLEN)
@ -235,3 +238,5 @@ L(exit_tail7):
ret
END (WCSLEN)
#endif

View File

@ -1,5 +1,9 @@
#define AS_WCSLEN
#define STRLEN __wcslen_sse4_1
#define SECTION(p) p##.sse4.1
#ifndef WCSLEN
# define WCSLEN __wcslen_sse4_1
#endif
#define AS_WCSLEN 1
#define STRLEN WCSLEN
#define SECTION(p) p##.sse4.1
#include "strlen-sse2.S"

View File

@ -1,5 +1,3 @@
#define STRCMP __wcsncmp_avx2_rtm
#define USE_AS_STRNCMP 1
#define USE_AS_WCSCMP 1
#define OVERFLOW_STRCMP __wcscmp_avx2_rtm
#include "strcmp-avx2-rtm.S"

View File

@ -1,5 +1,4 @@
#define STRCMP __wcsncmp_avx2
#define USE_AS_STRNCMP 1
#define USE_AS_WCSCMP 1
#define OVERFLOW_STRCMP __wcscmp_avx2
#include "strcmp-avx2.S"

View File

@ -1,4 +1,3 @@
#define STRCMP __wcsncmp_evex
#define USE_AS_STRNCMP 1
#define USE_AS_WCSCMP 1

View File

@ -16,5 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define WCSNCMP __wcsncmp_generic
#include <wcsmbs/wcsncmp.c>
#include <isa-level.h>
#if ISA_SHOULD_BUILD (2)
# define WCSNCMP __wcsncmp_generic
# include <wcsmbs/wcsncmp.c>
#endif

View File

@ -1,4 +1,8 @@
#define STRLEN __wcsnlen_avx2
#ifndef WCSNLEN
# define WCSNLEN __wcsnlen_avx2
#endif
#define STRLEN WCSNLEN
#define USE_AS_WCSLEN 1
#define USE_AS_STRNLEN 1

View File

@ -1,4 +1,8 @@
#define STRLEN __wcsnlen_evex
#ifndef WCSNLEN
# define WCSNLEN __wcsnlen_evex
#endif
#define STRLEN WCSNLEN
#define USE_AS_WCSLEN 1
#define USE_AS_STRNLEN 1

View File

@ -16,13 +16,18 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <isa-level.h>
#if ISA_SHOULD_BUILD (1)
#if IS_IN (libc)
# include <wchar.h>
# define WCSNLEN __wcsnlen_generic
# ifndef WCSNLEN
# define WCSNLEN __wcsnlen_generic
# endif
extern __typeof (wcsnlen) __wcsnlen_generic;
#endif
#include "wcsmbs/wcsnlen.c"
# include "wcsmbs/wcsnlen.c"
#endif

View File

@ -1,6 +1,11 @@
#ifndef WCSNLEN
# define WCSNLEN __wcsnlen_sse4_1
# define OVERFLOW_STRLEN __wcslen_sse4_1
#endif
#define AS_WCSLEN
#define AS_STRNLEN
#define STRLEN __wcsnlen_sse4_1
#define STRLEN WCSNLEN
#define SECTION(p) p##.sse4.1
#include "strlen-sse2.S"

View File

@ -1,3 +1,8 @@
#define STRRCHR __wcsrchr_avx2
#ifndef WCSRCHR
# define WCSRCHR __wcsrchr_avx2
#endif
#define STRRCHR WCSRCHR
#define USE_AS_WCSRCHR 1
#include "strrchr-avx2.S"

View File

@ -1,3 +1,7 @@
#define STRRCHR __wcsrchr_evex
#ifndef WCSRCHR
# define WCSRCHR __wcsrchr_evex
#endif
#define STRRCHR WCSRCHR
#define USE_AS_WCSRCHR 1
#include "strrchr-evex.S"

View File

@ -16,12 +16,11 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
# ifndef STRRCHR
# define STRRCHR __wcsrchr_sse2
# endif
#ifndef WCSRCHR
# define WCSRCHR __wcsrchr_sse2
#endif
#define STRRCHR WCSRCHR
#define USE_AS_WCSRCHR 1
#define NO_PMINU 1

View File

@ -1,11 +1,35 @@
/* strcasecmp_l dispatch for RTLD and non-multiarch build
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* Symbols = __strcasecmp_l and __strcasecmp. */
#include "multiarch/strcasecmp_l-sse2.S"
#define DEFAULT_IMPL_V1 "multiarch/strcasecmp_l-sse2.S"
/* This may cause regressions on some processors that heavily prefer
aligned loads or have slow a implementation of the `pcmpstri`
instruction. */
#define DEFAULT_IMPL_V2 "multiarch/strcasecmp_l-sse4_2.S"
#define DEFAULT_IMPL_V3 "multiarch/strcasecmp_l-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/strcasecmp_l-evex.S"
libc_hidden_builtin_def (__strcasecmp_l)
#include "isa-default-impl.h"
libc_hidden_def (__strcasecmp_l)
weak_alias (__strcasecmp_l, strcasecmp_l)
libc_hidden_def (strcasecmp_l)
weak_alias (__strcasecmp, strcasecmp)
libc_hidden_def (__strcasecmp)
weak_alias (__strcasecmp, strcasecmp)

View File

@ -0,0 +1,28 @@
/* Set default strchr impl based on ISA level.
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <isa-level.h>
#if MINIMUM_X86_ISA_LEVEL == 1 || MINIMUM_X86_ISA_LEVEL == 2
# define DEFAULT_STRCHR __strchr_sse2
#elif MINIMUM_X86_ISA_LEVEL == 3
# define DEFAULT_STRCHR __strchr_avx2
#elif MINIMUM_X86_ISA_LEVEL == 4
# define DEFAULT_STRCHR __strchr_evex
#else
# error "Unknown default strchr implementation"
#endif

View File

@ -1,5 +1,4 @@
/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
For AMD x86-64.
/* strchr dispatch for RTLD and non-multiarch build
Copyright (C) 2009-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@ -17,8 +16,13 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define STRCHR strchr
#define DEFAULT_IMPL_V1 "multiarch/strchr-sse2.S"
#define DEFAULT_IMPL_V3 "multiarch/strchr-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/strchr-evex.S"
#include "isa-default-impl.h"
#define STRCHR strchr
#include "multiarch/strchr-sse2.S"
weak_alias (strchr, index)
libc_hidden_builtin_def (strchr)

View File

@ -1,6 +1,4 @@
/* strchrnul (str, ch) -- Return pointer to first occurrence of CH in STR
or terminating NUL byte.
For AMD x86-64.
/* strchrnul dispatch for RTLD and non-multiarch build
Copyright (C) 2009-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@ -18,7 +16,12 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define STRCHR __strchrnul
#include "multiarch/strchrnul-sse2.S"
#define STRCHRNUL __strchrnul
#define DEFAULT_IMPL_V1 "multiarch/strchrnul-sse2.S"
#define DEFAULT_IMPL_V3 "multiarch/strchrnul-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/strchrnul-evex.S"
#include "isa-default-impl.h"
weak_alias (__strchrnul, strchrnul)

View File

@ -1,4 +1,4 @@
/* Highly optimized version for x86-64.
/* strcmp dispatch for RTLD and non-multiarch build
Copyright (C) 1999-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@ -18,5 +18,14 @@
/* Symbol = strcmp. */
#include "multiarch/strcmp-sse2.S"
#define DEFAULT_IMPL_V1 "multiarch/strcmp-sse2.S"
/* strcmp-sse2-unaligned.S is often faster than strcmp-sse42.S and
doesn't have the drawback of using the `pcmpstri` instruction
which can be very slow on some CPUs. */
#define DEFAULT_IMPL_V2 "multiarch/strcmp-sse2-unaligned.S"
#define DEFAULT_IMPL_V3 "multiarch/strcmp-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/strcmp-evex.S"
#include "isa-default-impl.h"
libc_hidden_builtin_def (strcmp)

View File

@ -1,4 +1,4 @@
/* SSE2 version of strlen.
/* strlen dispatch for RTLD and non-multiarch build
Copyright (C) 2021-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@ -17,6 +17,11 @@
<https://www.gnu.org/licenses/>. */
#define STRLEN strlen
#include "multiarch/strlen-sse2.S"
#define DEFAULT_IMPL_V1 "multiarch/strlen-sse2.S"
#define DEFAULT_IMPL_V3 "multiarch/strlen-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/strlen-evex.S"
#include "isa-default-impl.h"
libc_hidden_builtin_def (strlen)

View File

@ -1,11 +1,35 @@
/* strcasecmp_l dispatch for RTLD and non-multiarch build
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* Symbols = __strncasecmp_l and __strncasecmp. */
#include "multiarch/strncase_l-sse2.S"
#define DEFAULT_IMPL_V1 "multiarch/strncase_l-sse2.S"
/* This may cause regressions on some processors that heavily prefer
aligned loads or have slow a implementation of the `pcmpstri`
instruction. */
#define DEFAULT_IMPL_V2 "multiarch/strncase_l-sse4_2.S"
#define DEFAULT_IMPL_V3 "multiarch/strncase_l-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/strncase_l-evex.S"
libc_hidden_builtin_def (__strncasecmp_l)
#include "isa-default-impl.h"
libc_hidden_def (__strncasecmp_l)
weak_alias (__strncasecmp_l, strncasecmp_l)
libc_hidden_def (strncasecmp_l)
weak_alias (__strncasecmp, strncasecmp)
libc_hidden_def (__strncasecmp)
weak_alias (__strncasecmp, strncasecmp)

View File

@ -1,4 +1,31 @@
/* strncmp dispatch for RTLD and non-multiarch build
Copyright (C) 1999-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* Symbol = strncmp. */
#include "multiarch/strncmp-sse2.S"
#define DEFAULT_IMPL_V1 "multiarch/strncmp-sse2.S"
/* This may cause regressions on some processors that heavily prefer
aligned loads or have slow a implementation of the `pcmpstri`
instruction. */
#define DEFAULT_IMPL_V2 "multiarch/strncmp-sse4_2.S"
#define DEFAULT_IMPL_V3 "multiarch/strncmp-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/strncmp-evex.S"
#include "isa-default-impl.h"
libc_hidden_builtin_def (strncmp)

View File

@ -1,6 +1,29 @@
#define STRLEN __strnlen
#include "multiarch/strnlen-sse2.S"
/* strnlen dispatch for RTLD and non-multiarch build
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define STRNLEN __strnlen
#define DEFAULT_IMPL_V1 "multiarch/strnlen-sse2.S"
#define DEFAULT_IMPL_V3 "multiarch/strnlen-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/strnlen-evex.S"
#include "isa-default-impl.h"
weak_alias (__strnlen, strnlen)
libc_hidden_def (__strnlen)
weak_alias (__strnlen, strnlen);
libc_hidden_builtin_def (strnlen)
libc_hidden_def (strnlen)

View File

@ -1,4 +1,4 @@
/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR.
/* strrchr dispatch for RTLD and non-multiarch build
Copyright (C) 2013-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@ -17,6 +17,12 @@
<https://www.gnu.org/licenses/>. */
#define STRRCHR strrchr
#include "multiarch/strrchr-sse2.S"
#define DEFAULT_IMPL_V1 "multiarch/strrchr-sse2.S"
#define DEFAULT_IMPL_V3 "multiarch/strrchr-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/strrchr-evex.S"
#include "isa-default-impl.h"
weak_alias (strrchr, rindex)
libc_hidden_builtin_def (strrchr)

View File

@ -1,4 +1,4 @@
/* wcschr with SSSE3
/* wcschr dispatch for RTLD and non-multiarch build
Copyright (C) 2011-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@ -16,9 +16,14 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define WCSCHR __wcschr
#include "multiarch/wcschr-sse2.S"
libc_hidden_def(__wcschr)
#define DEFAULT_IMPL_V1 "multiarch/wcschr-sse2.S"
#define DEFAULT_IMPL_V3 "multiarch/wcschr-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/wcschr-evex.S"
#include "isa-default-impl.h"
libc_hidden_def (__wcschr)
weak_alias (__wcschr, wcschr)
libc_hidden_weak (wcschr)

View File

@ -1,4 +1,4 @@
/* Optimized wcscmp for x86-64 with SSE2.
/* strlen dispatch for RTLD and non-multiarch build
Copyright (C) 2011-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@ -18,6 +18,11 @@
/* Symbol = __wcscmp. */
#include "multiarch/wcscmp-sse2.S"
#define DEFAULT_IMPL_V1 "multiarch/wcscmp-sse2.S"
#define DEFAULT_IMPL_V3 "multiarch/wcscmp-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/wcscmp-evex.S"
#include "isa-default-impl.h"
libc_hidden_def (__wcscmp)
weak_alias (__wcscmp, wcscmp)

View File

@ -1,4 +1,4 @@
/* Optimized wcslen for x86-64 with SSE2.
/* wcslen dispatch for RTLD and non-multiarch build
Copyright (C) 2011-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@ -17,5 +17,18 @@
<https://www.gnu.org/licenses/>. */
#define WCSLEN __wcslen
#include "multiarch/wcslen-sse2.S"
weak_alias(__wcslen, wcslen)
#define DEFAULT_IMPL_V1 "multiarch/wcslen-sse2.S"
#define DEFAULT_IMPL_V2 "multiarch/wcslen-sse4_1.S"
#define DEFAULT_IMPL_V3 "multiarch/wcslen-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/wcslen-evex.S"
#include "isa-default-impl.h"
weak_alias (__wcslen, wcslen)
#if MINIMUM_X86_ISA_LEVEL == 2 && !IS_IN (rtld)
/* Hidden def so it can be used as overflow fallback in
wcsnlen-sse4_1.S. */
libc_hidden_def (__wcslen)
#endif

View File

@ -0,0 +1,29 @@
/* wcsncmp dispatch for RTLD and non-multiarch .c ISA level 1 build.
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* wcsncmp non-multiarch build is split into two files,
wcsncmp-generic.c and wcsncmp.S. The wcsncmp-generic.c build is for
ISA level <= 1 and just uses wcsmbs/wcsncmp.c. This must be split
into two files because we cannot include C code from assembly or
vice versa. */
#include <isa-level.h>
#if MINIMUM_X86_ISA_LEVEL <= 2
# include "wcsmbs/wcsncmp.c"
#endif

40
sysdeps/x86_64/wcsncmp.S Normal file
View File

@ -0,0 +1,40 @@
/* wcsncmp dispatch for RTLD and non-multiarch .c files
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* wcsncmp non-multiarch build is split into two files,
wcsncmp-generic.c and wcsncmp.S. The wcsncmp.S build is for
ISA level >= 3 uses the optimized assembly implementations in
multiarch/wcsncmp*.S. This must be split into two files because
we cannot include C code from assembly or vice versa. */
#include <isa-level.h>
#if MINIMUM_X86_ISA_LEVEL >= 3
/* Symbol = wcsncmp. */
# define DEFAULT_IMPL_V3 "multiarch/wcsncmp-avx2.S"
# define DEFAULT_IMPL_V4 "multiarch/wcsncmp-evex.S"
/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
should never be used from here. */
# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
# include "isa-default-impl.h"
#endif

View File

@ -0,0 +1,29 @@
/* wcsnlen dispatch for RTLD and non-multiarch .c ISA level 1 build.
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* wcsnlen non-multiarch build is split into two files,
wcsnlen-generic.c and wcsnlen.S. The wcsnlen-generic.c build is for
ISA level <= 1 and just uses wcsmbs/wcsnlen.c. This must be split
into two files because we cannot include C code from assembly or
vice versa. */
#include <isa-level.h>
#if MINIMUM_X86_ISA_LEVEL <= 1
# include "wcsmbs/wcsnlen.c"
#endif

49
sysdeps/x86_64/wcsnlen.S Normal file
View File

@ -0,0 +1,49 @@
/* wcsnlen dispatch for RTLD and non-multiarch .c files
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* wcsnlen non-multiarch build is split into two files,
wcsnlen-generic.c and wcsnlen.S. The wcsnlen.S build is for
ISA level >= 2 uses the optimized assembly implementations in
multiarch/wcsnlen*.S. This must be split into two files because
we cannot include C code from assembly or vice versa. */
#include <isa-level.h>
#if MINIMUM_X86_ISA_LEVEL >= 2
# define WCSNLEN __wcsnlen
/* This symbol must stay linked to the name in wcslen.S. */
#if IS_IN (rtld)
# define OVERFLOW_STRLEN __wcslen
#else
# define OVERFLOW_STRLEN HIDDEN_JUMPTARGET (__wcslen)
#endif
# define DEFAULT_IMPL_V2 "multiarch/wcsnlen-sse4_1.S"
# define DEFAULT_IMPL_V3 "multiarch/wcsnlen-avx2.S"
# define DEFAULT_IMPL_V4 "multiarch/wcsnlen-evex.S"
/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
should never be used from here. */
# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
# include "isa-default-impl.h"
weak_alias (__wcsnlen, wcsnlen)
libc_hidden_def (__wcsnlen)
#endif

View File

@ -1,4 +1,4 @@
/* wcsrchr optimized with SSE2.
/* wcsrchr dispatch for RTLD and non-multiarch build
Copyright (C) 2011-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@ -16,5 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define STRRCHR wcsrchr
#include "multiarch/wcsrchr-sse2.S"
#define WCSRCHR wcsrchr
#define DEFAULT_IMPL_V1 "multiarch/wcsrchr-sse2.S"
#define DEFAULT_IMPL_V3 "multiarch/wcsrchr-avx2.S"
#define DEFAULT_IMPL_V4 "multiarch/wcsrchr-evex.S"
#include "isa-default-impl.h"