mirror of git://sourceware.org/git/glibc.git
This patch changes the condition for copy 4x VEC so that if length is exactly equal to 4 * VEC_SIZE it will use the 4x VEC case instead of 8x VEC case. Results For Skylake memcpy-avx2-erms size, al1 , al2 , Cur T , New T , Win , New / Cur 128 , 0 , 0 , 9.137 , 6.873 , New , 75.22 128 , 7 , 0 , 12.933 , 7.732 , New , 59.79 128 , 0 , 7 , 11.852 , 6.76 , New , 57.04 128 , 7 , 7 , 12.587 , 6.808 , New , 54.09 Results For Icelake memcpy-evex-erms size, al1 , al2 , Cur T , New T , Win , New / Cur 128 , 0 , 0 , 9.963 , 5.416 , New , 54.36 128 , 7 , 0 , 16.467 , 8.061 , New , 48.95 128 , 0 , 7 , 14.388 , 7.644 , New , 53.13 128 , 7 , 7 , 14.546 , 7.642 , New , 52.54 Results For Tigerlake memcpy-evex-erms size, al1 , al2 , Cur T , New T , Win , New / Cur 128 , 0 , 0 , 8.979 , 4.95 , New , 55.13 128 , 7 , 0 , 14.245 , 7.122 , New , 50.0 128 , 0 , 7 , 12.668 , 6.675 , New , 52.69 128 , 7 , 7 , 13.042 , 6.802 , New , 52.15 Results For Skylake memmove-avx2-erms size, al1 , al2 , Cur T , New T , Win , New / Cur 128 , 0 , 32 , 6.181 , 5.691 , New , 92.07 128 , 32 , 0 , 6.165 , 5.752 , New , 93.3 128 , 0 , 7 , 13.923 , 9.37 , New , 67.3 128 , 7 , 0 , 12.049 , 10.182 , New , 84.5 Results For Icelake memmove-evex-erms size, al1 , al2 , Cur T , New T , Win , New / Cur 128 , 0 , 32 , 5.479 , 4.889 , New , 89.23 128 , 32 , 0 , 5.127 , 4.911 , New , 95.79 128 , 0 , 7 , 18.885 , 13.547 , New , 71.73 128 , 7 , 0 , 15.565 , 14.436 , New , 92.75 Results For Tigerlake memmove-evex-erms size, al1 , al2 , Cur T , New T , Win , New / Cur 128 , 0 , 32 , 5.275 , 4.815 , New , 91.28 128 , 32 , 0 , 5.376 , 4.565 , New , 84.91 128 , 0 , 7 , 19.426 , 14.273 , New , 73.47 128 , 7 , 0 , 15.924 , 14.951 , New , 93.89 Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> |
||
|---|---|---|
| .. | ||
| Makefile | ||
| bcopy.S | ||
| ifunc-avx2.h | ||
| ifunc-evex.h | ||
| ifunc-impl-list.c | ||
| ifunc-memcmp.h | ||
| ifunc-memmove.h | ||
| ifunc-memset.h | ||
| ifunc-sse4_2.h | ||
| ifunc-strcasecmp.h | ||
| ifunc-strcpy.h | ||
| ifunc-wmemset.h | ||
| memchr-avx2-rtm.S | ||
| memchr-avx2.S | ||
| memchr-evex-rtm.S | ||
| memchr-evex.S | ||
| memchr-sse2.S | ||
| memchr.c | ||
| memcmp-avx2-movbe-rtm.S | ||
| memcmp-avx2-movbe.S | ||
| memcmp-evex-movbe.S | ||
| memcmp-sse2.S | ||
| memcmp-sse4.S | ||
| memcmp-ssse3.S | ||
| memcmp.c | ||
| memcpy-ssse3-back.S | ||
| memcpy-ssse3.S | ||
| memcpy.c | ||
| memcpy_chk-nonshared.S | ||
| memcpy_chk.c | ||
| memmove-avx-unaligned-erms-rtm.S | ||
| memmove-avx-unaligned-erms.S | ||
| memmove-avx512-no-vzeroupper.S | ||
| memmove-avx512-unaligned-erms.S | ||
| memmove-evex-unaligned-erms.S | ||
| memmove-sse2-unaligned-erms.S | ||
| memmove-ssse3-back.S | ||
| memmove-ssse3.S | ||
| memmove-vec-unaligned-erms.S | ||
| memmove.c | ||
| memmove_chk-nonshared.S | ||
| memmove_chk.c | ||
| mempcpy.c | ||
| mempcpy_chk-nonshared.S | ||
| mempcpy_chk.c | ||
| memrchr-avx2-rtm.S | ||
| memrchr-avx2.S | ||
| memrchr-evex.S | ||
| memrchr-sse2.S | ||
| memrchr.c | ||
| memset-avx2-unaligned-erms-rtm.S | ||
| memset-avx2-unaligned-erms.S | ||
| memset-avx512-no-vzeroupper.S | ||
| memset-avx512-unaligned-erms.S | ||
| memset-evex-unaligned-erms.S | ||
| memset-sse2-unaligned-erms.S | ||
| memset-vec-unaligned-erms.S | ||
| memset.c | ||
| memset_chk-nonshared.S | ||
| memset_chk.c | ||
| rawmemchr-avx2-rtm.S | ||
| rawmemchr-avx2.S | ||
| rawmemchr-evex-rtm.S | ||
| rawmemchr-evex.S | ||
| rawmemchr-sse2.S | ||
| rawmemchr.c | ||
| stpcpy-avx2-rtm.S | ||
| stpcpy-avx2.S | ||
| stpcpy-evex.S | ||
| stpcpy-sse2-unaligned.S | ||
| stpcpy-sse2.S | ||
| stpcpy-ssse3.S | ||
| stpcpy.c | ||
| stpncpy-avx2-rtm.S | ||
| stpncpy-avx2.S | ||
| stpncpy-c.c | ||
| stpncpy-evex.S | ||
| stpncpy-sse2-unaligned.S | ||
| stpncpy-ssse3.S | ||
| stpncpy.c | ||
| strcasecmp.c | ||
| strcasecmp_l-avx.S | ||
| strcasecmp_l-sse2.S | ||
| strcasecmp_l-sse4_2.S | ||
| strcasecmp_l-ssse3.S | ||
| strcasecmp_l.c | ||
| strcat-avx2-rtm.S | ||
| strcat-avx2.S | ||
| strcat-evex.S | ||
| strcat-sse2-unaligned.S | ||
| strcat-sse2.S | ||
| strcat-ssse3.S | ||
| strcat.c | ||
| strchr-avx2-rtm.S | ||
| strchr-avx2.S | ||
| strchr-evex.S | ||
| strchr-sse2-no-bsf.S | ||
| strchr-sse2.S | ||
| strchr.c | ||
| strchrnul-avx2-rtm.S | ||
| strchrnul-avx2.S | ||
| strchrnul-evex.S | ||
| strchrnul-sse2.S | ||
| strchrnul.c | ||
| strcmp-avx2-rtm.S | ||
| strcmp-avx2.S | ||
| strcmp-evex.S | ||
| strcmp-sse2-unaligned.S | ||
| strcmp-sse2.S | ||
| strcmp-sse4_2.S | ||
| strcmp-sse42.S | ||
| strcmp-ssse3.S | ||
| strcmp.c | ||
| strcpy-avx2-rtm.S | ||
| strcpy-avx2.S | ||
| strcpy-evex.S | ||
| strcpy-sse2-unaligned.S | ||
| strcpy-sse2.S | ||
| strcpy-ssse3.S | ||
| strcpy.c | ||
| strcspn-c.c | ||
| strcspn-sse2.S | ||
| strcspn.c | ||
| strlen-avx2-rtm.S | ||
| strlen-avx2.S | ||
| strlen-evex.S | ||
| strlen-sse2.S | ||
| strlen.c | ||
| strncase.c | ||
| strncase_l-avx.S | ||
| strncase_l-sse2.S | ||
| strncase_l-sse4_2.S | ||
| strncase_l-ssse3.S | ||
| strncase_l.c | ||
| strncat-avx2-rtm.S | ||
| strncat-avx2.S | ||
| strncat-c.c | ||
| strncat-evex.S | ||
| strncat-sse2-unaligned.S | ||
| strncat-ssse3.S | ||
| strncat.c | ||
| strncmp-avx2-rtm.S | ||
| strncmp-avx2.S | ||
| strncmp-evex.S | ||
| strncmp-sse2.S | ||
| strncmp-sse4_2.S | ||
| strncmp-ssse3.S | ||
| strncmp.c | ||
| strncpy-avx2-rtm.S | ||
| strncpy-avx2.S | ||
| strncpy-c.c | ||
| strncpy-evex.S | ||
| strncpy-sse2-unaligned.S | ||
| strncpy-ssse3.S | ||
| strncpy.c | ||
| strnlen-avx2-rtm.S | ||
| strnlen-avx2.S | ||
| strnlen-evex.S | ||
| strnlen-sse2.S | ||
| strnlen.c | ||
| strpbrk-c.c | ||
| strpbrk-sse2.S | ||
| strpbrk.c | ||
| strrchr-avx2-rtm.S | ||
| strrchr-avx2.S | ||
| strrchr-evex.S | ||
| strrchr-sse2.S | ||
| strrchr.c | ||
| strspn-c.c | ||
| strspn-sse2.S | ||
| strspn.c | ||
| strstr-sse2-unaligned.S | ||
| strstr.c | ||
| varshift.c | ||
| varshift.h | ||
| wcschr-avx2-rtm.S | ||
| wcschr-avx2.S | ||
| wcschr-evex.S | ||
| wcschr-sse2.S | ||
| wcschr.c | ||
| wcscmp-avx2-rtm.S | ||
| wcscmp-avx2.S | ||
| wcscmp-evex.S | ||
| wcscmp-sse2.S | ||
| wcscmp.c | ||
| wcscpy-c.c | ||
| wcscpy-ssse3.S | ||
| wcscpy.c | ||
| wcslen-avx2-rtm.S | ||
| wcslen-avx2.S | ||
| wcslen-evex.S | ||
| wcslen-sse2.S | ||
| wcslen.c | ||
| wcsncmp-avx2-rtm.S | ||
| wcsncmp-avx2.S | ||
| wcsncmp-evex.S | ||
| wcsncmp-sse2.c | ||
| wcsncmp.c | ||
| wcsnlen-avx2-rtm.S | ||
| wcsnlen-avx2.S | ||
| wcsnlen-c.c | ||
| wcsnlen-evex.S | ||
| wcsnlen-sse4_1.S | ||
| wcsnlen.c | ||
| wcsrchr-avx2-rtm.S | ||
| wcsrchr-avx2.S | ||
| wcsrchr-evex.S | ||
| wcsrchr-sse2.S | ||
| wcsrchr.c | ||
| wmemchr-avx2-rtm.S | ||
| wmemchr-avx2.S | ||
| wmemchr-evex-rtm.S | ||
| wmemchr-evex.S | ||
| wmemchr-sse2.S | ||
| wmemchr.c | ||
| wmemcmp-avx2-movbe-rtm.S | ||
| wmemcmp-avx2-movbe.S | ||
| wmemcmp-c.c | ||
| wmemcmp-evex-movbe.S | ||
| wmemcmp-sse4.S | ||
| wmemcmp-ssse3.S | ||
| wmemcmp.c | ||
| wmemset.c | ||
| wmemset_chk-nonshared.S | ||
| wmemset_chk.c | ||