x86: Replace all sse instructions with vex equivilent in avx+ files

Most of these don't really matter as there was no dirty upper state
but we should generally avoid stray sse when its not needed.

The one case that really matters is in svml_d_tanh4_core_avx2.S:

blendvps %xmm0, %xmm8, %xmm7

When there was a dirty upper state.

Tested on x86_64-linux
This commit is contained in:
Noah Goldstein 2022-06-20 13:02:10 -07:00
parent 3edda6a0f0
commit 3079f652d7
75 changed files with 158 additions and 158 deletions

View File

@ -210,11 +210,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call acos@PLT call acos@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -232,11 +232,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call acos@PLT call acos@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -372,11 +372,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call acosh@PLT call acosh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -317,11 +317,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call acosh@PLT call acosh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -202,11 +202,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call asin@PLT call asin@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -224,11 +224,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call asin@PLT call asin@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -429,11 +429,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call asinh@PLT call asinh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -343,11 +343,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call asinh@PLT call asinh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -277,12 +277,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
movsd 64(%rsp, %r14, 8), %xmm1 vmovsd 64(%rsp, %r14, 8), %xmm1
call atan2@PLT call atan2@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 96(%rsp, %r14, 8) vmovsd %xmm0, 96(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -295,12 +295,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
movsd 128(%rsp, %r14, 8), %xmm1 vmovsd 128(%rsp, %r14, 8), %xmm1
call atan2@PLT call atan2@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 192(%rsp, %r14, 8) vmovsd %xmm0, 192(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -339,11 +339,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call atanh@PLT call atanh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -274,11 +274,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call atanh@PLT call atanh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -262,11 +262,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call cbrt@PLT call cbrt@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -282,11 +282,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call cosh@PLT call cosh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -231,11 +231,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call cosh@PLT call cosh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -258,11 +258,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call erfc@PLT call erfc@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -261,11 +261,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call erfc@PLT call erfc@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -231,11 +231,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call exp10@PLT call exp10@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -191,11 +191,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call exp10@PLT call exp10@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -223,11 +223,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call exp2@PLT call exp2@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -227,11 +227,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call exp2@PLT call exp2@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -205,11 +205,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call expm1@PLT call expm1@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -211,11 +211,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call expm1@PLT call expm1@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -231,12 +231,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
movsd 64(%rsp, %r14, 8), %xmm1 vmovsd 64(%rsp, %r14, 8), %xmm1
call hypot@PLT call hypot@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 96(%rsp, %r14, 8) vmovsd %xmm0, 96(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -194,12 +194,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
movsd 128(%rsp, %r14, 8), %xmm1 vmovsd 128(%rsp, %r14, 8), %xmm1
call hypot@PLT call hypot@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 192(%rsp, %r14, 8) vmovsd %xmm0, 192(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -225,11 +225,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call log10@PLT call log10@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -207,11 +207,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call log10@PLT call log10@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -263,11 +263,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call log1p@PLT call log1p@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -225,11 +225,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call log1p@PLT call log1p@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -223,11 +223,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call log2@PLT call log2@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -205,11 +205,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call log2@PLT call log2@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -280,11 +280,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call sinh@PLT call sinh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -271,11 +271,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call sinh@PLT call sinh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -267,11 +267,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call tan@PLT call tan@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -239,11 +239,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call tan@PLT call tan@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -110,7 +110,7 @@ ENTRY(_ZGVdN4v_tanh_avx2)
vpcmpgtd %xmm11, %xmm9, %xmm10 vpcmpgtd %xmm11, %xmm9, %xmm10
vpcmpgtd %xmm8, %xmm9, %xmm0 vpcmpgtd %xmm8, %xmm9, %xmm0
vpand %xmm10, %xmm9, %xmm7 vpand %xmm10, %xmm9, %xmm7
blendvps %xmm0, %xmm8, %xmm7 vblendvps %xmm0, %xmm8, %xmm7, %xmm7
/* /*
* VSHRIMM( I, iIndex, = iIndex, (17 - 4) ); * VSHRIMM( I, iIndex, = iIndex, (17 - 4) );
@ -272,11 +272,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0 vmovsd 32(%rsp, %r14, 8), %xmm0
call tanh@PLT call tanh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8) vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -286,11 +286,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0 vmovsd 64(%rsp, %r14, 8), %xmm0
call tanh@PLT call tanh@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8) vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -205,11 +205,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call acosf@PLT call acosf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -198,11 +198,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call acosf@PLT call acosf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -290,11 +290,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call acoshf@PLT call acoshf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -286,11 +286,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call acoshf@PLT call acoshf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -198,11 +198,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call asinf@PLT call asinf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -187,11 +187,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call asinf@PLT call asinf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -313,11 +313,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call asinhf@PLT call asinhf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -361,11 +361,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call asinhf@PLT call asinhf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -257,12 +257,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
movss 128(%rsp, %r14, 4), %xmm1 vmovss 128(%rsp, %r14, 4), %xmm1
call atan2f@PLT call atan2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 192(%rsp, %r14, 4) vmovss %xmm0, 192(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -238,12 +238,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
movss 64(%rsp, %r14, 4), %xmm1 vmovss 64(%rsp, %r14, 4), %xmm1
call atan2f@PLT call atan2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 96(%rsp, %r14, 4) vmovss %xmm0, 96(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -222,13 +222,13 @@ L(SPECIAL_VALUES_LOOP):
tzcntl %ebx, %ebp tzcntl %ebx, %ebp
/* Scalar math fucntion call to process special input. */ /* Scalar math fucntion call to process special input. */
movss 64(%rsp, %rbp, 4), %xmm0 vmovss 64(%rsp, %rbp, 4), %xmm0
call atanhf@PLT call atanhf@PLT
/* No good way to avoid the store-forwarding fault this will cause on /* No good way to avoid the store-forwarding fault this will cause on
return. `lfence` avoids the SF fault but at greater cost as it return. `lfence` avoids the SF fault but at greater cost as it
serialized stack/callee save restoration. */ serialized stack/callee save restoration. */
movss %xmm0, (%rsp, %rbp, 4) vmovss %xmm0, (%rsp, %rbp, 4)
blsrl %ebx, %ebx blsrl %ebx, %ebx
jnz L(SPECIAL_VALUES_LOOP) jnz L(SPECIAL_VALUES_LOOP)

View File

@ -231,13 +231,13 @@ L(SPECIAL_VALUES_LOOP):
tzcntl %ebx, %ebp tzcntl %ebx, %ebp
/* Scalar math fucntion call to process special input. */ /* Scalar math fucntion call to process special input. */
movss 32(%rsp, %rbp, 4), %xmm0 vmovss 32(%rsp, %rbp, 4), %xmm0
call atanhf@PLT call atanhf@PLT
/* No good way to avoid the store-forwarding fault this will cause on /* No good way to avoid the store-forwarding fault this will cause on
return. `lfence` avoids the SF fault but at greater cost as it return. `lfence` avoids the SF fault but at greater cost as it
serialized stack/callee save restoration. */ serialized stack/callee save restoration. */
movss %xmm0, (%rsp, %rbp, 4) vmovss %xmm0, (%rsp, %rbp, 4)
blsrl %ebx, %ebx blsrl %ebx, %ebx
jnz L(SPECIAL_VALUES_LOOP) jnz L(SPECIAL_VALUES_LOOP)

View File

@ -304,11 +304,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call cbrtf@PLT call cbrtf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -228,11 +228,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call coshf@PLT call coshf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -242,11 +242,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call coshf@PLT call coshf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -218,11 +218,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call erfcf@PLT call erfcf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -243,11 +243,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call erfcf@PLT call erfcf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -186,11 +186,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call exp10f@PLT call exp10f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -238,11 +238,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call exp10f@PLT call exp10f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -209,11 +209,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call exp2f@PLT call exp2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -188,11 +188,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call exp2f@PLT call exp2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -194,11 +194,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call expm1f@PLT call expm1f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -212,11 +212,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call expm1f@PLT call expm1f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -202,12 +202,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
movss 128(%rsp, %r14, 4), %xmm1 vmovss 128(%rsp, %r14, 4), %xmm1
call hypotf@PLT call hypotf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 192(%rsp, %r14, 4) vmovss %xmm0, 192(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -226,12 +226,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
movss 64(%rsp, %r14, 4), %xmm1 vmovss 64(%rsp, %r14, 4), %xmm1
call hypotf@PLT call hypotf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 96(%rsp, %r14, 4) vmovss %xmm0, 96(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -161,11 +161,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call log10f@PLT call log10f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -174,11 +174,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call log10f@PLT call log10f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -207,11 +207,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call log1pf@PLT call log1pf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -190,11 +190,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call log1pf@PLT call log1pf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -158,11 +158,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call log2f@PLT call log2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -169,11 +169,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call log2f@PLT call log2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -252,11 +252,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call sinhf@PLT call sinhf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -243,11 +243,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0 vmovss 32(%rsp, %r14, 4), %xmm0
call sinhf@PLT call sinhf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4) vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -235,11 +235,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %r12d, %r14d movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0 vmovss 64(%rsp, %r14, 4), %xmm0
call tanf@PLT call tanf@PLT
# LOE rbx r14 r15 r12d r13d xmm0 # LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4) vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -261,11 +261,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL): L(SCALAR_MATH_CALL):
movl %ebx, %r13d movl %ebx, %r13d
movss 32(%rsp, %r13, 4), %xmm0 vmovss 32(%rsp, %r13, 4), %xmm0
call tanf@PLT call tanf@PLT
# LOE r13 r14 r15 ebx r12d xmm0 # LOE r13 r14 r15 ebx r12d xmm0
movss %xmm0, 64(%rsp, %r13, 4) vmovss %xmm0, 64(%rsp, %r13, 4)
/* Process special inputs in loop */ /* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP) jmp L(SPECIAL_VALUES_LOOP)

View File

@ -221,13 +221,13 @@ L(SPECIAL_VALUES_LOOP):
tzcntl %ebx, %ebp tzcntl %ebx, %ebp
/* Scalar math fucntion call to process special input. */ /* Scalar math fucntion call to process special input. */
movss 64(%rsp, %rbp, 4), %xmm0 vmovss 64(%rsp, %rbp, 4), %xmm0
call tanhf@PLT call tanhf@PLT
/* No good way to avoid the store-forwarding fault this will cause on /* No good way to avoid the store-forwarding fault this will cause on
return. `lfence` avoids the SF fault but at greater cost as it return. `lfence` avoids the SF fault but at greater cost as it
serialized stack/callee save restoration. */ serialized stack/callee save restoration. */
movss %xmm0, (%rsp, %rbp, 4) vmovss %xmm0, (%rsp, %rbp, 4)
blsrl %ebx, %ebx blsrl %ebx, %ebx
jnz L(SPECIAL_VALUES_LOOP) jnz L(SPECIAL_VALUES_LOOP)

View File

@ -240,13 +240,13 @@ L(SPECIAL_VALUES_LOOP):
tzcntl %ebx, %ebp tzcntl %ebx, %ebp
/* Scalar math function call to process special input. */ /* Scalar math function call to process special input. */
movss 32(%rsp, %rbp, 4), %xmm0 vmovss 32(%rsp, %rbp, 4), %xmm0
call tanhf@PLT call tanhf@PLT
/* No good way to avoid the store-forwarding fault this will cause on /* No good way to avoid the store-forwarding fault this will cause on
return. `lfence` avoids the SF fault but at greater cost as it return. `lfence` avoids the SF fault but at greater cost as it
serialized stack/callee save restoration. */ serialized stack/callee save restoration. */
movss %xmm0, (%rsp, %rbp, 4) vmovss %xmm0, (%rsp, %rbp, 4)
blsrl %ebx, %ebx blsrl %ebx, %ebx
jnz L(SPECIAL_VALUES_LOOP) jnz L(SPECIAL_VALUES_LOOP)

View File

@ -49,7 +49,7 @@
.section SECTION(.text), "ax", @progbits .section SECTION(.text), "ax", @progbits
ENTRY(STRRCHR) ENTRY(STRRCHR)
movd %esi, %xmm7 vmovd %esi, %xmm7
movl %edi, %eax movl %edi, %eax
/* Broadcast CHAR to YMM4. */ /* Broadcast CHAR to YMM4. */
VPBROADCAST %xmm7, %ymm7 VPBROADCAST %xmm7, %ymm7