x86: Replace all sse instructions with vex equivilent in avx+ files

Most of these don't really matter as there was no dirty upper state
but we should generally avoid stray sse when its not needed.

The one case that really matters is in svml_d_tanh4_core_avx2.S:

blendvps %xmm0, %xmm8, %xmm7

When there was a dirty upper state.

Tested on x86_64-linux
This commit is contained in:
Noah Goldstein 2022-06-20 13:02:10 -07:00
parent 3edda6a0f0
commit 3079f652d7
75 changed files with 158 additions and 158 deletions

View File

@ -210,11 +210,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call acos@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -232,11 +232,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call acos@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -372,11 +372,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call acosh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -317,11 +317,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call acosh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -202,11 +202,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call asin@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -224,11 +224,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call asin@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -429,11 +429,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call asinh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -343,11 +343,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call asinh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -277,12 +277,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
movsd 64(%rsp, %r14, 8), %xmm1
vmovsd 32(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm1
call atan2@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 96(%rsp, %r14, 8)
vmovsd %xmm0, 96(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -295,12 +295,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
movsd 128(%rsp, %r14, 8), %xmm1
vmovsd 64(%rsp, %r14, 8), %xmm0
vmovsd 128(%rsp, %r14, 8), %xmm1
call atan2@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 192(%rsp, %r14, 8)
vmovsd %xmm0, 192(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -339,11 +339,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call atanh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -274,11 +274,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call atanh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -262,11 +262,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call cbrt@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -282,11 +282,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call cosh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -231,11 +231,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call cosh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -258,11 +258,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call erfc@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -261,11 +261,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call erfc@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -231,11 +231,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call exp10@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -191,11 +191,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call exp10@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -223,11 +223,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call exp2@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -227,11 +227,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call exp2@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -205,11 +205,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call expm1@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -211,11 +211,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call expm1@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -231,12 +231,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
movsd 64(%rsp, %r14, 8), %xmm1
vmovsd 32(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm1
call hypot@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 96(%rsp, %r14, 8)
vmovsd %xmm0, 96(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -194,12 +194,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
movsd 128(%rsp, %r14, 8), %xmm1
vmovsd 64(%rsp, %r14, 8), %xmm0
vmovsd 128(%rsp, %r14, 8), %xmm1
call hypot@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 192(%rsp, %r14, 8)
vmovsd %xmm0, 192(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -225,11 +225,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call log10@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -207,11 +207,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call log10@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -263,11 +263,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call log1p@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -225,11 +225,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call log1p@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -223,11 +223,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call log2@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -205,11 +205,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call log2@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -280,11 +280,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call sinh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -271,11 +271,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call sinh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -267,11 +267,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call tan@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -239,11 +239,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call tan@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -110,7 +110,7 @@ ENTRY(_ZGVdN4v_tanh_avx2)
vpcmpgtd %xmm11, %xmm9, %xmm10
vpcmpgtd %xmm8, %xmm9, %xmm0
vpand %xmm10, %xmm9, %xmm7
blendvps %xmm0, %xmm8, %xmm7
vblendvps %xmm0, %xmm8, %xmm7, %xmm7
/*
* VSHRIMM( I, iIndex, = iIndex, (17 - 4) );
@ -272,11 +272,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 32(%rsp, %r14, 8), %xmm0
vmovsd 32(%rsp, %r14, 8), %xmm0
call tanh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 64(%rsp, %r14, 8)
vmovsd %xmm0, 64(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -286,11 +286,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp, %r14, 8), %xmm0
vmovsd 64(%rsp, %r14, 8), %xmm0
call tanh@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp, %r14, 8)
vmovsd %xmm0, 128(%rsp, %r14, 8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -205,11 +205,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call acosf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -198,11 +198,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call acosf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -290,11 +290,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call acoshf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -286,11 +286,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call acoshf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -198,11 +198,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call asinf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -187,11 +187,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call asinf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -313,11 +313,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call asinhf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -361,11 +361,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call asinhf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -257,12 +257,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
movss 128(%rsp, %r14, 4), %xmm1
vmovss 64(%rsp, %r14, 4), %xmm0
vmovss 128(%rsp, %r14, 4), %xmm1
call atan2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 192(%rsp, %r14, 4)
vmovss %xmm0, 192(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -238,12 +238,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
movss 64(%rsp, %r14, 4), %xmm1
vmovss 32(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm1
call atan2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 96(%rsp, %r14, 4)
vmovss %xmm0, 96(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -222,13 +222,13 @@ L(SPECIAL_VALUES_LOOP):
tzcntl %ebx, %ebp
/* Scalar math fucntion call to process special input. */
movss 64(%rsp, %rbp, 4), %xmm0
vmovss 64(%rsp, %rbp, 4), %xmm0
call atanhf@PLT
/* No good way to avoid the store-forwarding fault this will cause on
return. `lfence` avoids the SF fault but at greater cost as it
serialized stack/callee save restoration. */
movss %xmm0, (%rsp, %rbp, 4)
vmovss %xmm0, (%rsp, %rbp, 4)
blsrl %ebx, %ebx
jnz L(SPECIAL_VALUES_LOOP)

View File

@ -231,13 +231,13 @@ L(SPECIAL_VALUES_LOOP):
tzcntl %ebx, %ebp
/* Scalar math fucntion call to process special input. */
movss 32(%rsp, %rbp, 4), %xmm0
vmovss 32(%rsp, %rbp, 4), %xmm0
call atanhf@PLT
/* No good way to avoid the store-forwarding fault this will cause on
return. `lfence` avoids the SF fault but at greater cost as it
serialized stack/callee save restoration. */
movss %xmm0, (%rsp, %rbp, 4)
vmovss %xmm0, (%rsp, %rbp, 4)
blsrl %ebx, %ebx
jnz L(SPECIAL_VALUES_LOOP)

View File

@ -304,11 +304,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call cbrtf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -228,11 +228,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call coshf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -242,11 +242,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call coshf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -218,11 +218,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call erfcf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -243,11 +243,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call erfcf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -186,11 +186,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call exp10f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -238,11 +238,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call exp10f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -209,11 +209,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call exp2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -188,11 +188,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call exp2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -194,11 +194,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call expm1f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -212,11 +212,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call expm1f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -202,12 +202,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
movss 128(%rsp, %r14, 4), %xmm1
vmovss 64(%rsp, %r14, 4), %xmm0
vmovss 128(%rsp, %r14, 4), %xmm1
call hypotf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 192(%rsp, %r14, 4)
vmovss %xmm0, 192(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -226,12 +226,12 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
movss 64(%rsp, %r14, 4), %xmm1
vmovss 32(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm1
call hypotf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 96(%rsp, %r14, 4)
vmovss %xmm0, 96(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -161,11 +161,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call log10f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -174,11 +174,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call log10f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -207,11 +207,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call log1pf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -190,11 +190,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call log1pf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -158,11 +158,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call log2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -169,11 +169,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call log2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -252,11 +252,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call sinhf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -243,11 +243,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp, %r14, 4), %xmm0
vmovss 32(%rsp, %r14, 4), %xmm0
call sinhf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp, %r14, 4)
vmovss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -235,11 +235,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp, %r14, 4), %xmm0
vmovss 64(%rsp, %r14, 4), %xmm0
call tanf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp, %r14, 4)
vmovss %xmm0, 128(%rsp, %r14, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -261,11 +261,11 @@ L(SPECIAL_VALUES_LOOP):
L(SCALAR_MATH_CALL):
movl %ebx, %r13d
movss 32(%rsp, %r13, 4), %xmm0
vmovss 32(%rsp, %r13, 4), %xmm0
call tanf@PLT
# LOE r13 r14 r15 ebx r12d xmm0
movss %xmm0, 64(%rsp, %r13, 4)
vmovss %xmm0, 64(%rsp, %r13, 4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)

View File

@ -221,13 +221,13 @@ L(SPECIAL_VALUES_LOOP):
tzcntl %ebx, %ebp
/* Scalar math fucntion call to process special input. */
movss 64(%rsp, %rbp, 4), %xmm0
vmovss 64(%rsp, %rbp, 4), %xmm0
call tanhf@PLT
/* No good way to avoid the store-forwarding fault this will cause on
return. `lfence` avoids the SF fault but at greater cost as it
serialized stack/callee save restoration. */
movss %xmm0, (%rsp, %rbp, 4)
vmovss %xmm0, (%rsp, %rbp, 4)
blsrl %ebx, %ebx
jnz L(SPECIAL_VALUES_LOOP)

View File

@ -240,13 +240,13 @@ L(SPECIAL_VALUES_LOOP):
tzcntl %ebx, %ebp
/* Scalar math function call to process special input. */
movss 32(%rsp, %rbp, 4), %xmm0
vmovss 32(%rsp, %rbp, 4), %xmm0
call tanhf@PLT
/* No good way to avoid the store-forwarding fault this will cause on
return. `lfence` avoids the SF fault but at greater cost as it
serialized stack/callee save restoration. */
movss %xmm0, (%rsp, %rbp, 4)
vmovss %xmm0, (%rsp, %rbp, 4)
blsrl %ebx, %ebx
jnz L(SPECIAL_VALUES_LOOP)

View File

@ -49,7 +49,7 @@
.section SECTION(.text), "ax", @progbits
ENTRY(STRRCHR)
movd %esi, %xmm7
vmovd %esi, %xmm7
movl %edi, %eax
/* Broadcast CHAR to YMM4. */
VPBROADCAST %xmm7, %ymm7