mirror of git://sourceware.org/git/glibc.git
x86: Replace all sse instructions with vex equivilent in avx+ files
Most of these don't really matter as there was no dirty upper state but we should generally avoid stray sse when its not needed. The one case that really matters is in svml_d_tanh4_core_avx2.S: blendvps %xmm0, %xmm8, %xmm7 When there was a dirty upper state. Tested on x86_64-linux
This commit is contained in:
parent
3edda6a0f0
commit
3079f652d7
|
@ -210,11 +210,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call acos@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -232,11 +232,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call acos@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -372,11 +372,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call acosh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -317,11 +317,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call acosh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -202,11 +202,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call asin@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -224,11 +224,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call asin@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -429,11 +429,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call asinh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -343,11 +343,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call asinh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -277,12 +277,12 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
movsd 64(%rsp, %r14, 8), %xmm1
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm1
|
||||
call atan2@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 96(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 96(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -295,12 +295,12 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
movsd 128(%rsp, %r14, 8), %xmm1
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 128(%rsp, %r14, 8), %xmm1
|
||||
call atan2@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 192(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 192(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -339,11 +339,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call atanh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -274,11 +274,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call atanh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -262,11 +262,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call cbrt@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -282,11 +282,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call cosh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -231,11 +231,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call cosh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -258,11 +258,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call erfc@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -261,11 +261,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call erfc@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -231,11 +231,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call exp10@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -191,11 +191,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call exp10@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -223,11 +223,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call exp2@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -227,11 +227,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call exp2@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -205,11 +205,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call expm1@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -211,11 +211,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call expm1@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -231,12 +231,12 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
movsd 64(%rsp, %r14, 8), %xmm1
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm1
|
||||
call hypot@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 96(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 96(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -194,12 +194,12 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
movsd 128(%rsp, %r14, 8), %xmm1
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 128(%rsp, %r14, 8), %xmm1
|
||||
call hypot@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 192(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 192(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -225,11 +225,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call log10@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -207,11 +207,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call log10@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -263,11 +263,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call log1p@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -225,11 +225,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call log1p@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -223,11 +223,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call log2@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -205,11 +205,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call log2@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -280,11 +280,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call sinh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -271,11 +271,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call sinh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -267,11 +267,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call tan@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -239,11 +239,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call tan@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -110,7 +110,7 @@ ENTRY(_ZGVdN4v_tanh_avx2)
|
|||
vpcmpgtd %xmm11, %xmm9, %xmm10
|
||||
vpcmpgtd %xmm8, %xmm9, %xmm0
|
||||
vpand %xmm10, %xmm9, %xmm7
|
||||
blendvps %xmm0, %xmm8, %xmm7
|
||||
vblendvps %xmm0, %xmm8, %xmm7, %xmm7
|
||||
|
||||
/*
|
||||
* VSHRIMM( I, iIndex, = iIndex, (17 - 4) );
|
||||
|
@ -272,11 +272,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 32(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 32(%rsp, %r14, 8), %xmm0
|
||||
call tanh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 64(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 64(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -286,11 +286,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp, %r14, 8), %xmm0
|
||||
vmovsd 64(%rsp, %r14, 8), %xmm0
|
||||
call tanh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp, %r14, 8)
|
||||
vmovsd %xmm0, 128(%rsp, %r14, 8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -205,11 +205,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call acosf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -198,11 +198,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call acosf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -290,11 +290,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call acoshf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -286,11 +286,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call acoshf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -198,11 +198,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call asinf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -187,11 +187,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call asinf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -313,11 +313,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call asinhf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -361,11 +361,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call asinhf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -257,12 +257,12 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
movss 128(%rsp, %r14, 4), %xmm1
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 128(%rsp, %r14, 4), %xmm1
|
||||
call atan2f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 192(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 192(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -238,12 +238,12 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
movss 64(%rsp, %r14, 4), %xmm1
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm1
|
||||
call atan2f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 96(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 96(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -222,13 +222,13 @@ L(SPECIAL_VALUES_LOOP):
|
|||
tzcntl %ebx, %ebp
|
||||
|
||||
/* Scalar math fucntion call to process special input. */
|
||||
movss 64(%rsp, %rbp, 4), %xmm0
|
||||
vmovss 64(%rsp, %rbp, 4), %xmm0
|
||||
call atanhf@PLT
|
||||
|
||||
/* No good way to avoid the store-forwarding fault this will cause on
|
||||
return. `lfence` avoids the SF fault but at greater cost as it
|
||||
serialized stack/callee save restoration. */
|
||||
movss %xmm0, (%rsp, %rbp, 4)
|
||||
vmovss %xmm0, (%rsp, %rbp, 4)
|
||||
|
||||
blsrl %ebx, %ebx
|
||||
jnz L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -231,13 +231,13 @@ L(SPECIAL_VALUES_LOOP):
|
|||
tzcntl %ebx, %ebp
|
||||
|
||||
/* Scalar math fucntion call to process special input. */
|
||||
movss 32(%rsp, %rbp, 4), %xmm0
|
||||
vmovss 32(%rsp, %rbp, 4), %xmm0
|
||||
call atanhf@PLT
|
||||
|
||||
/* No good way to avoid the store-forwarding fault this will cause on
|
||||
return. `lfence` avoids the SF fault but at greater cost as it
|
||||
serialized stack/callee save restoration. */
|
||||
movss %xmm0, (%rsp, %rbp, 4)
|
||||
vmovss %xmm0, (%rsp, %rbp, 4)
|
||||
|
||||
blsrl %ebx, %ebx
|
||||
jnz L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -304,11 +304,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call cbrtf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -228,11 +228,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call coshf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -242,11 +242,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call coshf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -218,11 +218,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call erfcf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -243,11 +243,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call erfcf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -186,11 +186,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call exp10f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -238,11 +238,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call exp10f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -209,11 +209,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call exp2f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -188,11 +188,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call exp2f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -194,11 +194,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call expm1f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -212,11 +212,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call expm1f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -202,12 +202,12 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
movss 128(%rsp, %r14, 4), %xmm1
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 128(%rsp, %r14, 4), %xmm1
|
||||
call hypotf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 192(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 192(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -226,12 +226,12 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
movss 64(%rsp, %r14, 4), %xmm1
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm1
|
||||
call hypotf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 96(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 96(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -161,11 +161,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call log10f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -174,11 +174,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call log10f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -207,11 +207,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call log1pf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -190,11 +190,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call log1pf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -158,11 +158,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call log2f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -169,11 +169,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call log2f@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -252,11 +252,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call sinhf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -243,11 +243,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
vmovss 32(%rsp, %r14, 4), %xmm0
|
||||
call sinhf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -235,11 +235,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp, %r14, 4), %xmm0
|
||||
vmovss 64(%rsp, %r14, 4), %xmm0
|
||||
call tanf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp, %r14, 4)
|
||||
vmovss %xmm0, 128(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -261,11 +261,11 @@ L(SPECIAL_VALUES_LOOP):
|
|||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %ebx, %r13d
|
||||
movss 32(%rsp, %r13, 4), %xmm0
|
||||
vmovss 32(%rsp, %r13, 4), %xmm0
|
||||
call tanf@PLT
|
||||
# LOE r13 r14 r15 ebx r12d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp, %r13, 4)
|
||||
vmovss %xmm0, 64(%rsp, %r13, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -221,13 +221,13 @@ L(SPECIAL_VALUES_LOOP):
|
|||
tzcntl %ebx, %ebp
|
||||
|
||||
/* Scalar math fucntion call to process special input. */
|
||||
movss 64(%rsp, %rbp, 4), %xmm0
|
||||
vmovss 64(%rsp, %rbp, 4), %xmm0
|
||||
call tanhf@PLT
|
||||
|
||||
/* No good way to avoid the store-forwarding fault this will cause on
|
||||
return. `lfence` avoids the SF fault but at greater cost as it
|
||||
serialized stack/callee save restoration. */
|
||||
movss %xmm0, (%rsp, %rbp, 4)
|
||||
vmovss %xmm0, (%rsp, %rbp, 4)
|
||||
|
||||
blsrl %ebx, %ebx
|
||||
jnz L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -240,13 +240,13 @@ L(SPECIAL_VALUES_LOOP):
|
|||
tzcntl %ebx, %ebp
|
||||
|
||||
/* Scalar math function call to process special input. */
|
||||
movss 32(%rsp, %rbp, 4), %xmm0
|
||||
vmovss 32(%rsp, %rbp, 4), %xmm0
|
||||
call tanhf@PLT
|
||||
|
||||
/* No good way to avoid the store-forwarding fault this will cause on
|
||||
return. `lfence` avoids the SF fault but at greater cost as it
|
||||
serialized stack/callee save restoration. */
|
||||
movss %xmm0, (%rsp, %rbp, 4)
|
||||
vmovss %xmm0, (%rsp, %rbp, 4)
|
||||
|
||||
blsrl %ebx, %ebx
|
||||
jnz L(SPECIAL_VALUES_LOOP)
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
|
||||
.section SECTION(.text), "ax", @progbits
|
||||
ENTRY(STRRCHR)
|
||||
movd %esi, %xmm7
|
||||
vmovd %esi, %xmm7
|
||||
movl %edi, %eax
|
||||
/* Broadcast CHAR to YMM4. */
|
||||
VPBROADCAST %xmm7, %ymm7
|
||||
|
|
Loading…
Reference in New Issue