mirror of git://sourceware.org/git/glibc.git
PowerPC: remove branch prediction from rint implementation
The branch prediction hints is actually hurts performance in this case. The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52' is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a general floating point function, expected input is not bounded and then it is better to let the hardware handle the branches.
This commit is contained in:
parent
6142896d53
commit
60c414c346
|
|
@ -43,7 +43,7 @@
|
||||||
# See pow-inputs for an example.
|
# See pow-inputs for an example.
|
||||||
|
|
||||||
subdir := benchtests
|
subdir := benchtests
|
||||||
bench := exp pow
|
bench := exp pow rint
|
||||||
|
|
||||||
exp-ITER = 100000
|
exp-ITER = 100000
|
||||||
exp-ARGLIST = double
|
exp-ARGLIST = double
|
||||||
|
|
@ -55,5 +55,10 @@ pow-ARGLIST = double:double
|
||||||
pow-RET = double
|
pow-RET = double
|
||||||
LDFLAGS-bench-pow = -lm
|
LDFLAGS-bench-pow = -lm
|
||||||
|
|
||||||
|
rint-ITER = 250000000
|
||||||
|
rint-ARGLIST = double
|
||||||
|
rint-RET = double
|
||||||
|
LDFLAGS-bench-rint = -lm
|
||||||
|
|
||||||
include ../Makeconfig
|
include ../Makeconfig
|
||||||
include ../Rules
|
include ../Rules
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
78.5
|
||||||
|
-78.5
|
||||||
|
4503599627370497.0
|
||||||
|
-4503599627370497.0
|
||||||
|
|
@ -45,14 +45,14 @@ ENTRY (__rint)
|
||||||
fsub fp12,fp13,fp13 /* generate 0.0 */
|
fsub fp12,fp13,fp13 /* generate 0.0 */
|
||||||
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
|
||||||
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||||||
bnllr- cr7
|
bnllr cr7
|
||||||
bng- cr6,.L4
|
bng cr6,.L4
|
||||||
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||||||
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||||||
fabs fp1,fp1 /* if (x == 0.0) */
|
fabs fp1,fp1 /* if (x == 0.0) */
|
||||||
blr /* x = 0.0; */
|
blr /* x = 0.0; */
|
||||||
.L4:
|
.L4:
|
||||||
bnllr- cr6 /* if (x < 0.0) */
|
bnllr cr6 /* if (x < 0.0) */
|
||||||
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||||||
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||||||
fnabs fp1,fp1 /* if (x == 0.0) */
|
fnabs fp1,fp1 /* if (x == 0.0) */
|
||||||
|
|
|
||||||
|
|
@ -41,14 +41,14 @@ ENTRY (__rintf)
|
||||||
fsubs fp12,fp13,fp13 /* generate 0.0 */
|
fsubs fp12,fp13,fp13 /* generate 0.0 */
|
||||||
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
|
||||||
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||||||
bnllr- cr7
|
bnllr cr7
|
||||||
bng- cr6,.L4
|
bng cr6,.L4
|
||||||
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||||||
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||||||
fabs fp1,fp1 /* if (x == 0.0) */
|
fabs fp1,fp1 /* if (x == 0.0) */
|
||||||
blr /* x = 0.0; */
|
blr /* x = 0.0; */
|
||||||
.L4:
|
.L4:
|
||||||
bnllr- cr6 /* if (x < 0.0) */
|
bnllr cr6 /* if (x < 0.0) */
|
||||||
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||||||
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||||||
fnabs fp1,fp1 /* if (x == 0.0) */
|
fnabs fp1,fp1 /* if (x == 0.0) */
|
||||||
|
|
|
||||||
|
|
@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0)
|
||||||
fsub fp12,fp13,fp13 /* generate 0.0 */
|
fsub fp12,fp13,fp13 /* generate 0.0 */
|
||||||
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
|
||||||
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||||||
bnllr- cr7
|
bnllr cr7
|
||||||
bng- cr6,.L4
|
bng cr6,.L4
|
||||||
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||||||
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||||||
fabs fp1,fp1 /* if (x == 0.0) */
|
fabs fp1,fp1 /* if (x == 0.0) */
|
||||||
blr /* x = 0.0; */
|
blr /* x = 0.0; */
|
||||||
.L4:
|
.L4:
|
||||||
bnllr- cr6 /* if (x < 0.0) */
|
bnllr cr6 /* if (x < 0.0) */
|
||||||
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||||||
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||||||
fnabs fp1,fp1 /* if (x == 0.0) */
|
fnabs fp1,fp1 /* if (x == 0.0) */
|
||||||
|
|
|
||||||
|
|
@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0)
|
||||||
fsubs fp12,fp13,fp13 /* generate 0.0 */
|
fsubs fp12,fp13,fp13 /* generate 0.0 */
|
||||||
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
|
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
|
||||||
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||||||
bnllr- cr7
|
bnllr cr7
|
||||||
bng- cr6,.L4
|
bng cr6,.L4
|
||||||
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||||||
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||||||
fabs fp1,fp1 /* if (x == 0.0) */
|
fabs fp1,fp1 /* if (x == 0.0) */
|
||||||
blr /* x = 0.0; */
|
blr /* x = 0.0; */
|
||||||
.L4:
|
.L4:
|
||||||
bnllr- cr6 /* if (x < 0.0) */
|
bnllr cr6 /* if (x < 0.0) */
|
||||||
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||||||
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||||||
fnabs fp1,fp1 /* if (x == 0.0) */
|
fnabs fp1,fp1 /* if (x == 0.0) */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue