mirror of git://sourceware.org/git/glibc.git
The generic implementation is slight more optimized than the powerpc one, where it has a more optimized inf/nan check (by not using FP unit checks, along with branch prediction hints), and removed one branch by issuing trunc instead of a combination of floor/ceil (which also generated less code). On power10 with gcc 14.2.1: reciprocal-throughput master patch difference workload-0_1 1.1351 0.9067 20.12% workload-1_maxint 1.4230 0.9040 36.47% workload-maxint_maxfloat 1.5038 0.9076 39.65% workload-integral 1.1280 0.9111 19.23% latency master patch difference workload-0_1 1.1440 2.7117 -137.03% workload-1_maxint 4.0556 2.7070 33.25% workload-maxint_maxfloat 3.2122 2.7164 15.43% workload-integral 3.2381 2.7281 15.75% Checked on powerpc64le-linux-gnu. Reviewed-by: Sachin Monga <smonga@linux.ibm.com> |
||
---|---|---|
.. | ||
405 | ||
440 | ||
464 | ||
476 | ||
970 | ||
a2 | ||
bits | ||
cell | ||
fpu | ||
power4 | ||
power5 | ||
power5+ | ||
power6 | ||
power6x | ||
power7 | ||
power8 | ||
power9 | ||
power10 | ||
power11 | ||
Implies | ||
Implies-after | ||
Makefile | ||
Versions | ||
__longjmp-common.S | ||
__longjmp.S | ||
add_n.S | ||
addmul_1.S | ||
atomic-machine.h | ||
bsd-_setjmp.S | ||
bsd-setjmp.S | ||
compat-ppc-mcount.S | ||
configure | ||
configure.ac | ||
crti.S | ||
crtn.S | ||
dl-dtprocnum.h | ||
dl-irel.h | ||
dl-machine.c | ||
dl-machine.h | ||
dl-start.S | ||
dl-trampoline.S | ||
gprrest0.S | ||
gprrest1.S | ||
gprsave0.S | ||
gprsave1.S | ||
libgcc-compat.S | ||
lshift.S | ||
mcount.c | ||
memset.S | ||
mul_1.S | ||
ppc-mcount.S | ||
rshift.S | ||
rtld-memset.c | ||
setjmp-common.S | ||
setjmp.S | ||
stackguard-macros.h | ||
stackinfo.h | ||
start.S | ||
stpcpy.S | ||
strchr.S | ||
strcmp.S | ||
strcpy.S | ||
strlen.S | ||
sub_n.S | ||
submul_1.S | ||
symbol-hacks.h | ||
sysdep.h | ||
tst-audit.h | ||
unwind-arch.h |