mirror of git://sourceware.org/git/glibc.git
This patch adds an optimized memset implementation for POWER8. For sizes from 0 to 255 bytes, a word/doubleword algorithm similar to POWER7 optimized one is used. For size higher than 255 two strategies are used: 1. If the constant is different than 0, the memory is written with altivec vector instruction; 2. If constant is 0, dbcz instructions are used. The loop is unrolled to clear 512 byte at time. Using vector instructions increases throughput considerable, with a double performance for sizes larger than 1024. The dcbz loops unrolls also shows performance improvement, by doubling throughput for sizes larger than 8192 bytes. |
||
|---|---|---|
| .. | ||
| bits | ||
| fpu | ||
| nofpu | ||
| nptl | ||
| power4 | ||
| power5+/fpu | ||
| power6 | ||
| power7/fpu | ||
| powerpc32 | ||
| powerpc64 | ||
| soft-fp | ||
| sys/platform | ||
| Implies | ||
| Makefile | ||
| Versions | ||
| abort-instr.h | ||
| configure | ||
| configure.ac | ||
| dl-procinfo.c | ||
| dl-procinfo.h | ||
| dl-tls.h | ||
| ffs.c | ||
| fpu_control.h | ||
| gccframe.h | ||
| ifunc-sel.h | ||
| jmpbuf-offsets.h | ||
| jmpbuf-unwind.h | ||
| ldsodefs.h | ||
| locale-defines.sym | ||
| longjmp.c | ||
| machine-gmon.h | ||
| math-tests.h | ||
| memusage.h | ||
| mp_clz_tab.c | ||
| novmx-longjmp.c | ||
| novmx-sigjmp.c | ||
| novmxsetjmp.h | ||
| preconfigure | ||
| rtld-global-offsets.sym | ||
| sched_cpucount.c | ||
| sigjmp.c | ||
| sotruss-lib.c | ||
| stackinfo.h | ||
| strcat.c | ||
| sysdep.h | ||
| test-arith.c | ||
| test-arithf.c | ||
| test-gettimebase.c | ||
| tls-macros.h | ||
| tst-stack-align.h | ||