mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-27 11:43:34 +08:00
9a421348cd
Unroll slightly and enforce good instruction scheduling. This improves performance on out-of-order machines. The unrolling allows for pipelined multiplies. As well, as an optional sysdep, reorder the operations and prevent reassosiation for better scheduling and higher ILP. This commit only adds the barrier for x86, although it should be either no change or a win for any architecture. Unrolling further started to induce slowdowns for sizes [0, 4] but can help the loop so if larger sizes are the target further unrolling can be beneficial. Results for _dl_new_hash Benchmarked on Tigerlake: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz Time as Geometric Mean of N=30 runs Geometric of all benchmark New / Old: 0.674 type, length, New Time, Old Time, New Time / Old Time fixed, 0, 2.865, 2.72, 1.053 fixed, 1, 3.567, 2.489, 1.433 fixed, 2, 2.577, 3.649, 0.706 fixed, 3, 3.644, 5.983, 0.609 fixed, 4, 4.211, 6.833, 0.616 fixed, 5, 4.741, 9.372, 0.506 fixed, 6, 5.415, 9.561, 0.566 fixed, 7, 6.649, 10.789, 0.616 fixed, 8, 8.081, 11.808, 0.684 fixed, 9, 8.427, 12.935, 0.651 fixed, 10, 8.673, 14.134, 0.614 fixed, 11, 10.69, 15.408, 0.694 fixed, 12, 10.789, 16.982, 0.635 fixed, 13, 12.169, 18.411, 0.661 fixed, 14, 12.659, 19.914, 0.636 fixed, 15, 13.526, 21.541, 0.628 fixed, 16, 14.211, 23.088, 0.616 fixed, 32, 29.412, 52.722, 0.558 fixed, 64, 65.41, 142.351, 0.459 fixed, 128, 138.505, 295.625, 0.469 fixed, 256, 291.707, 601.983, 0.485 random, 2, 12.698, 12.849, 0.988 random, 4, 16.065, 15.857, 1.013 random, 8, 19.564, 21.105, 0.927 random, 16, 23.919, 26.823, 0.892 random, 32, 31.987, 39.591, 0.808 random, 64, 49.282, 71.487, 0.689 random, 128, 82.23, 145.364, 0.566 random, 256, 152.209, 298.434, 0.51 Co-authored-by: Alexander Monakov <amonakov@ispras.ru> Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org> |
||
---|---|---|
.. | ||
bits | ||
fpu | ||
include | ||
nptl | ||
sys/platform | ||
__longjmp_cancel.S | ||
abi-note.c | ||
atomic-machine.h | ||
cacheinfo.c | ||
cacheinfo.h | ||
cet-control.h | ||
check-cet.awk | ||
configure | ||
configure.ac | ||
cpu-features-offsets.sym | ||
cpu-features.c | ||
cpu-tunables.c | ||
dl-cacheinfo.h | ||
dl-cet.c | ||
dl-diagnostics-cpu.c | ||
dl-get-cpu-features.c | ||
dl-hwcap.h | ||
dl-isa-level.h | ||
dl-lookupcfg.h | ||
dl-minsigstacksize.h | ||
dl-new-hash.h | ||
dl-procinfo.c | ||
dl-procinfo.h | ||
dl-procruntime.c | ||
dl-prop.h | ||
dl-tunables.list | ||
elf-initfini.h | ||
elide.h | ||
float128-abi.h | ||
fpu_control.h | ||
get-cpuid-feature-leaf.c | ||
get-isa-level.h | ||
hp-timing.h | ||
init-arch.h | ||
isa-level.c | ||
jmp_buf-ssp.sym | ||
ldbl2mpn.c | ||
ldsodefs.h | ||
libc-start.c | ||
libc-start.h | ||
link_map.h | ||
linkmap.h | ||
longjmp.c | ||
Makeconfig | ||
Makefile | ||
string_private.h | ||
sysdep.h | ||
tininess.h | ||
tst-cet-legacy-1.c | ||
tst-cet-legacy-1a.c | ||
tst-cet-legacy-2.c | ||
tst-cet-legacy-2a.c | ||
tst-cet-legacy-3.c | ||
tst-cet-legacy-4.c | ||
tst-cet-legacy-4a.c | ||
tst-cet-legacy-4b.c | ||
tst-cet-legacy-4c.c | ||
tst-cet-legacy-5.c | ||
tst-cet-legacy-5a.c | ||
tst-cet-legacy-5b.c | ||
tst-cet-legacy-6.c | ||
tst-cet-legacy-6a.c | ||
tst-cet-legacy-6b.c | ||
tst-cet-legacy-7.c | ||
tst-cet-legacy-8.c | ||
tst-cet-legacy-9-static.c | ||
tst-cet-legacy-9.c | ||
tst-cet-legacy-10-static.c | ||
tst-cet-legacy-10.c | ||
tst-cet-legacy-mod-1.c | ||
tst-cet-legacy-mod-2.c | ||
tst-cet-legacy-mod-4.c | ||
tst-cet-legacy-mod-5.c | ||
tst-cet-legacy-mod-5a.c | ||
tst-cet-legacy-mod-5b.c | ||
tst-cet-legacy-mod-5c.c | ||
tst-cet-legacy-mod-6.c | ||
tst-cet-legacy-mod-6a.c | ||
tst-cet-legacy-mod-6b.c | ||
tst-cet-legacy-mod-6c.c | ||
tst-cet-legacy-mod-6d.c | ||
tst-cpu-features-cpuinfo-static.c | ||
tst-cpu-features-cpuinfo.c | ||
tst-cpu-features-supports-static.c | ||
tst-cpu-features-supports.c | ||
tst-get-cpu-features-static.c | ||
tst-get-cpu-features.c | ||
tst-ifunc-isa-1-static.c | ||
tst-ifunc-isa-1.c | ||
tst-ifunc-isa-2-static.c | ||
tst-ifunc-isa-2.c | ||
tst-ifunc-isa.h | ||
tst-isa-level-1.c | ||
tst-isa-level-mod-1-baseline.c | ||
tst-isa-level-mod-1-v2.c | ||
tst-isa-level-mod-1-v3.c | ||
tst-isa-level-mod-1-v4.c | ||
tst-isa-level-mod-1.c | ||
tst-ldbl-nonnormal-printf.c | ||
tst-memchr-rtm.c | ||
tst-memcmp-rtm.c | ||
tst-memmove-rtm.c | ||
tst-memrchr-rtm.c | ||
tst-memset-rtm.c | ||
tst-setjmp-cet.c | ||
tst-stack-align.h | ||
tst-strchr-rtm.c | ||
tst-strcpy-rtm.c | ||
tst-string-rtm.h | ||
tst-strlen-rtm.c | ||
tst-strncmp-rtm.c | ||
tst-strrchr-rtm.c | ||
tst-sysconf-cache-linesize-static.c | ||
tst-sysconf-cache-linesize.c | ||
tst-wcsncmp-rtm.c | ||
Versions |