mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-24 10:22:41 +08:00
x86-64: Compile branred.c with -mprefer-vector-width=128 [BZ #24603]
When compiled with -O3 and AVX, GCC 8 and 9 optimize some loops in sysdeps/ieee754/dbl-64/branred.c with 256-bit vector instructions, which leads to store forward stall: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579 There is no easy fix in compiler. This patch limits vector width to 128 bits to work around this issue. It improves performance of sin and cos by more than 40% on Skylake compiled with -O3 -march=skylake. Tested with GCC 7/8/9 on x86-64. [BZ #24603] * sysdeps/x86_64/configure.ac: Check if -mprefer-vector-width=128 works. * sysdeps/x86_64/configure: Regenerated. * sysdeps/x86_64/fpu/Makefile (CFLAGS-branred.c): New. Set to -mprefer-vector-width=128 if supported.
This commit is contained in:
parent
82c664ed75
commit
7e681561a3
@ -1,3 +1,12 @@
|
||||
2019-07-24 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
[BZ #24603]
|
||||
* sysdeps/x86_64/configure.ac: Check if -mprefer-vector-width=128
|
||||
works.
|
||||
* sysdeps/x86_64/configure: Regenerated.
|
||||
* sysdeps/x86_64/fpu/Makefile (CFLAGS-branred.c): New. Set
|
||||
to -mprefer-vector-width=128 if supported.
|
||||
|
||||
2019-07-24 Florian Weimer <fweimer@redhat.com>
|
||||
|
||||
* scripts/build-many-glibcs.py (Context.checkout): Default to
|
||||
|
22
sysdeps/x86_64/configure
vendored
22
sysdeps/x86_64/configure
vendored
@ -54,6 +54,28 @@ fi
|
||||
config_vars="$config_vars
|
||||
config-cflags-avx512 = $libc_cv_cc_avx512"
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking -mprefer-vector-width=128" >&5
|
||||
$as_echo_n "checking -mprefer-vector-width=128... " >&6; }
|
||||
if ${libc_cv_cc_mprefer_vector_width+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
if { ac_try='${CC-cc} -mprefer-vector-width=128 -xc /dev/null -S -o /dev/null'
|
||||
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||
(eval $ac_try) 2>&5
|
||||
ac_status=$?
|
||||
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||
test $ac_status = 0; }; }; then :
|
||||
libc_cv_cc_mprefer_vector_width=yes
|
||||
else
|
||||
libc_cv_cc_mprefer_vector_width=no
|
||||
fi
|
||||
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_mprefer_vector_width" >&5
|
||||
$as_echo "$libc_cv_cc_mprefer_vector_width" >&6; }
|
||||
config_vars="$config_vars
|
||||
config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5
|
||||
$as_echo_n "checking for Intel MPX support... " >&6; }
|
||||
if ${libc_cv_asm_mpx+:} false; then :
|
||||
|
@ -25,6 +25,15 @@ if test $libc_cv_cc_avx512 = yes; then
|
||||
fi
|
||||
LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512])
|
||||
|
||||
dnl Check if -mprefer-vector-width=128 works.
|
||||
AC_CACHE_CHECK(-mprefer-vector-width=128, libc_cv_cc_mprefer_vector_width, [dnl
|
||||
LIBC_TRY_CC_OPTION([-mprefer-vector-width=128],
|
||||
[libc_cv_cc_mprefer_vector_width=yes],
|
||||
[libc_cv_cc_mprefer_vector_width=no])
|
||||
])
|
||||
LIBC_CONFIG_VAR([config-cflags-mprefer-vector-width],
|
||||
[$libc_cv_cc_mprefer_vector_width])
|
||||
|
||||
dnl Check whether asm supports Intel MPX
|
||||
AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl
|
||||
cat > conftest.s <<\EOF
|
||||
|
@ -237,3 +237,15 @@ CFLAGS-test-float-libmvec-sincosf-avx512.c = -DREQUIRE_AVX512F
|
||||
CFLAGS-test-float-libmvec-sincosf-avx512-main.c = $(libmvec-sincos-cflags) $(float-vlen16-arch-ext-cflags)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(subdir)$(config-cflags-mprefer-vector-width),mathyes)
|
||||
# When compiled with -O3 -march=skylake, GCC 8 and 9 optimize some loops
|
||||
# in branred.c with 256-bit vector instructions, which leads to store
|
||||
# forward stall:
|
||||
#
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579
|
||||
#
|
||||
# Limit vector width to 128 bits to work around this issue. It improves
|
||||
# performance of sin and cos by more than 40% on Skylake.
|
||||
CFLAGS-branred.c = -mprefer-vector-width=128
|
||||
endif
|
||||
|
Loading…
Reference in New Issue
Block a user