mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-26 09:43:50 +08:00
7e681561a3
When compiled with -O3 and AVX, GCC 8 and 9 optimize some loops in sysdeps/ieee754/dbl-64/branred.c with 256-bit vector instructions, which leads to store forward stall: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579 There is no easy fix in compiler. This patch limits vector width to 128 bits to work around this issue. It improves performance of sin and cos by more than 40% on Skylake compiled with -O3 -march=skylake. Tested with GCC 7/8/9 on x86-64. [BZ #24603] * sysdeps/x86_64/configure.ac: Check if -mprefer-vector-width=128 works. * sysdeps/x86_64/configure: Regenerated. * sysdeps/x86_64/fpu/Makefile (CFLAGS-branred.c): New. Set to -mprefer-vector-width=128 if supported.
148 lines
4.6 KiB
Plaintext
148 lines
4.6 KiB
Plaintext
# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
|
|
# Local configure fragment for sysdeps/x86_64.
|
|
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512DQ support in assembler" >&5
|
|
$as_echo_n "checking for AVX512DQ support in assembler... " >&6; }
|
|
if ${libc_cv_asm_avx512dq+:} false; then :
|
|
$as_echo_n "(cached) " >&6
|
|
else
|
|
cat > conftest.s <<\EOF
|
|
vandpd (%rax), %zmm6, %zmm1
|
|
EOF
|
|
if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
|
|
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
|
(eval $ac_try) 2>&5
|
|
ac_status=$?
|
|
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
|
test $ac_status = 0; }; }; then
|
|
libc_cv_asm_avx512dq=yes
|
|
else
|
|
libc_cv_asm_avx512dq=no
|
|
fi
|
|
rm -f conftest*
|
|
fi
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512dq" >&5
|
|
$as_echo "$libc_cv_asm_avx512dq" >&6; }
|
|
if test $libc_cv_asm_avx512dq = yes; then
|
|
$as_echo "#define HAVE_AVX512DQ_ASM_SUPPORT 1" >>confdefs.h
|
|
|
|
fi
|
|
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support" >&5
|
|
$as_echo_n "checking for AVX512 support... " >&6; }
|
|
if ${libc_cv_cc_avx512+:} false; then :
|
|
$as_echo_n "(cached) " >&6
|
|
else
|
|
if { ac_try='${CC-cc} -mavx512f -xc /dev/null -S -o /dev/null'
|
|
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
|
(eval $ac_try) 2>&5
|
|
ac_status=$?
|
|
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
|
test $ac_status = 0; }; }; then :
|
|
libc_cv_cc_avx512=$libc_cv_asm_avx512dq
|
|
else
|
|
libc_cv_cc_avx512=no
|
|
fi
|
|
|
|
fi
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx512" >&5
|
|
$as_echo "$libc_cv_cc_avx512" >&6; }
|
|
if test $libc_cv_cc_avx512 = yes; then
|
|
$as_echo "#define HAVE_AVX512_SUPPORT 1" >>confdefs.h
|
|
|
|
fi
|
|
config_vars="$config_vars
|
|
config-cflags-avx512 = $libc_cv_cc_avx512"
|
|
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking -mprefer-vector-width=128" >&5
|
|
$as_echo_n "checking -mprefer-vector-width=128... " >&6; }
|
|
if ${libc_cv_cc_mprefer_vector_width+:} false; then :
|
|
$as_echo_n "(cached) " >&6
|
|
else
|
|
if { ac_try='${CC-cc} -mprefer-vector-width=128 -xc /dev/null -S -o /dev/null'
|
|
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
|
(eval $ac_try) 2>&5
|
|
ac_status=$?
|
|
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
|
test $ac_status = 0; }; }; then :
|
|
libc_cv_cc_mprefer_vector_width=yes
|
|
else
|
|
libc_cv_cc_mprefer_vector_width=no
|
|
fi
|
|
|
|
fi
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_mprefer_vector_width" >&5
|
|
$as_echo "$libc_cv_cc_mprefer_vector_width" >&6; }
|
|
config_vars="$config_vars
|
|
config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
|
|
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5
|
|
$as_echo_n "checking for Intel MPX support... " >&6; }
|
|
if ${libc_cv_asm_mpx+:} false; then :
|
|
$as_echo_n "(cached) " >&6
|
|
else
|
|
cat > conftest.s <<\EOF
|
|
bndmov %bnd0,(%rsp)
|
|
EOF
|
|
if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
|
|
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
|
(eval $ac_try) 2>&5
|
|
ac_status=$?
|
|
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
|
test $ac_status = 0; }; }; then
|
|
libc_cv_asm_mpx=yes
|
|
else
|
|
libc_cv_asm_mpx=no
|
|
fi
|
|
rm -f conftest*
|
|
fi
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5
|
|
$as_echo "$libc_cv_asm_mpx" >&6; }
|
|
if test $libc_cv_asm_mpx = yes; then
|
|
$as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h
|
|
|
|
fi
|
|
|
|
if test x"$build_mathvec" = xnotset; then
|
|
build_mathvec=yes
|
|
fi
|
|
|
|
if test "$static_pie" = yes; then
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5
|
|
$as_echo_n "checking for linker static PIE support... " >&6; }
|
|
if ${libc_cv_ld_static_pie+:} false; then :
|
|
$as_echo_n "(cached) " >&6
|
|
else
|
|
cat > conftest.s <<\EOF
|
|
.text
|
|
.global _start
|
|
.weak foo
|
|
_start:
|
|
leaq foo(%rip), %rax
|
|
EOF
|
|
libc_cv_pie_option="-Wl,-pie"
|
|
if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5'
|
|
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
|
(eval $ac_try) 2>&5
|
|
ac_status=$?
|
|
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
|
test $ac_status = 0; }; }; then
|
|
libc_cv_ld_static_pie=yes
|
|
else
|
|
libc_cv_ld_static_pie=no
|
|
fi
|
|
rm -f conftest*
|
|
fi
|
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5
|
|
$as_echo "$libc_cv_ld_static_pie" >&6; }
|
|
if test "$libc_cv_ld_static_pie" != yes; then
|
|
as_fn_error $? "linker support for static PIE needed" "$LINENO" 5
|
|
fi
|
|
fi
|
|
|
|
$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
|
|
|
|
|
|
test -n "$critic_missing" && as_fn_error $? "
|
|
*** $critic_missing" "$LINENO" 5
|