mirror of
https://gcc.gnu.org/git/gcc.git
synced 2025-01-01 16:34:06 +08:00
Add a separate function to calculate cost for WIDEN_MULT_EXPR.
gcc/ChangeLog: PR target/39821 * config/i386/i386.c (ix86_widen_mult_cost): New function. (ix86_add_stmt_cost): Use ix86_widen_mult_cost for WIDEN_MULT_EXPR. gcc/testsuite/ChangeLog: PR target/39821 * gcc.target/i386/sse2-pr39821.c: New test. * gcc.target/i386/sse4-pr39821.c: New test.
This commit is contained in:
parent
aafa38b5bf
commit
231bcc77b9
@ -19845,6 +19845,44 @@ ix86_vec_cost (machine_mode mode, int cost)
|
||||
return cost;
|
||||
}
|
||||
|
||||
/* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
|
||||
vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
|
||||
static int
|
||||
ix86_widen_mult_cost (const struct processor_costs *cost,
|
||||
enum machine_mode mode, bool uns_p)
|
||||
{
|
||||
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
|
||||
int extra_cost = 0;
|
||||
int basic_cost = 0;
|
||||
switch (mode)
|
||||
{
|
||||
case V8HImode:
|
||||
case V16HImode:
|
||||
if (!uns_p || mode == V16HImode)
|
||||
extra_cost = cost->sse_op * 2;
|
||||
basic_cost = cost->mulss * 2 + cost->sse_op * 4;
|
||||
break;
|
||||
case V4SImode:
|
||||
case V8SImode:
|
||||
/* pmulhw/pmullw can be used. */
|
||||
basic_cost = cost->mulss * 2 + cost->sse_op * 2;
|
||||
break;
|
||||
case V2DImode:
|
||||
/* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
|
||||
require extra 4 mul, 4 add, 4 cmp and 2 shift. */
|
||||
if (!TARGET_SSE4_1 && !uns_p)
|
||||
extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
|
||||
+ cost->sse_op * 2;
|
||||
/* Fallthru. */
|
||||
case V4DImode:
|
||||
basic_cost = cost->mulss * 2 + cost->sse_op * 4;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable();
|
||||
}
|
||||
return ix86_vec_cost (mode, basic_cost + extra_cost);
|
||||
}
|
||||
|
||||
/* Return cost of multiplication in MODE. */
|
||||
|
||||
static int
|
||||
@ -22575,10 +22613,18 @@ ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
|
||||
break;
|
||||
|
||||
case MULT_EXPR:
|
||||
case WIDEN_MULT_EXPR:
|
||||
/* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
|
||||
take it as MULT_EXPR. */
|
||||
case MULT_HIGHPART_EXPR:
|
||||
stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
|
||||
break;
|
||||
/* There's no direct instruction for WIDEN_MULT_EXPR,
|
||||
take emulation into account. */
|
||||
case WIDEN_MULT_EXPR:
|
||||
stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
|
||||
TYPE_UNSIGNED (vectype));
|
||||
break;
|
||||
|
||||
case NEGATE_EXPR:
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
stmt_cost = ix86_cost->sse_op;
|
||||
|
45
gcc/testsuite/gcc.target/i386/sse2-pr39821.c
Normal file
45
gcc/testsuite/gcc.target/i386/sse2-pr39821.c
Normal file
@ -0,0 +1,45 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-msse2 -mno-sse4.1 -O3 -fdump-tree-vect-details" } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
|
||||
#include<stdint.h>
|
||||
void
|
||||
vec_widen_smul8 (int16_t* __restrict v3, int8_t *v1, int8_t *v2, int order)
|
||||
{
|
||||
while (order--)
|
||||
*v3++ = (int16_t) *v1++ * *v2++;
|
||||
}
|
||||
|
||||
void
|
||||
vec_widen_umul8(uint16_t* __restrict v3, uint8_t *v1, uint8_t *v2, int order)
|
||||
{
|
||||
while (order--)
|
||||
*v3++ = (uint16_t) *v1++ * *v2++;
|
||||
}
|
||||
|
||||
void
|
||||
vec_widen_smul16(int32_t* __restrict v3, int16_t *v1, int16_t *v2, int order)
|
||||
{
|
||||
while (order--)
|
||||
*v3++ = (int32_t) *v1++ * *v2++;
|
||||
}
|
||||
|
||||
void
|
||||
vec_widen_umul16(uint32_t* __restrict v3, uint16_t *v1, uint16_t *v2, int order)
|
||||
{
|
||||
while (order--)
|
||||
*v3++ = (uint32_t) *v1++ * *v2++;
|
||||
}
|
||||
|
||||
void
|
||||
vec_widen_smul32(int64_t* __restrict v3, int32_t *v1, int32_t *v2, int order)
|
||||
{
|
||||
while (order--)
|
||||
*v3++ = (int64_t) *v1++ * *v2++;
|
||||
}
|
||||
|
||||
void
|
||||
vec_widen_umul32(uint64_t* __restrict v3, uint32_t *v1, uint32_t *v2, int order)
|
||||
{
|
||||
while (order--)
|
||||
*v3++ = (uint64_t) *v1++ * *v2++;
|
||||
}
|
4
gcc/testsuite/gcc.target/i386/sse4-pr39821.c
Normal file
4
gcc/testsuite/gcc.target/i386/sse4-pr39821.c
Normal file
@ -0,0 +1,4 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-msse4.1 -O3 -fdump-tree-vect-details" } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect"} } */
|
||||
#include "sse2-pr39821.c"
|
Loading…
Reference in New Issue
Block a user