mirror of
https://gcc.gnu.org/git/gcc.git
synced 2025-01-07 11:33:45 +08:00
tree-optimization/92645 - improve SLP with existing vectors
This improves SLP discovery in the face of existing vectors allowing punning of the vector shape (or even punning from an integer type). For punning from integer types this does not yet handle lane zero extraction being represented as conversion rather than BIT_FIELD_REF. 2021-01-13 Richard Biener <rguenther@suse.de> PR tree-optimization/92645 * tree-vect-slp.c (vect_build_slp_tree_1): Relax supported BIT_FIELD_REF argument. (vect_build_slp_tree_2): Record the desired vector type on the external vector def. (vectorizable_slp_permutation): Handle required punning of existing vector defs. * gcc.target/i386/pr92645-6.c: New testcase.
This commit is contained in:
parent
5ab67cdee6
commit
3ddc18251a
34
gcc/testsuite/gcc.target/i386/pr92645-6.c
Normal file
34
gcc/testsuite/gcc.target/i386/pr92645-6.c
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-require-effective-target lp64 } */
|
||||||
|
/* { dg-options "-O3 -msse2" } */
|
||||||
|
|
||||||
|
typedef long v2di __attribute__((vector_size(16)));
|
||||||
|
typedef int v4si __attribute__((vector_size(16)));
|
||||||
|
|
||||||
|
void foo (v4si *p, v2di *q)
|
||||||
|
{
|
||||||
|
union { v2di a; v4si b; } u;
|
||||||
|
u.a = *q;
|
||||||
|
(*p)[0] = u.b[0];
|
||||||
|
(*p)[1] = u.b[2];
|
||||||
|
(*p)[2] = u.b[1];
|
||||||
|
(*p)[3] = u.b[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
void bar (v4si *p, __int128_t *q)
|
||||||
|
{
|
||||||
|
union { __int128_t a; v4si b; } u;
|
||||||
|
u.a = *q;
|
||||||
|
(*p)[0] = u.b[1];
|
||||||
|
(*p)[1] = u.b[2];
|
||||||
|
(*p)[2] = u.b[1];
|
||||||
|
(*p)[3] = u.b[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Both functions should end up with sth like
|
||||||
|
[v]pshufd $val, (%esi), %xmm0
|
||||||
|
[v]movdqa %xmm0, (%edi)
|
||||||
|
ret
|
||||||
|
recognized by SLP vectorization involving an existing "vector". */
|
||||||
|
/* { dg-final { scan-assembler-not "punpck" } } */
|
||||||
|
/* { dg-final { scan-assembler-times "pshufd" 2 } } */
|
@ -1105,7 +1105,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|
|||||||
tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
|
tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
|
||||||
if (!is_a <bb_vec_info> (vinfo)
|
if (!is_a <bb_vec_info> (vinfo)
|
||||||
|| TREE_CODE (vec) != SSA_NAME
|
|| TREE_CODE (vec) != SSA_NAME
|
||||||
|| !types_compatible_p (vectype, TREE_TYPE (vec)))
|
|| !operand_equal_p (TYPE_SIZE (vectype),
|
||||||
|
TYPE_SIZE (TREE_TYPE (vec))))
|
||||||
{
|
{
|
||||||
if (dump_enabled_p ())
|
if (dump_enabled_p ())
|
||||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||||
@ -1642,7 +1643,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
|
|||||||
lperm.safe_push (std::make_pair (0, (unsigned)lane));
|
lperm.safe_push (std::make_pair (0, (unsigned)lane));
|
||||||
}
|
}
|
||||||
slp_tree vnode = vect_create_new_slp_node (vNULL);
|
slp_tree vnode = vect_create_new_slp_node (vNULL);
|
||||||
SLP_TREE_VECTYPE (vnode) = TREE_TYPE (vec);
|
/* ??? We record vectype here but we hide eventually necessary
|
||||||
|
punning and instead rely on code generation to materialize
|
||||||
|
VIEW_CONVERT_EXPRs as necessary. We instead should make
|
||||||
|
this explicit somehow. */
|
||||||
|
SLP_TREE_VECTYPE (vnode) = vectype;
|
||||||
SLP_TREE_VEC_DEFS (vnode).safe_push (vec);
|
SLP_TREE_VEC_DEFS (vnode).safe_push (vec);
|
||||||
/* We are always building a permutation node even if it is an identity
|
/* We are always building a permutation node even if it is an identity
|
||||||
permute to shield the rest of the vectorizer from the odd node
|
permute to shield the rest of the vectorizer from the odd node
|
||||||
@ -5671,6 +5676,18 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||||||
slp_tree first_node = SLP_TREE_CHILDREN (node)[first_vec.first];
|
slp_tree first_node = SLP_TREE_CHILDREN (node)[first_vec.first];
|
||||||
tree first_def
|
tree first_def
|
||||||
= vect_get_slp_vect_def (first_node, first_vec.second);
|
= vect_get_slp_vect_def (first_node, first_vec.second);
|
||||||
|
/* ??? We SLP match existing vector element extracts but
|
||||||
|
allow punning which we need to re-instantiate at uses
|
||||||
|
but have no good way of explicitely representing. */
|
||||||
|
if (!types_compatible_p (TREE_TYPE (first_def), vectype))
|
||||||
|
{
|
||||||
|
gassign *conv_stmt;
|
||||||
|
conv_stmt = gimple_build_assign (make_ssa_name (vectype),
|
||||||
|
build1 (VIEW_CONVERT_EXPR,
|
||||||
|
vectype, first_def));
|
||||||
|
vect_finish_stmt_generation (vinfo, NULL, conv_stmt, gsi);
|
||||||
|
first_def = gimple_assign_lhs (conv_stmt);
|
||||||
|
}
|
||||||
gassign *perm_stmt;
|
gassign *perm_stmt;
|
||||||
tree perm_dest = make_ssa_name (vectype);
|
tree perm_dest = make_ssa_name (vectype);
|
||||||
if (!identity_p)
|
if (!identity_p)
|
||||||
@ -5679,6 +5696,16 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||||||
= SLP_TREE_CHILDREN (node)[second_vec.first];
|
= SLP_TREE_CHILDREN (node)[second_vec.first];
|
||||||
tree second_def
|
tree second_def
|
||||||
= vect_get_slp_vect_def (second_node, second_vec.second);
|
= vect_get_slp_vect_def (second_node, second_vec.second);
|
||||||
|
if (!types_compatible_p (TREE_TYPE (second_def), vectype))
|
||||||
|
{
|
||||||
|
gassign *conv_stmt;
|
||||||
|
conv_stmt = gimple_build_assign (make_ssa_name (vectype),
|
||||||
|
build1
|
||||||
|
(VIEW_CONVERT_EXPR,
|
||||||
|
vectype, second_def));
|
||||||
|
vect_finish_stmt_generation (vinfo, NULL, conv_stmt, gsi);
|
||||||
|
second_def = gimple_assign_lhs (conv_stmt);
|
||||||
|
}
|
||||||
tree mask_vec = vect_gen_perm_mask_checked (vectype, indices);
|
tree mask_vec = vect_gen_perm_mask_checked (vectype, indices);
|
||||||
perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
|
perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
|
||||||
first_def, second_def,
|
first_def, second_def,
|
||||||
|
Loading…
Reference in New Issue
Block a user