vect: Avoid duplicate_and_interleave for uniform vectors [PR112661]

can_duplicate_and_interleave_p checks whether we know a way of
building a particular VLA SLP invariant.  g:60034ecf25597bd515f
skipped that test for booleans, to support MASK_LEN_GATHER_LOAD
calls with a dummy all-ones mask.  But there's nothing fundamentally
different about VLA masks vs VLA data vectors.  If we have a VLA mask
that isn't all-ones, we need some way of loading it.  This ultimately
led to the ICE in the PR.

This patch fixes it by applying can_duplicate_and_interleave_p
to masks, while also adding a special path for uniform vectors
(of all kinds) to support the MASK_LEN_GATHER_LOAD usage.  This
also fixes an XFAIL in pr36648.cc for SVE.

The patch is mostly Richard's.  My only changes were to skip
redundant conversions and to use gimple_build_vector_from_val
for all eligible vectors.

2023-11-27  Richard Biener  <rguenther@suse.de>
	    Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR tree-optimization/112661
	* tree-vect-slp.cc (vect_get_and_check_slp_defs): Defer duplicate-and-
	interleave test to...
	(vect_build_slp_tree_2): ...here, once we have all the operands.
	Skip the test for uniform vectors.
	(vect_create_constant_vectors): Detect uniform vectors.  Avoid
	redundant conversions in that case.  Use gimple_build_vector_from_val
	to build the vector.

gcc/testsuite/
	* g++.dg/vect/pr36648.cc: Remove XFAIL for VLA load-lanes.
This commit is contained in:
Richard Sandiford 2023-11-27 13:38:16 +00:00
parent 5b33cf3a3a
commit 061a82fa2b
2 changed files with 40 additions and 18 deletions

View File

@ -25,6 +25,6 @@ int main() { }
targets, ! vect_no_align is a sufficient test. */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } xfail { vect_variable_length && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */

View File

@ -763,18 +763,6 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
{
tree type = TREE_TYPE (oprnd);
dt = dts[i];
if ((dt == vect_constant_def
|| dt == vect_external_def)
&& !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
&& TREE_CODE (type) != BOOLEAN_TYPE
&& !can_duplicate_and_interleave_p (vinfo, stmts.length (), type))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: invalid type of def "
"for variable-length SLP %T\n", oprnd);
return -1;
}
/* For the swapping logic below force vect_reduction_def
for the reduction op in a SLP reduction group. */
@ -2395,7 +2383,7 @@ out:
/* Create SLP_TREE nodes for the definition node/s. */
FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
{
slp_tree child;
slp_tree child = nullptr;
unsigned int j;
/* We're skipping certain operands from processing, for example
@ -2443,6 +2431,29 @@ out:
if (oprnd_info->first_dt == vect_external_def
|| oprnd_info->first_dt == vect_constant_def)
{
if (!GET_MODE_SIZE (vinfo->vector_mode).is_constant ())
{
tree op0;
tree uniform_val = op0 = oprnd_info->ops[0];
for (j = 1; j < oprnd_info->ops.length (); ++j)
if (!operand_equal_p (uniform_val, oprnd_info->ops[j]))
{
uniform_val = NULL_TREE;
break;
}
if (!uniform_val
&& !can_duplicate_and_interleave_p (vinfo,
oprnd_info->ops.length (),
TREE_TYPE (op0)))
{
matches[j] = false;
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: invalid type of def "
"for variable-length SLP %T\n", op0);
goto fail;
}
}
slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
oprnd_info->ops = vNULL;
@ -8157,6 +8168,7 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
number_of_places_left_in_vector = nunits;
constant_p = true;
tree uniform_elt = NULL_TREE;
tree_vector_builder elts (vector_type, nunits, 1);
elts.quick_grow (nunits);
stmt_vec_info insert_after = NULL;
@ -8166,8 +8178,14 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
{
/* Create 'vect_ = {op0,op1,...,opn}'. */
number_of_places_left_in_vector--;
tree orig_op = op;
if (number_of_places_left_in_vector == nunits)
uniform_elt = op;
else if (uniform_elt && operand_equal_p (uniform_elt, op))
op = elts[number_of_places_left_in_vector];
else
uniform_elt = NULL_TREE;
number_of_places_left_in_vector--;
if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
{
if (CONSTANT_CLASS_P (op))
@ -8236,9 +8254,13 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
if (number_of_places_left_in_vector == 0)
{
if (constant_p
? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
: known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
auto type_nunits = TYPE_VECTOR_SUBPARTS (vector_type);
if (uniform_elt)
vec_cst = gimple_build_vector_from_val (&ctor_seq, vector_type,
elts[0]);
else if (constant_p
? multiple_p (type_nunits, nunits)
: known_eq (type_nunits, nunits))
vec_cst = gimple_build_vector (&ctor_seq, &elts);
else
{