mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-30 23:35:00 +08:00
vect: Avoid duplicate_and_interleave for uniform vectors [PR112661]
can_duplicate_and_interleave_p checks whether we know a way of building a particular VLA SLP invariant. g:60034ecf25597bd515f skipped that test for booleans, to support MASK_LEN_GATHER_LOAD calls with a dummy all-ones mask. But there's nothing fundamentally different about VLA masks vs VLA data vectors. If we have a VLA mask that isn't all-ones, we need some way of loading it. This ultimately led to the ICE in the PR. This patch fixes it by applying can_duplicate_and_interleave_p to masks, while also adding a special path for uniform vectors (of all kinds) to support the MASK_LEN_GATHER_LOAD usage. This also fixes an XFAIL in pr36648.cc for SVE. The patch is mostly Richard's. My only changes were to skip redundant conversions and to use gimple_build_vector_from_val for all eligible vectors. 2023-11-27 Richard Biener <rguenther@suse.de> Richard Sandiford <richard.sandiford@arm.com> gcc/ PR tree-optimization/112661 * tree-vect-slp.cc (vect_get_and_check_slp_defs): Defer duplicate-and- interleave test to... (vect_build_slp_tree_2): ...here, once we have all the operands. Skip the test for uniform vectors. (vect_create_constant_vectors): Detect uniform vectors. Avoid redundant conversions in that case. Use gimple_build_vector_from_val to build the vector. gcc/testsuite/ * g++.dg/vect/pr36648.cc: Remove XFAIL for VLA load-lanes.
This commit is contained in:
parent
5b33cf3a3a
commit
061a82fa2b
@ -25,6 +25,6 @@ int main() { }
|
||||
targets, ! vect_no_align is a sufficient test. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } xfail { vect_variable_length && vect_load_lanes } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */
|
||||
|
||||
|
||||
|
@ -763,18 +763,6 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
|
||||
{
|
||||
tree type = TREE_TYPE (oprnd);
|
||||
dt = dts[i];
|
||||
if ((dt == vect_constant_def
|
||||
|| dt == vect_external_def)
|
||||
&& !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
|
||||
&& TREE_CODE (type) != BOOLEAN_TYPE
|
||||
&& !can_duplicate_and_interleave_p (vinfo, stmts.length (), type))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"Build SLP failed: invalid type of def "
|
||||
"for variable-length SLP %T\n", oprnd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* For the swapping logic below force vect_reduction_def
|
||||
for the reduction op in a SLP reduction group. */
|
||||
@ -2395,7 +2383,7 @@ out:
|
||||
/* Create SLP_TREE nodes for the definition node/s. */
|
||||
FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
|
||||
{
|
||||
slp_tree child;
|
||||
slp_tree child = nullptr;
|
||||
unsigned int j;
|
||||
|
||||
/* We're skipping certain operands from processing, for example
|
||||
@ -2443,6 +2431,29 @@ out:
|
||||
if (oprnd_info->first_dt == vect_external_def
|
||||
|| oprnd_info->first_dt == vect_constant_def)
|
||||
{
|
||||
if (!GET_MODE_SIZE (vinfo->vector_mode).is_constant ())
|
||||
{
|
||||
tree op0;
|
||||
tree uniform_val = op0 = oprnd_info->ops[0];
|
||||
for (j = 1; j < oprnd_info->ops.length (); ++j)
|
||||
if (!operand_equal_p (uniform_val, oprnd_info->ops[j]))
|
||||
{
|
||||
uniform_val = NULL_TREE;
|
||||
break;
|
||||
}
|
||||
if (!uniform_val
|
||||
&& !can_duplicate_and_interleave_p (vinfo,
|
||||
oprnd_info->ops.length (),
|
||||
TREE_TYPE (op0)))
|
||||
{
|
||||
matches[j] = false;
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"Build SLP failed: invalid type of def "
|
||||
"for variable-length SLP %T\n", op0);
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
|
||||
SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
|
||||
oprnd_info->ops = vNULL;
|
||||
@ -8157,6 +8168,7 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
|
||||
|
||||
number_of_places_left_in_vector = nunits;
|
||||
constant_p = true;
|
||||
tree uniform_elt = NULL_TREE;
|
||||
tree_vector_builder elts (vector_type, nunits, 1);
|
||||
elts.quick_grow (nunits);
|
||||
stmt_vec_info insert_after = NULL;
|
||||
@ -8166,8 +8178,14 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
|
||||
for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
|
||||
{
|
||||
/* Create 'vect_ = {op0,op1,...,opn}'. */
|
||||
number_of_places_left_in_vector--;
|
||||
tree orig_op = op;
|
||||
if (number_of_places_left_in_vector == nunits)
|
||||
uniform_elt = op;
|
||||
else if (uniform_elt && operand_equal_p (uniform_elt, op))
|
||||
op = elts[number_of_places_left_in_vector];
|
||||
else
|
||||
uniform_elt = NULL_TREE;
|
||||
number_of_places_left_in_vector--;
|
||||
if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
|
||||
{
|
||||
if (CONSTANT_CLASS_P (op))
|
||||
@ -8236,9 +8254,13 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
|
||||
|
||||
if (number_of_places_left_in_vector == 0)
|
||||
{
|
||||
if (constant_p
|
||||
? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
|
||||
: known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
|
||||
auto type_nunits = TYPE_VECTOR_SUBPARTS (vector_type);
|
||||
if (uniform_elt)
|
||||
vec_cst = gimple_build_vector_from_val (&ctor_seq, vector_type,
|
||||
elts[0]);
|
||||
else if (constant_p
|
||||
? multiple_p (type_nunits, nunits)
|
||||
: known_eq (type_nunits, nunits))
|
||||
vec_cst = gimple_build_vector (&ctor_seq, &elts);
|
||||
else
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user