tree-optimization/97832 - handle associatable chains in SLP discovery

This makes SLP discovery handle associatable (including mixed
plus/minus) chains better by swapping operands across the whole
chain.  To work this adds caching of the 'matches' lanes for
failed SLP discovery attempts, thereby fixing a failed SLP
discovery for the slp-pr98855.cc testcase which results in
building an operand from scalars as expected.  Unfortunately
this makes us trip over the cost threshold so I'm XFAILing the
testcase for now.

For BB vectorization all this doesn't work because we have no way
to distinguish good from bad associations as we eventually build
operands from scalars and thus not fail in the classical sense.

2021-05-31  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/97832
	* tree-vectorizer.h (_slp_tree::failed): New.
	* tree-vect-slp.c (_slp_tree::_slp_tree): Initialize
	failed member.
	(_slp_tree::~_slp_tree): Free failed.
	(vect_build_slp_tree): Retain failed nodes and record
	matches in them, copying that back out when running
	into a cached fail.  Dump start and end of discovery.
	(dt_sort_cmp): New.
	(vect_build_slp_tree_2): Handle associatable chains
	together doing more aggressive operand swapping.

	* gcc.dg/vect/pr97832-1.c: New testcase.
	* gcc.dg/vect/pr97832-2.c: Likewise.
	* gcc.dg/vect/pr97832-3.c: Likewise.
	* g++.dg/vect/slp-pr98855.cc: XFAIL.
This commit is contained in:
Richard Biener 2020-11-18 14:17:34 +01:00
parent 13a3988694
commit ce670e4faa
7 changed files with 560 additions and 10 deletions

View File

@ -81,4 +81,6 @@ void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, uint32_t *EK)
}
}
// { dg-final { scan-tree-dump-times "not vectorized: vectorization is not profitable" 2 "slp1" { target x86_64-*-* i?86-*-* } } }
// This used to work on { target x86_64-*-* i?86-*-* } but a fix in SLP
// discovery makes us trip over the threshold again.
// { dg-final { scan-tree-dump-times "not vectorized: vectorization is not profitable" 2 "slp1" { xfail *-*-* } } }

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-additional-options "-Ofast" } */
/* { dg-require-effective-target vect_double } */
double a[1024], b[1024], c[1024];
void foo()
{
for (int i = 0; i < 256; ++i)
{
a[2*i] = a[2*i] + b[2*i] - c[2*i];
a[2*i+1] = a[2*i+1] - b[2*i+1] - c[2*i+1];
}
}
/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */

View File

@ -0,0 +1,29 @@
/* { dg-do compile } */
/* { dg-additional-options "-Ofast" } */
/* { dg-require-effective-target vect_double } */
void foo1x1(double* restrict y, const double* restrict x, int clen)
{
int xi = clen & 2;
double f_re = x[0+xi+0];
double f_im = x[4+xi+0];
int clen2 = (clen+xi) * 2;
#pragma GCC unroll 0
for (int c = 0; c < clen2; c += 8) {
// y[c] = y[c] - x[c]*conj(f);
#pragma GCC unroll 4
for (int k = 0; k < 4; ++k) {
double x_re = x[c+0+k];
double x_im = x[c+4+k];
double y_re = y[c+0+k];
double y_im = y[c+4+k];
y_re = y_re - x_re * f_re - x_im * f_im;;
y_im = y_im + x_re * f_im - x_im * f_re;
y[c+0+k] = y_re;
y[c+4+k] = y_im;
}
}
}
/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */

View File

@ -0,0 +1,50 @@
/* { dg-do compile } */
/* { dg-additional-options "-Ofast" } */
/* { dg-require-effective-target vect_double } */
void foo(double* restrict y, const double* restrict x0, const double* restrict x1, int clen)
{
int xi = clen & 2;
double f00_re = x0[0+xi+0];
double f10_re = x1[0+xi+0];
double f01_re = x0[0+xi+1];
double f11_re = x1[0+xi+1];
double f00_im = x0[4+xi+0];
double f10_im = x1[4+xi+0];
double f01_im = x0[4+xi+1];
double f11_im = x1[4+xi+1];
int clen2 = (clen+xi) * 2;
double* y0 = &y[0];
double* y1 = &y[clen2];
#pragma GCC unroll 0
for (int c = 0; c < clen2; c += 8) {
// y0[c] = y0[c] - x0[c]*conj(f00) - x1[c]*conj(f10);
// y1[c] = y1[c] - x0[c]*conj(f01) - x1[c]*conj(f11);
#pragma GCC unroll 4
for (int k = 0; k < 4; ++k) {
double x0_re = x0[c+0+k];
double x0_im = x0[c+4+k];
double y0_re = y0[c+0+k];
double y0_im = y0[c+4+k];
double y1_re = y1[c+0+k];
double y1_im = y1[c+4+k];
y0_re = y0_re - x0_re * f00_re - x0_im * f00_im;
y0_im = y0_im + x0_re * f00_im - x0_im * f00_re;
y1_re = y1_re - x0_re * f01_re - x0_im * f01_im;
y1_im = y1_im + x0_re * f01_im - x0_im * f01_re;
double x1_re = x1[c+0+k];
double x1_im = x1[c+4+k];
y0_re = y0_re - x1_re * f10_re - x1_im * f10_im;
y0_im = y0_im + x1_re * f10_im - x1_im * f10_re;
y1_re = y1_re - x1_re * f11_re - x1_im * f11_im;
y1_im = y1_im + x1_re * f11_im - x1_im * f11_re;
y0[c+0+k] = y0_re;
y0[c+4+k] = y0_im;
y1[c+0+k] = y1_re;
y1[c+4+k] = y1_im;
}
}
}
/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */

View File

@ -0,0 +1,20 @@
/* { dg-do compile } */
/* { dg-additional-options "-ffast-math" } */
typedef int Quantum;
typedef struct {
Quantum blue, green;
} PixelPacket;
PixelPacket *EnhanceImage_image_q;
int EnhanceImage_image_x;
float EnhanceImage_image_distance_squared_total_weight;
void EnhanceImage_image_distance_squared()
{
float zero_1;
for (; EnhanceImage_image_x; EnhanceImage_image_x++) {
EnhanceImage_image_distance_squared_total_weight += 5.0;
EnhanceImage_image_q->green = EnhanceImage_image_q->blue =
zero_1 + EnhanceImage_image_distance_squared_total_weight / 2 - 1;
EnhanceImage_image_q++;
}
}

View File

@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see
static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
slp_tree, stmt_vector_for_cost *);
static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree);
static object_allocator<_slp_tree> *slp_tree_pool;
static slp_tree slp_first_node;
@ -108,6 +109,7 @@ _slp_tree::_slp_tree ()
SLP_TREE_VECTYPE (this) = NULL_TREE;
SLP_TREE_REPRESENTATIVE (this) = NULL;
SLP_TREE_REF_COUNT (this) = 1;
this->failed = NULL;
this->max_nunits = 1;
this->lanes = 0;
}
@ -129,6 +131,8 @@ _slp_tree::~_slp_tree ()
SLP_TREE_VEC_DEFS (this).release ();
SLP_TREE_LOAD_PERMUTATION (this).release ();
SLP_TREE_LANE_PERMUTATION (this).release ();
if (this->failed)
free (failed);
}
/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
@ -1414,6 +1418,30 @@ bst_traits::equal (value_type existing, value_type candidate)
return true;
}
/* ??? This was std::pair<std::pair<tree_code, vect_def_type>, tree>
but then vec::insert does memmove and that's not compatible with
std::pair. */
struct chain_op_t
{
chain_op_t (tree_code code_, vect_def_type dt_, tree op_)
: code (code_), dt (dt_), op (op_) {}
tree_code code;
vect_def_type dt;
tree op;
};
/* Comparator for sorting associatable chains. */
static int
dt_sort_cmp (const void *op1_, const void *op2_, void *)
{
auto *op1 = (const chain_op_t *) op1_;
auto *op2 = (const chain_op_t *) op2_;
if (op1->dt != op2->dt)
return (int)op1->dt - (int)op2->dt;
return (int)op1->code - (int)op2->code;
}
typedef hash_map <vec <stmt_vec_info>, slp_tree,
simple_hashmap_traits <bst_traits, slp_tree> >
scalar_stmts_to_slp_tree_map_t;
@ -1436,14 +1464,16 @@ vect_build_slp_tree (vec_info *vinfo,
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
*leader ? "" : "failed ", *leader);
if (*leader)
!(*leader)->failed ? "" : "failed ", *leader);
if (!(*leader)->failed)
{
SLP_TREE_REF_COUNT (*leader)++;
vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
stmts.release ();
return *leader;
}
return *leader;
memcpy (matches, (*leader)->failed, sizeof (bool) * group_size);
return NULL;
}
/* Seed the bst_map with a stub node to be filled by vect_build_slp_tree_2
@ -1458,34 +1488,42 @@ vect_build_slp_tree (vec_info *vinfo,
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"SLP discovery limit exceeded\n");
bool existed_p = bst_map->put (stmts, NULL);
gcc_assert (existed_p);
/* Mark the node invalid so we can detect those when still in use
as backedge destinations. */
SLP_TREE_SCALAR_STMTS (res) = vNULL;
SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def;
vect_free_slp_tree (res);
res->failed = XNEWVEC (bool, group_size);
memset (res->failed, 0, sizeof (bool) * group_size);
memset (matches, 0, sizeof (bool) * group_size);
return NULL;
}
--*limit;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"starting SLP discovery for node %p\n", res);
poly_uint64 this_max_nunits = 1;
slp_tree res_ = vect_build_slp_tree_2 (vinfo, res, stmts, group_size,
&this_max_nunits,
matches, limit, tree_size, bst_map);
if (!res_)
{
bool existed_p = bst_map->put (stmts, NULL);
gcc_assert (existed_p);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"SLP discovery for node %p failed\n", res);
/* Mark the node invalid so we can detect those when still in use
as backedge destinations. */
SLP_TREE_SCALAR_STMTS (res) = vNULL;
SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def;
vect_free_slp_tree (res);
res->failed = XNEWVEC (bool, group_size);
memcpy (res->failed, matches, sizeof (bool) * group_size);
}
else
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"SLP discovery for node %p succeeded\n", res);
gcc_assert (res_ == res);
res->max_nunits = this_max_nunits;
vect_update_max_nunits (max_nunits, this_max_nunits);
@ -1495,6 +1533,48 @@ vect_build_slp_tree (vec_info *vinfo,
return res_;
}
/* Helper for building an associated SLP node chain. */
static void
vect_slp_build_two_operator_nodes (slp_tree perm,
slp_tree op0, slp_tree op1,
stmt_vec_info oper1, stmt_vec_info oper2,
vec<std::pair<unsigned, unsigned> > lperm)
{
unsigned group_size = SLP_TREE_LANES (op1);
tree vectype = SLP_TREE_VECTYPE (op1);
slp_tree child1 = new _slp_tree;
SLP_TREE_DEF_TYPE (child1) = vect_internal_def;
SLP_TREE_VECTYPE (child1) = vectype;
SLP_TREE_LANES (child1) = group_size;
SLP_TREE_CHILDREN (child1).create (2);
SLP_TREE_CHILDREN (child1).quick_push (op0);
SLP_TREE_CHILDREN (child1).quick_push (op1);
SLP_TREE_REPRESENTATIVE (child1) = oper1;
slp_tree child2 = new _slp_tree;
SLP_TREE_DEF_TYPE (child2) = vect_internal_def;
SLP_TREE_VECTYPE (child2) = vectype;
SLP_TREE_LANES (child2) = group_size;
SLP_TREE_CHILDREN (child2).create (2);
SLP_TREE_CHILDREN (child2).quick_push (op0);
SLP_TREE_REF_COUNT (op0)++;
SLP_TREE_CHILDREN (child2).quick_push (op1);
SLP_TREE_REF_COUNT (op1)++;
SLP_TREE_REPRESENTATIVE (child2) = oper2;
SLP_TREE_DEF_TYPE (perm) = vect_internal_def;
SLP_TREE_CODE (perm) = VEC_PERM_EXPR;
SLP_TREE_VECTYPE (perm) = vectype;
SLP_TREE_LANES (perm) = group_size;
/* ??? We should set this NULL but that's not expected. */
SLP_TREE_REPRESENTATIVE (perm) = oper1;
SLP_TREE_LANE_PERMUTATION (perm) = lperm;
SLP_TREE_CHILDREN (perm).quick_push (child1);
SLP_TREE_CHILDREN (perm).quick_push (child2);
}
/* Recursively build an SLP tree starting from NODE.
Fail (and return a value not equal to zero) if def-stmts are not
isomorphic, require data permutation or are of unsupported types of
@ -1672,6 +1752,353 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
SLP_TREE_CHILDREN (node).quick_push (vnode);
return node;
}
/* When discovery reaches an associatable operation see whether we can
improve that to match up lanes in a way superior to the operand
swapping code which at most looks at two defs.
??? For BB vectorization we cannot do the brute-force search
for matching as we can succeed by means of builds from scalars
and have no good way to "cost" one build against another. */
else if (is_a <loop_vec_info> (vinfo)
/* ??? We don't handle !vect_internal_def defs below. */
&& STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
&& is_gimple_assign (stmt_info->stmt)
&& (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt))
|| gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR)
&& ((FLOAT_TYPE_P (vectype) && flag_associative_math)
|| (INTEGRAL_TYPE_P (TREE_TYPE (vectype))
&& TYPE_OVERFLOW_WRAPS (TREE_TYPE (vectype)))))
{
/* See if we have a chain of (mixed) adds or subtracts or other
associatable ops. */
enum tree_code code = gimple_assign_rhs_code (stmt_info->stmt);
if (code == MINUS_EXPR)
code = PLUS_EXPR;
stmt_vec_info other_op_stmt_info = NULL;
stmt_vec_info op_stmt_info = NULL;
unsigned chain_len = 0;
auto_vec<chain_op_t> chain;
auto_vec<std::pair<tree_code, gimple *> > worklist;
auto_vec<vec<chain_op_t> > chains (group_size);
auto_vec<slp_tree, 4> children;
bool hard_fail = true;
for (unsigned lane = 0; lane < group_size; ++lane)
{
/* For each lane linearize the addition/subtraction (or other
uniform associatable operation) expression tree. */
worklist.safe_push (std::make_pair (code, stmts[lane]->stmt));
while (!worklist.is_empty ())
{
auto entry = worklist.pop ();
gassign *stmt = as_a <gassign *> (entry.second);
enum tree_code in_code = entry.first;
enum tree_code this_code = gimple_assign_rhs_code (stmt);
/* Pick some stmts suitable for SLP_TREE_REPRESENTATIVE. */
if (!op_stmt_info
&& gimple_assign_rhs_code (stmt) == code)
op_stmt_info = vinfo->lookup_stmt (stmt);
else if (!other_op_stmt_info
&& gimple_assign_rhs_code (stmt) == MINUS_EXPR)
other_op_stmt_info = vinfo->lookup_stmt (stmt);
for (unsigned opnum = 1; opnum <= 2; ++opnum)
{
tree op = gimple_op (stmt, opnum);
vect_def_type dt;
stmt_vec_info def_stmt_info;
bool res = vect_is_simple_use (op, vinfo, &dt, &def_stmt_info);
gcc_assert (res);
gimple *use_stmt;
use_operand_p use_p;
if (dt == vect_internal_def
&& single_imm_use (op, &use_p, &use_stmt)
&& is_gimple_assign (def_stmt_info->stmt)
&& (gimple_assign_rhs_code (def_stmt_info->stmt) == code
|| (code == PLUS_EXPR
&& (gimple_assign_rhs_code (def_stmt_info->stmt)
== MINUS_EXPR))))
{
tree_code op_def_code = this_code;
if (op_def_code == MINUS_EXPR && opnum == 1)
op_def_code = PLUS_EXPR;
if (in_code == MINUS_EXPR)
op_def_code
= op_def_code == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR;
worklist.safe_push (std::make_pair (op_def_code,
def_stmt_info->stmt));
}
else
{
tree_code op_def_code = this_code;
if (op_def_code == MINUS_EXPR && opnum == 1)
op_def_code = PLUS_EXPR;
if (in_code == MINUS_EXPR)
op_def_code
= op_def_code == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR;
chain.safe_push (chain_op_t (op_def_code, dt, op));
}
}
}
if (chain.length () == 2)
{
/* In a chain of just two elements resort to the regular
operand swapping scheme. If we run into a length
mismatch still hard-FAIL. */
if (chain_len == 0)
hard_fail = false;
break;
}
else if (chain_len == 0)
chain_len = chain.length ();
else if (chain.length () != chain_len)
/* ??? Here we could slip in magic to compensate with
neutral operands. */
break;
chains.quick_push (chain.copy ());
chain.truncate (0);
}
if (chains.length () == group_size)
{
/* Now we have a set of chains with the same length. */
/* 1. pre-sort according to def_type and operation. */
for (unsigned lane = 0; lane < group_size; ++lane)
chains[lane].sort (dt_sort_cmp, vinfo);
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"pre-sorted chains of %s\n",
get_tree_code_name (code));
for (unsigned lane = 0; lane < group_size; ++lane)
{
for (unsigned opnum = 0; opnum < chain_len; ++opnum)
dump_printf (MSG_NOTE, "%s %T ",
get_tree_code_name (chains[lane][opnum].code),
chains[lane][opnum].op);
dump_printf (MSG_NOTE, "\n");
}
}
/* 2. try to build children nodes, associating as necessary. */
for (unsigned n = 0; n < chain_len; ++n)
{
vect_def_type dt = chains[0][n].dt;
unsigned lane;
for (lane = 0; lane < group_size; ++lane)
if (chains[lane][n].dt != dt)
{
if (dt == vect_constant_def
&& chains[lane][n].dt == vect_external_def)
dt = vect_external_def;
else if (dt == vect_external_def
&& chains[lane][n].dt == vect_constant_def)
;
else
break;
}
if (lane != group_size)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"giving up on chain due to mismatched "
"def types\n");
goto out;
}
if (dt == vect_constant_def
|| dt == vect_external_def)
{
/* We can always build those. Might want to sort last
or defer building. */
vec<tree> ops;
ops.create (group_size);
for (lane = 0; lane < group_size; ++lane)
ops.quick_push (chains[lane][n].op);
slp_tree child = vect_create_new_slp_node (ops);
SLP_TREE_DEF_TYPE (child) = dt;
children.safe_push (child);
}
else if (dt != vect_internal_def)
{
/* Not sure, we might need sth special.
gcc.dg/vect/pr96854.c,
gfortran.dg/vect/fast-math-pr37021.f90
and gfortran.dg/vect/pr61171.f trigger. */
/* Soft-fail for now. */
hard_fail = false;
goto out;
}
else
{
vec<stmt_vec_info> op_stmts;
op_stmts.create (group_size);
slp_tree child = NULL;
/* Brute-force our way. We have to consider a lane
failing after fixing an earlier fail up in the
SLP discovery recursion. So track the current
permute per lane. */
unsigned *perms = XALLOCAVEC (unsigned, group_size);
memset (perms, 0, sizeof (unsigned) * group_size);
do
{
op_stmts.truncate (0);
for (lane = 0; lane < group_size; ++lane)
op_stmts.quick_push
(vinfo->lookup_def (chains[lane][n].op));
child = vect_build_slp_tree (vinfo, op_stmts,
group_size, &this_max_nunits,
matches, limit,
&this_tree_size, bst_map);
/* ??? We're likely getting too many fatal mismatches
here so maybe we want to ignore them (but then we
have no idea which lanes fatally mismatched). */
if (child || !matches[0])
break;
/* Swap another lane we have not yet matched up into
lanes that did not match. If we run out of
permute possibilities for a lane terminate the
search. */
bool term = false;
for (lane = 1; lane < group_size; ++lane)
if (!matches[lane])
{
if (n + perms[lane] + 1 == chain_len)
{
term = true;
break;
}
std::swap (chains[lane][n],
chains[lane][n + perms[lane] + 1]);
perms[lane]++;
}
if (term)
break;
}
while (1);
if (!child)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"failed to match up op %d\n", n);
op_stmts.release ();
goto out;
}
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"matched up op %d to\n", n);
vect_print_slp_tree (MSG_NOTE, vect_location, child);
}
children.safe_push (child);
}
}
/* 3. build SLP nodes to combine the chain. */
for (unsigned lane = 0; lane < group_size; ++lane)
if (chains[lane][0].code != code)
{
/* See if there's any alternate all-PLUS entry. */
unsigned n;
for (n = 1; n < chain_len; ++n)
{
for (lane = 0; lane < group_size; ++lane)
if (chains[lane][n].code != code)
break;
if (lane == group_size)
break;
}
if (n != chain_len)
{
/* Swap that in at first position. */
std::swap (children[0], children[n]);
for (lane = 0; lane < group_size; ++lane)
std::swap (chains[lane][0], chains[lane][n]);
}
else
{
/* ??? When this triggers and we end up with two
vect_constant/external_def up-front things break (ICE)
spectacularly finding an insertion place for the
all-constant op. We should have a fully
vect_internal_def operand though(?) so we can swap
that into first place and then prepend the all-zero
constant. */
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"inserting constant zero to compensate "
"for (partially) negated first "
"operand\n");
chain_len++;
for (lane = 0; lane < group_size; ++lane)
chains[lane].safe_insert
(0, chain_op_t (code, vect_constant_def, NULL_TREE));
vec<tree> zero_ops;
zero_ops.create (group_size);
zero_ops.quick_push (build_zero_cst (TREE_TYPE (vectype)));
for (lane = 1; lane < group_size; ++lane)
zero_ops.quick_push (zero_ops[0]);
slp_tree zero = vect_create_new_slp_node (zero_ops);
SLP_TREE_DEF_TYPE (zero) = vect_constant_def;
children.safe_insert (0, zero);
}
break;
}
for (unsigned i = 1; i < children.length (); ++i)
{
slp_tree op0 = children[i - 1];
slp_tree op1 = children[i];
bool this_two_op = false;
for (unsigned lane = 0; lane < group_size; ++lane)
if (chains[lane][i].code != chains[0][i].code)
{
this_two_op = true;
break;
}
slp_tree child;
if (i == children.length () - 1)
child = vect_create_new_slp_node (node, stmts, 2);
else
child = vect_create_new_slp_node (2, ERROR_MARK);
if (this_two_op)
{
vec<std::pair<unsigned, unsigned> > lperm;
lperm.create (group_size);
for (unsigned lane = 0; lane < group_size; ++lane)
lperm.quick_push (std::make_pair
(chains[lane][i].code != chains[0][i].code, lane));
vect_slp_build_two_operator_nodes (child, op0, op1,
(chains[0][i].code == code
? op_stmt_info
: other_op_stmt_info),
(chains[0][i].code == code
? other_op_stmt_info
: op_stmt_info),
lperm);
}
else
{
SLP_TREE_DEF_TYPE (child) = vect_internal_def;
SLP_TREE_VECTYPE (child) = vectype;
SLP_TREE_LANES (child) = group_size;
SLP_TREE_CHILDREN (child).quick_push (op0);
SLP_TREE_CHILDREN (child).quick_push (op1);
SLP_TREE_REPRESENTATIVE (child)
= (chains[0][i].code == code
? op_stmt_info : other_op_stmt_info);
}
children[i] = child;
}
*tree_size += this_tree_size + 1;
*max_nunits = this_max_nunits;
while (!chains.is_empty ())
chains.pop ().release ();
return node;
}
out:
while (!children.is_empty ())
vect_free_slp_tree (children.pop ());
while (!chains.is_empty ())
chains.pop ().release ();
/* Hard-fail, otherwise we might run into quadratic processing of the
chains starting one stmt into the chain again. */
if (hard_fail)
return NULL;
/* Fall thru to normal processing. */
}
/* Get at the operands, verifying they are compatible. */
vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);

View File

@ -167,6 +167,11 @@ struct _slp_tree {
int vertex;
/* If not NULL this is a cached failed SLP discovery attempt with
the lanes that failed during SLP discovery as 'false'. This is
a copy of the matches array. */
bool *failed;
/* Allocate from slp_tree_pool. */
static void *operator new (size_t);