diff --git a/gcc/testsuite/gcc.target/i386/pr92645-6.c b/gcc/testsuite/gcc.target/i386/pr92645-6.c new file mode 100644 index 000000000000..c5c5f8f8df2f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92645-6.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -msse2" } */ + +typedef long v2di __attribute__((vector_size(16))); +typedef int v4si __attribute__((vector_size(16))); + +void foo (v4si *p, v2di *q) +{ + union { v2di a; v4si b; } u; + u.a = *q; + (*p)[0] = u.b[0]; + (*p)[1] = u.b[2]; + (*p)[2] = u.b[1]; + (*p)[3] = u.b[3]; +} + +void bar (v4si *p, __int128_t *q) +{ + union { __int128_t a; v4si b; } u; + u.a = *q; + (*p)[0] = u.b[1]; + (*p)[1] = u.b[2]; + (*p)[2] = u.b[1]; + (*p)[3] = u.b[3]; +} + +/* Both functions should end up with sth like + [v]pshufd $val, (%esi), %xmm0 + [v]movdqa %xmm0, (%edi) + ret + recognized by SLP vectorization involving an existing "vector". */ +/* { dg-final { scan-assembler-not "punpck" } } */ +/* { dg-final { scan-assembler-times "pshufd" 2 } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 65b7a27e1e88..f7f656a48102 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1105,7 +1105,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); if (!is_a (vinfo) || TREE_CODE (vec) != SSA_NAME - || !types_compatible_p (vectype, TREE_TYPE (vec))) + || !operand_equal_p (TYPE_SIZE (vectype), + TYPE_SIZE (TREE_TYPE (vec)))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -1642,7 +1643,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, lperm.safe_push (std::make_pair (0, (unsigned)lane)); } slp_tree vnode = vect_create_new_slp_node (vNULL); - SLP_TREE_VECTYPE (vnode) = TREE_TYPE (vec); + /* ??? We record vectype here but we hide eventually necessary + punning and instead rely on code generation to materialize + VIEW_CONVERT_EXPRs as necessary. We instead should make + this explicit somehow. */ + SLP_TREE_VECTYPE (vnode) = vectype; SLP_TREE_VEC_DEFS (vnode).safe_push (vec); /* We are always building a permutation node even if it is an identity permute to shield the rest of the vectorizer from the odd node @@ -5671,6 +5676,18 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, slp_tree first_node = SLP_TREE_CHILDREN (node)[first_vec.first]; tree first_def = vect_get_slp_vect_def (first_node, first_vec.second); + /* ??? We SLP match existing vector element extracts but + allow punning which we need to re-instantiate at uses + but have no good way of explicitely representing. */ + if (!types_compatible_p (TREE_TYPE (first_def), vectype)) + { + gassign *conv_stmt; + conv_stmt = gimple_build_assign (make_ssa_name (vectype), + build1 (VIEW_CONVERT_EXPR, + vectype, first_def)); + vect_finish_stmt_generation (vinfo, NULL, conv_stmt, gsi); + first_def = gimple_assign_lhs (conv_stmt); + } gassign *perm_stmt; tree perm_dest = make_ssa_name (vectype); if (!identity_p) @@ -5679,6 +5696,16 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, = SLP_TREE_CHILDREN (node)[second_vec.first]; tree second_def = vect_get_slp_vect_def (second_node, second_vec.second); + if (!types_compatible_p (TREE_TYPE (second_def), vectype)) + { + gassign *conv_stmt; + conv_stmt = gimple_build_assign (make_ssa_name (vectype), + build1 + (VIEW_CONVERT_EXPR, + vectype, second_def)); + vect_finish_stmt_generation (vinfo, NULL, conv_stmt, gsi); + second_def = gimple_assign_lhs (conv_stmt); + } tree mask_vec = vect_gen_perm_mask_checked (vectype, indices); perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR, first_def, second_def,