mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-23 19:03:59 +08:00
SVE intrinsics: Fold constant operands for svdiv.
This patch implements constant folding for svdiv: The new function aarch64_const_binop was created, which - in contrast to int_const_binop - does not treat operations as overflowing. This function is passed as callback to vector_const_binop from the new gimple_folder method fold_const_binary, if the predicate is ptrue or predication is _x. From svdiv_impl::fold, fold_const_binary is called with TRUNC_DIV_EXPR as tree_code. In aarch64_const_binop, a case was added for TRUNC_DIV_EXPR to return 0 for division by 0, as defined in the semantics for svdiv. Tests were added to check the produced assembly for different predicates, signed and unsigned integers, and the svdiv_n_* case. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold): Try constant folding. * config/aarch64/aarch64-sve-builtins.h: Declare gimple_folder::fold_const_binary. * config/aarch64/aarch64-sve-builtins.cc (aarch64_const_binop): New function to fold binary SVE intrinsics without overflow. (gimple_folder::fold_const_binary): New helper function for constant folding of SVE intrinsics. gcc/testsuite/ * gcc.target/aarch64/sve/const_fold_div_1.c: New test.
This commit is contained in:
parent
87217bea3a
commit
ee8b7231b0
@ -755,8 +755,13 @@ public:
|
||||
gimple *
|
||||
fold (gimple_folder &f) const override
|
||||
{
|
||||
tree divisor = gimple_call_arg (f.call, 2);
|
||||
tree divisor_cst = uniform_integer_cst_p (divisor);
|
||||
if (auto *res = f.fold_const_binary (TRUNC_DIV_EXPR))
|
||||
return res;
|
||||
|
||||
/* If the divisor is a uniform power of 2, fold to a shift
|
||||
instruction. */
|
||||
tree op2 = gimple_call_arg (f.call, 2);
|
||||
tree divisor_cst = uniform_integer_cst_p (op2);
|
||||
|
||||
if (!divisor_cst || !integer_pow2p (divisor_cst))
|
||||
return NULL;
|
||||
@ -770,7 +775,7 @@ public:
|
||||
shapes::binary_uint_opt_n, MODE_n,
|
||||
f.type_suffix_ids, GROUP_none, f.pred);
|
||||
call = f.redirect_call (instance);
|
||||
tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst;
|
||||
tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : divisor_cst;
|
||||
new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
|
||||
}
|
||||
else
|
||||
|
@ -1132,6 +1132,30 @@ report_not_enum (location_t location, tree fndecl, unsigned int argno,
|
||||
" a valid %qT value", actual, argno + 1, fndecl, enumtype);
|
||||
}
|
||||
|
||||
/* Try to fold constant arguments ARG1 and ARG2 using the given tree_code.
|
||||
Operations are not treated as overflowing. */
|
||||
static tree
|
||||
aarch64_const_binop (enum tree_code code, tree arg1, tree arg2)
|
||||
{
|
||||
if (poly_int_tree_p (arg1) && poly_int_tree_p (arg2))
|
||||
{
|
||||
poly_wide_int poly_res;
|
||||
tree type = TREE_TYPE (arg1);
|
||||
signop sign = TYPE_SIGN (type);
|
||||
wi::overflow_type overflow = wi::OVF_NONE;
|
||||
|
||||
/* Return 0 for division by 0, like SDIV and UDIV do. */
|
||||
if (code == TRUNC_DIV_EXPR && integer_zerop (arg2))
|
||||
return arg2;
|
||||
|
||||
if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow))
|
||||
return NULL_TREE;
|
||||
return force_fit_type (type, poly_res, false,
|
||||
TREE_OVERFLOW (arg1) | TREE_OVERFLOW (arg2));
|
||||
}
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Return a hash code for a function_instance. */
|
||||
hashval_t
|
||||
function_instance::hash () const
|
||||
@ -3593,6 +3617,25 @@ gimple_folder::fold_to_vl_pred (unsigned int vl)
|
||||
return gimple_build_assign (lhs, builder.build ());
|
||||
}
|
||||
|
||||
/* Try to fold the call to a constant, given that, for integers, the call
|
||||
is roughly equivalent to binary operation CODE. aarch64_const_binop
|
||||
handles any differences between CODE and the intrinsic. */
|
||||
gimple *
|
||||
gimple_folder::fold_const_binary (enum tree_code code)
|
||||
{
|
||||
gcc_assert (gimple_call_num_args (call) == 3);
|
||||
tree pg = gimple_call_arg (call, 0);
|
||||
tree op1 = gimple_call_arg (call, 1);
|
||||
tree op2 = gimple_call_arg (call, 2);
|
||||
|
||||
if (type_suffix (0).integer_p
|
||||
&& (pred == PRED_x || is_ptrue (pg, type_suffix (0).element_bytes)))
|
||||
if (tree res = vector_const_binop (code, op1, op2, aarch64_const_binop))
|
||||
return gimple_build_assign (lhs, res);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Try to fold the call. Return the new statement on success and null
|
||||
on failure. */
|
||||
gimple *
|
||||
|
@ -636,6 +636,7 @@ public:
|
||||
gimple *fold_to_pfalse ();
|
||||
gimple *fold_to_ptrue ();
|
||||
gimple *fold_to_vl_pred (unsigned int);
|
||||
gimple *fold_const_binary (enum tree_code);
|
||||
|
||||
gimple *fold ();
|
||||
|
||||
|
358
gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c
Normal file
358
gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c
Normal file
@ -0,0 +1,358 @@
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
#include "arm_sve.h"
|
||||
|
||||
/*
|
||||
** s64_x_pg:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_pg (svbool_t pg)
|
||||
{
|
||||
return svdiv_x (pg, svdup_s64 (5), svdup_s64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_pg_0:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_pg_0 (svbool_t pg)
|
||||
{
|
||||
return svdiv_x (pg, svdup_s64 (0), svdup_s64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_pg_by0:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_pg_by0 (svbool_t pg)
|
||||
{
|
||||
return svdiv_x (pg, svdup_s64 (5), svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_pg:
|
||||
** mov z[0-9]+\.d, p[0-7]/z, #1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg (svbool_t pg)
|
||||
{
|
||||
return svdiv_z (pg, svdup_s64 (5), svdup_s64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_pg_0:
|
||||
** mov z[0-9]+\.d, p[0-7]/z, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_0 (svbool_t pg)
|
||||
{
|
||||
return svdiv_z (pg, svdup_s64 (0), svdup_s64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_pg_by0:
|
||||
** mov (z[0-9]+\.d), #5
|
||||
** mov (z[0-9]+)\.b, #0
|
||||
** sdivr \2\.d, p[0-7]/m, \2\.d, \1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_by0 (svbool_t pg)
|
||||
{
|
||||
return svdiv_z (pg, svdup_s64 (5), svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_pg:
|
||||
** mov (z[0-9]+\.d), #3
|
||||
** mov (z[0-9]+\.d), #5
|
||||
** sdiv \2, p[0-7]/m, \2, \1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_pg (svbool_t pg)
|
||||
{
|
||||
return svdiv_m (pg, svdup_s64 (5), svdup_s64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_ptrue:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_ptrue ()
|
||||
{
|
||||
return svdiv_x (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_ptrue:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_ptrue ()
|
||||
{
|
||||
return svdiv_z (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_ptrue:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_ptrue ()
|
||||
{
|
||||
return svdiv_m (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_pg_n:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_pg_n (svbool_t pg)
|
||||
{
|
||||
return svdiv_n_s64_x (pg, svdup_s64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_pg_n_s64_0:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_pg_n_s64_0 (svbool_t pg)
|
||||
{
|
||||
return svdiv_n_s64_x (pg, svdup_s64 (0), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_pg_n_s64_by0:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_pg_n_s64_by0 (svbool_t pg)
|
||||
{
|
||||
return svdiv_n_s64_x (pg, svdup_s64 (5), 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_pg_n:
|
||||
** mov z[0-9]+\.d, p[0-7]/z, #1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_n (svbool_t pg)
|
||||
{
|
||||
return svdiv_n_s64_z (pg, svdup_s64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_pg_n_s64_0:
|
||||
** mov z[0-9]+\.d, p[0-7]/z, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_n_s64_0 (svbool_t pg)
|
||||
{
|
||||
return svdiv_n_s64_z (pg, svdup_s64 (0), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_pg_n_s64_by0:
|
||||
** mov (z[0-9]+\.d), #5
|
||||
** mov (z[0-9]+)\.b, #0
|
||||
** sdivr \2\.d, p[0-7]/m, \2\.d, \1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_n_s64_by0 (svbool_t pg)
|
||||
{
|
||||
return svdiv_n_s64_z (pg, svdup_s64 (5), 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_pg_n:
|
||||
** mov (z[0-9]+\.d), #3
|
||||
** mov (z[0-9]+\.d), #5
|
||||
** sdiv \2, p[0-7]/m, \2, \1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_pg_n (svbool_t pg)
|
||||
{
|
||||
return svdiv_n_s64_m (pg, svdup_s64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_ptrue_n:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_ptrue_n ()
|
||||
{
|
||||
return svdiv_n_s64_x (svptrue_b64 (), svdup_s64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_ptrue_n:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_ptrue_n ()
|
||||
{
|
||||
return svdiv_n_s64_z (svptrue_b64 (), svdup_s64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_ptrue_n:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_ptrue_n ()
|
||||
{
|
||||
return svdiv_n_s64_m (svptrue_b64 (), svdup_s64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** s32_m_ptrue_dupq:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint32_t s32_m_ptrue_dupq ()
|
||||
{
|
||||
return svdiv_s32_m (svptrue_b32 (), svdupq_s32 (3, 0, -5, 11),
|
||||
svdupq_s32 (4, 1, -6, 0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s32_z_ptrue_dupq:
|
||||
** mov z[0-9]+\.s, #-2
|
||||
** ret
|
||||
*/
|
||||
svint32_t s32_z_ptrue_dupq ()
|
||||
{
|
||||
return svdiv_s32_z (svptrue_b32 (), svdupq_s32 (6, -30, 100, -4),
|
||||
svdupq_s32 (-3, 15, -50, 2));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_pg:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_pg (svbool_t pg)
|
||||
{
|
||||
return svdiv_x (pg, svdup_u64 (5), svdup_u64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_pg:
|
||||
** mov z[0-9]+\.d, p[0-7]/z, #1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_pg (svbool_t pg)
|
||||
{
|
||||
return svdiv_z (pg, svdup_u64 (5), svdup_u64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_pg:
|
||||
** mov (z[0-9]+\.d), #3
|
||||
** mov (z[0-9]+\.d), #5
|
||||
** udiv \2, p[0-7]/m, \2, \1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_pg (svbool_t pg)
|
||||
{
|
||||
return svdiv_m (pg, svdup_u64 (5), svdup_u64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_ptrue:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_ptrue ()
|
||||
{
|
||||
return svdiv_x (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_ptrue:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_ptrue ()
|
||||
{
|
||||
return svdiv_z (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_ptrue:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_ptrue ()
|
||||
{
|
||||
return svdiv_m (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_pg_n:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_pg_n (svbool_t pg)
|
||||
{
|
||||
return svdiv_n_u64_x (pg, svdup_u64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_pg_n:
|
||||
** mov z[0-9]+\.d, p[0-7]/z, #1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_pg_n (svbool_t pg)
|
||||
{
|
||||
return svdiv_n_u64_z (pg, svdup_u64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_pg_n:
|
||||
** mov (z[0-9]+\.d), #3
|
||||
** mov (z[0-9]+\.d), #5
|
||||
** udiv \2, p[0-7]/m, \2, \1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_pg_n (svbool_t pg)
|
||||
{
|
||||
return svdiv_n_u64_m (pg, svdup_u64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_ptrue_n:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_ptrue_n ()
|
||||
{
|
||||
return svdiv_n_u64_x (svptrue_b64 (), svdup_u64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_ptrue_n:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_ptrue_n ()
|
||||
{
|
||||
return svdiv_n_u64_z (svptrue_b64 (), svdup_u64 (5), 3);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_ptrue_n:
|
||||
** mov z[0-9]+\.d, #1
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_ptrue_n ()
|
||||
{
|
||||
return svdiv_n_u64_m (svptrue_b64 (), svdup_u64 (5), 3);
|
||||
}
|
Loading…
Reference in New Issue
Block a user