loop-niter: Recognize popcount idioms even with char, short and __int128 [PR95771]

As the testcase shows, we punt unnecessarily on popcount loop idioms if
the type is smaller than int or larger than long long.
Smaller type than int can be handled by zero-extending the argument to
unsigned int, and types twice as long as long long by doing
__builtin_popcountll on both halves of the __int128.

2020-01-04  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/95771
	* tree-ssa-loop-niter.c (number_of_iterations_popcount): Handle types
	with precision smaller than int's precision and types with precision
	twice as large as long long.  Formatting fixes.

	* gcc.target/i386/pr95771.c: New test.
This commit is contained in:
Jakub Jelinek 2021-01-04 14:36:06 +01:00
parent 39bd65faee
commit 24cd9afe61
2 changed files with 98 additions and 14 deletions

View File

@ -0,0 +1,67 @@
/* PR tree-optimization/95771 */
/* { dg-do compile } */
/* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 6 "optimized" { target int128 } } } */
/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 4 "optimized" { target { ! int128 } } } } */
int
foo (unsigned char x)
{
int i = 0;
while (x)
{
x &= x - 1;
++i;
}
return i;
}
int
bar (unsigned short x)
{
int i = 0;
while (x)
{
x &= x - 1;
++i;
}
return i;
}
int
baz (unsigned int x)
{
int i = 0;
while (x)
{
x &= x - 1;
++i;
}
return i;
}
int
qux (unsigned long long x)
{
int i = 0;
while (x)
{
x &= x - 1;
++i;
}
return i;
}
#ifdef __SIZEOF_INT128__
int
corge (unsigned __int128 x)
{
int i = 0;
while (x)
{
x &= x - 1;
++i;
}
return i;
}
#endif

View File

@ -2666,27 +2666,45 @@ number_of_iterations_popcount (loop_p loop, edge exit,
/* We found a match. Get the corresponding popcount builtin. */
tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION (integer_type_node))
if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node))
fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
(long_integer_type_node))
else if (TYPE_PRECISION (TREE_TYPE (src))
== TYPE_PRECISION (long_integer_type_node))
fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
(long_long_integer_type_node))
else if (TYPE_PRECISION (TREE_TYPE (src))
== TYPE_PRECISION (long_long_integer_type_node)
|| (TYPE_PRECISION (TREE_TYPE (src))
== 2 * TYPE_PRECISION (long_long_integer_type_node)))
fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
/* ??? Support promoting char/short to int. */
if (!fn)
return false;
/* Update NITER params accordingly */
tree utype = unsigned_type_for (TREE_TYPE (src));
src = fold_convert (utype, src);
tree call = fold_convert (utype, build_call_expr (fn, 1, src));
if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node))
src = fold_convert (unsigned_type_node, src);
tree call;
if (TYPE_PRECISION (TREE_TYPE (src))
== 2 * TYPE_PRECISION (long_long_integer_type_node))
{
int prec = TYPE_PRECISION (long_long_integer_type_node);
tree src1 = fold_convert (long_long_unsigned_type_node,
fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
unshare_expr (src),
build_int_cst (integer_type_node,
prec)));
tree src2 = fold_convert (long_long_unsigned_type_node, src);
call = build_call_expr (fn, 1, src1);
call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call,
build_call_expr (fn, 1, src2));
call = fold_convert (utype, call);
}
else
call = fold_convert (utype, build_call_expr (fn, 1, src));
if (adjust)
iter = fold_build2 (MINUS_EXPR, utype,
call,
build_int_cst (utype, 1));
iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1));
else
iter = call;
@ -2703,10 +2721,9 @@ number_of_iterations_popcount (loop_p loop, edge exit,
if (adjust)
{
tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
build_zero_cst
(TREE_TYPE (src)));
niter->may_be_zero =
simplify_using_initial_conditions (loop, may_be_zero);
build_zero_cst (TREE_TYPE (src)));
niter->may_be_zero
= simplify_using_initial_conditions (loop, may_be_zero);
}
else
niter->may_be_zero = boolean_false_node;