[omp, simt] Fix expand_GOMP_SIMT_*

When running the test-case included in this patch using an
nvptx accelerator, it fails in execution.

The problem is that the expansion of GOMP_SIMT_XCHG_BFLY is optimized away
during pass_jump as "trivially dead insns".

This is caused by this code in expand_GOMP_SIMT_XCHG_BFLY:
...
  class expand_operand ops[3];
  create_output_operand (&ops[0], target, mode);
  ...
  expand_insn (targetm.code_for_omp_simt_xchg_bfly, 3, ops);
...
which doesn't guarantee that target is assigned to by the expanded insn.

F.i., if target is:
...
(gdb) call debug_rtx ( target )
(subreg/s/u:QI (reg:SI 40 [ _61 ]) 0)
...
then after expand_insn, we have:
...
(gdb) call debug_rtx ( ops[0].value )
(reg:QI 57)
...

See commit 3af3bec2e4 "internal-fn: Avoid dropping the lhs of some
calls [PR94941]" for a similar problem.

Fix this in the same way, by adding:
...
  if (!rtx_equal_p (target, ops[0].value))
    emit_move_insn (target, ops[0].value);
...
where applicable in the expand_GOMP_SIMT_* functions.

Tested libgomp on x86_64 with nvptx accelerator.

gcc/ChangeLog:

2021-04-28  Tom de Vries  <tdevries@suse.de>

	PR target/100232
	* internal-fn.c (expand_GOMP_SIMT_ENTER_ALLOC)
	(expand_GOMP_SIMT_LAST_LANE, expand_GOMP_SIMT_ORDERED_PRED)
	(expand_GOMP_SIMT_VOTE_ANY, expand_GOMP_SIMT_XCHG_BFLY)
	(expand_GOMP_SIMT_XCHG_IDX): Ensure target is assigned to.
This commit is contained in:
Tom de Vries 2021-04-28 16:00:01 +02:00
parent b58dc0b803
commit 4d7c874e2c
2 changed files with 36 additions and 0 deletions

View File

@ -243,6 +243,8 @@ expand_GOMP_SIMT_ENTER_ALLOC (internal_fn, gcall *stmt)
create_input_operand (&ops[2], align, Pmode);
gcc_assert (targetm.have_omp_simt_enter ());
expand_insn (targetm.code_for_omp_simt_enter, 3, ops);
if (!rtx_equal_p (target, ops[0].value))
emit_move_insn (target, ops[0].value);
}
/* Deallocate per-lane storage and leave non-uniform execution region. */
@ -300,6 +302,8 @@ expand_GOMP_SIMT_LAST_LANE (internal_fn, gcall *stmt)
create_input_operand (&ops[1], cond, mode);
gcc_assert (targetm.have_omp_simt_last_lane ());
expand_insn (targetm.code_for_omp_simt_last_lane, 2, ops);
if (!rtx_equal_p (target, ops[0].value))
emit_move_insn (target, ops[0].value);
}
/* Non-transparent predicate used in SIMT lowering of OpenMP "ordered". */
@ -319,6 +323,8 @@ expand_GOMP_SIMT_ORDERED_PRED (internal_fn, gcall *stmt)
create_input_operand (&ops[1], ctr, mode);
gcc_assert (targetm.have_omp_simt_ordered ());
expand_insn (targetm.code_for_omp_simt_ordered, 2, ops);
if (!rtx_equal_p (target, ops[0].value))
emit_move_insn (target, ops[0].value);
}
/* "Or" boolean reduction across SIMT lanes: return non-zero in all lanes if
@ -339,6 +345,8 @@ expand_GOMP_SIMT_VOTE_ANY (internal_fn, gcall *stmt)
create_input_operand (&ops[1], cond, mode);
gcc_assert (targetm.have_omp_simt_vote_any ());
expand_insn (targetm.code_for_omp_simt_vote_any, 2, ops);
if (!rtx_equal_p (target, ops[0].value))
emit_move_insn (target, ops[0].value);
}
/* Exchange between SIMT lanes with a "butterfly" pattern: source lane index
@ -361,6 +369,8 @@ expand_GOMP_SIMT_XCHG_BFLY (internal_fn, gcall *stmt)
create_input_operand (&ops[2], idx, SImode);
gcc_assert (targetm.have_omp_simt_xchg_bfly ());
expand_insn (targetm.code_for_omp_simt_xchg_bfly, 3, ops);
if (!rtx_equal_p (target, ops[0].value))
emit_move_insn (target, ops[0].value);
}
/* Exchange between SIMT lanes according to given source lane index. */
@ -382,6 +392,8 @@ expand_GOMP_SIMT_XCHG_IDX (internal_fn, gcall *stmt)
create_input_operand (&ops[2], idx, SImode);
gcc_assert (targetm.have_omp_simt_xchg_idx ());
expand_insn (targetm.code_for_omp_simt_xchg_idx, 3, ops);
if (!rtx_equal_p (target, ops[0].value))
emit_move_insn (target, ops[0].value);
}
/* This should get expanded in adjust_simduid_builtins. */

View File

@ -0,0 +1,24 @@
/* { dg-do run } */
#include <stdlib.h>
#define N 32
#define TYPE char
int
main (void)
{
TYPE result = 1;
TYPE a[N];
for (int x = 0; x < N; ++x)
a[x] = 1;
#pragma omp target map(tofrom: result) map(to:a)
#pragma omp for simd reduction(&&:result)
for (int x = 0; x < N; ++x)
result = result && a[x];
if (result != 1)
abort ();
return 0;
}