Allow mode-switching to introduce internal loops [PR113220]

In this PR, the SME mode-switching code needs to insert a stack-probe
loop for an alloca.  This patch allows the target to do that.

There are two parts to it: allowing loops for insertions in blocks,
and allowing them for insertions on edges.  The former can be handled
entirely within mode-switching itself, by recording which blocks have
had new branches inserted.  The latter requires an extension to
commit_one_edge_insertion.

I think the extension to commit_one_edge_insertion makes logical sense,
since it already explicitly allows internal loops during RTL expansion.
The single-block find_sub_basic_blocks is a relatively recent addition,
so wouldn't have been available when the code was originally written.

The patch also has a small and obvious fix to make the aarch64 emit
hook cope with labels.

I've added specific -fstack-clash-protection versions of all
aarch64-sme.exp tests that previously failed because of this bug.
I've also added -fno-stack-clash-protection to the original versions
of these tests if they contain scans that assume no protection.

gcc/
	PR target/113220
	* cfgrtl.cc (commit_one_edge_insertion): Handle sequences that
	contain jumps even if called after initial RTL expansion.
	* mode-switching.cc: Include cfgbuild.h.
	(optimize_mode_switching): Allow the sequence returned by the
	emit hook to contain internal jumps.  Record which blocks
	contain such jumps and split the blocks at the end.
	* config/aarch64/aarch64.cc (aarch64_mode_emit): Check for
	non-debug insns when scanning the sequence.

gcc/testsuite/
	PR target/113220
	* gcc.target/aarch64/sme/call_sm_switch_5.c: Add
	-fno-stack-clash-protection.
	* gcc.target/aarch64/sme/call_sm_switch_5_scp.c: New test.
	* gcc.target/aarch64/sme/sibcall_6_scp.c: New test.
	* gcc.target/aarch64/sme/za_state_4.c: Add
	-fno-stack-clash-protection.
	* gcc.target/aarch64/sme/za_state_4_scp.c: New test.
	* gcc.target/aarch64/sme/za_state_5.c: Add
	-fno-stack-clash-protection.
	* gcc.target/aarch64/sme/za_state_5_scp.c: New test.
This commit is contained in:
Richard Sandiford 2024-02-21 11:12:26 +00:00
parent 703d14132e
commit 4f7d4a2cd2
10 changed files with 53 additions and 9 deletions

View File

@ -2018,6 +2018,21 @@ commit_one_edge_insertion (edge e)
insns = e->insns.r;
e->insns.r = NULL;
/* Allow the sequence to contain internal jumps, such as a memcpy loop
or an allocation loop. If such a sequence is emitted during RTL
expansion, we'll create the appropriate basic blocks later,
at the end of the pass. But if such a sequence is emitted after
initial expansion, we'll need to find the subblocks ourselves. */
bool contains_jump = false;
if (!currently_expanding_to_rtl)
for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
if (JUMP_P (insn))
{
rebuild_jump_labels_chain (insns);
contains_jump = true;
break;
}
/* Figure out where to put these insns. If the destination has
one predecessor, insert there. Except for the exit block. */
if (single_pred_p (e->dest) && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
@ -2112,13 +2127,13 @@ commit_one_edge_insertion (edge e)
delete_insn (before);
}
else
/* Some builtin expanders, such as those for memset and memcpy,
may generate loops and conditionals, and those may get emitted
into edges. That's ok while expanding to rtl, basic block
boundaries will be identified and split afterwards. ??? Need
we check whether the destination labels of any inserted jumps
are also part of the inserted sequence? */
/* Sequences inserted after RTL expansion are expected to be SESE,
with only internal branches allowed. If the sequence jumps outside
itself then we do not know how to add the associated edges here. */
gcc_assert (!JUMP_P (last) || currently_expanding_to_rtl);
if (contains_jump)
find_sub_basic_blocks (bb);
}
/* Update the CFG for all queued instructions. */

View File

@ -29513,6 +29513,8 @@ aarch64_mode_emit (int entity, int mode, int prev_mode, HARD_REG_SET live)
HARD_REG_SET clobbers = {};
for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
{
if (!NONDEBUG_INSN_P (insn))
continue;
vec_rtx_properties properties;
properties.add_insn (insn, false);
for (rtx_obj_reference ref : properties.refs ())

View File

@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
#include "lcm.h"
#include "cfgcleanup.h"
#include "tree-pass.h"
#include "cfgbuild.h"
/* We want target macros for the mode switching code to be able to refer
to instruction attribute values. */
@ -1105,6 +1106,8 @@ optimize_mode_switching (void)
edge_list = pre_edge_lcm_avs (n_entities * max_num_modes, transp, comp, antic,
kill, avin, avout, &insert, &del);
auto_sbitmap jumping_blocks (last_basic_block_for_fn (cfun));
bitmap_clear (jumping_blocks);
for (j = n_entities - 1; j >= 0; j--)
{
int no_mode = num_modes[entity_map[j]];
@ -1215,6 +1218,13 @@ optimize_mode_switching (void)
/* Insert MODE_SET only if it is nonempty. */
if (mode_set != NULL_RTX)
{
for (auto insn = mode_set; insn; insn = NEXT_INSN (insn))
if (JUMP_P (insn))
{
rebuild_jump_labels_chain (mode_set);
bitmap_set_bit (jumping_blocks, bb->index);
break;
}
emitted = true;
if (NOTE_INSN_BASIC_BLOCK_P (ptr->insn_ptr))
/* We need to emit the insns in a FIFO-like manner,
@ -1252,6 +1262,11 @@ optimize_mode_switching (void)
sbitmap_vector_free (avin);
sbitmap_vector_free (avout);
gcc_assert (SBITMAP_SIZE ((sbitmap) jumping_blocks)
== (unsigned int) last_basic_block_for_fn (cfun));
if (!bitmap_empty_p (jumping_blocks))
find_many_sub_basic_blocks (jumping_blocks);
if (need_commit)
commit_edge_insertions ();

View File

@ -1,4 +1,4 @@
// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables -fno-stack-clash-protection" }
// { dg-final { check-function-bodies "**" "" } }
#include <arm_sve.h>

View File

@ -0,0 +1,3 @@
// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
#include "call_sm_switch_5.c"

View File

@ -0,0 +1,3 @@
/* { dg-options "-O2 -fstack-clash-protection" } */
#include "sibcall_6.c"

View File

@ -1,4 +1,4 @@
// { dg-options "-O -fno-optimize-sibling-calls" }
// { dg-options "-O -fno-optimize-sibling-calls -fno-stack-clash-protection" }
// { dg-final { check-function-bodies "**" "" } }
void private_za();

View File

@ -0,0 +1,3 @@
// { dg-options "-O -fno-optimize-sibling-calls -fstack-clash-protection" }
#include "za_state_4.c"

View File

@ -1,4 +1,4 @@
// { dg-options "-O2 -fno-optimize-sibling-calls" }
// { dg-options "-O2 -fno-optimize-sibling-calls -fno-stack-clash-protection" }
// { dg-final { check-function-bodies "**" "" } }
void private_za();

View File

@ -0,0 +1,3 @@
// { dg-options "-O -fno-optimize-sibling-calls -fstack-clash-protection" }
#include "za_state_5.c"