mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-23 19:03:59 +08:00
Allow mode-switching to introduce internal loops [PR113220]
In this PR, the SME mode-switching code needs to insert a stack-probe loop for an alloca. This patch allows the target to do that. There are two parts to it: allowing loops for insertions in blocks, and allowing them for insertions on edges. The former can be handled entirely within mode-switching itself, by recording which blocks have had new branches inserted. The latter requires an extension to commit_one_edge_insertion. I think the extension to commit_one_edge_insertion makes logical sense, since it already explicitly allows internal loops during RTL expansion. The single-block find_sub_basic_blocks is a relatively recent addition, so wouldn't have been available when the code was originally written. The patch also has a small and obvious fix to make the aarch64 emit hook cope with labels. I've added specific -fstack-clash-protection versions of all aarch64-sme.exp tests that previously failed because of this bug. I've also added -fno-stack-clash-protection to the original versions of these tests if they contain scans that assume no protection. gcc/ PR target/113220 * cfgrtl.cc (commit_one_edge_insertion): Handle sequences that contain jumps even if called after initial RTL expansion. * mode-switching.cc: Include cfgbuild.h. (optimize_mode_switching): Allow the sequence returned by the emit hook to contain internal jumps. Record which blocks contain such jumps and split the blocks at the end. * config/aarch64/aarch64.cc (aarch64_mode_emit): Check for non-debug insns when scanning the sequence. gcc/testsuite/ PR target/113220 * gcc.target/aarch64/sme/call_sm_switch_5.c: Add -fno-stack-clash-protection. * gcc.target/aarch64/sme/call_sm_switch_5_scp.c: New test. * gcc.target/aarch64/sme/sibcall_6_scp.c: New test. * gcc.target/aarch64/sme/za_state_4.c: Add -fno-stack-clash-protection. * gcc.target/aarch64/sme/za_state_4_scp.c: New test. * gcc.target/aarch64/sme/za_state_5.c: Add -fno-stack-clash-protection. * gcc.target/aarch64/sme/za_state_5_scp.c: New test.
This commit is contained in:
parent
703d14132e
commit
4f7d4a2cd2
@ -2018,6 +2018,21 @@ commit_one_edge_insertion (edge e)
|
||||
insns = e->insns.r;
|
||||
e->insns.r = NULL;
|
||||
|
||||
/* Allow the sequence to contain internal jumps, such as a memcpy loop
|
||||
or an allocation loop. If such a sequence is emitted during RTL
|
||||
expansion, we'll create the appropriate basic blocks later,
|
||||
at the end of the pass. But if such a sequence is emitted after
|
||||
initial expansion, we'll need to find the subblocks ourselves. */
|
||||
bool contains_jump = false;
|
||||
if (!currently_expanding_to_rtl)
|
||||
for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
|
||||
if (JUMP_P (insn))
|
||||
{
|
||||
rebuild_jump_labels_chain (insns);
|
||||
contains_jump = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Figure out where to put these insns. If the destination has
|
||||
one predecessor, insert there. Except for the exit block. */
|
||||
if (single_pred_p (e->dest) && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
|
||||
@ -2112,13 +2127,13 @@ commit_one_edge_insertion (edge e)
|
||||
delete_insn (before);
|
||||
}
|
||||
else
|
||||
/* Some builtin expanders, such as those for memset and memcpy,
|
||||
may generate loops and conditionals, and those may get emitted
|
||||
into edges. That's ok while expanding to rtl, basic block
|
||||
boundaries will be identified and split afterwards. ??? Need
|
||||
we check whether the destination labels of any inserted jumps
|
||||
are also part of the inserted sequence? */
|
||||
/* Sequences inserted after RTL expansion are expected to be SESE,
|
||||
with only internal branches allowed. If the sequence jumps outside
|
||||
itself then we do not know how to add the associated edges here. */
|
||||
gcc_assert (!JUMP_P (last) || currently_expanding_to_rtl);
|
||||
|
||||
if (contains_jump)
|
||||
find_sub_basic_blocks (bb);
|
||||
}
|
||||
|
||||
/* Update the CFG for all queued instructions. */
|
||||
|
@ -29513,6 +29513,8 @@ aarch64_mode_emit (int entity, int mode, int prev_mode, HARD_REG_SET live)
|
||||
HARD_REG_SET clobbers = {};
|
||||
for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
|
||||
{
|
||||
if (!NONDEBUG_INSN_P (insn))
|
||||
continue;
|
||||
vec_rtx_properties properties;
|
||||
properties.add_insn (insn, false);
|
||||
for (rtx_obj_reference ref : properties.refs ())
|
||||
|
@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "lcm.h"
|
||||
#include "cfgcleanup.h"
|
||||
#include "tree-pass.h"
|
||||
#include "cfgbuild.h"
|
||||
|
||||
/* We want target macros for the mode switching code to be able to refer
|
||||
to instruction attribute values. */
|
||||
@ -1105,6 +1106,8 @@ optimize_mode_switching (void)
|
||||
edge_list = pre_edge_lcm_avs (n_entities * max_num_modes, transp, comp, antic,
|
||||
kill, avin, avout, &insert, &del);
|
||||
|
||||
auto_sbitmap jumping_blocks (last_basic_block_for_fn (cfun));
|
||||
bitmap_clear (jumping_blocks);
|
||||
for (j = n_entities - 1; j >= 0; j--)
|
||||
{
|
||||
int no_mode = num_modes[entity_map[j]];
|
||||
@ -1215,6 +1218,13 @@ optimize_mode_switching (void)
|
||||
/* Insert MODE_SET only if it is nonempty. */
|
||||
if (mode_set != NULL_RTX)
|
||||
{
|
||||
for (auto insn = mode_set; insn; insn = NEXT_INSN (insn))
|
||||
if (JUMP_P (insn))
|
||||
{
|
||||
rebuild_jump_labels_chain (mode_set);
|
||||
bitmap_set_bit (jumping_blocks, bb->index);
|
||||
break;
|
||||
}
|
||||
emitted = true;
|
||||
if (NOTE_INSN_BASIC_BLOCK_P (ptr->insn_ptr))
|
||||
/* We need to emit the insns in a FIFO-like manner,
|
||||
@ -1252,6 +1262,11 @@ optimize_mode_switching (void)
|
||||
sbitmap_vector_free (avin);
|
||||
sbitmap_vector_free (avout);
|
||||
|
||||
gcc_assert (SBITMAP_SIZE ((sbitmap) jumping_blocks)
|
||||
== (unsigned int) last_basic_block_for_fn (cfun));
|
||||
if (!bitmap_empty_p (jumping_blocks))
|
||||
find_many_sub_basic_blocks (jumping_blocks);
|
||||
|
||||
if (need_commit)
|
||||
commit_edge_insertions ();
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
|
||||
// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables -fno-stack-clash-protection" }
|
||||
// { dg-final { check-function-bodies "**" "" } }
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
@ -0,0 +1,3 @@
|
||||
// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
|
||||
|
||||
#include "call_sm_switch_5.c"
|
3
gcc/testsuite/gcc.target/aarch64/sme/sibcall_6_scp.c
Normal file
3
gcc/testsuite/gcc.target/aarch64/sme/sibcall_6_scp.c
Normal file
@ -0,0 +1,3 @@
|
||||
/* { dg-options "-O2 -fstack-clash-protection" } */
|
||||
|
||||
#include "sibcall_6.c"
|
@ -1,4 +1,4 @@
|
||||
// { dg-options "-O -fno-optimize-sibling-calls" }
|
||||
// { dg-options "-O -fno-optimize-sibling-calls -fno-stack-clash-protection" }
|
||||
// { dg-final { check-function-bodies "**" "" } }
|
||||
|
||||
void private_za();
|
||||
|
3
gcc/testsuite/gcc.target/aarch64/sme/za_state_4_scp.c
Normal file
3
gcc/testsuite/gcc.target/aarch64/sme/za_state_4_scp.c
Normal file
@ -0,0 +1,3 @@
|
||||
// { dg-options "-O -fno-optimize-sibling-calls -fstack-clash-protection" }
|
||||
|
||||
#include "za_state_4.c"
|
@ -1,4 +1,4 @@
|
||||
// { dg-options "-O2 -fno-optimize-sibling-calls" }
|
||||
// { dg-options "-O2 -fno-optimize-sibling-calls -fno-stack-clash-protection" }
|
||||
// { dg-final { check-function-bodies "**" "" } }
|
||||
|
||||
void private_za();
|
||||
|
3
gcc/testsuite/gcc.target/aarch64/sme/za_state_5_scp.c
Normal file
3
gcc/testsuite/gcc.target/aarch64/sme/za_state_5_scp.c
Normal file
@ -0,0 +1,3 @@
|
||||
// { dg-options "-O -fno-optimize-sibling-calls -fstack-clash-protection" }
|
||||
|
||||
#include "za_state_5.c"
|
Loading…
Reference in New Issue
Block a user