mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-12-19 09:05:17 +08:00
lra: Canonicalize mult to shift in address reloads
Inside a (mem) RTX, it is canonical to write multiplications by powers of two using a (mult) [0]. Outside of a (mem), the canonical way to write multiplications by powers of two is using (ashift). Now I observed that LRA does not quite respect this RTL canonicalization rule. When compiling gcc/testsuite/gcc.dg/torture/pr34330.c with -Os -ftree-vectorize, the RTL in the dump "281r.ira" has the insn: (set (reg:SI 111) (mem:SI (plus:DI (mult:DI (reg:DI 101 [ ivtmp.9 ]) (const_int 4 [0x4])) (reg/v/f:DI 105 [ b ])))) but LRA then proceeds to generate a reload, and we get the following non-canonical insn in "282r.reload": (set (reg:DI 7 x7 [121]) (plus:DI (mult:DI (reg:DI 5 x5 [orig:101 ivtmp.9 ] [101]) (const_int 4 [0x4])) (reg/v/f:DI 1 x1 [orig:105 b ] [105]))) This patch fixes LRA to ensure that we generate canonical RTL in this case. After the patch, we get the following insn in "282r.reload": (set (reg:DI 7 x7 [121]) (plus:DI (ashift:DI (reg:DI 5 x5 [orig:101 ivtmp.9 ] [101]) (const_int 2 [0x2])) (reg/v/f:DI 1 x1 [orig:105 b ] [105]))) [0] : https://gcc.gnu.org/onlinedocs/gccint/Insn-Canonicalizations.html gcc/ChangeLog: * lra-constraints.c (canonicalize_reload_addr): New. (curr_insn_transform): Use canonicalize_reload_addr to ensure we generate canonical RTL for an address reload. gcc/testsuite/ChangeLog: * gcc.target/aarch64/mem-shift-canonical.c: New test.
This commit is contained in:
parent
795944c456
commit
6b3034eaba
@ -131,6 +131,7 @@
|
||||
#include "lra-int.h"
|
||||
#include "print-rtl.h"
|
||||
#include "function-abi.h"
|
||||
#include "rtl-iter.h"
|
||||
|
||||
/* Value of LRA_CURR_RELOAD_NUM at the beginning of BB of the current
|
||||
insn. Remember that LRA_CURR_RELOAD_NUM is the number of emitted
|
||||
@ -570,6 +571,33 @@ init_curr_insn_input_reloads (void)
|
||||
curr_insn_input_reloads_num = 0;
|
||||
}
|
||||
|
||||
/* The canonical form of an rtx inside a MEM is not necessarily the same as the
|
||||
canonical form of the rtx outside the MEM. Fix this up in the case that
|
||||
we're reloading an address (and therefore pulling it outside a MEM). */
|
||||
static rtx
|
||||
canonicalize_reload_addr (rtx addr)
|
||||
{
|
||||
subrtx_var_iterator::array_type array;
|
||||
FOR_EACH_SUBRTX_VAR (iter, array, addr, NONCONST)
|
||||
{
|
||||
rtx x = *iter;
|
||||
if (GET_CODE (x) == MULT && CONST_INT_P (XEXP (x, 1)))
|
||||
{
|
||||
const HOST_WIDE_INT ci = INTVAL (XEXP (x, 1));
|
||||
const int pwr2 = exact_log2 (ci);
|
||||
if (pwr2 > 0)
|
||||
{
|
||||
/* Rewrite this to use a shift instead, which is canonical when
|
||||
outside of a MEM. */
|
||||
PUT_CODE (x, ASHIFT);
|
||||
XEXP (x, 1) = GEN_INT (pwr2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already
|
||||
created input reload pseudo (only if TYPE is not OP_OUT). Don't
|
||||
reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be
|
||||
@ -4362,12 +4390,19 @@ curr_insn_transform (bool check_only_p)
|
||||
{
|
||||
rtx addr = *loc;
|
||||
enum rtx_code code = GET_CODE (addr);
|
||||
|
||||
bool align_p = false;
|
||||
|
||||
if (code == AND && CONST_INT_P (XEXP (addr, 1)))
|
||||
/* (and ... (const_int -X)) is used to align to X bytes. */
|
||||
addr = XEXP (*loc, 0);
|
||||
{
|
||||
/* (and ... (const_int -X)) is used to align to X bytes. */
|
||||
align_p = true;
|
||||
addr = XEXP (*loc, 0);
|
||||
}
|
||||
else
|
||||
addr = canonicalize_reload_addr (addr);
|
||||
|
||||
lra_emit_move (new_reg, addr);
|
||||
if (addr != *loc)
|
||||
if (align_p)
|
||||
emit_move_insn (new_reg, gen_rtx_AND (GET_MODE (new_reg), new_reg, XEXP (*loc, 1)));
|
||||
}
|
||||
before = get_insns ();
|
||||
|
27
gcc/testsuite/gcc.target/aarch64/mem-shift-canonical.c
Normal file
27
gcc/testsuite/gcc.target/aarch64/mem-shift-canonical.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* This test is a copy of gcc.dg/torture/pr34330.c: here we are looking for
|
||||
specific patterns being matched in the AArch64 backend. */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-Os -ftree-vectorize -dp" } */
|
||||
|
||||
|
||||
struct T
|
||||
{
|
||||
int t;
|
||||
struct { short s1, s2, s3, s4; } *s;
|
||||
};
|
||||
|
||||
void
|
||||
foo (int *a, int *b, int *c, int *d, struct T *e)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < e->t; i++)
|
||||
{
|
||||
e->s[i].s1 = a[i];
|
||||
e->s[i].s2 = b[i];
|
||||
e->s[i].s3 = c[i];
|
||||
e->s[i].s4 = d[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "add_lsl_di" 3 } } */
|
Loading…
Reference in New Issue
Block a user