lra: Canonicalize mult to shift in address reloads

Inside a (mem) RTX, it is canonical to write multiplications by powers
of two using a (mult) [0]. Outside of a (mem), the canonical way to
write multiplications by powers of two is using (ashift).

Now I observed that LRA does not quite respect this RTL canonicalization
rule.  When compiling gcc/testsuite/gcc.dg/torture/pr34330.c with -Os
-ftree-vectorize, the RTL in the dump "281r.ira" has the insn:

(set (reg:SI 111)
     (mem:SI (plus:DI (mult:DI (reg:DI 101 [ ivtmp.9 ])
                 (const_int 4 [0x4]))
             (reg/v/f:DI 105 [ b ]))))

but LRA then proceeds to generate a reload, and we get the following
non-canonical insn in "282r.reload":

(set (reg:DI 7 x7 [121])
     (plus:DI (mult:DI (reg:DI 5 x5 [orig:101 ivtmp.9 ] [101])
             (const_int 4 [0x4]))
         (reg/v/f:DI 1 x1 [orig:105 b ] [105])))

This patch fixes LRA to ensure that we generate canonical RTL in this
case. After the patch, we get the following insn in "282r.reload":

(set (reg:DI 7 x7 [121])
        (plus:DI (ashift:DI (reg:DI 5 x5 [orig:101 ivtmp.9 ] [101])
                (const_int 2 [0x2]))
            (reg/v/f:DI 1 x1 [orig:105 b ] [105])))

[0] : https://gcc.gnu.org/onlinedocs/gccint/Insn-Canonicalizations.html

gcc/ChangeLog:

	* lra-constraints.c (canonicalize_reload_addr): New.
	(curr_insn_transform): Use canonicalize_reload_addr to ensure we
	generate canonical RTL for an address reload.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/mem-shift-canonical.c: New test.
This commit is contained in:
Alex Coplan 2020-08-27 09:49:57 +01:00
parent 795944c456
commit 6b3034eaba
2 changed files with 66 additions and 4 deletions

View File

@ -131,6 +131,7 @@
#include "lra-int.h"
#include "print-rtl.h"
#include "function-abi.h"
#include "rtl-iter.h"
/* Value of LRA_CURR_RELOAD_NUM at the beginning of BB of the current
insn. Remember that LRA_CURR_RELOAD_NUM is the number of emitted
@ -570,6 +571,33 @@ init_curr_insn_input_reloads (void)
curr_insn_input_reloads_num = 0;
}
/* The canonical form of an rtx inside a MEM is not necessarily the same as the
canonical form of the rtx outside the MEM. Fix this up in the case that
we're reloading an address (and therefore pulling it outside a MEM). */
static rtx
canonicalize_reload_addr (rtx addr)
{
subrtx_var_iterator::array_type array;
FOR_EACH_SUBRTX_VAR (iter, array, addr, NONCONST)
{
rtx x = *iter;
if (GET_CODE (x) == MULT && CONST_INT_P (XEXP (x, 1)))
{
const HOST_WIDE_INT ci = INTVAL (XEXP (x, 1));
const int pwr2 = exact_log2 (ci);
if (pwr2 > 0)
{
/* Rewrite this to use a shift instead, which is canonical when
outside of a MEM. */
PUT_CODE (x, ASHIFT);
XEXP (x, 1) = GEN_INT (pwr2);
}
}
}
return addr;
}
/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already
created input reload pseudo (only if TYPE is not OP_OUT). Don't
reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be
@ -4362,12 +4390,19 @@ curr_insn_transform (bool check_only_p)
{
rtx addr = *loc;
enum rtx_code code = GET_CODE (addr);
bool align_p = false;
if (code == AND && CONST_INT_P (XEXP (addr, 1)))
/* (and ... (const_int -X)) is used to align to X bytes. */
addr = XEXP (*loc, 0);
{
/* (and ... (const_int -X)) is used to align to X bytes. */
align_p = true;
addr = XEXP (*loc, 0);
}
else
addr = canonicalize_reload_addr (addr);
lra_emit_move (new_reg, addr);
if (addr != *loc)
if (align_p)
emit_move_insn (new_reg, gen_rtx_AND (GET_MODE (new_reg), new_reg, XEXP (*loc, 1)));
}
before = get_insns ();

View File

@ -0,0 +1,27 @@
/* This test is a copy of gcc.dg/torture/pr34330.c: here we are looking for
specific patterns being matched in the AArch64 backend. */
/* { dg-do compile } */
/* { dg-options "-Os -ftree-vectorize -dp" } */
struct T
{
int t;
struct { short s1, s2, s3, s4; } *s;
};
void
foo (int *a, int *b, int *c, int *d, struct T *e)
{
int i;
for (i = 0; i < e->t; i++)
{
e->s[i].s1 = a[i];
e->s[i].s2 = b[i];
e->s[i].s3 = c[i];
e->s[i].s4 = d[i];
}
}
/* { dg-final { scan-assembler-times "add_lsl_di" 3 } } */