mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-26 21:33:59 +08:00
AVR: target/117726 - Better optimizations of ASHIFT:SI insns.
This patch improves the 4-byte ASHIFT insns. 1) It adds a "r,r,C15" alternative for improved long << 15. 2) It adds 3-operand alternatives (depending on options) and splits them after peephole2 / before avr-fuse-move into a 3-operand byte shift and a 2-operand residual bit shift. For better control, it introduces new option -msplit-bit-shift that's activated at -O2 and higher per default. 2) is even performed with -Os, but not with -Oz. PR target/117726 gcc/ * config/avr/avr.opt (-msplit-bit-shift): Add new optimization option. * common/config/avr/avr-common.cc (avr_option_optimization_table) [OPT_LEVELS_2_PLUS]: Turn on -msplit-bit-shift. * config/avr/avr.h (machine_function.n_avr_fuse_add_executed): New bool component. * config/avr/avr.md (attr "isa") <2op, 3op>: Add new values. (attr "enabled"): Handle them. (ashlsi3, *ashlsi3, *ashlsi3_const): Add "r,r,C15" alternative. Add "r,0,C4l" and "r,r,C4l" alternatives (depending on 2op / 3op). (define_split) [avr_split_bit_shift]: Add 2 new ashift:ALL4 splitters. (define_peephole2) [ashift:ALL4]: Add (match_dup 3) so that the scratch won't overlap with the output operand of the matched insn. (*ashl<mode>3_const_split): Remove unused ashift:ALL4 splitter. * config/avr/avr-passes.cc (emit_valid_insn) (emit_valid_move_clobbercc): Move out of anonymous namespace. (make_avr_pass_fuse_add) <gate>: Don't override. <execute>: Set n_avr_fuse_add_executed according to func->machine->n_avr_fuse_add_executed. (pass_data avr_pass_data_split_after_peephole2): New object. (avr_pass_split_after_peephole2): New rtl_opt_pass. (avr_emit_shift): New static function. (avr_shift_is_3op, avr_split_shift_p, avr_split_shift) (make_avr_pass_split_after_peephole2): New functions. * config/avr/avr-passes.def (avr_pass_split_after_peephole2): Insert new pass after pass_peephole2. * config/avr/avr-protos.h (n_avr_fuse_add_executed, avr_shift_is_3op, avr_split_shift_p) (avr_split_shift, avr_optimize_size_level) (make_avr_pass_split_after_peephole2): New prototypes. * config/avr/avr.cc (n_avr_fuse_add_executed): New global variable. (avr_optimize_size_level): New function. (avr_set_current_function): Set n_avr_fuse_add_executed according to cfun->machine->n_avr_fuse_add_executed. (ashlsi3_out) [case 15]: Output optimized code for this offset. (avr_rtx_costs_1) [ASHIFT, SImode]: Adjust costs of oggsets 15, 16. * config/avr/constraints.md (C4a, C4r, C4r): New constraints. * pass_manager.h (pass_manager): Adjust comments.
This commit is contained in:
parent
938094abec
commit
873cffc792
@ -39,6 +39,7 @@ static const struct default_options avr_option_optimization_table[] =
|
||||
{ OPT_LEVELS_2_PLUS, OPT_mfuse_add_, NULL, 2 },
|
||||
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mfuse_move_, NULL, 3 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
|
||||
// Stick to the "old" placement of the subreg lowering pass.
|
||||
{ OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
|
||||
/* Allow optimizer to introduce store data races. This used to be the
|
||||
|
@ -49,6 +49,34 @@
|
||||
|
||||
#define FIRST_GPR (AVR_TINY ? REG_18 : REG_2)
|
||||
|
||||
|
||||
// Emit pattern PAT, and ICE when the insn is not valid / not recognized.
|
||||
|
||||
static rtx_insn *
|
||||
emit_valid_insn (rtx pat)
|
||||
{
|
||||
rtx_insn *insn = emit_insn (pat);
|
||||
|
||||
if (! valid_insn_p (insn)) // Also runs recog().
|
||||
fatal_insn ("emit unrecognizable insn", insn);
|
||||
|
||||
return insn;
|
||||
}
|
||||
|
||||
// Emit a single_set with an optional scratch operand. This function
|
||||
// asserts that the new insn is valid and recognized.
|
||||
|
||||
static rtx_insn *
|
||||
emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
|
||||
{
|
||||
rtx pat = scratch
|
||||
? gen_gen_move_clobbercc_scratch (dest, src, scratch)
|
||||
: gen_gen_move_clobbercc (dest, src);
|
||||
|
||||
return emit_valid_insn (pat);
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -116,31 +144,6 @@ single_set_with_scratch (rtx_insn *insn, int ®no_scratch)
|
||||
return single_set (insn);
|
||||
}
|
||||
|
||||
// Emit pattern PAT, and ICE when the insn is not valid / not recognized.
|
||||
|
||||
static rtx_insn *
|
||||
emit_valid_insn (rtx pat)
|
||||
{
|
||||
rtx_insn *insn = emit_insn (pat);
|
||||
|
||||
if (! valid_insn_p (insn)) // Also runs recog().
|
||||
fatal_insn ("emit unrecognizable insn", insn);
|
||||
|
||||
return insn;
|
||||
}
|
||||
|
||||
// Emit a single_set with an optional scratch operand. This function
|
||||
// asserts that the new insn is valid and recognized.
|
||||
|
||||
static rtx_insn *
|
||||
emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
|
||||
{
|
||||
rtx pat = scratch
|
||||
? gen_gen_move_clobbercc_scratch (dest, src, scratch)
|
||||
: gen_gen_move_clobbercc (dest, src);
|
||||
|
||||
return emit_valid_insn (pat);
|
||||
}
|
||||
|
||||
// One bit for each GRP in REG_0 ... REG_31.
|
||||
using gprmask_t = uint32_t;
|
||||
@ -4213,12 +4216,17 @@ public:
|
||||
return make_avr_pass_fuse_add (m_ctxt);
|
||||
}
|
||||
|
||||
bool gate (function *) final override
|
||||
unsigned int execute (function *func) final override
|
||||
{
|
||||
return optimize && avr_fuse_add > 0;
|
||||
func->machine->n_avr_fuse_add_executed += 1;
|
||||
n_avr_fuse_add_executed = func->machine->n_avr_fuse_add_executed;
|
||||
|
||||
if (optimize && avr_fuse_add > 0)
|
||||
return execute1 (func);
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned int execute (function *) final override;
|
||||
unsigned int execute1 (function *);
|
||||
|
||||
struct Some_Insn
|
||||
{
|
||||
@ -4697,7 +4705,7 @@ avr_pass_fuse_add::fuse_mem_add (Mem_Insn &mem, Add_Insn &add)
|
||||
as PRE_DEC + PRE_DEC for two adjacent locations. */
|
||||
|
||||
unsigned int
|
||||
avr_pass_fuse_add::execute (function *func)
|
||||
avr_pass_fuse_add::execute1 (function *func)
|
||||
{
|
||||
df_note_add_problem ();
|
||||
df_analyze ();
|
||||
@ -4769,6 +4777,146 @@ avr_pass_fuse_add::execute (function *func)
|
||||
}
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Split insns after peephole2 / befor avr-fuse-move.
|
||||
static const pass_data avr_pass_data_split_after_peephole2 =
|
||||
{
|
||||
RTL_PASS, // type
|
||||
"", // name (will be patched)
|
||||
OPTGROUP_NONE, // optinfo_flags
|
||||
TV_DF_SCAN, // tv_id
|
||||
0, // properties_required
|
||||
0, // properties_provided
|
||||
0, // properties_destroyed
|
||||
0, // todo_flags_start
|
||||
0 // todo_flags_finish
|
||||
};
|
||||
|
||||
class avr_pass_split_after_peephole2 : public rtl_opt_pass
|
||||
{
|
||||
public:
|
||||
avr_pass_split_after_peephole2 (gcc::context *ctxt, const char *name)
|
||||
: rtl_opt_pass (avr_pass_data_split_after_peephole2, ctxt)
|
||||
{
|
||||
this->name = name;
|
||||
}
|
||||
|
||||
unsigned int execute (function *) final override
|
||||
{
|
||||
if (avr_shift_is_3op ())
|
||||
split_all_insns ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
}; // avr_pass_split_after_peephole2
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
/* Whether some shift insn alternatives are a 3-operand insn or a
|
||||
2-operand insn. This 3op alternatives allow the source and the
|
||||
destination register of the shift to be different right from the
|
||||
start, because the splitter will split the 3op shift into a 3op byte
|
||||
shift and a 2op residual bit shift.
|
||||
(When the residual shift has an offset of one less than the bitsize,
|
||||
then the residual shift is also a 3op insn. */
|
||||
|
||||
bool
|
||||
avr_shift_is_3op ()
|
||||
{
|
||||
// Don't split for OPTIMIZE_SIZE_MAX (-Oz).
|
||||
// For OPTIMIZE_SIZE_BALANCED (-Os), we still split because
|
||||
// the size overhead (if exists at all) is marginal.
|
||||
|
||||
return (avr_split_bit_shift
|
||||
&& optimize > 0
|
||||
&& avr_optimize_size_level () < OPTIMIZE_SIZE_MAX);
|
||||
}
|
||||
|
||||
|
||||
/* Implement constraints `C4a', `C4l' and `C4r'.
|
||||
Whether we split an N_BYTES shift of code CODE in { ASHIFTRT,
|
||||
LSHIFTRT, ASHIFT } into a byte shift and a residual bit shift. */
|
||||
|
||||
bool
|
||||
avr_split_shift_p (int n_bytes, int offset, rtx_code)
|
||||
{
|
||||
gcc_assert (n_bytes == 4);
|
||||
|
||||
return (avr_shift_is_3op ()
|
||||
&& offset % 8 != 0 && IN_RANGE (offset, 17, 30));
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch)
|
||||
{
|
||||
machine_mode mode = GET_MODE (dest);
|
||||
rtx shift;
|
||||
|
||||
if (off == GET_MODE_BITSIZE (mode) - 1)
|
||||
{
|
||||
shift = gen_rtx_fmt_ee (code, mode, src, GEN_INT (off));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (REGNO (dest) != REGNO (src))
|
||||
emit_valid_move_clobbercc (dest, src);
|
||||
shift = gen_rtx_fmt_ee (code, mode, dest, GEN_INT (off));
|
||||
}
|
||||
|
||||
emit_valid_move_clobbercc (dest, shift, scratch);
|
||||
}
|
||||
|
||||
|
||||
/* Worker for define_split that run when -msplit-bit-shift is on.
|
||||
Split a shift of code CODE into a 3op byte shift and a residual bit shift.
|
||||
Return 'true' when a split has been performed and insns have been emitted.
|
||||
Otherwise, return 'false'. */
|
||||
|
||||
bool
|
||||
avr_split_shift (rtx xop[], rtx scratch, rtx_code code)
|
||||
{
|
||||
scratch = scratch && REG_P (scratch) ? scratch : NULL_RTX;
|
||||
rtx dest = xop[0];
|
||||
rtx src = xop[1];
|
||||
int ioff = INTVAL (xop[2]);
|
||||
|
||||
gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 4);
|
||||
|
||||
if (code == ASHIFT)
|
||||
{
|
||||
if (ioff >= 25)
|
||||
{
|
||||
rtx dst8 = avr_byte (dest, 3);
|
||||
rtx src8 = avr_byte (src, 0);
|
||||
avr_emit_shift (code, dst8, src8, ioff % 8, NULL_RTX);
|
||||
emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx);
|
||||
emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
|
||||
return true;
|
||||
}
|
||||
else if (ioff >= 17)
|
||||
{
|
||||
rtx dst16 = avr_word (dest, 2);
|
||||
rtx src16 = avr_word (src, 0);
|
||||
avr_emit_shift (code, dst16, src16, ioff % 16, scratch);
|
||||
emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Determine whether an ISR may use the __gcc_isr pseudo-instruction.
|
||||
@ -5125,3 +5273,11 @@ make_avr_pass_fuse_move (gcc::context *ctxt)
|
||||
{
|
||||
return new avr_pass_fuse_move (ctxt, "avr-fuse-move");
|
||||
}
|
||||
|
||||
// Split insns after peephole2 / befor avr-fuse-move.
|
||||
|
||||
rtl_opt_pass *
|
||||
make_avr_pass_split_after_peephole2 (gcc::context *ctxt)
|
||||
{
|
||||
return new avr_pass_split_after_peephole2 (ctxt, "avr-split-after-peephole2");
|
||||
}
|
||||
|
@ -104,3 +104,10 @@ INSERT_PASS_BEFORE (pass_split_after_reload, 1, avr_pass_ifelse);
|
||||
- The RTL peepholer may optimize insns involving lower registers. */
|
||||
|
||||
INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_fuse_move);
|
||||
|
||||
/* Run an instance of post-reload split prior to avr-fuse-move.
|
||||
Purpose is to split 3-operand shift insns into a 3-operand shift
|
||||
with a byte offset, and a 2-operand residual shift after
|
||||
RTL peepholes but prior to the avr-fuse-move pass. */
|
||||
|
||||
INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_split_after_peephole2);
|
||||
|
@ -169,6 +169,13 @@ extern rtx cc_reg_rtx;
|
||||
extern rtx ccn_reg_rtx;
|
||||
extern rtx cczn_reg_rtx;
|
||||
|
||||
extern int n_avr_fuse_add_executed;
|
||||
extern bool avr_shift_is_3op ();
|
||||
extern bool avr_split_shift_p (int n_bytes, int offset, rtx_code);
|
||||
extern bool avr_split_shift (rtx xop[], rtx xscratch, rtx_code);
|
||||
|
||||
extern int avr_optimize_size_level ();
|
||||
|
||||
#endif /* RTX_CODE */
|
||||
|
||||
#ifdef REAL_VALUE_TYPE
|
||||
@ -188,6 +195,7 @@ extern rtl_opt_pass *make_avr_pass_pre_proep (gcc::context *);
|
||||
extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *);
|
||||
extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
|
||||
extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
|
||||
extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
|
||||
#ifdef RTX_CODE
|
||||
extern bool avr_casei_sequence_check_operands (rtx *xop);
|
||||
extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);
|
||||
|
@ -229,6 +229,12 @@ bool avr_need_clear_bss_p = false;
|
||||
bool avr_need_copy_data_p = false;
|
||||
bool avr_has_rodata_p = false;
|
||||
|
||||
/* Counts how often pass avr-fuse-add has been executed. Is is kept in
|
||||
sync with cfun->machine->n_avr_fuse_add_executed and serves as an
|
||||
insn condition for shift insn splitters. */
|
||||
int n_avr_fuse_add_executed = 0;
|
||||
|
||||
|
||||
|
||||
/* Transform UP into lowercase and write the result to LO.
|
||||
You must provide enough space for LO. Return LO. */
|
||||
@ -526,6 +532,14 @@ avr_option_override (void)
|
||||
}
|
||||
|
||||
|
||||
int avr_optimize_size_level ()
|
||||
{
|
||||
return cfun && cfun->decl
|
||||
? opt_for_fn (cfun->decl, optimize_size)
|
||||
: optimize_size;
|
||||
}
|
||||
|
||||
|
||||
/* Implement `INIT_EXPANDERS'. */
|
||||
/* The function works like a singleton. */
|
||||
|
||||
@ -823,8 +837,12 @@ avr_set_current_function (tree decl)
|
||||
if (decl == NULL_TREE
|
||||
|| current_function_decl == NULL_TREE
|
||||
|| current_function_decl == error_mark_node
|
||||
|| ! cfun->machine
|
||||
|| cfun->machine->attributes_checked_p)
|
||||
|| ! cfun->machine)
|
||||
return;
|
||||
|
||||
n_avr_fuse_add_executed = cfun->machine->n_avr_fuse_add_executed;
|
||||
|
||||
if (cfun->machine->attributes_checked_p)
|
||||
return;
|
||||
|
||||
location_t loc = DECL_SOURCE_LOCATION (decl);
|
||||
@ -6590,7 +6608,7 @@ avr_out_cmp_ext (rtx xop[], rtx_code code, int *plen)
|
||||
|
||||
|
||||
/* Generate asm equivalent for various shifts. This only handles cases
|
||||
that are not already carefully hand-optimized in ?sh??i3_out.
|
||||
that are not already carefully hand-optimized in ?sh<mode>3_out.
|
||||
|
||||
OPERANDS[0] resp. %0 in TEMPL is the operand to be shifted.
|
||||
OPERANDS[2] is the shift count as CONST_INT, MEM or REG.
|
||||
@ -7042,6 +7060,7 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
|
||||
{
|
||||
int reg0 = true_regnum (operands[0]);
|
||||
int reg1 = true_regnum (operands[1]);
|
||||
bool reg1_unused_after_p = reg_unused_after (insn, operands[1]);
|
||||
|
||||
if (plen)
|
||||
*plen = 0;
|
||||
@ -7070,6 +7089,30 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
|
||||
"mov %B0,%A1" CR_TAB
|
||||
"mov %C0,%B1" CR_TAB
|
||||
"mov %D0,%C1", operands, plen, 4);
|
||||
case 15:
|
||||
avr_asm_len (reg1_unused_after_p
|
||||
? "lsr %C1"
|
||||
: "bst %C1,0", operands, plen, 1);
|
||||
if (reg0 + 2 != reg1)
|
||||
{
|
||||
if (AVR_HAVE_MOVW)
|
||||
avr_asm_len ("movw %C0,%A1", operands, plen, 1);
|
||||
else
|
||||
avr_asm_len ("mov %C0,%A1" CR_TAB
|
||||
"mov %D0,%B1", operands, plen, 2);
|
||||
}
|
||||
return reg1_unused_after_p
|
||||
? avr_asm_len ("clr %A0" CR_TAB
|
||||
"clr %B0" CR_TAB
|
||||
"ror %D0" CR_TAB
|
||||
"ror %C0" CR_TAB
|
||||
"ror %B0", operands, plen, 5)
|
||||
: avr_asm_len ("clr %A0" CR_TAB
|
||||
"clr %B0" CR_TAB
|
||||
"lsr %D0" CR_TAB
|
||||
"ror %C0" CR_TAB
|
||||
"ror %B0" CR_TAB
|
||||
"bld %D0,7", operands, plen, 6);
|
||||
case 16:
|
||||
if (reg0 + 2 == reg1)
|
||||
return avr_asm_len ("clr %B0" CR_TAB
|
||||
@ -12392,9 +12435,14 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
|
||||
break;
|
||||
case 1:
|
||||
case 8:
|
||||
case 16:
|
||||
*total = COSTS_N_INSNS (4);
|
||||
break;
|
||||
case 15:
|
||||
*total = COSTS_N_INSNS (8 - AVR_HAVE_MOVW);
|
||||
break;
|
||||
case 16:
|
||||
*total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
|
||||
break;
|
||||
case 31:
|
||||
*total = COSTS_N_INSNS (6);
|
||||
break;
|
||||
|
@ -610,6 +610,12 @@ struct GTY(()) machine_function
|
||||
/* 'true' if this function references .L__stack_usage like with
|
||||
__builtin_return_address. */
|
||||
bool use_L__stack_usage;
|
||||
|
||||
/* Counts how many times the execute() method of the avr-fuse-add
|
||||
has been invoked. The count is even increased when the optimization
|
||||
itself is not run. This purpose of this variable is to provide
|
||||
information about where in the pass sequence we are. */
|
||||
int n_avr_fuse_add_executed;
|
||||
};
|
||||
|
||||
/* AVR does not round pushes, but the existence of this macro is
|
||||
|
@ -184,73 +184,75 @@
|
||||
;; no_xmega: non-XMEGA core xmega : XMEGA core
|
||||
;; no_adiw: ISA has no ADIW, SBIW adiw : ISA has ADIW, SBIW
|
||||
|
||||
;; The following ISA attributes are actually not architecture specific,
|
||||
;; but depend on (optimization) options. This is because the "enabled"
|
||||
;; attribut can't depend on more than one other attribute. This means
|
||||
;; that 2op and 3op must work for all ISAs, and hence a 'flat' attribue
|
||||
;; scheme can be used (as opposed to a true cartesian product).
|
||||
|
||||
;; 2op : insn is a 2-operand insn 3op : insn is a 3-operand insn
|
||||
|
||||
(define_attr "isa"
|
||||
"mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega,
|
||||
no_adiw,adiw,
|
||||
2op,3op,
|
||||
standard"
|
||||
(const_string "standard"))
|
||||
|
||||
(define_attr "enabled" ""
|
||||
(cond [(eq_attr "isa" "standard")
|
||||
(const_int 1)
|
||||
(if_then_else
|
||||
(ior (eq_attr "isa" "standard")
|
||||
|
||||
(and (eq_attr "isa" "mov")
|
||||
(match_test "!AVR_HAVE_MOVW"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "movw")
|
||||
(match_test "AVR_HAVE_MOVW"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "rjmp")
|
||||
(match_test "!AVR_HAVE_JMP_CALL"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "jmp")
|
||||
(match_test "AVR_HAVE_JMP_CALL"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "ijmp")
|
||||
(match_test "!AVR_HAVE_EIJMP_EICALL"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "eijmp")
|
||||
(match_test "AVR_HAVE_EIJMP_EICALL"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "lpm")
|
||||
(match_test "!AVR_HAVE_LPMX"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "lpmx")
|
||||
(match_test "AVR_HAVE_LPMX"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "elpm")
|
||||
(match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "elpmx")
|
||||
(match_test "AVR_HAVE_ELPMX"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "xmega")
|
||||
(match_test "AVR_XMEGA"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "no_xmega")
|
||||
(match_test "!AVR_XMEGA"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "adiw")
|
||||
(match_test "AVR_HAVE_ADIW"))
|
||||
(const_int 1)
|
||||
|
||||
(and (eq_attr "isa" "no_adiw")
|
||||
(match_test "!AVR_HAVE_ADIW"))
|
||||
(const_int 1)
|
||||
|
||||
] (const_int 0)))
|
||||
(and (eq_attr "isa" "2op")
|
||||
(match_test "!avr_shift_is_3op ()"))
|
||||
|
||||
(and (eq_attr "isa" "3op")
|
||||
(match_test "avr_shift_is_3op ()"))
|
||||
)
|
||||
(const_int 1)
|
||||
(const_int 0)))
|
||||
|
||||
|
||||
;; Define mode iterators
|
||||
@ -5257,28 +5259,31 @@
|
||||
;; "ashlsq3" "ashlusq3"
|
||||
;; "ashlsa3" "ashlusa3"
|
||||
(define_insn_and_split "ashl<mode>3"
|
||||
[(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r ,r,r,r")
|
||||
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r ,0,0,0")
|
||||
(match_operand:QI 2 "nop_general_operand" "r,L,P,O C31,K,n,Qm")))]
|
||||
[(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r")
|
||||
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0")
|
||||
(match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,C4l,n,Qm")))]
|
||||
""
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(parallel [(set (match_dup 0)
|
||||
(ashift:ALL4 (match_dup 1)
|
||||
(match_dup 2)))
|
||||
(clobber (reg:CC REG_CC))])])
|
||||
(clobber (reg:CC REG_CC))])]
|
||||
""
|
||||
[(set_attr "isa" "*,*,*,2op,3op,*,*")])
|
||||
|
||||
(define_insn "*ashl<mode>3"
|
||||
[(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r ,r,r,r")
|
||||
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r ,0,0,0")
|
||||
(match_operand:QI 2 "nop_general_operand" "r,L,P,O C31,K,n,Qm")))
|
||||
[(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r")
|
||||
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0")
|
||||
(match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,C4l,n,Qm")))
|
||||
(clobber (reg:CC REG_CC))]
|
||||
"reload_completed"
|
||||
{
|
||||
return ashlsi3_out (insn, operands, NULL);
|
||||
}
|
||||
[(set_attr "length" "8,0,4,5,8,10,12")
|
||||
(set_attr "adjust_len" "ashlsi")])
|
||||
[(set_attr "length" "12")
|
||||
(set_attr "adjust_len" "ashlsi")
|
||||
(set_attr "isa" "*,*,*,2op,3op,*,*")])
|
||||
|
||||
;; Optimize if a scratch register from LD_REGS happens to be available.
|
||||
|
||||
@ -5380,12 +5385,72 @@
|
||||
[(set_attr "length" "0,2,2,4,10")
|
||||
(set_attr "adjust_len" "ashlhi")])
|
||||
|
||||
|
||||
;; Split shift into a byte shift and a residual bit shift (without scratch)
|
||||
(define_split
|
||||
[(parallel [(set (match_operand:ALL4 0 "register_operand")
|
||||
(ashift:ALL4 (match_operand:ALL4 1 "register_operand")
|
||||
(match_operand:QI 2 "const_int_operand")))
|
||||
(clobber (reg:CC REG_CC))])]
|
||||
"avr_split_bit_shift
|
||||
&& n_avr_fuse_add_executed >= 1
|
||||
&& satisfies_constraint_C4l (operands[2])"
|
||||
[(parallel [(set (match_dup 0)
|
||||
(ashift:ALL4 (match_dup 1)
|
||||
(match_dup 3)))
|
||||
(clobber (reg:CC REG_CC))])
|
||||
(parallel [(set (match_dup 0)
|
||||
(ashift:ALL4 (match_dup 0)
|
||||
(match_dup 4)))
|
||||
(clobber (reg:CC REG_CC))])]
|
||||
{
|
||||
if (avr_split_shift (operands, NULL_RTX, ASHIFT))
|
||||
DONE;
|
||||
else if (REGNO (operands[0]) == REGNO (operands[1]))
|
||||
FAIL;
|
||||
int offset = INTVAL (operands[2]);
|
||||
operands[3] = GEN_INT (offset & ~7);
|
||||
operands[4] = GEN_INT (offset & 7);
|
||||
})
|
||||
|
||||
;; Split shift into a byte shift and a residual bit shift (with scratch)
|
||||
(define_split
|
||||
[(parallel [(set (match_operand:ALL4 0 "register_operand")
|
||||
(ashift:ALL4 (match_operand:ALL4 1 "register_operand")
|
||||
(match_operand:QI 2 "const_int_operand")))
|
||||
(clobber (match_operand:QI 3 "scratch_or_d_register_operand"))
|
||||
(clobber (reg:CC REG_CC))])]
|
||||
"avr_split_bit_shift
|
||||
&& n_avr_fuse_add_executed >= 1
|
||||
&& satisfies_constraint_C4l (operands[2])"
|
||||
[(parallel [(set (match_dup 0)
|
||||
(ashift:ALL4 (match_dup 1)
|
||||
(match_dup 4)))
|
||||
(clobber (reg:CC REG_CC))])
|
||||
(parallel [(set (match_dup 0)
|
||||
(ashift:ALL4 (match_dup 0)
|
||||
(match_dup 5)))
|
||||
(clobber (match_dup 3))
|
||||
(clobber (reg:CC REG_CC))])]
|
||||
{
|
||||
if (avr_split_shift (operands, operands[3], ASHIFT))
|
||||
DONE;
|
||||
else if (REGNO (operands[0]) == REGNO (operands[1]))
|
||||
FAIL;
|
||||
int offset = INTVAL (operands[2]);
|
||||
operands[4] = GEN_INT (offset & ~7);
|
||||
operands[5] = GEN_INT (offset & 7);
|
||||
})
|
||||
|
||||
|
||||
(define_peephole2
|
||||
[(match_scratch:QI 3 "d")
|
||||
(parallel [(set (match_operand:ALL4 0 "register_operand" "")
|
||||
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "")
|
||||
(match_operand:QI 2 "const_int_operand" "")))
|
||||
(clobber (reg:CC REG_CC))])]
|
||||
(clobber (reg:CC REG_CC))])
|
||||
;; $3 must not overlap with the output of the insn above.
|
||||
(match_dup 3)]
|
||||
""
|
||||
[(parallel [(set (match_dup 0)
|
||||
(ashift:ALL4 (match_dup 1)
|
||||
@ -5393,35 +5458,20 @@
|
||||
(clobber (match_dup 3))
|
||||
(clobber (reg:CC REG_CC))])])
|
||||
|
||||
;; "*ashlsi3_const"
|
||||
;; "*ashlsq3_const" "*ashlusq3_const"
|
||||
;; "*ashlsa3_const" "*ashlusa3_const"
|
||||
(define_insn_and_split "*ashl<mode>3_const_split"
|
||||
[(set (match_operand:ALL4 0 "register_operand" "=r,r,r ,r")
|
||||
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r ,0")
|
||||
(match_operand:QI 2 "const_int_operand" "L,P,O C31,n")))
|
||||
(clobber (match_scratch:QI 3 "=X,X,X ,&d"))]
|
||||
"reload_completed"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(parallel [(set (match_dup 0)
|
||||
(ashift:ALL4 (match_dup 1)
|
||||
(match_dup 2)))
|
||||
(clobber (match_dup 3))
|
||||
(clobber (reg:CC REG_CC))])])
|
||||
|
||||
(define_insn "*ashl<mode>3_const"
|
||||
[(set (match_operand:ALL4 0 "register_operand" "=r,r,r ,r")
|
||||
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r ,0")
|
||||
(match_operand:QI 2 "const_int_operand" "L,P,O C31,n")))
|
||||
(clobber (match_scratch:QI 3 "=X,X,X ,&d"))
|
||||
[(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r ,r")
|
||||
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,0 ,r ,0")
|
||||
(match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4l,C4l,n")))
|
||||
(clobber (match_operand:QI 3 "scratch_or_d_register_operand" "=X ,X ,&d ,&d ,&d"))
|
||||
(clobber (reg:CC REG_CC))]
|
||||
"reload_completed"
|
||||
{
|
||||
return ashlsi3_out (insn, operands, NULL);
|
||||
}
|
||||
[(set_attr "length" "0,4,5,10")
|
||||
(set_attr "adjust_len" "ashlsi")])
|
||||
[(set_attr "length" "10")
|
||||
(set_attr "adjust_len" "ashlsi")
|
||||
(set_attr "isa" "*,*,2op,3op,*")])
|
||||
|
||||
(define_expand "ashlpsi3"
|
||||
[(parallel [(set (match_operand:PSI 0 "register_operand" "")
|
||||
|
@ -94,6 +94,10 @@ maccumulate-args
|
||||
Target Mask(ACCUMULATE_OUTGOING_ARGS) Optimization
|
||||
Optimization. Accumulate outgoing function arguments and acquire/release the needed stack space for outgoing function arguments in function prologue/epilogue. Without this option, outgoing arguments are pushed before calling a function and popped afterwards. This option can lead to reduced code size for functions that call many functions that get their arguments on the stack like, for example printf.
|
||||
|
||||
msplit-bit-shift
|
||||
Target Var(avr_split_bit_shift) Init(0) Optimization
|
||||
Optimization. Split shifts of 4-byte values into a byte shift and a residual bit shift.
|
||||
|
||||
mstrict-X
|
||||
Target Var(avr_strict_X) Init(0) Optimization
|
||||
Optimization. When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register. Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.
|
||||
|
@ -263,6 +263,22 @@
|
||||
(and (match_code "const_int,symbol_ref,const")
|
||||
(match_test "const_0mod256_operand (op, HImode)")))
|
||||
|
||||
(define_constraint "C4a"
|
||||
"A constant integer shift offset for a 4-byte ASHIFTRT that's opt to being split."
|
||||
(and (match_code "const_int")
|
||||
(match_test "avr_split_shift_p (4, ival, ASHIFTRT)")))
|
||||
|
||||
(define_constraint "C4r"
|
||||
"A constant integer shift offset for a 4-byte LSHIFTRT that's opt to being split."
|
||||
(and (match_code "const_int")
|
||||
(match_test "avr_split_shift_p (4, ival, LSHIFTRT)")))
|
||||
|
||||
(define_constraint "C4l"
|
||||
"A constant integer shift offset for a 4-byte ASHIFT that's opt to being split."
|
||||
(and (match_code "const_int")
|
||||
(match_test "avr_split_shift_p (4, ival, ASHIFT)")))
|
||||
|
||||
|
||||
;; CONST_FIXED is no element of 'n' so cook our own.
|
||||
;; "i" or "s" would match but because the insn uses iterators that cover
|
||||
;; INT_MODE, "i" or "s" is not always possible.
|
||||
|
@ -65,7 +65,7 @@ public:
|
||||
void execute_early_local_passes ();
|
||||
unsigned int execute_pass_mode_switching ();
|
||||
|
||||
/* Various passes are manually cloned by epiphany. */
|
||||
/* Various passes are manually cloned by avr and epiphany. */
|
||||
opt_pass *get_pass_split_all_insns () const {
|
||||
return pass_split_all_insns_1;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user