AVR: target/117726 - Better optimizations of ASHIFT:SI insns.

This patch improves the 4-byte ASHIFT insns.
1) It adds a "r,r,C15" alternative for improved long << 15.
2) It adds 3-operand alternatives (depending on options) and
   splits them after peephole2 / before avr-fuse-move into
   a 3-operand byte shift and a 2-operand residual bit shift.
For better control, it introduces new option -msplit-bit-shift
that's activated at -O2 and higher per default.  2) is even
performed with -Os, but not with -Oz.

	PR target/117726
gcc/
	* config/avr/avr.opt (-msplit-bit-shift): Add new optimization option.
	* common/config/avr/avr-common.cc (avr_option_optimization_table)
	[OPT_LEVELS_2_PLUS]: Turn on -msplit-bit-shift.
	* config/avr/avr.h (machine_function.n_avr_fuse_add_executed):
	New bool component.
	* config/avr/avr.md (attr "isa") <2op, 3op>: Add new values.
	(attr "enabled"): Handle them.
	(ashlsi3, *ashlsi3, *ashlsi3_const): Add "r,r,C15" alternative.
	Add "r,0,C4l" and "r,r,C4l" alternatives (depending on 2op / 3op).
	(define_split) [avr_split_bit_shift]: Add 2 new ashift:ALL4 splitters.
	(define_peephole2) [ashift:ALL4]: Add (match_dup 3) so that the scratch
	won't overlap with the output operand of the matched insn.
	(*ashl<mode>3_const_split): Remove unused ashift:ALL4 splitter.
	* config/avr/avr-passes.cc (emit_valid_insn)
	(emit_valid_move_clobbercc): Move out of anonymous namespace.
	(make_avr_pass_fuse_add) <gate>: Don't override.
	<execute>: Set n_avr_fuse_add_executed according to
	func->machine->n_avr_fuse_add_executed.
	(pass_data avr_pass_data_split_after_peephole2): New object.
	(avr_pass_split_after_peephole2): New rtl_opt_pass.
	(avr_emit_shift): New static function.
	(avr_shift_is_3op, avr_split_shift_p, avr_split_shift)
	(make_avr_pass_split_after_peephole2): New functions.
	* config/avr/avr-passes.def (avr_pass_split_after_peephole2):
	Insert new pass after pass_peephole2.
	* config/avr/avr-protos.h
	(n_avr_fuse_add_executed, avr_shift_is_3op, avr_split_shift_p)
	(avr_split_shift, avr_optimize_size_level)
	(make_avr_pass_split_after_peephole2): New prototypes.
	* config/avr/avr.cc (n_avr_fuse_add_executed): New global variable.
	(avr_optimize_size_level): New function.
	(avr_set_current_function): Set n_avr_fuse_add_executed
	according to cfun->machine->n_avr_fuse_add_executed.
	(ashlsi3_out) [case 15]: Output optimized code for this offset.
	(avr_rtx_costs_1) [ASHIFT, SImode]: Adjust costs of oggsets 15, 16.
	* config/avr/constraints.md (C4a, C4r, C4r): New constraints.
	* pass_manager.h (pass_manager): Adjust comments.
This commit is contained in:
Georg-Johann Lay 2024-11-20 12:25:18 +01:00
parent 938094abec
commit 873cffc792
10 changed files with 407 additions and 111 deletions

View File

@ -39,6 +39,7 @@ static const struct default_options avr_option_optimization_table[] =
{ OPT_LEVELS_2_PLUS, OPT_mfuse_add_, NULL, 2 },
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mfuse_move_, NULL, 3 },
{ OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
{ OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
// Stick to the "old" placement of the subreg lowering pass.
{ OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
/* Allow optimizer to introduce store data races. This used to be the

View File

@ -49,6 +49,34 @@
#define FIRST_GPR (AVR_TINY ? REG_18 : REG_2)
// Emit pattern PAT, and ICE when the insn is not valid / not recognized.
static rtx_insn *
emit_valid_insn (rtx pat)
{
rtx_insn *insn = emit_insn (pat);
if (! valid_insn_p (insn)) // Also runs recog().
fatal_insn ("emit unrecognizable insn", insn);
return insn;
}
// Emit a single_set with an optional scratch operand. This function
// asserts that the new insn is valid and recognized.
static rtx_insn *
emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
{
rtx pat = scratch
? gen_gen_move_clobbercc_scratch (dest, src, scratch)
: gen_gen_move_clobbercc (dest, src);
return emit_valid_insn (pat);
}
namespace
{
@ -116,31 +144,6 @@ single_set_with_scratch (rtx_insn *insn, int &regno_scratch)
return single_set (insn);
}
// Emit pattern PAT, and ICE when the insn is not valid / not recognized.
static rtx_insn *
emit_valid_insn (rtx pat)
{
rtx_insn *insn = emit_insn (pat);
if (! valid_insn_p (insn)) // Also runs recog().
fatal_insn ("emit unrecognizable insn", insn);
return insn;
}
// Emit a single_set with an optional scratch operand. This function
// asserts that the new insn is valid and recognized.
static rtx_insn *
emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
{
rtx pat = scratch
? gen_gen_move_clobbercc_scratch (dest, src, scratch)
: gen_gen_move_clobbercc (dest, src);
return emit_valid_insn (pat);
}
// One bit for each GRP in REG_0 ... REG_31.
using gprmask_t = uint32_t;
@ -4213,12 +4216,17 @@ public:
return make_avr_pass_fuse_add (m_ctxt);
}
bool gate (function *) final override
unsigned int execute (function *func) final override
{
return optimize && avr_fuse_add > 0;
func->machine->n_avr_fuse_add_executed += 1;
n_avr_fuse_add_executed = func->machine->n_avr_fuse_add_executed;
if (optimize && avr_fuse_add > 0)
return execute1 (func);
return 0;
}
unsigned int execute (function *) final override;
unsigned int execute1 (function *);
struct Some_Insn
{
@ -4697,7 +4705,7 @@ avr_pass_fuse_add::fuse_mem_add (Mem_Insn &mem, Add_Insn &add)
as PRE_DEC + PRE_DEC for two adjacent locations. */
unsigned int
avr_pass_fuse_add::execute (function *func)
avr_pass_fuse_add::execute1 (function *func)
{
df_note_add_problem ();
df_analyze ();
@ -4769,6 +4777,146 @@ avr_pass_fuse_add::execute (function *func)
}
//////////////////////////////////////////////////////////////////////////////
// Split insns after peephole2 / befor avr-fuse-move.
static const pass_data avr_pass_data_split_after_peephole2 =
{
RTL_PASS, // type
"", // name (will be patched)
OPTGROUP_NONE, // optinfo_flags
TV_DF_SCAN, // tv_id
0, // properties_required
0, // properties_provided
0, // properties_destroyed
0, // todo_flags_start
0 // todo_flags_finish
};
class avr_pass_split_after_peephole2 : public rtl_opt_pass
{
public:
avr_pass_split_after_peephole2 (gcc::context *ctxt, const char *name)
: rtl_opt_pass (avr_pass_data_split_after_peephole2, ctxt)
{
this->name = name;
}
unsigned int execute (function *) final override
{
if (avr_shift_is_3op ())
split_all_insns ();
return 0;
}
}; // avr_pass_split_after_peephole2
} // anonymous namespace
/* Whether some shift insn alternatives are a 3-operand insn or a
2-operand insn. This 3op alternatives allow the source and the
destination register of the shift to be different right from the
start, because the splitter will split the 3op shift into a 3op byte
shift and a 2op residual bit shift.
(When the residual shift has an offset of one less than the bitsize,
then the residual shift is also a 3op insn. */
bool
avr_shift_is_3op ()
{
// Don't split for OPTIMIZE_SIZE_MAX (-Oz).
// For OPTIMIZE_SIZE_BALANCED (-Os), we still split because
// the size overhead (if exists at all) is marginal.
return (avr_split_bit_shift
&& optimize > 0
&& avr_optimize_size_level () < OPTIMIZE_SIZE_MAX);
}
/* Implement constraints `C4a', `C4l' and `C4r'.
Whether we split an N_BYTES shift of code CODE in { ASHIFTRT,
LSHIFTRT, ASHIFT } into a byte shift and a residual bit shift. */
bool
avr_split_shift_p (int n_bytes, int offset, rtx_code)
{
gcc_assert (n_bytes == 4);
return (avr_shift_is_3op ()
&& offset % 8 != 0 && IN_RANGE (offset, 17, 30));
}
static void
avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch)
{
machine_mode mode = GET_MODE (dest);
rtx shift;
if (off == GET_MODE_BITSIZE (mode) - 1)
{
shift = gen_rtx_fmt_ee (code, mode, src, GEN_INT (off));
}
else
{
if (REGNO (dest) != REGNO (src))
emit_valid_move_clobbercc (dest, src);
shift = gen_rtx_fmt_ee (code, mode, dest, GEN_INT (off));
}
emit_valid_move_clobbercc (dest, shift, scratch);
}
/* Worker for define_split that run when -msplit-bit-shift is on.
Split a shift of code CODE into a 3op byte shift and a residual bit shift.
Return 'true' when a split has been performed and insns have been emitted.
Otherwise, return 'false'. */
bool
avr_split_shift (rtx xop[], rtx scratch, rtx_code code)
{
scratch = scratch && REG_P (scratch) ? scratch : NULL_RTX;
rtx dest = xop[0];
rtx src = xop[1];
int ioff = INTVAL (xop[2]);
gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 4);
if (code == ASHIFT)
{
if (ioff >= 25)
{
rtx dst8 = avr_byte (dest, 3);
rtx src8 = avr_byte (src, 0);
avr_emit_shift (code, dst8, src8, ioff % 8, NULL_RTX);
emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx);
emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
return true;
}
else if (ioff >= 17)
{
rtx dst16 = avr_word (dest, 2);
rtx src16 = avr_word (src, 0);
avr_emit_shift (code, dst16, src16, ioff % 16, scratch);
emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
return true;
}
else
gcc_unreachable ();
}
else
gcc_unreachable ();
return false;
}
namespace
{
//////////////////////////////////////////////////////////////////////////////
// Determine whether an ISR may use the __gcc_isr pseudo-instruction.
@ -5125,3 +5273,11 @@ make_avr_pass_fuse_move (gcc::context *ctxt)
{
return new avr_pass_fuse_move (ctxt, "avr-fuse-move");
}
// Split insns after peephole2 / befor avr-fuse-move.
rtl_opt_pass *
make_avr_pass_split_after_peephole2 (gcc::context *ctxt)
{
return new avr_pass_split_after_peephole2 (ctxt, "avr-split-after-peephole2");
}

View File

@ -104,3 +104,10 @@ INSERT_PASS_BEFORE (pass_split_after_reload, 1, avr_pass_ifelse);
- The RTL peepholer may optimize insns involving lower registers. */
INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_fuse_move);
/* Run an instance of post-reload split prior to avr-fuse-move.
Purpose is to split 3-operand shift insns into a 3-operand shift
with a byte offset, and a 2-operand residual shift after
RTL peepholes but prior to the avr-fuse-move pass. */
INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_split_after_peephole2);

View File

@ -169,6 +169,13 @@ extern rtx cc_reg_rtx;
extern rtx ccn_reg_rtx;
extern rtx cczn_reg_rtx;
extern int n_avr_fuse_add_executed;
extern bool avr_shift_is_3op ();
extern bool avr_split_shift_p (int n_bytes, int offset, rtx_code);
extern bool avr_split_shift (rtx xop[], rtx xscratch, rtx_code);
extern int avr_optimize_size_level ();
#endif /* RTX_CODE */
#ifdef REAL_VALUE_TYPE
@ -188,6 +195,7 @@ extern rtl_opt_pass *make_avr_pass_pre_proep (gcc::context *);
extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *);
extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
#ifdef RTX_CODE
extern bool avr_casei_sequence_check_operands (rtx *xop);
extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);

View File

@ -229,6 +229,12 @@ bool avr_need_clear_bss_p = false;
bool avr_need_copy_data_p = false;
bool avr_has_rodata_p = false;
/* Counts how often pass avr-fuse-add has been executed. Is is kept in
sync with cfun->machine->n_avr_fuse_add_executed and serves as an
insn condition for shift insn splitters. */
int n_avr_fuse_add_executed = 0;
/* Transform UP into lowercase and write the result to LO.
You must provide enough space for LO. Return LO. */
@ -526,6 +532,14 @@ avr_option_override (void)
}
int avr_optimize_size_level ()
{
return cfun && cfun->decl
? opt_for_fn (cfun->decl, optimize_size)
: optimize_size;
}
/* Implement `INIT_EXPANDERS'. */
/* The function works like a singleton. */
@ -823,8 +837,12 @@ avr_set_current_function (tree decl)
if (decl == NULL_TREE
|| current_function_decl == NULL_TREE
|| current_function_decl == error_mark_node
|| ! cfun->machine
|| cfun->machine->attributes_checked_p)
|| ! cfun->machine)
return;
n_avr_fuse_add_executed = cfun->machine->n_avr_fuse_add_executed;
if (cfun->machine->attributes_checked_p)
return;
location_t loc = DECL_SOURCE_LOCATION (decl);
@ -6590,7 +6608,7 @@ avr_out_cmp_ext (rtx xop[], rtx_code code, int *plen)
/* Generate asm equivalent for various shifts. This only handles cases
that are not already carefully hand-optimized in ?sh??i3_out.
that are not already carefully hand-optimized in ?sh<mode>3_out.
OPERANDS[0] resp. %0 in TEMPL is the operand to be shifted.
OPERANDS[2] is the shift count as CONST_INT, MEM or REG.
@ -7042,6 +7060,7 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
{
int reg0 = true_regnum (operands[0]);
int reg1 = true_regnum (operands[1]);
bool reg1_unused_after_p = reg_unused_after (insn, operands[1]);
if (plen)
*plen = 0;
@ -7070,6 +7089,30 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
"mov %B0,%A1" CR_TAB
"mov %C0,%B1" CR_TAB
"mov %D0,%C1", operands, plen, 4);
case 15:
avr_asm_len (reg1_unused_after_p
? "lsr %C1"
: "bst %C1,0", operands, plen, 1);
if (reg0 + 2 != reg1)
{
if (AVR_HAVE_MOVW)
avr_asm_len ("movw %C0,%A1", operands, plen, 1);
else
avr_asm_len ("mov %C0,%A1" CR_TAB
"mov %D0,%B1", operands, plen, 2);
}
return reg1_unused_after_p
? avr_asm_len ("clr %A0" CR_TAB
"clr %B0" CR_TAB
"ror %D0" CR_TAB
"ror %C0" CR_TAB
"ror %B0", operands, plen, 5)
: avr_asm_len ("clr %A0" CR_TAB
"clr %B0" CR_TAB
"lsr %D0" CR_TAB
"ror %C0" CR_TAB
"ror %B0" CR_TAB
"bld %D0,7", operands, plen, 6);
case 16:
if (reg0 + 2 == reg1)
return avr_asm_len ("clr %B0" CR_TAB
@ -12392,9 +12435,14 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
break;
case 1:
case 8:
case 16:
*total = COSTS_N_INSNS (4);
break;
case 15:
*total = COSTS_N_INSNS (8 - AVR_HAVE_MOVW);
break;
case 16:
*total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
break;
case 31:
*total = COSTS_N_INSNS (6);
break;

View File

@ -610,6 +610,12 @@ struct GTY(()) machine_function
/* 'true' if this function references .L__stack_usage like with
__builtin_return_address. */
bool use_L__stack_usage;
/* Counts how many times the execute() method of the avr-fuse-add
has been invoked. The count is even increased when the optimization
itself is not run. This purpose of this variable is to provide
information about where in the pass sequence we are. */
int n_avr_fuse_add_executed;
};
/* AVR does not round pushes, but the existence of this macro is

View File

@ -184,73 +184,75 @@
;; no_xmega: non-XMEGA core xmega : XMEGA core
;; no_adiw: ISA has no ADIW, SBIW adiw : ISA has ADIW, SBIW
;; The following ISA attributes are actually not architecture specific,
;; but depend on (optimization) options. This is because the "enabled"
;; attribut can't depend on more than one other attribute. This means
;; that 2op and 3op must work for all ISAs, and hence a 'flat' attribue
;; scheme can be used (as opposed to a true cartesian product).
;; 2op : insn is a 2-operand insn 3op : insn is a 3-operand insn
(define_attr "isa"
"mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega,
no_adiw,adiw,
2op,3op,
standard"
(const_string "standard"))
(define_attr "enabled" ""
(cond [(eq_attr "isa" "standard")
(const_int 1)
(if_then_else
(ior (eq_attr "isa" "standard")
(and (eq_attr "isa" "mov")
(match_test "!AVR_HAVE_MOVW"))
(const_int 1)
(and (eq_attr "isa" "mov")
(match_test "!AVR_HAVE_MOVW"))
(and (eq_attr "isa" "movw")
(match_test "AVR_HAVE_MOVW"))
(const_int 1)
(and (eq_attr "isa" "movw")
(match_test "AVR_HAVE_MOVW"))
(and (eq_attr "isa" "rjmp")
(match_test "!AVR_HAVE_JMP_CALL"))
(const_int 1)
(and (eq_attr "isa" "rjmp")
(match_test "!AVR_HAVE_JMP_CALL"))
(and (eq_attr "isa" "jmp")
(match_test "AVR_HAVE_JMP_CALL"))
(const_int 1)
(and (eq_attr "isa" "jmp")
(match_test "AVR_HAVE_JMP_CALL"))
(and (eq_attr "isa" "ijmp")
(match_test "!AVR_HAVE_EIJMP_EICALL"))
(const_int 1)
(and (eq_attr "isa" "ijmp")
(match_test "!AVR_HAVE_EIJMP_EICALL"))
(and (eq_attr "isa" "eijmp")
(match_test "AVR_HAVE_EIJMP_EICALL"))
(const_int 1)
(and (eq_attr "isa" "eijmp")
(match_test "AVR_HAVE_EIJMP_EICALL"))
(and (eq_attr "isa" "lpm")
(match_test "!AVR_HAVE_LPMX"))
(const_int 1)
(and (eq_attr "isa" "lpm")
(match_test "!AVR_HAVE_LPMX"))
(and (eq_attr "isa" "lpmx")
(match_test "AVR_HAVE_LPMX"))
(const_int 1)
(and (eq_attr "isa" "lpmx")
(match_test "AVR_HAVE_LPMX"))
(and (eq_attr "isa" "elpm")
(match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
(const_int 1)
(and (eq_attr "isa" "elpm")
(match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
(and (eq_attr "isa" "elpmx")
(match_test "AVR_HAVE_ELPMX"))
(const_int 1)
(and (eq_attr "isa" "elpmx")
(match_test "AVR_HAVE_ELPMX"))
(and (eq_attr "isa" "xmega")
(match_test "AVR_XMEGA"))
(const_int 1)
(and (eq_attr "isa" "xmega")
(match_test "AVR_XMEGA"))
(and (eq_attr "isa" "no_xmega")
(match_test "!AVR_XMEGA"))
(const_int 1)
(and (eq_attr "isa" "no_xmega")
(match_test "!AVR_XMEGA"))
(and (eq_attr "isa" "adiw")
(match_test "AVR_HAVE_ADIW"))
(const_int 1)
(and (eq_attr "isa" "adiw")
(match_test "AVR_HAVE_ADIW"))
(and (eq_attr "isa" "no_adiw")
(match_test "!AVR_HAVE_ADIW"))
(const_int 1)
(and (eq_attr "isa" "no_adiw")
(match_test "!AVR_HAVE_ADIW"))
] (const_int 0)))
(and (eq_attr "isa" "2op")
(match_test "!avr_shift_is_3op ()"))
(and (eq_attr "isa" "3op")
(match_test "avr_shift_is_3op ()"))
)
(const_int 1)
(const_int 0)))
;; Define mode iterators
@ -5257,28 +5259,31 @@
;; "ashlsq3" "ashlusq3"
;; "ashlsa3" "ashlusa3"
(define_insn_and_split "ashl<mode>3"
[(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r ,r,r,r")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r ,0,0,0")
(match_operand:QI 2 "nop_general_operand" "r,L,P,O C31,K,n,Qm")))]
[(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0")
(match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,C4l,n,Qm")))]
""
"#"
"&& reload_completed"
[(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 1)
(match_dup 2)))
(clobber (reg:CC REG_CC))])])
(clobber (reg:CC REG_CC))])]
""
[(set_attr "isa" "*,*,*,2op,3op,*,*")])
(define_insn "*ashl<mode>3"
[(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r ,r,r,r")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r ,0,0,0")
(match_operand:QI 2 "nop_general_operand" "r,L,P,O C31,K,n,Qm")))
[(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0")
(match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,C4l,n,Qm")))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
return ashlsi3_out (insn, operands, NULL);
}
[(set_attr "length" "8,0,4,5,8,10,12")
(set_attr "adjust_len" "ashlsi")])
[(set_attr "length" "12")
(set_attr "adjust_len" "ashlsi")
(set_attr "isa" "*,*,*,2op,3op,*,*")])
;; Optimize if a scratch register from LD_REGS happens to be available.
@ -5380,12 +5385,72 @@
[(set_attr "length" "0,2,2,4,10")
(set_attr "adjust_len" "ashlhi")])
;; Split shift into a byte shift and a residual bit shift (without scratch)
(define_split
[(parallel [(set (match_operand:ALL4 0 "register_operand")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))
(clobber (reg:CC REG_CC))])]
"avr_split_bit_shift
&& n_avr_fuse_add_executed >= 1
&& satisfies_constraint_C4l (operands[2])"
[(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 1)
(match_dup 3)))
(clobber (reg:CC REG_CC))])
(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 0)
(match_dup 4)))
(clobber (reg:CC REG_CC))])]
{
if (avr_split_shift (operands, NULL_RTX, ASHIFT))
DONE;
else if (REGNO (operands[0]) == REGNO (operands[1]))
FAIL;
int offset = INTVAL (operands[2]);
operands[3] = GEN_INT (offset & ~7);
operands[4] = GEN_INT (offset & 7);
})
;; Split shift into a byte shift and a residual bit shift (with scratch)
(define_split
[(parallel [(set (match_operand:ALL4 0 "register_operand")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))
(clobber (match_operand:QI 3 "scratch_or_d_register_operand"))
(clobber (reg:CC REG_CC))])]
"avr_split_bit_shift
&& n_avr_fuse_add_executed >= 1
&& satisfies_constraint_C4l (operands[2])"
[(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 1)
(match_dup 4)))
(clobber (reg:CC REG_CC))])
(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 0)
(match_dup 5)))
(clobber (match_dup 3))
(clobber (reg:CC REG_CC))])]
{
if (avr_split_shift (operands, operands[3], ASHIFT))
DONE;
else if (REGNO (operands[0]) == REGNO (operands[1]))
FAIL;
int offset = INTVAL (operands[2]);
operands[4] = GEN_INT (offset & ~7);
operands[5] = GEN_INT (offset & 7);
})
(define_peephole2
[(match_scratch:QI 3 "d")
(parallel [(set (match_operand:ALL4 0 "register_operand" "")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "")
(match_operand:QI 2 "const_int_operand" "")))
(clobber (reg:CC REG_CC))])]
(clobber (reg:CC REG_CC))])
;; $3 must not overlap with the output of the insn above.
(match_dup 3)]
""
[(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 1)
@ -5393,35 +5458,20 @@
(clobber (match_dup 3))
(clobber (reg:CC REG_CC))])])
;; "*ashlsi3_const"
;; "*ashlsq3_const" "*ashlusq3_const"
;; "*ashlsa3_const" "*ashlusa3_const"
(define_insn_and_split "*ashl<mode>3_const_split"
[(set (match_operand:ALL4 0 "register_operand" "=r,r,r ,r")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r ,0")
(match_operand:QI 2 "const_int_operand" "L,P,O C31,n")))
(clobber (match_scratch:QI 3 "=X,X,X ,&d"))]
"reload_completed"
"#"
"&& reload_completed"
[(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 1)
(match_dup 2)))
(clobber (match_dup 3))
(clobber (reg:CC REG_CC))])])
(define_insn "*ashl<mode>3_const"
[(set (match_operand:ALL4 0 "register_operand" "=r,r,r ,r")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r ,0")
(match_operand:QI 2 "const_int_operand" "L,P,O C31,n")))
(clobber (match_scratch:QI 3 "=X,X,X ,&d"))
[(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r ,r")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,0 ,r ,0")
(match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4l,C4l,n")))
(clobber (match_operand:QI 3 "scratch_or_d_register_operand" "=X ,X ,&d ,&d ,&d"))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
return ashlsi3_out (insn, operands, NULL);
}
[(set_attr "length" "0,4,5,10")
(set_attr "adjust_len" "ashlsi")])
[(set_attr "length" "10")
(set_attr "adjust_len" "ashlsi")
(set_attr "isa" "*,*,2op,3op,*")])
(define_expand "ashlpsi3"
[(parallel [(set (match_operand:PSI 0 "register_operand" "")

View File

@ -94,6 +94,10 @@ maccumulate-args
Target Mask(ACCUMULATE_OUTGOING_ARGS) Optimization
Optimization. Accumulate outgoing function arguments and acquire/release the needed stack space for outgoing function arguments in function prologue/epilogue. Without this option, outgoing arguments are pushed before calling a function and popped afterwards. This option can lead to reduced code size for functions that call many functions that get their arguments on the stack like, for example printf.
msplit-bit-shift
Target Var(avr_split_bit_shift) Init(0) Optimization
Optimization. Split shifts of 4-byte values into a byte shift and a residual bit shift.
mstrict-X
Target Var(avr_strict_X) Init(0) Optimization
Optimization. When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register. Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.

View File

@ -263,6 +263,22 @@
(and (match_code "const_int,symbol_ref,const")
(match_test "const_0mod256_operand (op, HImode)")))
(define_constraint "C4a"
"A constant integer shift offset for a 4-byte ASHIFTRT that's opt to being split."
(and (match_code "const_int")
(match_test "avr_split_shift_p (4, ival, ASHIFTRT)")))
(define_constraint "C4r"
"A constant integer shift offset for a 4-byte LSHIFTRT that's opt to being split."
(and (match_code "const_int")
(match_test "avr_split_shift_p (4, ival, LSHIFTRT)")))
(define_constraint "C4l"
"A constant integer shift offset for a 4-byte ASHIFT that's opt to being split."
(and (match_code "const_int")
(match_test "avr_split_shift_p (4, ival, ASHIFT)")))
;; CONST_FIXED is no element of 'n' so cook our own.
;; "i" or "s" would match but because the insn uses iterators that cover
;; INT_MODE, "i" or "s" is not always possible.

View File

@ -65,7 +65,7 @@ public:
void execute_early_local_passes ();
unsigned int execute_pass_mode_switching ();
/* Various passes are manually cloned by epiphany. */
/* Various passes are manually cloned by avr and epiphany. */
opt_pass *get_pass_split_all_insns () const {
return pass_split_all_insns_1;
}