mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-26 21:33:59 +08:00
aarch64: Add masked-load else operands.
This adds zero else operands to masked loads and their intrinsics. I needed to adjust more than initially thought because we rely on combine for several instructions and a change in a "base" pattern needs to propagate to all those. gcc/ChangeLog: * config/aarch64/aarch64-sve-builtins-base.cc: Add else handling. * config/aarch64/aarch64-sve-builtins.cc (function_expander::use_contiguous_load_insn): Ditto. * config/aarch64/aarch64-sve-builtins.h: Add else operand to contiguous load. * config/aarch64/aarch64-sve.md (@aarch64_load<SVE_PRED_LOAD:pred_load> _<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>): Split and add else operand. (@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>): Ditto. (*aarch64_load_<ANY_EXTEND:optab>_mov<SVE_HSDI:mode><SVE_PARTIAL_I:mode>): Ditto. * config/aarch64/aarch64-sve2.md: Ditto. * config/aarch64/iterators.md: Remove unused iterators. * config/aarch64/predicates.md (aarch64_maskload_else_operand): Add zero else operand.
This commit is contained in:
parent
634ae740f5
commit
a166a6ccdc
@ -1524,11 +1524,12 @@ public:
|
||||
gimple_seq stmts = NULL;
|
||||
tree pred = f.convert_pred (stmts, vectype, 0);
|
||||
tree base = f.fold_contiguous_base (stmts, vectype);
|
||||
tree els = build_zero_cst (vectype);
|
||||
gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
|
||||
|
||||
tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
|
||||
gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
|
||||
base, cookie, pred);
|
||||
gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
|
||||
base, cookie, pred, els);
|
||||
gimple_call_set_lhs (new_call, f.lhs);
|
||||
return new_call;
|
||||
}
|
||||
@ -1542,7 +1543,7 @@ public:
|
||||
e.vector_mode (0), e.gp_mode (0));
|
||||
else
|
||||
icode = code_for_aarch64 (UNSPEC_LD1_COUNT, e.tuple_mode (0));
|
||||
return e.use_contiguous_load_insn (icode);
|
||||
return e.use_contiguous_load_insn (icode, true);
|
||||
}
|
||||
};
|
||||
|
||||
@ -1555,10 +1556,10 @@ public:
|
||||
rtx
|
||||
expand (function_expander &e) const override
|
||||
{
|
||||
insn_code icode = code_for_aarch64_load (UNSPEC_LD1_SVE, extend_rtx_code (),
|
||||
insn_code icode = code_for_aarch64_load (extend_rtx_code (),
|
||||
e.vector_mode (0),
|
||||
e.memory_vector_mode ());
|
||||
return e.use_contiguous_load_insn (icode);
|
||||
return e.use_contiguous_load_insn (icode, true);
|
||||
}
|
||||
};
|
||||
|
||||
@ -1577,6 +1578,8 @@ public:
|
||||
e.prepare_gather_address_operands (1);
|
||||
/* Put the predicate last, as required by mask_gather_load_optab. */
|
||||
e.rotate_inputs_left (0, 5);
|
||||
/* Add the else operand. */
|
||||
e.args.quick_push (CONST0_RTX (e.vector_mode (0)));
|
||||
machine_mode mem_mode = e.memory_vector_mode ();
|
||||
machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
|
||||
insn_code icode = convert_optab_handler (mask_gather_load_optab,
|
||||
@ -1600,6 +1603,8 @@ public:
|
||||
e.rotate_inputs_left (0, 5);
|
||||
/* Add a constant predicate for the extension rtx. */
|
||||
e.args.quick_push (CONSTM1_RTX (VNx16BImode));
|
||||
/* Add the else operand. */
|
||||
e.args.quick_push (CONST0_RTX (e.vector_mode (1)));
|
||||
insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
|
||||
e.vector_mode (0),
|
||||
e.memory_vector_mode ());
|
||||
@ -1742,6 +1747,7 @@ public:
|
||||
/* Get the predicate and base pointer. */
|
||||
gimple_seq stmts = NULL;
|
||||
tree pred = f.convert_pred (stmts, vectype, 0);
|
||||
tree els = build_zero_cst (vectype);
|
||||
tree base = f.fold_contiguous_base (stmts, vectype);
|
||||
gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
|
||||
|
||||
@ -1760,8 +1766,8 @@ public:
|
||||
|
||||
/* Emit the load itself. */
|
||||
tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
|
||||
gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
|
||||
base, cookie, pred);
|
||||
gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
|
||||
base, cookie, pred, els);
|
||||
gimple_call_set_lhs (new_call, lhs_array);
|
||||
gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT);
|
||||
|
||||
@ -1774,7 +1780,7 @@ public:
|
||||
machine_mode tuple_mode = e.result_mode ();
|
||||
insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
|
||||
tuple_mode, e.vector_mode (0));
|
||||
return e.use_contiguous_load_insn (icode);
|
||||
return e.use_contiguous_load_insn (icode, true);
|
||||
}
|
||||
};
|
||||
|
||||
@ -1845,7 +1851,7 @@ public:
|
||||
? code_for_aarch64_ldnt1 (e.vector_mode (0))
|
||||
: code_for_aarch64 (UNSPEC_LDNT1_COUNT,
|
||||
e.tuple_mode (0)));
|
||||
return e.use_contiguous_load_insn (icode);
|
||||
return e.use_contiguous_load_insn (icode, true);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -4284,9 +4284,12 @@ function_expander::use_vcond_mask_insn (insn_code icode,
|
||||
/* Implement the call using instruction ICODE, which loads memory operand 1
|
||||
into register operand 0 under the control of predicate operand 2.
|
||||
Extending loads have a further predicate (operand 3) that nominally
|
||||
controls the extension. */
|
||||
controls the extension.
|
||||
HAS_ELSE is true if the pattern has an additional operand that specifies
|
||||
the values of inactive lanes. This exists to match the general maskload
|
||||
interface and is always zero for AArch64. */
|
||||
rtx
|
||||
function_expander::use_contiguous_load_insn (insn_code icode)
|
||||
function_expander::use_contiguous_load_insn (insn_code icode, bool has_else)
|
||||
{
|
||||
machine_mode mem_mode = memory_vector_mode ();
|
||||
|
||||
@ -4295,6 +4298,11 @@ function_expander::use_contiguous_load_insn (insn_code icode)
|
||||
add_input_operand (icode, args[0]);
|
||||
if (GET_MODE_UNIT_BITSIZE (mem_mode) < type_suffix (0).element_bits)
|
||||
add_input_operand (icode, CONSTM1_RTX (VNx16BImode));
|
||||
|
||||
/* If we have an else operand, add it. */
|
||||
if (has_else)
|
||||
add_input_operand (icode, CONST0_RTX (mem_mode));
|
||||
|
||||
return generate_insn (icode);
|
||||
}
|
||||
|
||||
|
@ -696,7 +696,7 @@ public:
|
||||
rtx use_pred_x_insn (insn_code);
|
||||
rtx use_cond_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO);
|
||||
rtx use_vcond_mask_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO);
|
||||
rtx use_contiguous_load_insn (insn_code);
|
||||
rtx use_contiguous_load_insn (insn_code, bool = false);
|
||||
rtx use_contiguous_prefetch_insn (insn_code);
|
||||
rtx use_contiguous_store_insn (insn_code);
|
||||
|
||||
|
@ -1291,7 +1291,8 @@
|
||||
[(set (match_operand:SVE_ALL 0 "register_operand" "=w")
|
||||
(unspec:SVE_ALL
|
||||
[(match_operand:<VPRED> 2 "register_operand" "Upl")
|
||||
(match_operand:SVE_ALL 1 "memory_operand" "m")]
|
||||
(match_operand:SVE_ALL 1 "memory_operand" "m")
|
||||
(match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")]
|
||||
UNSPEC_LD1_SVE))]
|
||||
"TARGET_SVE"
|
||||
"ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
|
||||
@ -1302,11 +1303,13 @@
|
||||
[(set (match_operand:SVE_STRUCT 0 "register_operand")
|
||||
(unspec:SVE_STRUCT
|
||||
[(match_dup 2)
|
||||
(match_operand:SVE_STRUCT 1 "memory_operand")]
|
||||
(match_operand:SVE_STRUCT 1 "memory_operand")
|
||||
(match_dup 3)]
|
||||
UNSPEC_LDN))]
|
||||
"TARGET_SVE"
|
||||
{
|
||||
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
|
||||
operands[3] = CONST0_RTX (<MODE>mode);
|
||||
}
|
||||
)
|
||||
|
||||
@ -1315,7 +1318,8 @@
|
||||
[(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
|
||||
(unspec:SVE_STRUCT
|
||||
[(match_operand:<VPRED> 2 "register_operand" "Upl")
|
||||
(match_operand:SVE_STRUCT 1 "memory_operand" "m")]
|
||||
(match_operand:SVE_STRUCT 1 "memory_operand" "m")
|
||||
(match_operand 3 "aarch64_maskload_else_operand")]
|
||||
UNSPEC_LDN))]
|
||||
"TARGET_SVE"
|
||||
"ld<vector_count><Vesize>\t%0, %2/z, %1"
|
||||
@ -1334,7 +1338,28 @@
|
||||
;; -------------------------------------------------------------------------
|
||||
|
||||
;; Predicated load and extend, with 8 elements per 128-bit block.
|
||||
(define_insn_and_rewrite "@aarch64_load<SVE_PRED_LOAD:pred_load>_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
|
||||
(define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
|
||||
[(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
|
||||
(unspec:SVE_HSDI
|
||||
[(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
|
||||
(ANY_EXTEND:SVE_HSDI
|
||||
(unspec:SVE_PARTIAL_I
|
||||
[(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
|
||||
(match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")
|
||||
(match_operand:SVE_PARTIAL_I 4 "aarch64_maskload_else_operand")]
|
||||
UNSPEC_LD1_SVE))]
|
||||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
|
||||
"ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
|
||||
"&& !CONSTANT_P (operands[3])"
|
||||
{
|
||||
operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
|
||||
}
|
||||
)
|
||||
|
||||
;; Same as above without the maskload_else_operand to still allow combine to
|
||||
;; match a sign-extended pred_mov pattern.
|
||||
(define_insn_and_rewrite "*aarch64_load_<ANY_EXTEND:optab>_mov<SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
|
||||
[(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
|
||||
(unspec:SVE_HSDI
|
||||
[(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
|
||||
@ -1342,8 +1367,8 @@
|
||||
(unspec:SVE_PARTIAL_I
|
||||
[(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
|
||||
(match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
|
||||
SVE_PRED_LOAD))]
|
||||
UNSPEC_PRED_X))]
|
||||
UNSPEC_PRED_X))]
|
||||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
|
||||
"ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
|
||||
"&& !CONSTANT_P (operands[3])"
|
||||
@ -1433,7 +1458,8 @@
|
||||
[(set (match_operand:SVE_FULL 0 "register_operand" "=w")
|
||||
(unspec:SVE_FULL
|
||||
[(match_operand:<VPRED> 2 "register_operand" "Upl")
|
||||
(match_operand:SVE_FULL 1 "memory_operand" "m")]
|
||||
(match_operand:SVE_FULL 1 "memory_operand" "m")
|
||||
(match_operand:SVE_FULL 3 "aarch64_maskload_else_operand")]
|
||||
UNSPEC_LDNT1_SVE))]
|
||||
"TARGET_SVE"
|
||||
"ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
|
||||
@ -1456,11 +1482,13 @@
|
||||
(match_operand:<V_INT_CONTAINER> 2 "register_operand")
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
|
||||
(match_dup 6)
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE && TARGET_NON_STREAMING"
|
||||
{
|
||||
operands[5] = aarch64_ptrue_reg (<VPRED>mode);
|
||||
operands[6] = CONST0_RTX (<MODE>mode);
|
||||
}
|
||||
)
|
||||
|
||||
@ -1474,6 +1502,7 @@
|
||||
(match_operand:VNx4SI 2 "register_operand")
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
|
||||
(match_operand:SVE_4 6 "aarch64_maskload_else_operand")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE && TARGET_NON_STREAMING"
|
||||
@ -1503,6 +1532,7 @@
|
||||
(match_operand:VNx2DI 2 "register_operand")
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
|
||||
(match_operand:SVE_2 6 "aarch64_maskload_else_operand")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE && TARGET_NON_STREAMING"
|
||||
@ -1531,6 +1561,7 @@
|
||||
UNSPEC_PRED_X)
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
|
||||
(match_operand:SVE_2 7 "aarch64_maskload_else_operand")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE && TARGET_NON_STREAMING"
|
||||
@ -1561,6 +1592,7 @@
|
||||
UNSPEC_PRED_X)
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
|
||||
(match_operand:SVE_2 7 "aarch64_maskload_else_operand")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE && TARGET_NON_STREAMING"
|
||||
@ -1588,6 +1620,7 @@
|
||||
(match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
|
||||
(match_operand:SVE_2 7 "aarch64_maskload_else_operand")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE && TARGET_NON_STREAMING"
|
||||
@ -1624,6 +1657,7 @@
|
||||
(match_operand:VNx4SI 2 "register_operand")
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>")
|
||||
(match_operand:SVE_4BHI 7 "aarch64_maskload_else_operand")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
UNSPEC_PRED_X))]
|
||||
@ -1663,6 +1697,7 @@
|
||||
(match_operand:VNx2DI 2 "register_operand")
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
|
||||
(match_operand:SVE_2BHSI 7 "aarch64_maskload_else_operand")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
UNSPEC_PRED_X))]
|
||||
@ -1701,6 +1736,7 @@
|
||||
UNSPEC_PRED_X)
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
|
||||
(match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
UNSPEC_PRED_X))]
|
||||
@ -1738,6 +1774,7 @@
|
||||
UNSPEC_PRED_X)
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
|
||||
(match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
UNSPEC_PRED_X))]
|
||||
@ -1772,6 +1809,7 @@
|
||||
(match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
|
||||
(match_operand:DI 3 "const_int_operand")
|
||||
(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
|
||||
(match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
UNSPEC_PRED_X))]
|
||||
|
@ -264,7 +264,8 @@
|
||||
[(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
|
||||
(unspec:SVE_FULLx24
|
||||
[(match_operand:VNx16BI 2 "register_operand" "Uph")
|
||||
(match_operand:SVE_FULLx24 1 "memory_operand" "m")]
|
||||
(match_operand:SVE_FULLx24 1 "memory_operand" "m")
|
||||
(match_operand:SVE_FULLx24 3 "aarch64_maskload_else_operand")]
|
||||
LD1_COUNT))]
|
||||
"TARGET_SVE2p1_OR_SME2"
|
||||
"<optab><Vesize>\t%0, %K2/z, %1"
|
||||
|
@ -3331,10 +3331,6 @@
|
||||
|
||||
(define_int_iterator SVE_LDFF1_LDNF1 [UNSPEC_LDFF1 UNSPEC_LDNF1])
|
||||
|
||||
(define_int_iterator SVE_PRED_LOAD [UNSPEC_PRED_X UNSPEC_LD1_SVE])
|
||||
|
||||
(define_int_attr pred_load [(UNSPEC_PRED_X "_x") (UNSPEC_LD1_SVE "")])
|
||||
|
||||
(define_int_iterator LD1_COUNT [UNSPEC_LD1_COUNT UNSPEC_LDNT1_COUNT])
|
||||
|
||||
(define_int_iterator ST1_COUNT [UNSPEC_ST1_COUNT UNSPEC_STNT1_COUNT])
|
||||
|
@ -1067,3 +1067,7 @@
|
||||
(and (match_code "const_int")
|
||||
(match_test "IN_RANGE (INTVAL (op), -4096, 4080)
|
||||
&& !(INTVAL (op) & 0xf)")))
|
||||
|
||||
(define_predicate "aarch64_maskload_else_operand"
|
||||
(and (match_code "const_int,const_vector")
|
||||
(match_test "op == CONST0_RTX (GET_MODE (op))")))
|
||||
|
Loading…
Reference in New Issue
Block a user