mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-27 04:04:23 +08:00
aco: don't consider sa_sdst=0 before SALU write to fix VALUMaskWriteHazard
LLVM does but that's probably a bug.
fossil-db (navi31):
Totals from 311 (0.39% of 79395) affected shaders:
Instrs: 380453 -> 381075 (+0.16%)
CodeSize: 1961012 -> 1964744 (+0.19%)
Latency: 4799095 -> 4800313 (+0.03%)
InvThroughput: 958358 -> 958904 (+0.06%)
VALU: 242322 -> 242633 (+0.13%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Backport-to: 24.1
Backport-to: 24.2
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30818>
(cherry picked from commit b1ba7d1b99
)
This commit is contained in:
parent
6beff74e00
commit
a42e144c32
@ -674,7 +674,7 @@
|
||||
"description": "aco: don't consider sa_sdst=0 before SALU write to fix VALUMaskWriteHazard",
|
||||
"nominated": true,
|
||||
"nomination_type": 4,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
@ -375,5 +375,5 @@ Triggered by:
|
||||
SALU writing then SALU or VALU reading a SGPR that was previously used as a lane mask for a VALU.
|
||||
|
||||
Mitigated by:
|
||||
A VALU instruction reading a non-exec SGPR before the SALU write, or a sa_sdst=0 wait:
|
||||
`s_waitcnt_depctr 0xfffe`
|
||||
A VALU instruction reading a non-exec SGPR before the SALU write, or a sa_sdst=0 wait after the
|
||||
SALU write: `s_waitcnt_depctr 0xfffe`
|
||||
|
@ -1579,6 +1579,7 @@ resolve_all_gfx11(State& state, NOP_ctx_gfx11& ctx,
|
||||
Builder bld(state.program, &new_instructions);
|
||||
|
||||
unsigned waitcnt_depctr = 0xffff;
|
||||
bool valu_read_sgpr = false;
|
||||
|
||||
/* LdsDirectVALUHazard/VALUPartialForwardingHazard/VALUTransUseHazard */
|
||||
bool has_vdst0_since_valu = true;
|
||||
@ -1598,12 +1599,15 @@ resolve_all_gfx11(State& state, NOP_ctx_gfx11& ctx,
|
||||
}
|
||||
|
||||
/* VALUMaskWriteHazard */
|
||||
if (state.program->wave_size == 64 &&
|
||||
(ctx.sgpr_read_by_valu_as_lanemask.any() ||
|
||||
ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu.any())) {
|
||||
waitcnt_depctr &= 0xfffe;
|
||||
ctx.sgpr_read_by_valu_as_lanemask.reset();
|
||||
ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu.reset();
|
||||
if (state.program->wave_size == 64) {
|
||||
if (ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu.any()) {
|
||||
waitcnt_depctr &= 0xfffe;
|
||||
ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu.reset();
|
||||
}
|
||||
if (ctx.sgpr_read_by_valu_as_lanemask.any()) {
|
||||
valu_read_sgpr = true;
|
||||
ctx.sgpr_read_by_valu_as_lanemask.reset();
|
||||
}
|
||||
}
|
||||
|
||||
/* LdsDirectVMEMHazard */
|
||||
@ -1617,6 +1621,16 @@ resolve_all_gfx11(State& state, NOP_ctx_gfx11& ctx,
|
||||
|
||||
if (waitcnt_depctr != 0xffff)
|
||||
bld.sopp(aco_opcode::s_waitcnt_depctr, waitcnt_depctr);
|
||||
|
||||
if (valu_read_sgpr) {
|
||||
/* This has to be after the s_waitcnt_depctr so that the instruction is not involved in any
|
||||
* other hazards. */
|
||||
bld.vop3(aco_opcode::v_xor3_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
|
||||
Operand(PhysReg(0), s1), Operand(PhysReg(0), s1));
|
||||
|
||||
/* workaround possible LdsDirectVALUHazard/VALUPartialForwardingHazard */
|
||||
bld.sopp(aco_opcode::s_waitcnt_depctr, 0x0fff);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ctx>
|
||||
|
@ -1282,9 +1282,22 @@ BEGIN_TEST(insert_nops.setpc_gfx11)
|
||||
/* VALUMaskWriteHazard */
|
||||
//! p_unit_test 4
|
||||
//! v1: %0:v[0] = v_cndmask_b32 0, 0, %0:vcc
|
||||
//! s1: %0:vcc_hi = s_mov_b32 0
|
||||
//! s_waitcnt_depctr va_vdst(0) sa_sdst(0)
|
||||
//! s_setpc_b64 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
|
||||
Operand::zero(), Operand(vcc, s2));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(vcc_hi, s1), Operand::c32(0));
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
|
||||
|
||||
//! p_unit_test 8
|
||||
//! v1: %0:v[0] = v_cndmask_b32 0, 0, %0:vcc
|
||||
//! s_waitcnt_depctr va_vdst(0)
|
||||
//! v1: %0:v[0] = v_xor3_b32 %0:v[0], %0:s[0], %0:s[0]
|
||||
//! s_waitcnt_depctr va_vdst(0)
|
||||
//! s_setpc_b64 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8));
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
|
||||
Operand::zero(), Operand(vcc, s2));
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
|
||||
|
Loading…
Reference in New Issue
Block a user