mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-23 18:24:13 +08:00
aco/gfx11: fix VMEM/DS->VALU WaW/RaW hazard
Previously, we could safely read/write unused lanes of VMEM/DS
destination VGPRs without waiting for the load to finish. That doesn't
seem to be the case on GFX11.
fossil-db (gfx1100):
Totals from 6698 (4.94% of 135636) affected shaders:
Instrs: 11184274 -> 11199420 (+0.14%); split: -0.00%, +0.14%
CodeSize: 57578344 -> 57638928 (+0.11%); split: -0.00%, +0.11%
Latency: 198348808 -> 198382472 (+0.02%); split: -0.00%, +0.02%
InvThroughput: 24376324 -> 24378439 (+0.01%); split: -0.00%, +0.01%
VClause: 192420 -> 192559 (+0.07%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8722
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8239
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22965>
(cherry picked from commit 88f6d7f4bd
)
This commit is contained in:
parent
b9f4bd9538
commit
1d348d54ed
@ -2554,7 +2554,7 @@
|
||||
"description": "aco/gfx11: fix VMEM/DS->VALU WaW/RaW hazard",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
@ -194,6 +194,11 @@ Currently, we don't do this.
|
||||
|
||||
This leads to wrong bounds checking, using a VGPR offset fixes it.
|
||||
|
||||
## unused VMEM/DS destination lanes can't be used without waiting
|
||||
|
||||
On GFX11, we can't safely read/write unused lanes of VMEM/DS destination
|
||||
VGPRs without waiting for the load to finish.
|
||||
|
||||
## GCN / GFX6 hazards
|
||||
|
||||
### VINTRP followed by a read with `v_readfirstlane` or `v_readlane`
|
||||
|
@ -224,14 +224,15 @@ struct wait_entry {
|
||||
bool join(const wait_entry& other)
|
||||
{
|
||||
bool changed = (other.events & ~events) || (other.counters & ~counters) ||
|
||||
(other.wait_on_read && !wait_on_read) || (other.vmem_types & !vmem_types);
|
||||
(other.wait_on_read && !wait_on_read) || (other.vmem_types & !vmem_types) ||
|
||||
(!other.logical && logical);
|
||||
events |= other.events;
|
||||
counters |= other.counters;
|
||||
changed |= imm.combine(other.imm);
|
||||
changed |= delay.combine(other.delay);
|
||||
wait_on_read |= other.wait_on_read;
|
||||
vmem_types |= other.vmem_types;
|
||||
assert(logical == other.logical);
|
||||
logical &= other.logical;
|
||||
return changed;
|
||||
}
|
||||
|
||||
@ -751,7 +752,7 @@ update_counters_for_flat_load(wait_ctx& ctx, memory_sync_info sync = memory_sync
|
||||
|
||||
void
|
||||
insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, bool wait_on_read,
|
||||
uint8_t vmem_types = 0, unsigned cycles = 0)
|
||||
uint8_t vmem_types = 0, unsigned cycles = 0, bool force_linear = false)
|
||||
{
|
||||
uint16_t counters = get_counters_for_event(event);
|
||||
wait_imm imm;
|
||||
@ -775,7 +776,7 @@ insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, boo
|
||||
delay.salu_cycles = cycles;
|
||||
}
|
||||
|
||||
wait_entry new_entry(event, imm, delay, !rc.is_linear(), wait_on_read);
|
||||
wait_entry new_entry(event, imm, delay, !rc.is_linear() && !force_linear, wait_on_read);
|
||||
new_entry.vmem_types |= vmem_types;
|
||||
|
||||
for (unsigned i = 0; i < rc.size(); i++) {
|
||||
@ -796,7 +797,14 @@ void
|
||||
insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_types = 0,
|
||||
unsigned cycles = 0)
|
||||
{
|
||||
insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, cycles);
|
||||
/* We can't safely write to unwritten destination VGPR lanes on GFX11 without waiting for
|
||||
* the load to finish.
|
||||
*/
|
||||
bool force_linear =
|
||||
ctx.gfx_level >= GFX11 && (event & (event_lds | event_gds | event_vmem | event_flat));
|
||||
|
||||
insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, cycles,
|
||||
force_linear);
|
||||
}
|
||||
|
||||
void
|
||||
|
Loading…
Reference in New Issue
Block a user