mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-23 18:24:13 +08:00
intel/fs: fix scheduling of HALT instructions
With the following test :
dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_out_of_bounds_load
There is a :
shader_start:
... <- no control flow
g0 = some_alu
g1 = fbl
g2 = broadcast g3, g1
g4 = get_buffer_size g2
... <- no control flow
halt <- on some lanes
g5 = send <surface>, g4
eliminate_find_live_channel will remove the fbl/broadcast because it
assumes lane0 is active at get_buffer_size :
shader_start:
... <- no control flow
g0 = some_alu
g4 = get_buffer_size g0
... <- no control flow
halt <- on some lanes
g5 = send <surface>, g4
But then the instruction scheduler will move the get_buffer_size after
the halt :
shader_start:
... <- no control flow
halt <- on some lanes
g0 = some_alu
g4 = get_buffer_size g0
g5 = send <surface>, g4
get_buffer_size pulls the surface index from lane0 in g0 which could
have been turned off by the halt and we end up accessing an invalid
surface handle.
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20765>
(cherry picked from commit 9471ffa70a
)
This commit is contained in:
parent
61b376097c
commit
479a915d88
@ -5974,7 +5974,7 @@
|
||||
"description": "intel/fs: fix scheduling of HALT instructions",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
@ -651,6 +651,7 @@ public:
|
||||
ralloc_free(this->mem_ctx);
|
||||
}
|
||||
void add_barrier_deps(schedule_node *n);
|
||||
void add_cross_lane_deps(schedule_node *n);
|
||||
void add_dep(schedule_node *before, schedule_node *after, int latency);
|
||||
void add_dep(schedule_node *before, schedule_node *after);
|
||||
|
||||
@ -1098,6 +1099,33 @@ is_scheduling_barrier(const backend_instruction *inst)
|
||||
inst->has_side_effects();
|
||||
}
|
||||
|
||||
static bool
|
||||
has_cross_lane_access(const fs_inst *inst)
|
||||
{
|
||||
/* FINISHME:
|
||||
*
|
||||
* This function is likely incomplete in terms of identify cross lane
|
||||
* accesses.
|
||||
*/
|
||||
if (inst->opcode == SHADER_OPCODE_BROADCAST ||
|
||||
inst->opcode == SHADER_OPCODE_READ_SR_REG ||
|
||||
inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST ||
|
||||
inst->opcode == SHADER_OPCODE_SHUFFLE ||
|
||||
inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS ||
|
||||
inst->opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL ||
|
||||
inst->opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL)
|
||||
return true;
|
||||
|
||||
for (unsigned s = 0; s < inst->sources; s++) {
|
||||
if (inst->src[s].file == VGRF) {
|
||||
if (inst->src[s].stride == 0)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sometimes we really want this node to execute after everything that
|
||||
* was before it and before everything that followed it. This adds
|
||||
@ -1128,6 +1156,25 @@ instruction_scheduler::add_barrier_deps(schedule_node *n)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Because some instructions like HALT can disable lanes, scheduling prior to
|
||||
* a cross lane access should not be allowed, otherwise we could end up with
|
||||
* later instructions accessing uninitialized data.
|
||||
*/
|
||||
void
|
||||
instruction_scheduler::add_cross_lane_deps(schedule_node *n)
|
||||
{
|
||||
schedule_node *prev = (schedule_node *)n->prev;
|
||||
|
||||
if (prev) {
|
||||
while (!prev->is_head_sentinel()) {
|
||||
if (has_cross_lane_access((fs_inst *)prev->inst))
|
||||
add_dep(prev, n, 0);
|
||||
prev = (schedule_node *)prev->prev;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* instruction scheduling needs to be aware of when an MRF write
|
||||
* actually writes 2 MRFs.
|
||||
*/
|
||||
@ -1165,6 +1212,10 @@ fs_instruction_scheduler::calculate_deps()
|
||||
if (is_scheduling_barrier(inst))
|
||||
add_barrier_deps(n);
|
||||
|
||||
if (inst->opcode == BRW_OPCODE_HALT ||
|
||||
inst->opcode == SHADER_OPCODE_HALT_TARGET)
|
||||
add_cross_lane_deps(n);
|
||||
|
||||
/* read-after-write deps. */
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
|
Loading…
Reference in New Issue
Block a user