intel/fs: fix scheduling of HALT instructions

With the following test :

dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_out_of_bounds_load

There is a :

shader_start:
   ...                                 <- no control flow
   g0 = some_alu
   g1 = fbl
   g2 = broadcast g3, g1
   g4 = get_buffer_size g2
   ...                                 <- no control flow
   halt                                <- on some lanes
   g5 = send <surface>, g4

eliminate_find_live_channel will remove the fbl/broadcast because it
assumes lane0 is active at get_buffer_size :

shader_start:
   ...                                 <- no control flow
   g0 = some_alu
   g4 = get_buffer_size g0
   ...                                 <- no control flow
   halt                                <- on some lanes
   g5 = send <surface>, g4

But then the instruction scheduler will move the get_buffer_size after
the halt :

shader_start:
   ...                                 <- no control flow
   halt                                <- on some lanes
   g0 = some_alu
   g4 = get_buffer_size g0
   g5 = send <surface>, g4

get_buffer_size pulls the surface index from lane0 in g0 which could
have been turned off by the halt and we end up accessing an invalid
surface handle.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20765>
This commit is contained in:
Lionel Landwerlin 2023-04-03 14:52:59 +03:00
parent 9b6945bb65
commit 9471ffa70a

View File

@ -659,6 +659,7 @@ public:
ralloc_free(this->mem_ctx);
}
void add_barrier_deps(schedule_node *n);
void add_cross_lane_deps(schedule_node *n);
void add_dep(schedule_node *before, schedule_node *after, int latency);
void add_dep(schedule_node *before, schedule_node *after);
@ -1106,6 +1107,33 @@ is_scheduling_barrier(const backend_instruction *inst)
inst->has_side_effects();
}
static bool
has_cross_lane_access(const fs_inst *inst)
{
/* FINISHME:
*
* This function is likely incomplete in terms of identify cross lane
* accesses.
*/
if (inst->opcode == SHADER_OPCODE_BROADCAST ||
inst->opcode == SHADER_OPCODE_READ_SR_REG ||
inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST ||
inst->opcode == SHADER_OPCODE_SHUFFLE ||
inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS ||
inst->opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL ||
inst->opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL)
return true;
for (unsigned s = 0; s < inst->sources; s++) {
if (inst->src[s].file == VGRF) {
if (inst->src[s].stride == 0)
return true;
}
}
return false;
}
/**
* Sometimes we really want this node to execute after everything that
* was before it and before everything that followed it. This adds
@ -1136,6 +1164,25 @@ instruction_scheduler::add_barrier_deps(schedule_node *n)
}
}
/**
* Because some instructions like HALT can disable lanes, scheduling prior to
* a cross lane access should not be allowed, otherwise we could end up with
* later instructions accessing uninitialized data.
*/
void
instruction_scheduler::add_cross_lane_deps(schedule_node *n)
{
schedule_node *prev = (schedule_node *)n->prev;
if (prev) {
while (!prev->is_head_sentinel()) {
if (has_cross_lane_access((fs_inst *)prev->inst))
add_dep(prev, n, 0);
prev = (schedule_node *)prev->prev;
}
}
}
/* instruction scheduling needs to be aware of when an MRF write
* actually writes 2 MRFs.
*/
@ -1173,6 +1220,10 @@ fs_instruction_scheduler::calculate_deps()
if (is_scheduling_barrier(inst))
add_barrier_deps(n);
if (inst->opcode == BRW_OPCODE_HALT ||
inst->opcode == SHADER_OPCODE_HALT_TARGET)
add_cross_lane_deps(n);
/* read-after-write deps. */
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {