From 479a915d88c080b42260cfb282b6eab5310353b5 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 3 Apr 2023 14:52:59 +0300 Subject: [PATCH] intel/fs: fix scheduling of HALT instructions With the following test : dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_out_of_bounds_load There is a : shader_start: ... <- no control flow g0 = some_alu g1 = fbl g2 = broadcast g3, g1 g4 = get_buffer_size g2 ... <- no control flow halt <- on some lanes g5 = send , g4 eliminate_find_live_channel will remove the fbl/broadcast because it assumes lane0 is active at get_buffer_size : shader_start: ... <- no control flow g0 = some_alu g4 = get_buffer_size g0 ... <- no control flow halt <- on some lanes g5 = send , g4 But then the instruction scheduler will move the get_buffer_size after the halt : shader_start: ... <- no control flow halt <- on some lanes g0 = some_alu g4 = get_buffer_size g0 g5 = send , g4 get_buffer_size pulls the surface index from lane0 in g0 which could have been turned off by the halt and we end up accessing an invalid surface handle. Signed-off-by: Lionel Landwerlin Cc: mesa-stable Reviewed-by: Francisco Jerez Part-of: (cherry picked from commit 9471ffa70a03265d966594d3e290ce646f5c2f9a) --- .pick_status.json | 2 +- .../compiler/brw_schedule_instructions.cpp | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index fbe6337f84f..446dd274359 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -5974,7 +5974,7 @@ "description": "intel/fs: fix scheduling of HALT instructions", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 3286e3f83b9..e861cab7f3f 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -651,6 +651,7 @@ public: ralloc_free(this->mem_ctx); } void add_barrier_deps(schedule_node *n); + void add_cross_lane_deps(schedule_node *n); void add_dep(schedule_node *before, schedule_node *after, int latency); void add_dep(schedule_node *before, schedule_node *after); @@ -1098,6 +1099,33 @@ is_scheduling_barrier(const backend_instruction *inst) inst->has_side_effects(); } +static bool +has_cross_lane_access(const fs_inst *inst) +{ + /* FINISHME: + * + * This function is likely incomplete in terms of identify cross lane + * accesses. + */ + if (inst->opcode == SHADER_OPCODE_BROADCAST || + inst->opcode == SHADER_OPCODE_READ_SR_REG || + inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST || + inst->opcode == SHADER_OPCODE_SHUFFLE || + inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS || + inst->opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL || + inst->opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL) + return true; + + for (unsigned s = 0; s < inst->sources; s++) { + if (inst->src[s].file == VGRF) { + if (inst->src[s].stride == 0) + return true; + } + } + + return false; +} + /** * Sometimes we really want this node to execute after everything that * was before it and before everything that followed it. This adds @@ -1128,6 +1156,25 @@ instruction_scheduler::add_barrier_deps(schedule_node *n) } } +/** + * Because some instructions like HALT can disable lanes, scheduling prior to + * a cross lane access should not be allowed, otherwise we could end up with + * later instructions accessing uninitialized data. + */ +void +instruction_scheduler::add_cross_lane_deps(schedule_node *n) +{ + schedule_node *prev = (schedule_node *)n->prev; + + if (prev) { + while (!prev->is_head_sentinel()) { + if (has_cross_lane_access((fs_inst *)prev->inst)) + add_dep(prev, n, 0); + prev = (schedule_node *)prev->prev; + } + } +} + /* instruction scheduling needs to be aware of when an MRF write * actually writes 2 MRFs. */ @@ -1165,6 +1212,10 @@ fs_instruction_scheduler::calculate_deps() if (is_scheduling_barrier(inst)) add_barrier_deps(n); + if (inst->opcode == BRW_OPCODE_HALT || + inst->opcode == SHADER_OPCODE_HALT_TARGET) + add_cross_lane_deps(n); + /* read-after-write deps. */ for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) {