mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-24 10:44:15 +08:00
ir3: set UL flag before ir3_lower_subgroups
ir3_legalize_relative, extracted from ir3_legalize, assumes a0 is loaded first in each block if there is any user in the block. ir3_lower_subgroups breaks the assumption. We need to do ir3_legalize_relative first. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6902 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17720>
This commit is contained in:
parent
7023cab093
commit
8001c78d49
@ -1865,6 +1865,7 @@ bool ir3_lower_subgroups(struct ir3 *ir);
|
||||
|
||||
/* legalize: */
|
||||
bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary);
|
||||
bool ir3_legalize_relative(struct ir3 *ir);
|
||||
|
||||
static inline bool
|
||||
ir3_has_latency_to_hide(struct ir3 *ir)
|
||||
|
@ -4892,6 +4892,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
||||
|
||||
IR3_PASS(ir, ir3_postsched, so);
|
||||
|
||||
IR3_PASS(ir, ir3_legalize_relative);
|
||||
IR3_PASS(ir, ir3_lower_subgroups);
|
||||
|
||||
if (so->type == MESA_SHADER_FRAGMENT)
|
||||
|
@ -39,7 +39,7 @@
|
||||
* 1) Iteratively determine where sync ((sy)/(ss)) flags are needed,
|
||||
* based on state flowing out of predecessor blocks until there is
|
||||
* no further change. In some cases this requires inserting nops.
|
||||
* 2) Mark (ei) on last varying input, and (ul) on last use of a0.x
|
||||
* 2) Mark (ei) on last varying input
|
||||
* 3) Final nop scheduling for instruction latency
|
||||
* 4) Resolve jumps and schedule blocks, marking potential convergence
|
||||
* points with (jp)
|
||||
@ -88,7 +88,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
||||
if (bd->valid)
|
||||
return false;
|
||||
|
||||
struct ir3_instruction *last_rel = NULL;
|
||||
struct ir3_instruction *last_n = NULL;
|
||||
struct list_head instr_list;
|
||||
struct ir3_legalize_state prev_state = bd->state;
|
||||
@ -207,13 +206,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
||||
regmask_init(&state->needs_sy, mergedregs);
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: is it valid to have address reg loaded from a
|
||||
* relative src (ie. mova a0, c<a0.x+4>)? If so, the
|
||||
* last_rel check below should be moved ahead of this:
|
||||
*/
|
||||
if (reg->flags & IR3_REG_RELATIV)
|
||||
last_rel = n;
|
||||
}
|
||||
|
||||
foreach_dst (reg, n) {
|
||||
@ -223,11 +215,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
||||
regmask_init(&state->needs_ss_war, mergedregs);
|
||||
regmask_init(&state->needs_ss, mergedregs);
|
||||
}
|
||||
|
||||
if (last_rel && (reg->num == regid(REG_A0, 0))) {
|
||||
last_rel->flags |= IR3_INSTR_UL;
|
||||
last_rel = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* cat5+ does not have an (ss) bit, if needed we need to
|
||||
@ -367,9 +354,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
||||
list_add(&baryf->node, &block->instr_list);
|
||||
}
|
||||
|
||||
if (last_rel)
|
||||
last_rel->flags |= IR3_INSTR_UL;
|
||||
|
||||
bd->valid = true;
|
||||
|
||||
if (memcmp(&prev_state, state, sizeof(*state))) {
|
||||
|
69
src/freedreno/ir3/ir3_legalize_relative.c
Normal file
69
src/freedreno/ir3/ir3_legalize_relative.c
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "ir3.h"
|
||||
|
||||
/*
|
||||
* Mark (ul) on the last user of a0 before a0 is loaded again. emit_block
|
||||
* makes sure a0 is loaded first if there is any user in the block. This
|
||||
* allows us to process each block independently.
|
||||
*
|
||||
* Note that this must be called before passes that break the assumption, such
|
||||
* as ir3_lower_subgroups.
|
||||
*/
|
||||
|
||||
static bool
|
||||
is_reg_relative(const struct ir3_instruction *instr)
|
||||
{
|
||||
foreach_dst (reg, instr) {
|
||||
if (reg->flags & IR3_REG_RELATIV)
|
||||
return true;
|
||||
}
|
||||
|
||||
foreach_src (reg, instr) {
|
||||
if (reg->flags & IR3_REG_RELATIV)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_dst_a0(const struct ir3_instruction *instr)
|
||||
{
|
||||
foreach_dst (reg, instr) {
|
||||
if (reg->num == regid(REG_A0, 0))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
ir3_legalize_relative(struct ir3 *ir)
|
||||
{
|
||||
foreach_block (block, &ir->block_list) {
|
||||
struct ir3_instruction *last_user = NULL;
|
||||
|
||||
foreach_instr (instr, &block->instr_list) {
|
||||
if (is_reg_relative(instr))
|
||||
last_user = instr;
|
||||
|
||||
/* Is it valid to have address reg loaded from a relative src (ie.
|
||||
* mova a0, c<a0.x+4>)? This marks the load (ul), which may or may
|
||||
* not be valid.
|
||||
*/
|
||||
if (last_user && is_dst_a0(instr)) {
|
||||
last_user->flags |= IR3_INSTR_UL;
|
||||
last_user = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_user)
|
||||
last_user->flags |= IR3_INSTR_UL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
@ -85,6 +85,7 @@ libfreedreno_ir3_files = files(
|
||||
'ir3_image.h',
|
||||
'ir3.h',
|
||||
'ir3_legalize.c',
|
||||
'ir3_legalize_relative.c',
|
||||
'ir3_liveness.c',
|
||||
'ir3_lower_parallelcopy.c',
|
||||
'ir3_lower_spill.c',
|
||||
|
Loading…
Reference in New Issue
Block a user