radeonsi: Add support to clear LDS at the end of a shader.

No hash updates as I didn't find a facility to do it in radeonsi
(even though there are flags like forcing fma32).

Note that we do this very late to avoid any optimizations that
might remove the dead stores. (Checked that LLVM doesn't remove
them, but it is admittedly potentially brittle)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26679>
This commit is contained in:
Bas Nieuwenhuizen 2023-12-04 01:39:35 +01:00 committed by Marge Bot
parent eaf61adea5
commit d04ee07712
5 changed files with 18 additions and 1 deletions

View File

@ -20,6 +20,7 @@ OPT_INT(max_vram_map_size, 8196, "Maximum size of a buffer in VRAM to map direct
OPT_BOOL(force_use_fma32, false, "Force use fma32 instruction for GPU family newer than gfx9")
OPT_BOOL(dcc_msaa, false, "Enable DCC for MSAA")
OPT_BOOL(zerovram, false, "Zero all VRAM allocations")
OPT_BOOL(clear_lds, false, "Clear LDS at the end of shaders. Might decrease performance.")
#undef OPT_BOOL
#undef OPT_INT

View File

@ -2255,6 +2255,11 @@ static void si_nir_emit_polygon_stipple(nir_shader *nir, struct si_shader_args *
nir_discard_if(b, nir_inot(b, pass));
}
bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader *shader)
{
return shader->info.stage == MESA_SHADER_COMPUTE && shader->info.shared_size > 0 && sscreen->options.clear_lds;
}
struct nir_shader *si_get_nir_shader(struct si_shader *shader,
struct si_shader_args *args,
bool *free_nir,
@ -2512,6 +2517,12 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
.allow_fp16 = sel->screen->info.gfx_level >= GFX9,
});
if (si_should_clear_lds(sel->screen, nir)) {
const unsigned chunk_size = 16; /* max single store size */
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
NIR_PASS_V(nir, nir_clear_shared_memory, shared_size, chunk_size);
}
NIR_PASS(progress, nir, ac_nir_lower_intrinsics_to_args, sel->screen->info.gfx_level,
si_select_hw_stage(nir->info.stage, key, sel->screen->info.gfx_level),
&args->ac);

View File

@ -1046,6 +1046,8 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
struct gfx9_gs_info *out);
bool gfx10_is_ngg_passthrough(struct si_shader *shader);
bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader *shader);
/* Inline helpers. */
/* Return the pointer to the main shader part's pointer. */

View File

@ -652,7 +652,8 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
info->uses_tg_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID);
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) ||
si_should_clear_lds(sscreen, nir);
info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE);
info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||

View File

@ -167,6 +167,8 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
shader_variant_flags |= 1 << 10;
if (sel->screen->options.inline_uniforms)
shader_variant_flags |= 1 << 11;
if (sel->screen->options.clear_lds)
shader_variant_flags |= 1 << 12;
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);