intel/fs: fix total_scratch computation

We only have a single prog_data::total_scratch for all shader variants
(SIMD 8, 16, 32). Therefore we should always max the total_scratch on
top of existing variant.

We probably haven't run into that issue before because we compile by
increasing SIMD size and higher SIMD size is more likely to spill. But
for bindless shaders with return shaders, if the last return part
doesn't spill, we completely ignore the previous parts' scratch
computation.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15193>
This commit is contained in:
Lionel Landwerlin 2022-02-28 15:13:07 +02:00 committed by Marge Bot
parent 5b43075888
commit 96c8880900
6 changed files with 16 additions and 2 deletions

View File

@ -8864,7 +8864,12 @@ fs_visitor::allocate_registers(bool allow_spilling)
if (last_scratch > 0) {
ASSERTED unsigned max_scratch_size = 2 * 1024 * 1024;
prog_data->total_scratch = brw_get_scratch_size(last_scratch);
/* Take the max of any previously compiled variant of the shader. In the
* case of bindless shaders with return parts, this will also take the
* max of all parts.
*/
prog_data->total_scratch = MAX2(brw_get_scratch_size(last_scratch),
prog_data->total_scratch);
if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL) {
if (devinfo->platform == INTEL_PLATFORM_HSW) {
@ -9701,6 +9706,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
prog_data->base.stage = MESA_SHADER_FRAGMENT;
prog_data->base.ray_queries = nir->info.ray_queries;
prog_data->base.total_scratch = 0;
const struct intel_device_info *devinfo = compiler->devinfo;
const unsigned max_subgroup_size = compiler->devinfo->ver >= 6 ? 32 : 16;
@ -10074,6 +10080,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
prog_data->base.stage = MESA_SHADER_COMPUTE;
prog_data->base.total_shared = nir->info.shared_size;
prog_data->base.ray_queries = nir->info.ray_queries;
prog_data->base.total_scratch = 0;
if (!nir->info.workgroup_size_variable) {
prog_data->local_size[0] = nir->info.workgroup_size[0];
@ -10333,6 +10340,8 @@ brw_compile_bs(const struct brw_compiler *compiler,
prog_data->base.stage = shader->info.stage;
prog_data->base.ray_queries = shader->info.ray_queries;
prog_data->base.total_scratch = 0;
prog_data->max_stack_size = 0;
fs_generator g(compiler, params->log_data, mem_ctx, &prog_data->base,

View File

@ -167,6 +167,7 @@ brw_compile_task(const struct brw_compiler *compiler,
prog_data->base.base.stage = MESA_SHADER_TASK;
prog_data->base.base.total_shared = nir->info.shared_size;
prog_data->base.base.total_scratch = 0;
prog_data->base.local_size[0] = nir->info.workgroup_size[0];
prog_data->base.local_size[1] = nir->info.workgroup_size[1];
@ -531,6 +532,7 @@ brw_compile_mesh(const struct brw_compiler *compiler,
prog_data->base.base.stage = MESA_SHADER_MESH;
prog_data->base.base.total_shared = nir->info.shared_size;
prog_data->base.base.total_scratch = 0;
prog_data->base.local_size[0] = nir->info.workgroup_size[0];
prog_data->base.local_size[1] = nir->info.workgroup_size[1];

View File

@ -122,7 +122,7 @@ extern const char *const conditional_modifier[16];
extern const char *const pred_ctrl_align16[16];
/* Per-thread scratch space is a power-of-two multiple of 1KB. */
static inline int
static inline unsigned
brw_get_scratch_size(int size)
{
return MAX2(1024, util_next_power_of_two(size));

View File

@ -2543,6 +2543,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
prog_data->base.base.stage = MESA_SHADER_VERTEX;
prog_data->base.base.ray_queries = nir->info.ray_queries;
prog_data->base.base.total_scratch = 0;
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX];
brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);

View File

@ -597,6 +597,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
prog_data->base.base.stage = MESA_SHADER_GEOMETRY;
prog_data->base.base.ray_queries = nir->info.ray_queries;
prog_data->base.base.total_scratch = 0;
/* The GLSL linker will have already matched up GS inputs and the outputs
* of prior stages. The driver does extend VS outputs in some cases, but

View File

@ -367,6 +367,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
vue_prog_data->base.stage = MESA_SHADER_TESS_CTRL;
prog_data->base.base.ray_queries = nir->info.ray_queries;
prog_data->base.base.total_scratch = 0;
nir->info.outputs_written = key->outputs_written;
nir->info.patch_outputs_written = key->patch_outputs_written;