mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-28 04:34:25 +08:00
radv: fix NGG streamout with VS and GPL on GFX11
With GPL it's not possible to know the primitive topology when compiling the pre-rasterization stages. For NGG, we use the maximum number of vertices per prim and rely on the hardware to ignore the extra bits for points/lines. Though, this can't work for NGG streamout because the number of vertices per prim is used to compute a streamout offset. The only way to solve this is to pass the number of vertices per prim through a new user SGPR. This fixes a bunch of streamout tests with Zink/RADV on GFX11. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21833>
This commit is contained in:
parent
491887c9f2
commit
d750ad19fd
@ -2174,7 +2174,12 @@ radv_emit_provoking_vertex_mode(struct radv_cmd_buffer *cmd_buffer)
|
||||
static void
|
||||
radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
|
||||
const struct radv_userdata_info *loc =
|
||||
&pipeline->last_vgt_api_stage_locs[AC_UD_NUM_VERTS_PER_PRIM];
|
||||
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
const unsigned stage = pipeline->last_vgt_api_stage;
|
||||
uint32_t base_reg;
|
||||
|
||||
assert(!cmd_buffer->state.mesh_shading);
|
||||
|
||||
@ -2185,6 +2190,13 @@ radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
|
||||
radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE,
|
||||
d->vk.ia.primitive_topology);
|
||||
}
|
||||
|
||||
if (loc->sgpr_idx == -1)
|
||||
return;
|
||||
|
||||
base_reg = pipeline->base.user_data_0[stage];
|
||||
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
|
||||
si_conv_prim_to_gs_out(d->vk.ia.primitive_topology) + 1);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -6342,6 +6354,11 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES;
|
||||
}
|
||||
|
||||
/* Re-emit the primitive topology because the SGPR idx can be different. */
|
||||
if (graphics_pipeline->has_num_verts_per_prim) {
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
|
||||
}
|
||||
|
||||
radv_bind_dynamic_state(cmd_buffer, &graphics_pipeline->dynamic_state);
|
||||
|
||||
radv_bind_vs_input_state(cmd_buffer, graphics_pipeline);
|
||||
|
@ -406,7 +406,12 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
||||
unsigned num_vertices;
|
||||
|
||||
if (stage == MESA_SHADER_VERTEX) {
|
||||
num_vertices = radv_get_num_vertices_per_prim(s->pl_key);
|
||||
/* For dynamic primitive topology with streamout. */
|
||||
if (s->info->vs.dynamic_num_verts_per_prim) {
|
||||
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->num_verts_per_prim);
|
||||
} else {
|
||||
replacement = nir_imm_int(b, radv_get_num_vertices_per_prim(s->pl_key));
|
||||
}
|
||||
} else if (stage == MESA_SHADER_TESS_EVAL) {
|
||||
if (s->info->tes.point_mode) {
|
||||
num_vertices = 1;
|
||||
@ -415,6 +420,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
||||
} else {
|
||||
num_vertices = 3;
|
||||
}
|
||||
replacement = nir_imm_int(b, num_vertices);
|
||||
} else {
|
||||
assert(stage == MESA_SHADER_GEOMETRY);
|
||||
switch (s->info->gs.output_prim) {
|
||||
@ -431,8 +437,8 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
||||
unreachable("invalid GS output primitive");
|
||||
break;
|
||||
}
|
||||
replacement = nir_imm_int(b, num_vertices);
|
||||
}
|
||||
replacement = nir_imm_int(b, num_vertices);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ordered_id_amd:
|
||||
|
@ -4980,6 +4980,8 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv
|
||||
pipeline->has_streamout = pipeline->last_vgt_api_stage_locs[AC_UD_STREAMOUT_BUFFERS].sgpr_idx != -1;
|
||||
pipeline->has_dynamic_samples = ps->info.user_sgprs_locs.shader_data[AC_UD_PS_NUM_SAMPLES].sgpr_idx != -1;
|
||||
pipeline->has_sample_positions = ps->info.ps.needs_sample_positions;
|
||||
pipeline->has_num_verts_per_prim =
|
||||
pipeline->last_vgt_api_stage_locs[AC_UD_NUM_VERTS_PER_PRIM].sgpr_idx != -1;
|
||||
|
||||
pipeline->base.push_constant_size = pipeline_layout.push_constant_size;
|
||||
pipeline->base.dynamic_offset_count = pipeline_layout.dynamic_offset_count;
|
||||
|
@ -2202,6 +2202,7 @@ struct radv_graphics_pipeline {
|
||||
bool has_streamout;
|
||||
bool has_dynamic_samples;
|
||||
bool has_sample_positions;
|
||||
bool has_num_verts_per_prim;
|
||||
|
||||
uint8_t vtx_emit_num;
|
||||
|
||||
|
@ -159,7 +159,8 @@ enum radv_ud_index {
|
||||
AC_UD_NGG_VIEWPORT = 9,
|
||||
AC_UD_FORCE_VRS_RATES = 10,
|
||||
AC_UD_TASK_RING_ENTRY = 11,
|
||||
AC_UD_SHADER_START = 12,
|
||||
AC_UD_NUM_VERTS_PER_PRIM = 12,
|
||||
AC_UD_SHADER_START = 13,
|
||||
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
|
||||
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
|
||||
AC_UD_VS_PROLOG_INPUTS,
|
||||
@ -277,6 +278,7 @@ struct radv_shader_info {
|
||||
uint32_t input_slot_usage_mask;
|
||||
bool has_prolog;
|
||||
bool dynamic_inputs;
|
||||
bool dynamic_num_verts_per_prim;
|
||||
} vs;
|
||||
struct {
|
||||
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
|
||||
|
@ -88,6 +88,8 @@ count_vs_user_sgprs(const struct radv_shader_info *info)
|
||||
count++;
|
||||
if (info->vs.needs_base_instance)
|
||||
count++;
|
||||
if (info->vs.dynamic_num_verts_per_prim)
|
||||
count++;
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -828,6 +830,9 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_pip
|
||||
if (previous_stage == MESA_SHADER_TESS_EVAL && key->dynamic_patch_control_points)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_num_patches);
|
||||
|
||||
if (previous_stage == MESA_SHADER_VERTEX && info->vs.dynamic_num_verts_per_prim)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->num_verts_per_prim);
|
||||
|
||||
/* Legacy GS force vrs is handled by GS copy shader. */
|
||||
if (info->force_vrs_per_vertex && info->is_ngg) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates);
|
||||
@ -974,6 +979,9 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_pip
|
||||
if (args->tes_num_patches.used)
|
||||
set_loc_shader(args, AC_UD_TES_NUM_PATCHES, &user_sgpr_idx, 1);
|
||||
|
||||
if (args->num_verts_per_prim.used)
|
||||
set_loc_shader(args, AC_UD_NUM_VERTS_PER_PRIM, &user_sgpr_idx, 1);
|
||||
|
||||
if (args->ac.force_vrs_rates.used)
|
||||
set_loc_shader(args, AC_UD_FORCE_VRS_RATES, &user_sgpr_idx, 1);
|
||||
|
||||
|
@ -74,6 +74,9 @@ struct radv_shader_args {
|
||||
/* TES */
|
||||
struct ac_arg tes_num_patches;
|
||||
|
||||
/* NGG VS streamout */
|
||||
struct ac_arg num_verts_per_prim;
|
||||
|
||||
struct radv_userdata_locations user_sgprs_locs;
|
||||
unsigned num_user_sgprs;
|
||||
|
||||
|
@ -421,6 +421,13 @@ gather_shader_info_vs(struct radv_device *device, const nir_shader *nir,
|
||||
nir_foreach_shader_in_variable(var, nir)
|
||||
gather_info_input_decl_vs(nir, var->data.location - VERT_ATTRIB_GENERIC0, var->type,
|
||||
pipeline_key, info);
|
||||
|
||||
/* When the topology is unknown (with GPL), the number of vertices per primitive needs be passed
|
||||
* through a user SGPR for NGG streamout with VS. Otherwise, the XFB offset is incorrectly
|
||||
* computed because using the maximum number of vertices can't work.
|
||||
*/
|
||||
info->vs.dynamic_num_verts_per_prim =
|
||||
pipeline_key->vs.topology == V_008958_DI_PT_NONE && info->is_ngg && nir->xfb_info;
|
||||
}
|
||||
|
||||
static void
|
||||
|
Loading…
Reference in New Issue
Block a user