mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-12-12 03:34:34 +08:00
radeonsi: emulate VGT_ESGS_RING_ITEMSIZE in the shader on gfx9-11
The hardware uses the register to premultiply GS vertex indices in input VGPRs. This changes the behavior as follows: - VGT_ESGS_RING_ITEMSIZE is always 1 on gfx9-11, set in the preamble. - The value is passed to the shader via current_gs_state (vs_state_bits). - The shader does the multiplication. The reason is that VGT_ESGS_RING_ITEMSIZE will be removed in the future. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21403>
This commit is contained in:
parent
fb819fdb13
commit
ddded6fbb5
@ -246,7 +246,10 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
||||
unreachable("no nir_load_lshs_vertex_stride_amd");
|
||||
break;
|
||||
case nir_intrinsic_load_esgs_vertex_stride_amd:
|
||||
replacement = nir_imm_int(b, 1);
|
||||
assert(sel->screen->info.gfx_level >= GFX9);
|
||||
replacement = shader->is_monolithic ?
|
||||
nir_imm_int(b, key->ge.part.gs.es->info.esgs_vertex_stride / 4) :
|
||||
GET_FIELD_NIR(GS_STATE_ESGS_VERTEX_STRIDE);
|
||||
break;
|
||||
case nir_intrinsic_load_tcs_num_patches_amd: {
|
||||
nir_ssa_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 6);
|
||||
|
@ -266,6 +266,8 @@ enum
|
||||
* in the shader via vs_state_bits in legacy GS, the GS copy shader, and any NGG shader.
|
||||
*/
|
||||
/* bit gap */
|
||||
#define GS_STATE_ESGS_VERTEX_STRIDE__SHIFT 10
|
||||
#define GS_STATE_ESGS_VERTEX_STRIDE__MASK 0xff /* max 32 * 4 + 1 */
|
||||
/* Small prim filter precision = num_samples / quant_mode, which can only be equal to 1/2^n
|
||||
* where n is between 4 and 12. Knowing that, we only need to store 4 bits of the FP32 exponent.
|
||||
* Set it like this: value = (fui(num_samples / quant_mode) >> 23) & 0xf;
|
||||
@ -917,7 +919,7 @@ struct si_shader {
|
||||
unsigned vgt_primitiveid_en;
|
||||
unsigned vgt_gs_onchip_cntl;
|
||||
unsigned vgt_gs_instance_cnt;
|
||||
unsigned vgt_esgs_ring_itemsize;
|
||||
unsigned esgs_vertex_stride;
|
||||
unsigned spi_vs_out_config;
|
||||
unsigned spi_shader_idx_format;
|
||||
unsigned spi_shader_pos_format;
|
||||
|
@ -791,8 +791,8 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
|
||||
*/
|
||||
if (sscreen->info.gfx_level >= GFX9)
|
||||
info->esgs_vertex_stride += 4;
|
||||
|
||||
assert(((info->esgs_vertex_stride / 4) & C_028AAC_ITEMSIZE) == 0);
|
||||
else
|
||||
assert(((info->esgs_vertex_stride / 4) & C_028AAC_ITEMSIZE) == 0);
|
||||
|
||||
info->tcs_vgpr_only_inputs = ~info->base.tess.tcs_cross_invocation_inputs_read &
|
||||
~info->base.inputs_read_indirectly &
|
||||
|
@ -65,8 +65,8 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args->bindless_samplers_and_images,
|
||||
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
|
||||
if (ctx->screen->use_ngg) {
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args->small_prim_cull_info, 8 + GFX9_SGPR_SMALL_PRIM_CULL_INFO);
|
||||
if (ctx->screen->info.gfx_level >= GFX11)
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args->gs_attr_address, 8 + GFX9_SGPR_ATTRIBUTE_RING_ADDR);
|
||||
|
@ -5804,6 +5804,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
||||
si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
|
||||
S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
|
||||
|
||||
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1);
|
||||
si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
|
||||
|
||||
if (sctx->gfx_level < GFX11) {
|
||||
|
@ -1214,10 +1214,11 @@ static void si_emit_vs_state(struct si_context *sctx, unsigned index_size)
|
||||
if (HAS_GS) {
|
||||
radeon_set_sh_reg(vs_base + SI_SGPR_VS_STATE_BITS * 4, vs_state);
|
||||
|
||||
/* NGG always uses the state bits. Legacy GS uses the state bits only for the emulation
|
||||
* of GS pipeline statistics on gfx10.x.
|
||||
/* GS always uses the state bits for emulating VGT_ESGS_RING_ITEMSIZE on Gfx9
|
||||
* (via nir_load_esgs_vertex_stride_amd) and for emulating GS pipeline statistics
|
||||
* on gfx10.x. NGG GS also has lots of states in there.
|
||||
*/
|
||||
if (NGG || (GFX_VERSION >= GFX10 && GFX_VERSION <= GFX10_3))
|
||||
if (GFX_VERSION >= GFX9)
|
||||
radeon_set_sh_reg(gs_base + SI_SGPR_VS_STATE_BITS * 4, gs_state);
|
||||
|
||||
/* The GS copy shader (for legacy GS) always uses the state bits. */
|
||||
|
@ -934,6 +934,11 @@ static void si_emit_shader_gs(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader *shader = sctx->queued.named.gs;
|
||||
|
||||
if (sctx->gfx_level >= GFX9) {
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_ESGS_VERTEX_STRIDE,
|
||||
shader->key.ge.part.gs.es->info.esgs_vertex_stride / 4);
|
||||
}
|
||||
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
|
||||
/* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2
|
||||
@ -971,10 +976,6 @@ static void si_emit_shader_gs(struct si_context *sctx)
|
||||
radeon_opt_set_context_reg(sctx, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
|
||||
SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
|
||||
shader->gs.vgt_gs_max_prims_per_subgroup);
|
||||
/* R_028AAC_VGT_ESGS_RING_ITEMSIZE */
|
||||
radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
|
||||
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
|
||||
shader->gs.vgt_esgs_ring_itemsize);
|
||||
|
||||
if (shader->key.ge.part.gs.es->stage == MESA_SHADER_TESS_EVAL)
|
||||
radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
|
||||
@ -1175,6 +1176,9 @@ bool gfx10_is_ngg_passthrough(struct si_shader *shader)
|
||||
/* Common tail code for NGG primitive shaders. */
|
||||
static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader *shader)
|
||||
{
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_ESGS_VERTEX_STRIDE,
|
||||
shader->ngg.esgs_vertex_stride);
|
||||
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
radeon_opt_set_context_reg(sctx, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
|
||||
SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
|
||||
@ -1189,9 +1193,6 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
|
||||
}
|
||||
radeon_opt_set_context_reg(sctx, R_028B90_VGT_GS_INSTANCE_CNT, SI_TRACKED_VGT_GS_INSTANCE_CNT,
|
||||
shader->ngg.vgt_gs_instance_cnt);
|
||||
radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
|
||||
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
|
||||
shader->ngg.vgt_esgs_ring_itemsize);
|
||||
radeon_opt_set_context_reg(sctx, R_0286C4_SPI_VS_OUT_CONFIG, SI_TRACKED_SPI_VS_OUT_CONFIG,
|
||||
shader->ngg.spi_vs_out_config);
|
||||
radeon_opt_set_context_reg2(
|
||||
@ -1441,10 +1442,10 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||
gs_sel->info.writes_primid);
|
||||
|
||||
if (gs_stage == MESA_SHADER_GEOMETRY) {
|
||||
shader->ngg.vgt_esgs_ring_itemsize = es_sel->info.esgs_vertex_stride / 4;
|
||||
shader->ngg.esgs_vertex_stride = es_sel->info.esgs_vertex_stride / 4;
|
||||
shader->ngg.vgt_gs_max_vert_out = gs_sel->info.base.gs.vertices_out;
|
||||
} else {
|
||||
shader->ngg.vgt_esgs_ring_itemsize = 1;
|
||||
shader->ngg.esgs_vertex_stride = 1;
|
||||
}
|
||||
|
||||
if (es_stage == MESA_SHADER_TESS_EVAL)
|
||||
|
Loading…
Reference in New Issue
Block a user