ir3,tu: Add support for Fragment Shading Rate and plumb it into Turnip

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30905>
This commit is contained in:
Danylo Piliaiev 2024-11-01 13:46:12 +01:00 committed by Marge Bot
parent 2038d363e7
commit 117379a77a
12 changed files with 152 additions and 12 deletions

View File

@ -2918,6 +2918,15 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
}
}
break;
case nir_intrinsic_load_frag_shading_rate: {
if (!ctx->frag_shading_rate) {
ctx->so->reads_shading_rate = true;
ctx->frag_shading_rate =
create_sysval_input(ctx, SYSTEM_VALUE_FRAG_SHADING_RATE, 0x1);
}
dst[0] = ctx->frag_shading_rate;
break;
}
case nir_intrinsic_load_base_workgroup_id:
for (int i = 0; i < dest_components; i++) {
dst[i] = create_driver_param(ctx, IR3_DP_CS(base_group_x) + i);
@ -5025,6 +5034,9 @@ setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr)
case VARYING_SLOT_VIEWPORT:
so->writes_viewport = true;
break;
case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
so->writes_shading_rate = true;
break;
case VARYING_SLOT_PRIMITIVE_ID:
case VARYING_SLOT_GS_VERTEX_FLAGS_IR3:
assert(ctx->so->type == MESA_SHADER_GEOMETRY);

View File

@ -84,6 +84,8 @@ struct ir3_context {
/* Compute shader inputs: */
struct ir3_instruction *local_invocation_id, *work_group_id;
struct ir3_instruction *frag_shading_rate;
/* mapping from nir_register to defining instruction: */
struct hash_table *def_ht;

View File

@ -764,6 +764,11 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
NIR_PASS_V(s, ir3_nir_move_varying_inputs);
NIR_PASS_V(s, nir_lower_fb_read);
NIR_PASS_V(s, ir3_nir_lower_layer_id);
NIR_PASS_V(s, ir3_nir_lower_frag_shading_rate);
}
if (s->info.stage == MESA_SHADER_VERTEX || s->info.stage == MESA_SHADER_GEOMETRY) {
NIR_PASS_V(s, ir3_nir_lower_primitive_shading_rate);
}
if (compiler->gen >= 6 && s->info.stage == MESA_SHADER_FRAGMENT &&

View File

@ -30,6 +30,8 @@ bool ir3_nir_move_varying_inputs(nir_shader *shader);
int ir3_nir_coord_offset(nir_def *ssa);
bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
bool ir3_nir_lower_layer_id(nir_shader *shader);
bool ir3_nir_lower_frag_shading_rate(nir_shader *shader);
bool ir3_nir_lower_primitive_shading_rate(nir_shader *shader);
void ir3_nir_lower_to_explicit_output(nir_shader *shader,
struct ir3_shader_variant *v,

View File

@ -0,0 +1,96 @@
/*
* Copyright 2024 Igalia S.L.
* SPDX-License-Identifier: MIT
*/
#include "compiler/nir/nir_builder.h"
#include "ir3_nir.h"
/* Values written in VS/GS to gl_PrimitiveShadingRateEXT have to
* be translated into HW internal representation.
*/
static const uint32_t vk_to_hw_shading_rate_lut[] = {
0, 4, 8, 11, 1, 5, 9, 11, 2, 6, 10, 11, 11, 11, 11, 11,
0, 1, 2, 11, 4, 5, 6, 11, 8, 9, 10, 11, 11, 11, 11, 11};
/* Values read from gl_ShadingRateEXT in FS have to be translated from
* HW representation.
*/
static const uint32_t hw_to_vk_shading_rate_lut[] = {
0, 4, 8, 0, 1, 5, 9, 0, 2, 6, 10, 11, 11, 11, 11, 11,
0, 1, 2, 0, 4, 5, 6, 0, 8, 9, 10, 11, 11, 11, 11, 11};
static nir_deref_instr *
create_lut(nir_builder *b, const uint32_t *lut, uint32_t lut_size,
const char *lut_name)
{
nir_variable *lut_var = nir_local_variable_create(
b->impl, glsl_array_type(glsl_uint_type(), lut_size, 0), lut_name);
nir_deref_instr *deref = nir_build_deref_var(b, lut_var);
for (uint32_t i = 0; i < lut_size; i++) {
nir_deref_instr *element =
nir_build_deref_array(b, deref, nir_imm_int(b, i));
nir_build_store_deref(b, &element->def, nir_imm_int(b, lut[i]), 0x1);
}
return deref;
}
static bool
nir_lower_frag_shading_rate(nir_builder *b, nir_intrinsic_instr *intr,
UNUSED void *cb_data)
{
if (intr->intrinsic != nir_intrinsic_load_frag_shading_rate)
return false;
b->cursor = nir_after_instr(&intr->instr);
nir_deref_instr *lut = create_lut(b, hw_to_vk_shading_rate_lut,
ARRAY_SIZE(hw_to_vk_shading_rate_lut),
"hw_to_vk_shading_rate_lut");
nir_deref_instr *result = nir_build_deref_array(b, lut, &intr->def);
nir_def *r = nir_build_load_deref(b, 1, 32, &result->def, 0);
nir_def_rewrite_uses_after(&intr->def, r, r->parent_instr);
return true;
}
bool
ir3_nir_lower_frag_shading_rate(nir_shader *shader)
{
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
return nir_shader_intrinsics_pass(shader, nir_lower_frag_shading_rate,
nir_metadata_control_flow, NULL);
}
static bool
nir_lower_primitive_shading_rate(nir_builder *b, nir_intrinsic_instr *intr,
UNUSED void *cb_data)
{
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
unsigned loc = nir_intrinsic_io_semantics(intr).location;
if (loc != VARYING_SLOT_PRIMITIVE_SHADING_RATE)
return false;
b->cursor = nir_before_instr(&intr->instr);
nir_deref_instr *lut = create_lut(b, vk_to_hw_shading_rate_lut,
ARRAY_SIZE(vk_to_hw_shading_rate_lut),
"vk_to_hw_shading_rate_lut");
nir_deref_instr *result = nir_build_deref_array(b, lut, intr->src[0].ssa);
nir_def *r = nir_build_load_deref(b, 1, 32, &result->def, 0);
nir_src_rewrite(&intr->src[0], r);
return true;
}
bool
ir3_nir_lower_primitive_shading_rate(nir_shader *shader)
{
assert(shader->info.stage != MESA_SHADER_FRAGMENT);
return nir_shader_intrinsics_pass(shader, nir_lower_primitive_shading_rate,
nir_metadata_control_flow, NULL);
}

View File

@ -12,7 +12,7 @@ struct state {
struct primitive_map {
/* +POSITION, +PSIZE, ... - see shader_io_get_unique_index */
unsigned loc[12 + 32];
unsigned loc[13 + 32];
unsigned stride;
} map;
@ -93,14 +93,15 @@ shader_io_get_unique_index(gl_varying_slot slot)
case VARYING_SLOT_CLIP_VERTEX: return 9;
case VARYING_SLOT_LAYER: return 10;
case VARYING_SLOT_VIEWPORT: return 11;
case VARYING_SLOT_PRIMITIVE_SHADING_RATE: return 12;
case VARYING_SLOT_VAR0 ... VARYING_SLOT_VAR31: {
struct state state = {};
STATIC_ASSERT(ARRAY_SIZE(state.map.loc) - 1 ==
(12 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0));
(13 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0));
struct ir3_shader_variant v = {};
STATIC_ASSERT(ARRAY_SIZE(v.output_loc) - 1 ==
(12 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0));
return 12 + (slot - VARYING_SLOT_VAR0);
(13 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0));
return 13 + (slot - VARYING_SLOT_VAR0);
}
default:
unreachable("illegal slot in get unique index\n");

View File

@ -948,6 +948,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
case MESA_SHADER_VERTEX:
dump_output(out, so, VARYING_SLOT_POS, "pos");
dump_output(out, so, VARYING_SLOT_PSIZ, "psize");
dump_output(out, so, VARYING_SLOT_PRIMITIVE_SHADING_RATE, "shading_rate");
break;
case MESA_SHADER_FRAGMENT:
dump_reg(out, "pos (ij_pixel)",

View File

@ -703,6 +703,7 @@ struct ir3_shader_variant {
bool half : 1;
} outputs[32 + 2]; /* +POSITION +PSIZE */
bool writes_pos, writes_smask, writes_psize, writes_viewport, writes_stencilref;
bool writes_shading_rate;
/* Size in dwords of all outputs for VS, size of entire patch for HS. */
uint32_t output_size;
@ -715,7 +716,7 @@ struct ir3_shader_variant {
* offset, and in bytes for all other stages.
* +POSITION, +PSIZE, ... - see shader_io_get_unique_index
*/
unsigned output_loc[12 + 32];
unsigned output_loc[13 + 32];
/* attributes (VS) / varyings (FS):
* Note that sysval's should come *after* normal inputs.
@ -740,6 +741,7 @@ struct ir3_shader_variant {
bool flat : 1;
} inputs[32 + 2]; /* +POSITION +FACE */
bool reads_primid;
bool reads_shading_rate;
/* sum of input components (scalar). For frag shaders, it only counts
* the varying inputs:

View File

@ -92,6 +92,7 @@ libfreedreno_ir3_files = files(
'ir3_nir_lower_load_barycentric_at_sample.c',
'ir3_nir_lower_load_barycentric_at_offset.c',
'ir3_nir_lower_push_consts_to_preamble.c',
'ir3_nir_lower_shading_rate.c',
'ir3_nir_lower_io_offsets.c',
'ir3_nir_lower_tess.c',
'ir3_nir_lower_tex_prefetch.c',

View File

@ -902,8 +902,11 @@ tu6_emit_vpc(struct tu_cs *cs,
ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1);
uint32_t flags_regid = gs ?
ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0;
const uint32_t shading_rate_regid =
ir3_find_output_regid(last_shader, VARYING_SLOT_PRIMITIVE_SHADING_RATE);
uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff, view_loc = 0xff;
uint32_t shading_rate_loc = 0xff;
if (layer_regid != regid(63, 0)) {
layer_loc = linkage.max_loc;
@ -915,6 +918,12 @@ tu6_emit_vpc(struct tu_cs *cs,
ir3_link_add(&linkage, VARYING_SLOT_VIEWPORT, view_regid, 0x1, linkage.max_loc);
}
if (shading_rate_regid != regid(63, 0)) {
shading_rate_loc = linkage.max_loc;
ir3_link_add(&linkage, VARYING_SLOT_PRIMITIVE_SHADING_RATE,
shading_rate_regid, 0x1, linkage.max_loc);
}
unsigned extra_pos = 0;
for (unsigned i = 0; i < last_shader->outputs_count; i++) {
@ -1026,7 +1035,8 @@ tu6_emit_vpc(struct tu_cs *cs,
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask) |
CONDREG(shading_rate_regid, A6XX_PC_VS_OUT_CNTL_SHADINGRATE));
} else {
tu_cs_emit(cs, COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
}
@ -1043,11 +1053,11 @@ tu6_emit_vpc(struct tu_cs *cs,
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1);
tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) |
A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc) |
0xff0000);
A6XX_VPC_VS_LAYER_CNTL_SHADINGRATELOC(shading_rate_loc));
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl_v2, 1);
tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) |
A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc) |
0xff0000);
A6XX_VPC_VS_LAYER_CNTL_SHADINGRATELOC(shading_rate_loc));
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1);
tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER) |
@ -2293,6 +2303,8 @@ tu_emit_program_state(struct tu_cs *sub_cs,
!last_shader->writes_viewport &&
shaders[MESA_SHADER_FRAGMENT]->fs.has_fdm &&
dev->physical_device->info->a6xx.has_per_view_viewport;
prog->writes_shading_rate = last_shader->writes_shading_rate;
prog->reads_shading_rate = fs->reads_shading_rate;
}
static const enum mesa_vk_dynamic_graphics_state tu_vertex_input_state[] = {

View File

@ -101,6 +101,8 @@ struct tu_program_state
unsigned dynamic_descriptor_offsets[MAX_SETS];
bool per_view_viewport;
bool writes_shading_rate;
bool reads_shading_rate;
};
struct tu_pipeline_executable {

View File

@ -1583,7 +1583,7 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
{
uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
uint32_t ij_regid[IJ_COUNT];
uint32_t smask_in_regid;
uint32_t smask_in_regid, shading_rate_regid;
bool sample_shading = fs->per_samp | fs->key.sample_shading;
bool enable_varyings = fs->total_in > 0;
@ -1593,6 +1593,7 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
face_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE);
coord_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD);
zwcoord_regid = VALIDREG(coord_regid) ? coord_regid + 2 : regid(63, 0);
shading_rate_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_SHADING_RATE);
for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
ij_regid[i] = ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
@ -1647,7 +1648,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
.ij_linear_sample = ij_regid[IJ_LINEAR_SAMPLE],
.xycoordregid = coord_regid,
.zwcoordregid = zwcoord_regid),
HLSQ_CONTROL_5_REG(CHIP, .dword = 0xfcfc), );
HLSQ_CONTROL_5_REG(CHIP, .linelengthregid = 0xfc,
.foveationqualityregid = shading_rate_regid), );
if (CHIP >= A7XX) {
uint32_t sysval_regs = 0;
@ -1660,7 +1662,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
}
}
for (uint32_t sysval : { face_regid, samp_id_regid, smask_in_regid }) {
for (uint32_t sysval : { face_regid, samp_id_regid, smask_in_regid,
shading_rate_regid }) {
if (VALIDREG(sysval))
sysval_regs += 1;
}
@ -1719,7 +1722,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) |
CONDREG(ij_regid[IJ_PERSP_CENTER_RHW], A6XX_RB_RENDER_CONTROL1_CENTERRHW) |
COND(fs->post_depth_coverage, A6XX_RB_RENDER_CONTROL1_POSTDEPTHCOVERAGE) |
COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS));
COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS) |
CONDREG(shading_rate_regid, A6XX_RB_RENDER_CONTROL1_FOVEATION));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CNTL, 1);
tu_cs_emit(cs, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE));