mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-23 10:14:13 +08:00
ir3,tu: Add support for Fragment Shading Rate and plumb it into Turnip
Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30905>
This commit is contained in:
parent
2038d363e7
commit
117379a77a
@ -2918,6 +2918,15 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
}
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_frag_shading_rate: {
|
||||
if (!ctx->frag_shading_rate) {
|
||||
ctx->so->reads_shading_rate = true;
|
||||
ctx->frag_shading_rate =
|
||||
create_sysval_input(ctx, SYSTEM_VALUE_FRAG_SHADING_RATE, 0x1);
|
||||
}
|
||||
dst[0] = ctx->frag_shading_rate;
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_CS(base_group_x) + i);
|
||||
@ -5025,6 +5034,9 @@ setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
case VARYING_SLOT_VIEWPORT:
|
||||
so->writes_viewport = true;
|
||||
break;
|
||||
case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
|
||||
so->writes_shading_rate = true;
|
||||
break;
|
||||
case VARYING_SLOT_PRIMITIVE_ID:
|
||||
case VARYING_SLOT_GS_VERTEX_FLAGS_IR3:
|
||||
assert(ctx->so->type == MESA_SHADER_GEOMETRY);
|
||||
|
@ -84,6 +84,8 @@ struct ir3_context {
|
||||
/* Compute shader inputs: */
|
||||
struct ir3_instruction *local_invocation_id, *work_group_id;
|
||||
|
||||
struct ir3_instruction *frag_shading_rate;
|
||||
|
||||
/* mapping from nir_register to defining instruction: */
|
||||
struct hash_table *def_ht;
|
||||
|
||||
|
@ -764,6 +764,11 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
|
||||
NIR_PASS_V(s, ir3_nir_move_varying_inputs);
|
||||
NIR_PASS_V(s, nir_lower_fb_read);
|
||||
NIR_PASS_V(s, ir3_nir_lower_layer_id);
|
||||
NIR_PASS_V(s, ir3_nir_lower_frag_shading_rate);
|
||||
}
|
||||
|
||||
if (s->info.stage == MESA_SHADER_VERTEX || s->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
NIR_PASS_V(s, ir3_nir_lower_primitive_shading_rate);
|
||||
}
|
||||
|
||||
if (compiler->gen >= 6 && s->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
|
@ -30,6 +30,8 @@ bool ir3_nir_move_varying_inputs(nir_shader *shader);
|
||||
int ir3_nir_coord_offset(nir_def *ssa);
|
||||
bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
|
||||
bool ir3_nir_lower_layer_id(nir_shader *shader);
|
||||
bool ir3_nir_lower_frag_shading_rate(nir_shader *shader);
|
||||
bool ir3_nir_lower_primitive_shading_rate(nir_shader *shader);
|
||||
|
||||
void ir3_nir_lower_to_explicit_output(nir_shader *shader,
|
||||
struct ir3_shader_variant *v,
|
||||
|
96
src/freedreno/ir3/ir3_nir_lower_shading_rate.c
Normal file
96
src/freedreno/ir3/ir3_nir_lower_shading_rate.c
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright 2024 Igalia S.L.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "ir3_nir.h"
|
||||
|
||||
/* Values written in VS/GS to gl_PrimitiveShadingRateEXT have to
|
||||
* be translated into HW internal representation.
|
||||
*/
|
||||
static const uint32_t vk_to_hw_shading_rate_lut[] = {
|
||||
0, 4, 8, 11, 1, 5, 9, 11, 2, 6, 10, 11, 11, 11, 11, 11,
|
||||
0, 1, 2, 11, 4, 5, 6, 11, 8, 9, 10, 11, 11, 11, 11, 11};
|
||||
|
||||
/* Values read from gl_ShadingRateEXT in FS have to be translated from
|
||||
* HW representation.
|
||||
*/
|
||||
static const uint32_t hw_to_vk_shading_rate_lut[] = {
|
||||
0, 4, 8, 0, 1, 5, 9, 0, 2, 6, 10, 11, 11, 11, 11, 11,
|
||||
0, 1, 2, 0, 4, 5, 6, 0, 8, 9, 10, 11, 11, 11, 11, 11};
|
||||
|
||||
static nir_deref_instr *
|
||||
create_lut(nir_builder *b, const uint32_t *lut, uint32_t lut_size,
|
||||
const char *lut_name)
|
||||
{
|
||||
nir_variable *lut_var = nir_local_variable_create(
|
||||
b->impl, glsl_array_type(glsl_uint_type(), lut_size, 0), lut_name);
|
||||
nir_deref_instr *deref = nir_build_deref_var(b, lut_var);
|
||||
|
||||
for (uint32_t i = 0; i < lut_size; i++) {
|
||||
nir_deref_instr *element =
|
||||
nir_build_deref_array(b, deref, nir_imm_int(b, i));
|
||||
nir_build_store_deref(b, &element->def, nir_imm_int(b, lut[i]), 0x1);
|
||||
}
|
||||
|
||||
return deref;
|
||||
}
|
||||
|
||||
static bool
|
||||
nir_lower_frag_shading_rate(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
UNUSED void *cb_data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_load_frag_shading_rate)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
nir_deref_instr *lut = create_lut(b, hw_to_vk_shading_rate_lut,
|
||||
ARRAY_SIZE(hw_to_vk_shading_rate_lut),
|
||||
"hw_to_vk_shading_rate_lut");
|
||||
nir_deref_instr *result = nir_build_deref_array(b, lut, &intr->def);
|
||||
nir_def *r = nir_build_load_deref(b, 1, 32, &result->def, 0);
|
||||
|
||||
nir_def_rewrite_uses_after(&intr->def, r, r->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ir3_nir_lower_frag_shading_rate(nir_shader *shader)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
return nir_shader_intrinsics_pass(shader, nir_lower_frag_shading_rate,
|
||||
nir_metadata_control_flow, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
nir_lower_primitive_shading_rate(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
UNUSED void *cb_data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
|
||||
unsigned loc = nir_intrinsic_io_semantics(intr).location;
|
||||
if (loc != VARYING_SLOT_PRIMITIVE_SHADING_RATE)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_deref_instr *lut = create_lut(b, vk_to_hw_shading_rate_lut,
|
||||
ARRAY_SIZE(vk_to_hw_shading_rate_lut),
|
||||
"vk_to_hw_shading_rate_lut");
|
||||
nir_deref_instr *result = nir_build_deref_array(b, lut, intr->src[0].ssa);
|
||||
nir_def *r = nir_build_load_deref(b, 1, 32, &result->def, 0);
|
||||
|
||||
nir_src_rewrite(&intr->src[0], r);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ir3_nir_lower_primitive_shading_rate(nir_shader *shader)
|
||||
{
|
||||
assert(shader->info.stage != MESA_SHADER_FRAGMENT);
|
||||
return nir_shader_intrinsics_pass(shader, nir_lower_primitive_shading_rate,
|
||||
nir_metadata_control_flow, NULL);
|
||||
}
|
@ -12,7 +12,7 @@ struct state {
|
||||
|
||||
struct primitive_map {
|
||||
/* +POSITION, +PSIZE, ... - see shader_io_get_unique_index */
|
||||
unsigned loc[12 + 32];
|
||||
unsigned loc[13 + 32];
|
||||
unsigned stride;
|
||||
} map;
|
||||
|
||||
@ -93,14 +93,15 @@ shader_io_get_unique_index(gl_varying_slot slot)
|
||||
case VARYING_SLOT_CLIP_VERTEX: return 9;
|
||||
case VARYING_SLOT_LAYER: return 10;
|
||||
case VARYING_SLOT_VIEWPORT: return 11;
|
||||
case VARYING_SLOT_PRIMITIVE_SHADING_RATE: return 12;
|
||||
case VARYING_SLOT_VAR0 ... VARYING_SLOT_VAR31: {
|
||||
struct state state = {};
|
||||
STATIC_ASSERT(ARRAY_SIZE(state.map.loc) - 1 ==
|
||||
(12 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0));
|
||||
(13 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0));
|
||||
struct ir3_shader_variant v = {};
|
||||
STATIC_ASSERT(ARRAY_SIZE(v.output_loc) - 1 ==
|
||||
(12 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0));
|
||||
return 12 + (slot - VARYING_SLOT_VAR0);
|
||||
(13 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0));
|
||||
return 13 + (slot - VARYING_SLOT_VAR0);
|
||||
}
|
||||
default:
|
||||
unreachable("illegal slot in get unique index\n");
|
||||
|
@ -948,6 +948,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
|
||||
case MESA_SHADER_VERTEX:
|
||||
dump_output(out, so, VARYING_SLOT_POS, "pos");
|
||||
dump_output(out, so, VARYING_SLOT_PSIZ, "psize");
|
||||
dump_output(out, so, VARYING_SLOT_PRIMITIVE_SHADING_RATE, "shading_rate");
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
dump_reg(out, "pos (ij_pixel)",
|
||||
|
@ -703,6 +703,7 @@ struct ir3_shader_variant {
|
||||
bool half : 1;
|
||||
} outputs[32 + 2]; /* +POSITION +PSIZE */
|
||||
bool writes_pos, writes_smask, writes_psize, writes_viewport, writes_stencilref;
|
||||
bool writes_shading_rate;
|
||||
|
||||
/* Size in dwords of all outputs for VS, size of entire patch for HS. */
|
||||
uint32_t output_size;
|
||||
@ -715,7 +716,7 @@ struct ir3_shader_variant {
|
||||
* offset, and in bytes for all other stages.
|
||||
* +POSITION, +PSIZE, ... - see shader_io_get_unique_index
|
||||
*/
|
||||
unsigned output_loc[12 + 32];
|
||||
unsigned output_loc[13 + 32];
|
||||
|
||||
/* attributes (VS) / varyings (FS):
|
||||
* Note that sysval's should come *after* normal inputs.
|
||||
@ -740,6 +741,7 @@ struct ir3_shader_variant {
|
||||
bool flat : 1;
|
||||
} inputs[32 + 2]; /* +POSITION +FACE */
|
||||
bool reads_primid;
|
||||
bool reads_shading_rate;
|
||||
|
||||
/* sum of input components (scalar). For frag shaders, it only counts
|
||||
* the varying inputs:
|
||||
|
@ -92,6 +92,7 @@ libfreedreno_ir3_files = files(
|
||||
'ir3_nir_lower_load_barycentric_at_sample.c',
|
||||
'ir3_nir_lower_load_barycentric_at_offset.c',
|
||||
'ir3_nir_lower_push_consts_to_preamble.c',
|
||||
'ir3_nir_lower_shading_rate.c',
|
||||
'ir3_nir_lower_io_offsets.c',
|
||||
'ir3_nir_lower_tess.c',
|
||||
'ir3_nir_lower_tex_prefetch.c',
|
||||
|
@ -902,8 +902,11 @@ tu6_emit_vpc(struct tu_cs *cs,
|
||||
ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1);
|
||||
uint32_t flags_regid = gs ?
|
||||
ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0;
|
||||
const uint32_t shading_rate_regid =
|
||||
ir3_find_output_regid(last_shader, VARYING_SLOT_PRIMITIVE_SHADING_RATE);
|
||||
|
||||
uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff, view_loc = 0xff;
|
||||
uint32_t shading_rate_loc = 0xff;
|
||||
|
||||
if (layer_regid != regid(63, 0)) {
|
||||
layer_loc = linkage.max_loc;
|
||||
@ -915,6 +918,12 @@ tu6_emit_vpc(struct tu_cs *cs,
|
||||
ir3_link_add(&linkage, VARYING_SLOT_VIEWPORT, view_regid, 0x1, linkage.max_loc);
|
||||
}
|
||||
|
||||
if (shading_rate_regid != regid(63, 0)) {
|
||||
shading_rate_loc = linkage.max_loc;
|
||||
ir3_link_add(&linkage, VARYING_SLOT_PRIMITIVE_SHADING_RATE,
|
||||
shading_rate_regid, 0x1, linkage.max_loc);
|
||||
}
|
||||
|
||||
unsigned extra_pos = 0;
|
||||
|
||||
for (unsigned i = 0; i < last_shader->outputs_count; i++) {
|
||||
@ -1026,7 +1035,8 @@ tu6_emit_vpc(struct tu_cs *cs,
|
||||
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
|
||||
CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
|
||||
COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
|
||||
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
|
||||
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask) |
|
||||
CONDREG(shading_rate_regid, A6XX_PC_VS_OUT_CNTL_SHADINGRATE));
|
||||
} else {
|
||||
tu_cs_emit(cs, COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
|
||||
}
|
||||
@ -1043,11 +1053,11 @@ tu6_emit_vpc(struct tu_cs *cs,
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1);
|
||||
tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) |
|
||||
A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc) |
|
||||
0xff0000);
|
||||
A6XX_VPC_VS_LAYER_CNTL_SHADINGRATELOC(shading_rate_loc));
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl_v2, 1);
|
||||
tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) |
|
||||
A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc) |
|
||||
0xff0000);
|
||||
A6XX_VPC_VS_LAYER_CNTL_SHADINGRATELOC(shading_rate_loc));
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1);
|
||||
tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER) |
|
||||
@ -2293,6 +2303,8 @@ tu_emit_program_state(struct tu_cs *sub_cs,
|
||||
!last_shader->writes_viewport &&
|
||||
shaders[MESA_SHADER_FRAGMENT]->fs.has_fdm &&
|
||||
dev->physical_device->info->a6xx.has_per_view_viewport;
|
||||
prog->writes_shading_rate = last_shader->writes_shading_rate;
|
||||
prog->reads_shading_rate = fs->reads_shading_rate;
|
||||
}
|
||||
|
||||
static const enum mesa_vk_dynamic_graphics_state tu_vertex_input_state[] = {
|
||||
|
@ -101,6 +101,8 @@ struct tu_program_state
|
||||
unsigned dynamic_descriptor_offsets[MAX_SETS];
|
||||
|
||||
bool per_view_viewport;
|
||||
bool writes_shading_rate;
|
||||
bool reads_shading_rate;
|
||||
};
|
||||
|
||||
struct tu_pipeline_executable {
|
||||
|
@ -1583,7 +1583,7 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
|
||||
{
|
||||
uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
|
||||
uint32_t ij_regid[IJ_COUNT];
|
||||
uint32_t smask_in_regid;
|
||||
uint32_t smask_in_regid, shading_rate_regid;
|
||||
|
||||
bool sample_shading = fs->per_samp | fs->key.sample_shading;
|
||||
bool enable_varyings = fs->total_in > 0;
|
||||
@ -1593,6 +1593,7 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
|
||||
face_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE);
|
||||
coord_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD);
|
||||
zwcoord_regid = VALIDREG(coord_regid) ? coord_regid + 2 : regid(63, 0);
|
||||
shading_rate_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_SHADING_RATE);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
|
||||
ij_regid[i] = ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
|
||||
|
||||
@ -1647,7 +1648,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
|
||||
.ij_linear_sample = ij_regid[IJ_LINEAR_SAMPLE],
|
||||
.xycoordregid = coord_regid,
|
||||
.zwcoordregid = zwcoord_regid),
|
||||
HLSQ_CONTROL_5_REG(CHIP, .dword = 0xfcfc), );
|
||||
HLSQ_CONTROL_5_REG(CHIP, .linelengthregid = 0xfc,
|
||||
.foveationqualityregid = shading_rate_regid), );
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
uint32_t sysval_regs = 0;
|
||||
@ -1660,7 +1662,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t sysval : { face_regid, samp_id_regid, smask_in_regid }) {
|
||||
for (uint32_t sysval : { face_regid, samp_id_regid, smask_in_regid,
|
||||
shading_rate_regid }) {
|
||||
if (VALIDREG(sysval))
|
||||
sysval_regs += 1;
|
||||
}
|
||||
@ -1719,7 +1722,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
|
||||
CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) |
|
||||
CONDREG(ij_regid[IJ_PERSP_CENTER_RHW], A6XX_RB_RENDER_CONTROL1_CENTERRHW) |
|
||||
COND(fs->post_depth_coverage, A6XX_RB_RENDER_CONTROL1_POSTDEPTHCOVERAGE) |
|
||||
COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS));
|
||||
COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS) |
|
||||
CONDREG(shading_rate_regid, A6XX_RB_RENDER_CONTROL1_FOVEATION));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CNTL, 1);
|
||||
tu_cs_emit(cs, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE));
|
||||
|
Loading…
Reference in New Issue
Block a user