diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index b4e90fbdd65..f8fb634e5b7 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2918,6 +2918,15 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) } } break; + case nir_intrinsic_load_frag_shading_rate: { + if (!ctx->frag_shading_rate) { + ctx->so->reads_shading_rate = true; + ctx->frag_shading_rate = + create_sysval_input(ctx, SYSTEM_VALUE_FRAG_SHADING_RATE, 0x1); + } + dst[0] = ctx->frag_shading_rate; + break; + } case nir_intrinsic_load_base_workgroup_id: for (int i = 0; i < dest_components; i++) { dst[i] = create_driver_param(ctx, IR3_DP_CS(base_group_x) + i); @@ -5025,6 +5034,9 @@ setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr) case VARYING_SLOT_VIEWPORT: so->writes_viewport = true; break; + case VARYING_SLOT_PRIMITIVE_SHADING_RATE: + so->writes_shading_rate = true; + break; case VARYING_SLOT_PRIMITIVE_ID: case VARYING_SLOT_GS_VERTEX_FLAGS_IR3: assert(ctx->so->type == MESA_SHADER_GEOMETRY); diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index 6ef3b4ff5db..b1d32a85dec 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -84,6 +84,8 @@ struct ir3_context { /* Compute shader inputs: */ struct ir3_instruction *local_invocation_id, *work_group_id; + struct ir3_instruction *frag_shading_rate; + /* mapping from nir_register to defining instruction: */ struct hash_table *def_ht; diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index f14872343bd..0c2b51d5b77 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -764,6 +764,11 @@ ir3_nir_post_finalize(struct ir3_shader *shader) NIR_PASS_V(s, ir3_nir_move_varying_inputs); NIR_PASS_V(s, nir_lower_fb_read); NIR_PASS_V(s, ir3_nir_lower_layer_id); + NIR_PASS_V(s, ir3_nir_lower_frag_shading_rate); + } + + if (s->info.stage == MESA_SHADER_VERTEX || s->info.stage == MESA_SHADER_GEOMETRY) { + NIR_PASS_V(s, ir3_nir_lower_primitive_shading_rate); } if (compiler->gen >= 6 && s->info.stage == MESA_SHADER_FRAGMENT && diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 7f0fe6db1d6..ce4ca922e24 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -30,6 +30,8 @@ bool ir3_nir_move_varying_inputs(nir_shader *shader); int ir3_nir_coord_offset(nir_def *ssa); bool ir3_nir_lower_tex_prefetch(nir_shader *shader); bool ir3_nir_lower_layer_id(nir_shader *shader); +bool ir3_nir_lower_frag_shading_rate(nir_shader *shader); +bool ir3_nir_lower_primitive_shading_rate(nir_shader *shader); void ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *v, diff --git a/src/freedreno/ir3/ir3_nir_lower_shading_rate.c b/src/freedreno/ir3/ir3_nir_lower_shading_rate.c new file mode 100644 index 00000000000..33b20eaa5c2 --- /dev/null +++ b/src/freedreno/ir3/ir3_nir_lower_shading_rate.c @@ -0,0 +1,96 @@ +/* + * Copyright 2024 Igalia S.L. + * SPDX-License-Identifier: MIT + */ + +#include "compiler/nir/nir_builder.h" +#include "ir3_nir.h" + +/* Values written in VS/GS to gl_PrimitiveShadingRateEXT have to + * be translated into HW internal representation. + */ +static const uint32_t vk_to_hw_shading_rate_lut[] = { + 0, 4, 8, 11, 1, 5, 9, 11, 2, 6, 10, 11, 11, 11, 11, 11, + 0, 1, 2, 11, 4, 5, 6, 11, 8, 9, 10, 11, 11, 11, 11, 11}; + +/* Values read from gl_ShadingRateEXT in FS have to be translated from + * HW representation. + */ +static const uint32_t hw_to_vk_shading_rate_lut[] = { + 0, 4, 8, 0, 1, 5, 9, 0, 2, 6, 10, 11, 11, 11, 11, 11, + 0, 1, 2, 0, 4, 5, 6, 0, 8, 9, 10, 11, 11, 11, 11, 11}; + +static nir_deref_instr * +create_lut(nir_builder *b, const uint32_t *lut, uint32_t lut_size, + const char *lut_name) +{ + nir_variable *lut_var = nir_local_variable_create( + b->impl, glsl_array_type(glsl_uint_type(), lut_size, 0), lut_name); + nir_deref_instr *deref = nir_build_deref_var(b, lut_var); + + for (uint32_t i = 0; i < lut_size; i++) { + nir_deref_instr *element = + nir_build_deref_array(b, deref, nir_imm_int(b, i)); + nir_build_store_deref(b, &element->def, nir_imm_int(b, lut[i]), 0x1); + } + + return deref; +} + +static bool +nir_lower_frag_shading_rate(nir_builder *b, nir_intrinsic_instr *intr, + UNUSED void *cb_data) +{ + if (intr->intrinsic != nir_intrinsic_load_frag_shading_rate) + return false; + + b->cursor = nir_after_instr(&intr->instr); + + nir_deref_instr *lut = create_lut(b, hw_to_vk_shading_rate_lut, + ARRAY_SIZE(hw_to_vk_shading_rate_lut), + "hw_to_vk_shading_rate_lut"); + nir_deref_instr *result = nir_build_deref_array(b, lut, &intr->def); + nir_def *r = nir_build_load_deref(b, 1, 32, &result->def, 0); + + nir_def_rewrite_uses_after(&intr->def, r, r->parent_instr); + return true; +} + +bool +ir3_nir_lower_frag_shading_rate(nir_shader *shader) +{ + assert(shader->info.stage == MESA_SHADER_FRAGMENT); + return nir_shader_intrinsics_pass(shader, nir_lower_frag_shading_rate, + nir_metadata_control_flow, NULL); +} + +static bool +nir_lower_primitive_shading_rate(nir_builder *b, nir_intrinsic_instr *intr, + UNUSED void *cb_data) +{ + if (intr->intrinsic != nir_intrinsic_store_output) + return false; + + unsigned loc = nir_intrinsic_io_semantics(intr).location; + if (loc != VARYING_SLOT_PRIMITIVE_SHADING_RATE) + return false; + + b->cursor = nir_before_instr(&intr->instr); + + nir_deref_instr *lut = create_lut(b, vk_to_hw_shading_rate_lut, + ARRAY_SIZE(vk_to_hw_shading_rate_lut), + "vk_to_hw_shading_rate_lut"); + nir_deref_instr *result = nir_build_deref_array(b, lut, intr->src[0].ssa); + nir_def *r = nir_build_load_deref(b, 1, 32, &result->def, 0); + + nir_src_rewrite(&intr->src[0], r); + return true; +} + +bool +ir3_nir_lower_primitive_shading_rate(nir_shader *shader) +{ + assert(shader->info.stage != MESA_SHADER_FRAGMENT); + return nir_shader_intrinsics_pass(shader, nir_lower_primitive_shading_rate, + nir_metadata_control_flow, NULL); +} diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c index e8d2f1a12eb..8f17bd3f6f5 100644 --- a/src/freedreno/ir3/ir3_nir_lower_tess.c +++ b/src/freedreno/ir3/ir3_nir_lower_tess.c @@ -12,7 +12,7 @@ struct state { struct primitive_map { /* +POSITION, +PSIZE, ... - see shader_io_get_unique_index */ - unsigned loc[12 + 32]; + unsigned loc[13 + 32]; unsigned stride; } map; @@ -93,14 +93,15 @@ shader_io_get_unique_index(gl_varying_slot slot) case VARYING_SLOT_CLIP_VERTEX: return 9; case VARYING_SLOT_LAYER: return 10; case VARYING_SLOT_VIEWPORT: return 11; + case VARYING_SLOT_PRIMITIVE_SHADING_RATE: return 12; case VARYING_SLOT_VAR0 ... VARYING_SLOT_VAR31: { struct state state = {}; STATIC_ASSERT(ARRAY_SIZE(state.map.loc) - 1 == - (12 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0)); + (13 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0)); struct ir3_shader_variant v = {}; STATIC_ASSERT(ARRAY_SIZE(v.output_loc) - 1 == - (12 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0)); - return 12 + (slot - VARYING_SLOT_VAR0); + (13 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0)); + return 13 + (slot - VARYING_SLOT_VAR0); } default: unreachable("illegal slot in get unique index\n"); diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index b5f8dcb5d1d..b2f94b9daee 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -948,6 +948,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) case MESA_SHADER_VERTEX: dump_output(out, so, VARYING_SLOT_POS, "pos"); dump_output(out, so, VARYING_SLOT_PSIZ, "psize"); + dump_output(out, so, VARYING_SLOT_PRIMITIVE_SHADING_RATE, "shading_rate"); break; case MESA_SHADER_FRAGMENT: dump_reg(out, "pos (ij_pixel)", diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 66c23af4666..26641f61083 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -703,6 +703,7 @@ struct ir3_shader_variant { bool half : 1; } outputs[32 + 2]; /* +POSITION +PSIZE */ bool writes_pos, writes_smask, writes_psize, writes_viewport, writes_stencilref; + bool writes_shading_rate; /* Size in dwords of all outputs for VS, size of entire patch for HS. */ uint32_t output_size; @@ -715,7 +716,7 @@ struct ir3_shader_variant { * offset, and in bytes for all other stages. * +POSITION, +PSIZE, ... - see shader_io_get_unique_index */ - unsigned output_loc[12 + 32]; + unsigned output_loc[13 + 32]; /* attributes (VS) / varyings (FS): * Note that sysval's should come *after* normal inputs. @@ -740,6 +741,7 @@ struct ir3_shader_variant { bool flat : 1; } inputs[32 + 2]; /* +POSITION +FACE */ bool reads_primid; + bool reads_shading_rate; /* sum of input components (scalar). For frag shaders, it only counts * the varying inputs: diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index 599d45f99dd..dd49be2aa0f 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -92,6 +92,7 @@ libfreedreno_ir3_files = files( 'ir3_nir_lower_load_barycentric_at_sample.c', 'ir3_nir_lower_load_barycentric_at_offset.c', 'ir3_nir_lower_push_consts_to_preamble.c', + 'ir3_nir_lower_shading_rate.c', 'ir3_nir_lower_io_offsets.c', 'ir3_nir_lower_tess.c', 'ir3_nir_lower_tex_prefetch.c', diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index ed765bf3075..8ac187212d1 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -902,8 +902,11 @@ tu6_emit_vpc(struct tu_cs *cs, ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1); uint32_t flags_regid = gs ? ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0; + const uint32_t shading_rate_regid = + ir3_find_output_regid(last_shader, VARYING_SLOT_PRIMITIVE_SHADING_RATE); uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff, view_loc = 0xff; + uint32_t shading_rate_loc = 0xff; if (layer_regid != regid(63, 0)) { layer_loc = linkage.max_loc; @@ -915,6 +918,12 @@ tu6_emit_vpc(struct tu_cs *cs, ir3_link_add(&linkage, VARYING_SLOT_VIEWPORT, view_regid, 0x1, linkage.max_loc); } + if (shading_rate_regid != regid(63, 0)) { + shading_rate_loc = linkage.max_loc; + ir3_link_add(&linkage, VARYING_SLOT_PRIMITIVE_SHADING_RATE, + shading_rate_regid, 0x1, linkage.max_loc); + } + unsigned extra_pos = 0; for (unsigned i = 0; i < last_shader->outputs_count; i++) { @@ -1026,7 +1035,8 @@ tu6_emit_vpc(struct tu_cs *cs, CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) | CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) | COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) | - A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask)); + A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask) | + CONDREG(shading_rate_regid, A6XX_PC_VS_OUT_CNTL_SHADINGRATE)); } else { tu_cs_emit(cs, COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID)); } @@ -1043,11 +1053,11 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1); tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) | A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc) | - 0xff0000); + A6XX_VPC_VS_LAYER_CNTL_SHADINGRATELOC(shading_rate_loc)); tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl_v2, 1); tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) | A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc) | - 0xff0000); + A6XX_VPC_VS_LAYER_CNTL_SHADINGRATELOC(shading_rate_loc)); tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1); tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER) | @@ -2293,6 +2303,8 @@ tu_emit_program_state(struct tu_cs *sub_cs, !last_shader->writes_viewport && shaders[MESA_SHADER_FRAGMENT]->fs.has_fdm && dev->physical_device->info->a6xx.has_per_view_viewport; + prog->writes_shading_rate = last_shader->writes_shading_rate; + prog->reads_shading_rate = fs->reads_shading_rate; } static const enum mesa_vk_dynamic_graphics_state tu_vertex_input_state[] = { diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 2a79e82e744..56a2440da1c 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -101,6 +101,8 @@ struct tu_program_state unsigned dynamic_descriptor_offsets[MAX_SETS]; bool per_view_viewport; + bool writes_shading_rate; + bool reads_shading_rate; }; struct tu_pipeline_executable { diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index d33169ca089..037ba321480 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -1583,7 +1583,7 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) { uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid; uint32_t ij_regid[IJ_COUNT]; - uint32_t smask_in_regid; + uint32_t smask_in_regid, shading_rate_regid; bool sample_shading = fs->per_samp | fs->key.sample_shading; bool enable_varyings = fs->total_in > 0; @@ -1593,6 +1593,7 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) face_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE); coord_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD); zwcoord_regid = VALIDREG(coord_regid) ? coord_regid + 2 : regid(63, 0); + shading_rate_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_SHADING_RATE); for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++) ij_regid[i] = ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i); @@ -1647,7 +1648,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) .ij_linear_sample = ij_regid[IJ_LINEAR_SAMPLE], .xycoordregid = coord_regid, .zwcoordregid = zwcoord_regid), - HLSQ_CONTROL_5_REG(CHIP, .dword = 0xfcfc), ); + HLSQ_CONTROL_5_REG(CHIP, .linelengthregid = 0xfc, + .foveationqualityregid = shading_rate_regid), ); if (CHIP >= A7XX) { uint32_t sysval_regs = 0; @@ -1660,7 +1662,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) } } - for (uint32_t sysval : { face_regid, samp_id_regid, smask_in_regid }) { + for (uint32_t sysval : { face_regid, samp_id_regid, smask_in_regid, + shading_rate_regid }) { if (VALIDREG(sysval)) sysval_regs += 1; } @@ -1719,7 +1722,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) | CONDREG(ij_regid[IJ_PERSP_CENTER_RHW], A6XX_RB_RENDER_CONTROL1_CENTERRHW) | COND(fs->post_depth_coverage, A6XX_RB_RENDER_CONTROL1_POSTDEPTHCOVERAGE) | - COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS)); + COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS) | + CONDREG(shading_rate_regid, A6XX_RB_RENDER_CONTROL1_FOVEATION)); tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CNTL, 1); tu_cs_emit(cs, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE));