From 2ab8eff511d640e50682965eb5d4cd9e28f78487 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Fri, 1 Nov 2024 13:53:47 +0100 Subject: [PATCH] tu/a7xx: Implement VK_KHR_fragment_shading_rate - A650+ - should be able to support pipelineFragmentShadingRate but in some other way than A7XX. Not implemented here. - A7XX - support pipelineFragmentShadingRate and attachmentFragmentShadingRate - A740+ - support primitiveFragmentShadingRate layeredShadingRateAttachments is unsupported at the moment due to tests failure, but prop driver supports it. Passes: dEQP-VK.fragment_shading_rate.* On A750/A740 Signed-off-by: Danylo Piliaiev Part-of: --- docs/features.txt | 2 +- src/freedreno/common/freedreno_dev_info.h | 6 +- src/freedreno/common/freedreno_devices.py | 30 +---- src/freedreno/vulkan/tu_clear_blit.cc | 7 +- src/freedreno/vulkan/tu_cmd_buffer.cc | 87 +++++++++++++- src/freedreno/vulkan/tu_cmd_buffer.h | 11 +- src/freedreno/vulkan/tu_device.cc | 69 ++++++++++- src/freedreno/vulkan/tu_formats.cc | 11 ++ src/freedreno/vulkan/tu_image.cc | 7 ++ src/freedreno/vulkan/tu_pass.cc | 33 ++++++ src/freedreno/vulkan/tu_pass.h | 4 + src/freedreno/vulkan/tu_pipeline.cc | 108 +++++++++++++++++- src/freedreno/vulkan/tu_pipeline.h | 1 + .../drivers/freedreno/a6xx/fd6_rasterizer.cc | 2 +- 14 files changed, 337 insertions(+), 41 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index fca943127f1..dc3797a4da1 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -523,7 +523,7 @@ Khronos extensions that are not part of any Vulkan version: VK_KHR_external_semaphore_fd DONE (anv, dzn, hasvk, nvk, panvk, pvr, radv, tu, v3dv, vn) VK_KHR_external_semaphore_win32 DONE (dzn) VK_KHR_fragment_shader_barycentric DONE (nvk/Turing+, radv/gfx10.3+) - VK_KHR_fragment_shading_rate DONE (anv/gen11+, nvk/Turing+, radv/gfx10.3+, vn) + VK_KHR_fragment_shading_rate DONE (anv/gen11+, nvk/Turing+, radv/gfx10.3+, tu/a7xx+, vn) VK_KHR_get_display_properties2 DONE (anv, nvk, pvr, radv, tu, v3dv) VK_KHR_get_surface_capabilities2 DONE (anv, lvp, nvk, pvr, radv, tu, v3dv, vn) VK_KHR_global_priority DONE (anv, panvk, radv, tu) diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h index 1710c12de2e..ca9939b3e77 100644 --- a/src/freedreno/common/freedreno_dev_info.h +++ b/src/freedreno/common/freedreno_dev_info.h @@ -79,7 +79,7 @@ struct fd_dev_info { bool tess_use_shared; /* Does the hw support GL_QCOM_shading_rate? */ - bool has_shading_rate; + bool has_legacy_pipeline_shading_rate; /* Whether a 16-bit descriptor can be used */ bool storage_16bit; @@ -186,6 +186,8 @@ struct fd_dev_info { */ bool has_coherent_ubwc_flag_caches; + bool has_attachment_shading_rate; + struct { uint32_t PC_POWER_CNTL; uint32_t TPL1_DBG_ECO_CNTL; @@ -302,6 +304,8 @@ struct fd_dev_info { /* Whether only 256 vec4 constants are available for compute */ bool compute_constlen_quirk; + + bool has_primitive_shading_rate; } a7xx; }; diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 1763dade15c..2a1ae806764 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -416,7 +416,7 @@ a6xx_gen4 = A6XXProps( has_cp_reg_write = False, has_8bpp_ubwc = False, has_lpac = True, - has_shading_rate = True, + has_legacy_pipeline_shading_rate = True, has_getfiberid = True, has_dp2acc = True, has_dp4acc = True, @@ -842,7 +842,6 @@ a7xx_base = A6XXProps( has_separate_chroma_filter = True, has_sample_locations = True, has_lpac = True, - has_shading_rate = True, has_getfiberid = True, has_dp2acc = True, has_dp4acc = True, @@ -857,6 +856,7 @@ a7xx_base = A6XXProps( has_isam_v = True, has_ssbo_imm_offsets = True, has_early_preamble = True, + has_attachment_shading_rate = True, ) a7xx_gen1 = A7XXProps( @@ -875,6 +875,7 @@ a7xx_gen2 = A7XXProps( # this hint set. Match them for better compatibility by default. enable_tp_ubwc_flag_hint = False, has_64b_ssbo_atomics = True, + has_primitive_shading_rate = True, ) a7xx_gen3 = A7XXProps( @@ -895,6 +896,7 @@ a7xx_gen3 = A7XXProps( ubwc_coherency_quirk = True, has_persistent_counter = True, has_64b_ssbo_atomics = True, + has_primitive_shading_rate = True, ) a730_magic_regs = dict( @@ -950,11 +952,6 @@ a730_raw_magic_regs = [ [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], - - # Shading rate group - [A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000], - [A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000], ] a740_magic_regs = dict( @@ -1021,12 +1018,7 @@ a740_raw_magic_regs = [ [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], - # Shading rate group - [A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000], - [A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_FSR_BUFFER_DESC, 0x00000000], ] add_gpus([ @@ -1137,11 +1129,7 @@ add_gpus([ [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], - # Shading rate group - [A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000], - [A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000], ], )) @@ -1238,12 +1226,6 @@ add_gpus([ [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], - - # Shading rate group - [A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000], - [A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_FSR_BUFFER_DESC, 0x00000000], ], )) @@ -1349,11 +1331,7 @@ add_gpus([ [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], - # Shading rate group - [A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000], - [A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], - [A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000], [0x930a, 0], [0x960a, 1], diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 1bb323e4530..a07cbe471e6 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -1610,9 +1610,14 @@ r3d_setup(struct tu_cmd_buffer *cmd, tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0)); tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0)); - if (CHIP >= A7XX) + if (CHIP >= A7XX) { tu_cs_emit_regs(cs, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO()); + tu_cs_emit_regs(cs, A6XX_RB_FSR_CONFIG()); + tu_cs_emit_regs(cs, A7XX_SP_FSR_CONFIG()); + tu_cs_emit_regs(cs, A7XX_GRAS_FSR_CONFIG()); + } + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL, A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2)); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 1dd69b2e44f..56f2978b484 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -803,11 +803,20 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state) enable_mask = CP_SET_DRAW_STATE__0_SYSMEM; break; case TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM: - /* By also applying the state during binning we ensure that there - * is no rotation applied, by previous A6XX_GRAS_SC_CNTL::rotation. - */ - enable_mask = - CP_SET_DRAW_STATE__0_SYSMEM | CP_SET_DRAW_STATE__0_BINNING; + if (!cs->device->physical_device->info->a6xx.has_coherent_ubwc_flag_caches) { + /* By also applying the state during binning we ensure that there + * is no rotation applied, by previous A6XX_GRAS_SC_CNTL::rotation. + */ + enable_mask = + CP_SET_DRAW_STATE__0_SYSMEM | CP_SET_DRAW_STATE__0_BINNING; + } else { + static_assert(TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM == + TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE); + enable_mask = CP_SET_DRAW_STATE__0_GMEM | + CP_SET_DRAW_STATE__0_SYSMEM | + CP_SET_DRAW_STATE__0_BINNING; + } + break; default: enable_mask = CP_SET_DRAW_STATE__0_GMEM | @@ -2542,6 +2551,7 @@ tu_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, cmd_buffer->state.max_vbs_bound = 0; cmd_buffer->vsc_initialized = false; + cmd_buffer->prev_fsr_is_null = false; ralloc_free(cmd_buffer->patchpoints_ctx); cmd_buffer->patchpoints_ctx = NULL; @@ -3709,6 +3719,17 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, cmd->state.dirty |= TU_CMD_DIRTY_FEEDBACK_LOOPS | TU_CMD_DIRTY_LRZ; } + if (pipeline->program.writes_shading_rate != + cmd->state.pipeline_writes_shading_rate || + pipeline->program.reads_shading_rate != + cmd->state.pipeline_reads_shading_rate) { + cmd->state.pipeline_writes_shading_rate = + pipeline->program.writes_shading_rate; + cmd->state.pipeline_reads_shading_rate = + pipeline->program.reads_shading_rate; + cmd->state.dirty |= TU_CMD_DIRTY_SHADING_RATE; + } + bool raster_order_attachment_access = pipeline->output.raster_order_attachment_access || pipeline->ds.raster_order_attachment_access; @@ -4586,6 +4607,49 @@ tu7_emit_subpass_clear(struct tu_cmd_buffer *cmd, struct tu_resolve_group *resol } } +static void +tu7_emit_subpass_shading_rate(struct tu_cmd_buffer *cmd, + const struct tu_subpass *subpass, + struct tu_cs *cs) +{ + if (subpass->fsr_attachment == VK_ATTACHMENT_UNUSED) { + tu_cs_emit_regs(cs, A7XX_GRAS_FSR_BUFFER_DESC(), + A7XX_GRAS_FSR_BUFFER_SIZE()); + tu_cs_emit_regs(cs, A7XX_GRAS_FSR_BUFFER_PITCH()); + tu_cs_emit_regs(cs, A7XX_GRAS_FSR_BUFFER_BASE()); + /* We need to invalidate cache when changing to NULL FSR attachment, but + * only once. + */ + if (!cmd->prev_fsr_is_null) { + tu_emit_raw_event_write(cmd, cs, LRZ_Q_CACHE_INVALIDATE, + false); + cmd->prev_fsr_is_null = true; + } + return; + } + + const struct tu_image_view *iview = + cmd->state.attachments[subpass->fsr_attachment]; + assert(iview->vk.format == VK_FORMAT_R8_UINT); + + tu_cs_emit_regs( + cs, + A7XX_GRAS_FSR_BUFFER_DESC(.layered = true, + .tile_mode = + (a6xx_tile_mode) iview->image->layout[0] + .tile_mode, ), + A7XX_GRAS_FSR_BUFFER_SIZE(.width = iview->view.width, + .height = iview->view.height)); + tu_cs_emit_regs( + cs, A7XX_GRAS_FSR_BUFFER_PITCH(.pitch = iview->view.pitch, + .array_pitch = iview->view.layer_size)); + tu_cs_emit_regs(cs, + A7XX_GRAS_FSR_BUFFER_BASE(.qword = iview->view.base_addr)); + + tu_emit_raw_event_write(cmd, cs, LRZ_Q_CACHE_INVALIDATE, false); + cmd->prev_fsr_is_null = false; +} + /* emit loads, clears, and mrt/zs/msaa/ubwc state for the subpass that is * starting (either at vkCmdBeginRenderPass2() or vkCmdNextSubpass2()) * @@ -4613,6 +4677,10 @@ tu_emit_subpass_begin(struct tu_cmd_buffer *cmd) tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs); tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false); + if (CHIP >= A7XX) { + tu7_emit_subpass_shading_rate(cmd, cmd->state.subpass, &cmd->draw_cs); + } + tu_set_input_attachments(cmd, cmd->state.subpass); vk_cmd_set_cb_attachment_count(&cmd->vk, cmd->state.subpass->color_count); @@ -4787,6 +4855,15 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer, if (cmd->dynamic_pass.has_fdm) cmd->patchpoints_ctx = ralloc_context(NULL); + a = cmd->dynamic_subpass.fsr_attachment; + if (a != VK_ATTACHMENT_UNUSED) { + const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_info = + vk_find_struct_const(pRenderingInfo->pNext, + RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); + VK_FROM_HANDLE(tu_image_view, view, fsr_info->imageView); + cmd->state.attachments[a] = view; + } + tu_choose_gmem_layout(cmd); cmd->state.renderpass_cache.pending_flush_bits = diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index 0be7f06e7e3..f44201c93a2 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -73,8 +73,9 @@ enum tu_cmd_dirty_bits TU_CMD_DIRTY_RAST_ORDER = BIT(12), TU_CMD_DIRTY_FEEDBACK_LOOPS = BIT(13), TU_CMD_DIRTY_FS = BIT(14), + TU_CMD_DIRTY_SHADING_RATE = BIT(15), /* all draw states were disabled and need to be re-enabled: */ - TU_CMD_DIRTY_DRAW_STATE = BIT(15) + TU_CMD_DIRTY_DRAW_STATE = BIT(16) }; /* There are only three cache domains we have to care about: the CCU, or @@ -514,6 +515,8 @@ struct tu_cmd_state bool raster_order_attachment_access_valid; bool blit_cache_cleaned; VkImageAspectFlags pipeline_feedback_loops; + bool pipeline_writes_shading_rate; + bool pipeline_reads_shading_rate; bool pipeline_blend_lrz, pipeline_bandwidth; uint32_t pipeline_draw_states; @@ -571,11 +574,11 @@ struct tu_cmd_buffer struct tu_descriptor_state descriptors[MAX_BIND_POINTS]; - struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1) + 1]; + struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1) + 2]; struct tu_subpass_attachment dynamic_color_attachments[MAX_RTS]; struct tu_subpass_attachment dynamic_input_attachments[MAX_RTS + 1]; struct tu_subpass_attachment dynamic_resolve_attachments[MAX_RTS + 1]; - const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1) + 1]; + const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1) + 2]; VkClearValue dynamic_clear_values[2 * (MAX_RTS + 1)]; struct tu_render_pass dynamic_pass; @@ -613,6 +616,8 @@ struct tu_cmd_buffer uint32_t vsc_prim_strm_pitch; uint64_t vsc_draw_strm_va, vsc_draw_strm_size_va, vsc_prim_strm_va; bool vsc_initialized; + + bool prev_fsr_is_null; }; VK_DEFINE_HANDLE_CASTS(tu_cmd_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER) diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index a6e284f11c0..ce891793009 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -166,6 +166,7 @@ get_device_extensions(const struct tu_physical_device *device, .KHR_external_semaphore = true, .KHR_external_semaphore_fd = true, .KHR_format_feature_flags2 = true, + .KHR_fragment_shading_rate = device->info->a6xx.has_attachment_shading_rate, .KHR_get_memory_requirements2 = true, .KHR_global_priority = true, .KHR_image_format_list = true, @@ -468,6 +469,11 @@ tu_get_features(struct tu_physical_device *pdevice, /* VK_KHR_dynamic_rendering_local_read */ features->dynamicRenderingLocalRead = true; + /* VK_KHR_fragment_shading_rate */ + features->pipelineFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate; + features->primitiveFragmentShadingRate = pdevice->info->a7xx.has_primitive_shading_rate; + features->attachmentFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate; + /* VK_KHR_index_type_uint8 */ features->indexTypeUint8 = true; @@ -1045,6 +1051,34 @@ tu_get_properties(struct tu_physical_device *pdevice, /* VK_KHR_compute_shader_derivatives */ props->meshAndTaskShaderDerivatives = false; + /* VK_KHR_fragment_shading_rate */ + if (pdevice->info->a6xx.has_attachment_shading_rate) { + props->minFragmentShadingRateAttachmentTexelSize = {8, 8}; + props->maxFragmentShadingRateAttachmentTexelSize = {8, 8}; + } else { + props->minFragmentShadingRateAttachmentTexelSize = {0, 0}; + props->maxFragmentShadingRateAttachmentTexelSize = {0, 0}; + } + props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1; + props->primitiveFragmentShadingRateWithMultipleViewports = + pdevice->info->a7xx.has_primitive_shading_rate; + /* A7XX TODO: dEQP-VK.fragment_shading_rate.*.srlayered.* are failing + * for some reason. + */ + props->layeredShadingRateAttachments = false; + props->fragmentShadingRateNonTrivialCombinerOps = true; + props->maxFragmentSize = {4, 4}; + props->maxFragmentSizeAspectRatio = 4; + props->maxFragmentShadingRateCoverageSamples = 16; + props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_4_BIT; + props->fragmentShadingRateWithShaderDepthStencilWrites = true; + props->fragmentShadingRateWithSampleMask = true; + props->fragmentShadingRateWithShaderSampleMask = true; + props->fragmentShadingRateWithConservativeRasterization = false; + props->fragmentShadingRateWithFragmentShaderInterlock = false; + props->fragmentShadingRateWithCustomSampleLocations = true; + props->fragmentShadingRateStrictMultiplyCombiner = true; + /* VK_KHR_push_descriptor */ props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; @@ -1189,7 +1223,7 @@ tu_get_properties(struct tu_physical_device *pdevice, /* VK_KHR_maintenance6 */ props->blockTexelViewCompatibleMultipleLayers = true; props->maxCombinedImageSamplerDescriptorCount = 1; - props->fragmentShadingRateClampCombinerInputs = false; /* TODO */ + props->fragmentShadingRateClampCombinerInputs = true; /* VK_EXT_host_image_copy */ @@ -1770,6 +1804,39 @@ tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev, } } +VKAPI_ATTR VkResult VKAPI_CALL +tu_GetPhysicalDeviceFragmentShadingRatesKHR( + VkPhysicalDevice physicalDevice, + uint32_t *pFragmentShadingRateCount, + VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates) +{ + VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, + pFragmentShadingRates, pFragmentShadingRateCount); + +#define append_rate(w, h, s) \ + { \ + VkPhysicalDeviceFragmentShadingRateKHR rate = { \ + .sType = \ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \ + .sampleCounts = s, \ + .fragmentSize = { .width = w, .height = h }, \ + }; \ + vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, \ + r) *r = rate; \ + } + + append_rate(4, 4, VK_SAMPLE_COUNT_1_BIT); + append_rate(4, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT); + append_rate(2, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT); + append_rate(2, 1, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT); + append_rate(1, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT); + append_rate(1, 1, ~0); + +#undef append_rate + + return vk_outarray_status(&out); +} + static VkResult tu_queue_init(struct tu_device *device, struct tu_queue *queue, diff --git a/src/freedreno/vulkan/tu_formats.cc b/src/freedreno/vulkan/tu_formats.cc index 281512a8bb3..8b710dd7d8c 100644 --- a/src/freedreno/vulkan/tu_formats.cc +++ b/src/freedreno/vulkan/tu_formats.cc @@ -265,6 +265,9 @@ tu_physical_device_get_format_properties( if (vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) linear = 0; + if (vk_format == VK_FORMAT_R8_UINT) + optimal |= VK_FORMAT_FEATURE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR; + end: out_properties->linearTilingFeatures = linear; out_properties->optimalTilingFeatures = optimal; @@ -513,6 +516,14 @@ tu_get_image_format_properties( } } + if (image_usage & + VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) { + if (!(format_feature_flags & + VK_FORMAT_FEATURE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)) { + return tu_image_unsupported_format(pImageFormatProperties); + } + } + *pImageFormatProperties = (VkImageFormatProperties) { .maxExtent = maxExtent, .maxMipLevels = maxMipLevels, diff --git a/src/freedreno/vulkan/tu_image.cc b/src/freedreno/vulkan/tu_image.cc index f6cb2311324..fe759d949b4 100644 --- a/src/freedreno/vulkan/tu_image.cc +++ b/src/freedreno/vulkan/tu_image.cc @@ -662,6 +662,13 @@ tu_image_init(struct tu_device *device, struct tu_image *image, TILE6_LINEAR) != WZYX) image->force_linear_tile = true; + /* Some kind of HW limitation. */ + if (pCreateInfo->usage & + VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR && + image->vk.extent.width < 16) { + image->force_linear_tile = true; + } + if (image->force_linear_tile || !ubwc_possible(device, image->vk.format, pCreateInfo->imageType, pCreateInfo->usage, image->vk.stencil_usage, diff --git a/src/freedreno/vulkan/tu_pass.cc b/src/freedreno/vulkan/tu_pass.cc index 04a413681b2..dce4edc7964 100644 --- a/src/freedreno/vulkan/tu_pass.cc +++ b/src/freedreno/vulkan/tu_pass.cc @@ -999,6 +999,20 @@ tu_CreateRenderPass2(VkDevice _device, if (a != VK_ATTACHMENT_UNUSED) { tu_subpass_use_attachment(pass, i, a, pCreateInfo); } + + const VkFragmentShadingRateAttachmentInfoKHR *fsr_att_info = + vk_find_struct_const(desc->pNext, + FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); + if (fsr_att_info && fsr_att_info->pFragmentShadingRateAttachment && + fsr_att_info->pFragmentShadingRateAttachment->attachment != + VK_ATTACHMENT_UNUSED) { + subpass->fsr_attachment = + fsr_att_info->pFragmentShadingRateAttachment->attachment; + subpass->fsr_attachment_texel_size = + fsr_att_info->shadingRateAttachmentTexelSize; + } else { + subpass->fsr_attachment = VK_ATTACHMENT_UNUSED; + } } tu_render_pass_patch_input_gmem(pass); @@ -1235,6 +1249,25 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, pass->has_fdm = false; } + const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_info = + vk_find_struct_const(info->pNext, + RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); + if (fsr_info && fsr_info->imageView != VK_NULL_HANDLE) { + VK_FROM_HANDLE(tu_image_view, view, fsr_info->imageView); + + struct tu_render_pass_attachment *att = &pass->attachments[a]; + tu_setup_dynamic_attachment(att, view); + subpass->fsr_attachment = a++; + attachment_set_ops(device, att, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE); + subpass->fsr_attachment_texel_size = fsr_info->shadingRateAttachmentTexelSize; + } else { + subpass->fsr_attachment = VK_ATTACHMENT_UNUSED; + } + if (TU_DEBUG(FDM) && !tu_render_pass_disable_fdm(pass)) pass->has_fdm = true; diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h index 8cc15b51943..a01eea8c150 100644 --- a/src/freedreno/vulkan/tu_pass.h +++ b/src/freedreno/vulkan/tu_pass.h @@ -63,6 +63,10 @@ struct tu_subpass struct tu_subpass_attachment *color_attachments; struct tu_subpass_attachment *resolve_attachments; struct tu_subpass_attachment depth_stencil_attachment; + + uint32_t fsr_attachment; + VkExtent2D fsr_attachment_texel_size; + /* When using dynamic rendering depth and stencil attachments may be * set to unused independently, so we need to track this bit of * information separately for each of them. diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 8ac187212d1..49b833d2336 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -3098,7 +3098,7 @@ tu6_rast_size(struct tu_device *dev, bool per_view_viewport) { if (CHIP == A6XX) { - return 15 + (dev->physical_device->info->a6xx.has_shading_rate ? 8 : 0); + return 15 + (dev->physical_device->info->a6xx.has_legacy_pipeline_shading_rate ? 8 : 0); } else { return 17; } @@ -3168,7 +3168,7 @@ tu6_emit_rast(struct tu_cs *cs, A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f), A6XX_GRAS_SU_POINT_SIZE(1.0f)); - if (CHIP == A6XX && cs->device->physical_device->info->a6xx.has_shading_rate) { + if (CHIP == A6XX && cs->device->physical_device->info->a6xx.has_legacy_pipeline_shading_rate) { tu_cs_emit_regs(cs, A6XX_RB_UNKNOWN_8A00()); tu_cs_emit_regs(cs, A6XX_RB_UNKNOWN_8A10()); tu_cs_emit_regs(cs, A6XX_RB_UNKNOWN_8A20()); @@ -3342,6 +3342,81 @@ tu6_emit_prim_mode_sysmem(struct tu_cs *cs, .single_prim_mode = sysmem_prim_mode)); } +static const enum mesa_vk_dynamic_graphics_state tu_fragment_shading_rate_state[] = { + MESA_VK_DYNAMIC_FSR, +}; + +template +static unsigned +tu6_fragment_shading_rate_size(struct tu_device *dev, + const vk_fragment_shading_rate_state *fsr, + bool enable_att_fsr, + bool enable_prim_fsr, + bool fs_reads_fsr) +{ + return 6; +} + +template +static void +tu6_emit_fragment_shading_rate(struct tu_cs *cs, + const vk_fragment_shading_rate_state *fsr, + bool enable_att_fsr, + bool enable_prim_fsr, + bool fs_reads_fsr) +{ + /* gl_ShadingRateEXT don't read 1x1 value with null config, so + * if it is read - we have to emit the config. + */ + if (!fsr || (!fs_reads_fsr && vk_fragment_shading_rate_is_disabled(fsr))) { + tu_cs_emit_regs(cs, A6XX_RB_FSR_CONFIG()); + tu_cs_emit_regs(cs, A7XX_SP_FSR_CONFIG()); + tu_cs_emit_regs(cs, A7XX_GRAS_FSR_CONFIG()); + return; + } + + bool enable_draw_fsr = true; + if (enable_att_fsr) { + if (fsr->combiner_ops[1] == + VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR) { + enable_draw_fsr = false; + enable_prim_fsr = false; + } else if (fsr->combiner_ops[1] == + VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) { + enable_att_fsr = false; + } + } + if (enable_prim_fsr) { + if (fsr->combiner_ops[0] == + VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR) { + enable_draw_fsr = false; + } else if (fsr->combiner_ops[0] == + VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) { + enable_prim_fsr = false; + } + } + + tu_cs_emit_regs( + cs, + A6XX_RB_FSR_CONFIG(.unk2 = true, .pipeline_fsr_enable = enable_draw_fsr, + .attachment_fsr_enable = enable_att_fsr, + .primitive_fsr_enable = enable_prim_fsr)); + tu_cs_emit_regs( + cs, A7XX_SP_FSR_CONFIG(.pipeline_fsr_enable = enable_draw_fsr, + .attachment_fsr_enable = enable_att_fsr, + .primitive_fsr_enable = enable_prim_fsr)); + tu_cs_emit_regs( + cs, A7XX_GRAS_FSR_CONFIG( + .pipeline_fsr_enable = enable_draw_fsr, + .frag_size_x = util_logbase2(fsr->fragment_size.width), + .frag_size_y = util_logbase2(fsr->fragment_size.height), + .combiner_op_1 = (a6xx_fsr_combiner) fsr->combiner_ops[0], + .combiner_op_2 = (a6xx_fsr_combiner) fsr->combiner_ops[1], + .attachment_fsr_enable = enable_att_fsr, + .primitive_fsr_enable = enable_prim_fsr)); +} + + static inline bool emit_pipeline_state(BITSET_WORD *keep, BITSET_WORD *remove, BITSET_WORD *pipeline_set, @@ -3467,6 +3542,7 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder, tu_calc_bandwidth(&pipeline->bandwidth, cb, builder->graphics_state.rp); DRAW_STATE(blend_constants, TU_DYNAMIC_STATE_BLEND_CONSTANTS, cb); + if (attachments_valid && !(builder->graphics_state.rp->attachments & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS)) { @@ -3526,6 +3602,19 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder, vk_pipeline_flags_feedback_loops(builder->graphics_state.pipeline_flags), &pipeline->prim_order.sysmem_single_prim_mode); } + + if (builder->device->physical_device->info->a6xx.has_attachment_shading_rate) { + bool has_fsr_att = + builder->graphics_state.pipeline_flags & + VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR; + DRAW_STATE_COND(fragment_shading_rate, + TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE, + attachments_valid && pipeline_contains_all_shader_state(pipeline), + builder->graphics_state.fsr, + has_fsr_att, + pipeline->program.writes_shading_rate, + pipeline->program.reads_shading_rate); + } #undef DRAW_STATE #undef DRAW_STATE_COND #undef EMIT_STATE @@ -3692,6 +3781,16 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd) &cmd->state.vk_rp); DRAW_STATE(blend_constants, VK_DYNAMIC_STATE_BLEND_CONSTANTS, &cmd->vk.dynamic_graphics_state.cb); + + if (cmd->device->physical_device->info->a6xx.has_attachment_shading_rate) { + DRAW_STATE_COND(fragment_shading_rate, + TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE, + cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS | TU_CMD_DIRTY_SHADING_RATE), + &cmd->vk.dynamic_graphics_state.fsr, + cmd->state.subpass->fsr_attachment != VK_ATTACHMENT_UNUSED, + cmd->state.program.writes_shading_rate, + cmd->state.program.reads_shading_rate); + } DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST, cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS | TU_CMD_DIRTY_PER_VIEW_VIEWPORT), @@ -4170,6 +4269,11 @@ tu_pipeline_builder_init_graphics( VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT; } + if (subpass->fsr_attachment != VK_ATTACHMENT_UNUSED) { + rp_flags |= + VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR; + } + builder->unscaled_input_fragcoord = 0; for (unsigned i = 0; i < subpass->input_count; i++) { /* Input attachments stored in GMEM must be loaded with unscaled diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 56a2440da1c..f2ef54a4b87 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -32,6 +32,7 @@ enum tu_dynamic_state TU_DYNAMIC_STATE_VERTEX_INPUT, TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS, TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM, + TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE = TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM, TU_DYNAMIC_STATE_COUNT, }; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc index 7bfd9618519..1da50349f65 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc @@ -126,7 +126,7 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, ); } - if (CHIP == A6XX && ctx->screen->info->a6xx.has_shading_rate) { + if (CHIP == A6XX && ctx->screen->info->a6xx.has_legacy_pipeline_shading_rate) { OUT_REG(ring, A6XX_RB_UNKNOWN_8A00()); OUT_REG(ring, A6XX_RB_UNKNOWN_8A10()); OUT_REG(ring, A6XX_RB_UNKNOWN_8A20());