tu/a7xx: Implement VK_KHR_fragment_shading_rate

- A650+ - should be able to support pipelineFragmentShadingRate
          but in some other way than A7XX. Not implemented here.
- A7XX  - support pipelineFragmentShadingRate and attachmentFragmentShadingRate
- A740+ - support primitiveFragmentShadingRate

layeredShadingRateAttachments is unsupported at the moment due to tests
failure, but prop driver supports it.

Passes:
  dEQP-VK.fragment_shading_rate.*
On A750/A740

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30905>
This commit is contained in:
Danylo Piliaiev 2024-11-01 13:53:47 +01:00 committed by Marge Bot
parent 117379a77a
commit 2ab8eff511
14 changed files with 337 additions and 41 deletions

View File

@ -523,7 +523,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_KHR_external_semaphore_fd DONE (anv, dzn, hasvk, nvk, panvk, pvr, radv, tu, v3dv, vn)
VK_KHR_external_semaphore_win32 DONE (dzn)
VK_KHR_fragment_shader_barycentric DONE (nvk/Turing+, radv/gfx10.3+)
VK_KHR_fragment_shading_rate DONE (anv/gen11+, nvk/Turing+, radv/gfx10.3+, vn)
VK_KHR_fragment_shading_rate DONE (anv/gen11+, nvk/Turing+, radv/gfx10.3+, tu/a7xx+, vn)
VK_KHR_get_display_properties2 DONE (anv, nvk, pvr, radv, tu, v3dv)
VK_KHR_get_surface_capabilities2 DONE (anv, lvp, nvk, pvr, radv, tu, v3dv, vn)
VK_KHR_global_priority DONE (anv, panvk, radv, tu)

View File

@ -79,7 +79,7 @@ struct fd_dev_info {
bool tess_use_shared;
/* Does the hw support GL_QCOM_shading_rate? */
bool has_shading_rate;
bool has_legacy_pipeline_shading_rate;
/* Whether a 16-bit descriptor can be used */
bool storage_16bit;
@ -186,6 +186,8 @@ struct fd_dev_info {
*/
bool has_coherent_ubwc_flag_caches;
bool has_attachment_shading_rate;
struct {
uint32_t PC_POWER_CNTL;
uint32_t TPL1_DBG_ECO_CNTL;
@ -302,6 +304,8 @@ struct fd_dev_info {
/* Whether only 256 vec4 constants are available for compute */
bool compute_constlen_quirk;
bool has_primitive_shading_rate;
} a7xx;
};

View File

@ -416,7 +416,7 @@ a6xx_gen4 = A6XXProps(
has_cp_reg_write = False,
has_8bpp_ubwc = False,
has_lpac = True,
has_shading_rate = True,
has_legacy_pipeline_shading_rate = True,
has_getfiberid = True,
has_dp2acc = True,
has_dp4acc = True,
@ -842,7 +842,6 @@ a7xx_base = A6XXProps(
has_separate_chroma_filter = True,
has_sample_locations = True,
has_lpac = True,
has_shading_rate = True,
has_getfiberid = True,
has_dp2acc = True,
has_dp4acc = True,
@ -857,6 +856,7 @@ a7xx_base = A6XXProps(
has_isam_v = True,
has_ssbo_imm_offsets = True,
has_early_preamble = True,
has_attachment_shading_rate = True,
)
a7xx_gen1 = A7XXProps(
@ -875,6 +875,7 @@ a7xx_gen2 = A7XXProps(
# this hint set. Match them for better compatibility by default.
enable_tp_ubwc_flag_hint = False,
has_64b_ssbo_atomics = True,
has_primitive_shading_rate = True,
)
a7xx_gen3 = A7XXProps(
@ -895,6 +896,7 @@ a7xx_gen3 = A7XXProps(
ubwc_coherency_quirk = True,
has_persistent_counter = True,
has_64b_ssbo_atomics = True,
has_primitive_shading_rate = True,
)
a730_magic_regs = dict(
@ -950,11 +952,6 @@ a730_raw_magic_regs = [
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000],
# Shading rate group
[A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000],
]
a740_magic_regs = dict(
@ -1021,12 +1018,7 @@ a740_raw_magic_regs = [
[A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
# Shading rate group
[A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_FSR_BUFFER_DESC, 0x00000000],
]
add_gpus([
@ -1137,11 +1129,7 @@ add_gpus([
[A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
# Shading rate group
[A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000],
],
))
@ -1238,12 +1226,6 @@ add_gpus([
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000],
# Shading rate group
[A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_FSR_BUFFER_DESC, 0x00000000],
],
))
@ -1349,11 +1331,7 @@ add_gpus([
[A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000],
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
# Shading rate group
[A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
[A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000],
[0x930a, 0],
[0x960a, 1],

View File

@ -1610,9 +1610,14 @@ r3d_setup(struct tu_cmd_buffer *cmd,
tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0));
tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0));
if (CHIP >= A7XX)
if (CHIP >= A7XX) {
tu_cs_emit_regs(cs, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO());
tu_cs_emit_regs(cs, A6XX_RB_FSR_CONFIG());
tu_cs_emit_regs(cs, A7XX_SP_FSR_CONFIG());
tu_cs_emit_regs(cs, A7XX_GRAS_FSR_CONFIG());
}
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL,
A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2));

View File

@ -803,11 +803,20 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
enable_mask = CP_SET_DRAW_STATE__0_SYSMEM;
break;
case TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM:
/* By also applying the state during binning we ensure that there
* is no rotation applied, by previous A6XX_GRAS_SC_CNTL::rotation.
*/
enable_mask =
CP_SET_DRAW_STATE__0_SYSMEM | CP_SET_DRAW_STATE__0_BINNING;
if (!cs->device->physical_device->info->a6xx.has_coherent_ubwc_flag_caches) {
/* By also applying the state during binning we ensure that there
* is no rotation applied, by previous A6XX_GRAS_SC_CNTL::rotation.
*/
enable_mask =
CP_SET_DRAW_STATE__0_SYSMEM | CP_SET_DRAW_STATE__0_BINNING;
} else {
static_assert(TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM ==
TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE);
enable_mask = CP_SET_DRAW_STATE__0_GMEM |
CP_SET_DRAW_STATE__0_SYSMEM |
CP_SET_DRAW_STATE__0_BINNING;
}
break;
default:
enable_mask = CP_SET_DRAW_STATE__0_GMEM |
@ -2542,6 +2551,7 @@ tu_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
cmd_buffer->state.max_vbs_bound = 0;
cmd_buffer->vsc_initialized = false;
cmd_buffer->prev_fsr_is_null = false;
ralloc_free(cmd_buffer->patchpoints_ctx);
cmd_buffer->patchpoints_ctx = NULL;
@ -3709,6 +3719,17 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
cmd->state.dirty |= TU_CMD_DIRTY_FEEDBACK_LOOPS | TU_CMD_DIRTY_LRZ;
}
if (pipeline->program.writes_shading_rate !=
cmd->state.pipeline_writes_shading_rate ||
pipeline->program.reads_shading_rate !=
cmd->state.pipeline_reads_shading_rate) {
cmd->state.pipeline_writes_shading_rate =
pipeline->program.writes_shading_rate;
cmd->state.pipeline_reads_shading_rate =
pipeline->program.reads_shading_rate;
cmd->state.dirty |= TU_CMD_DIRTY_SHADING_RATE;
}
bool raster_order_attachment_access =
pipeline->output.raster_order_attachment_access ||
pipeline->ds.raster_order_attachment_access;
@ -4586,6 +4607,49 @@ tu7_emit_subpass_clear(struct tu_cmd_buffer *cmd, struct tu_resolve_group *resol
}
}
static void
tu7_emit_subpass_shading_rate(struct tu_cmd_buffer *cmd,
const struct tu_subpass *subpass,
struct tu_cs *cs)
{
if (subpass->fsr_attachment == VK_ATTACHMENT_UNUSED) {
tu_cs_emit_regs(cs, A7XX_GRAS_FSR_BUFFER_DESC(),
A7XX_GRAS_FSR_BUFFER_SIZE());
tu_cs_emit_regs(cs, A7XX_GRAS_FSR_BUFFER_PITCH());
tu_cs_emit_regs(cs, A7XX_GRAS_FSR_BUFFER_BASE());
/* We need to invalidate cache when changing to NULL FSR attachment, but
* only once.
*/
if (!cmd->prev_fsr_is_null) {
tu_emit_raw_event_write<A7XX>(cmd, cs, LRZ_Q_CACHE_INVALIDATE,
false);
cmd->prev_fsr_is_null = true;
}
return;
}
const struct tu_image_view *iview =
cmd->state.attachments[subpass->fsr_attachment];
assert(iview->vk.format == VK_FORMAT_R8_UINT);
tu_cs_emit_regs(
cs,
A7XX_GRAS_FSR_BUFFER_DESC(.layered = true,
.tile_mode =
(a6xx_tile_mode) iview->image->layout[0]
.tile_mode, ),
A7XX_GRAS_FSR_BUFFER_SIZE(.width = iview->view.width,
.height = iview->view.height));
tu_cs_emit_regs(
cs, A7XX_GRAS_FSR_BUFFER_PITCH(.pitch = iview->view.pitch,
.array_pitch = iview->view.layer_size));
tu_cs_emit_regs(cs,
A7XX_GRAS_FSR_BUFFER_BASE(.qword = iview->view.base_addr));
tu_emit_raw_event_write<A7XX>(cmd, cs, LRZ_Q_CACHE_INVALIDATE, false);
cmd->prev_fsr_is_null = false;
}
/* emit loads, clears, and mrt/zs/msaa/ubwc state for the subpass that is
* starting (either at vkCmdBeginRenderPass2() or vkCmdNextSubpass2())
*
@ -4613,6 +4677,10 @@ tu_emit_subpass_begin(struct tu_cmd_buffer *cmd)
tu6_emit_mrt<CHIP>(cmd, cmd->state.subpass, &cmd->draw_cs);
tu6_emit_render_cntl<CHIP>(cmd, cmd->state.subpass, &cmd->draw_cs, false);
if (CHIP >= A7XX) {
tu7_emit_subpass_shading_rate(cmd, cmd->state.subpass, &cmd->draw_cs);
}
tu_set_input_attachments(cmd, cmd->state.subpass);
vk_cmd_set_cb_attachment_count(&cmd->vk, cmd->state.subpass->color_count);
@ -4787,6 +4855,15 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer,
if (cmd->dynamic_pass.has_fdm)
cmd->patchpoints_ctx = ralloc_context(NULL);
a = cmd->dynamic_subpass.fsr_attachment;
if (a != VK_ATTACHMENT_UNUSED) {
const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_info =
vk_find_struct_const(pRenderingInfo->pNext,
RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
VK_FROM_HANDLE(tu_image_view, view, fsr_info->imageView);
cmd->state.attachments[a] = view;
}
tu_choose_gmem_layout(cmd);
cmd->state.renderpass_cache.pending_flush_bits =

View File

@ -73,8 +73,9 @@ enum tu_cmd_dirty_bits
TU_CMD_DIRTY_RAST_ORDER = BIT(12),
TU_CMD_DIRTY_FEEDBACK_LOOPS = BIT(13),
TU_CMD_DIRTY_FS = BIT(14),
TU_CMD_DIRTY_SHADING_RATE = BIT(15),
/* all draw states were disabled and need to be re-enabled: */
TU_CMD_DIRTY_DRAW_STATE = BIT(15)
TU_CMD_DIRTY_DRAW_STATE = BIT(16)
};
/* There are only three cache domains we have to care about: the CCU, or
@ -514,6 +515,8 @@ struct tu_cmd_state
bool raster_order_attachment_access_valid;
bool blit_cache_cleaned;
VkImageAspectFlags pipeline_feedback_loops;
bool pipeline_writes_shading_rate;
bool pipeline_reads_shading_rate;
bool pipeline_blend_lrz, pipeline_bandwidth;
uint32_t pipeline_draw_states;
@ -571,11 +574,11 @@ struct tu_cmd_buffer
struct tu_descriptor_state descriptors[MAX_BIND_POINTS];
struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1) + 1];
struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1) + 2];
struct tu_subpass_attachment dynamic_color_attachments[MAX_RTS];
struct tu_subpass_attachment dynamic_input_attachments[MAX_RTS + 1];
struct tu_subpass_attachment dynamic_resolve_attachments[MAX_RTS + 1];
const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1) + 1];
const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1) + 2];
VkClearValue dynamic_clear_values[2 * (MAX_RTS + 1)];
struct tu_render_pass dynamic_pass;
@ -613,6 +616,8 @@ struct tu_cmd_buffer
uint32_t vsc_prim_strm_pitch;
uint64_t vsc_draw_strm_va, vsc_draw_strm_size_va, vsc_prim_strm_va;
bool vsc_initialized;
bool prev_fsr_is_null;
};
VK_DEFINE_HANDLE_CASTS(tu_cmd_buffer, vk.base, VkCommandBuffer,
VK_OBJECT_TYPE_COMMAND_BUFFER)

View File

@ -166,6 +166,7 @@ get_device_extensions(const struct tu_physical_device *device,
.KHR_external_semaphore = true,
.KHR_external_semaphore_fd = true,
.KHR_format_feature_flags2 = true,
.KHR_fragment_shading_rate = device->info->a6xx.has_attachment_shading_rate,
.KHR_get_memory_requirements2 = true,
.KHR_global_priority = true,
.KHR_image_format_list = true,
@ -468,6 +469,11 @@ tu_get_features(struct tu_physical_device *pdevice,
/* VK_KHR_dynamic_rendering_local_read */
features->dynamicRenderingLocalRead = true;
/* VK_KHR_fragment_shading_rate */
features->pipelineFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
features->primitiveFragmentShadingRate = pdevice->info->a7xx.has_primitive_shading_rate;
features->attachmentFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
/* VK_KHR_index_type_uint8 */
features->indexTypeUint8 = true;
@ -1045,6 +1051,34 @@ tu_get_properties(struct tu_physical_device *pdevice,
/* VK_KHR_compute_shader_derivatives */
props->meshAndTaskShaderDerivatives = false;
/* VK_KHR_fragment_shading_rate */
if (pdevice->info->a6xx.has_attachment_shading_rate) {
props->minFragmentShadingRateAttachmentTexelSize = {8, 8};
props->maxFragmentShadingRateAttachmentTexelSize = {8, 8};
} else {
props->minFragmentShadingRateAttachmentTexelSize = {0, 0};
props->maxFragmentShadingRateAttachmentTexelSize = {0, 0};
}
props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
props->primitiveFragmentShadingRateWithMultipleViewports =
pdevice->info->a7xx.has_primitive_shading_rate;
/* A7XX TODO: dEQP-VK.fragment_shading_rate.*.srlayered.* are failing
* for some reason.
*/
props->layeredShadingRateAttachments = false;
props->fragmentShadingRateNonTrivialCombinerOps = true;
props->maxFragmentSize = {4, 4};
props->maxFragmentSizeAspectRatio = 4;
props->maxFragmentShadingRateCoverageSamples = 16;
props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_4_BIT;
props->fragmentShadingRateWithShaderDepthStencilWrites = true;
props->fragmentShadingRateWithSampleMask = true;
props->fragmentShadingRateWithShaderSampleMask = true;
props->fragmentShadingRateWithConservativeRasterization = false;
props->fragmentShadingRateWithFragmentShaderInterlock = false;
props->fragmentShadingRateWithCustomSampleLocations = true;
props->fragmentShadingRateStrictMultiplyCombiner = true;
/* VK_KHR_push_descriptor */
props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
@ -1189,7 +1223,7 @@ tu_get_properties(struct tu_physical_device *pdevice,
/* VK_KHR_maintenance6 */
props->blockTexelViewCompatibleMultipleLayers = true;
props->maxCombinedImageSamplerDescriptorCount = 1;
props->fragmentShadingRateClampCombinerInputs = false; /* TODO */
props->fragmentShadingRateClampCombinerInputs = true;
/* VK_EXT_host_image_copy */
@ -1770,6 +1804,39 @@ tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
}
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetPhysicalDeviceFragmentShadingRatesKHR(
VkPhysicalDevice physicalDevice,
uint32_t *pFragmentShadingRateCount,
VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
{
VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
pFragmentShadingRates, pFragmentShadingRateCount);
#define append_rate(w, h, s) \
{ \
VkPhysicalDeviceFragmentShadingRateKHR rate = { \
.sType = \
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
.sampleCounts = s, \
.fragmentSize = { .width = w, .height = h }, \
}; \
vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, \
r) *r = rate; \
}
append_rate(4, 4, VK_SAMPLE_COUNT_1_BIT);
append_rate(4, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT);
append_rate(2, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
append_rate(2, 1, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
append_rate(1, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
append_rate(1, 1, ~0);
#undef append_rate
return vk_outarray_status(&out);
}
static VkResult
tu_queue_init(struct tu_device *device,
struct tu_queue *queue,

View File

@ -265,6 +265,9 @@ tu_physical_device_get_format_properties(
if (vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
linear = 0;
if (vk_format == VK_FORMAT_R8_UINT)
optimal |= VK_FORMAT_FEATURE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
end:
out_properties->linearTilingFeatures = linear;
out_properties->optimalTilingFeatures = optimal;
@ -513,6 +516,14 @@ tu_get_image_format_properties(
}
}
if (image_usage &
VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) {
if (!(format_feature_flags &
VK_FORMAT_FEATURE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)) {
return tu_image_unsupported_format(pImageFormatProperties);
}
}
*pImageFormatProperties = (VkImageFormatProperties) {
.maxExtent = maxExtent,
.maxMipLevels = maxMipLevels,

View File

@ -662,6 +662,13 @@ tu_image_init(struct tu_device *device, struct tu_image *image,
TILE6_LINEAR) != WZYX)
image->force_linear_tile = true;
/* Some kind of HW limitation. */
if (pCreateInfo->usage &
VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR &&
image->vk.extent.width < 16) {
image->force_linear_tile = true;
}
if (image->force_linear_tile ||
!ubwc_possible(device, image->vk.format, pCreateInfo->imageType,
pCreateInfo->usage, image->vk.stencil_usage,

View File

@ -999,6 +999,20 @@ tu_CreateRenderPass2(VkDevice _device,
if (a != VK_ATTACHMENT_UNUSED) {
tu_subpass_use_attachment(pass, i, a, pCreateInfo);
}
const VkFragmentShadingRateAttachmentInfoKHR *fsr_att_info =
vk_find_struct_const(desc->pNext,
FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
if (fsr_att_info && fsr_att_info->pFragmentShadingRateAttachment &&
fsr_att_info->pFragmentShadingRateAttachment->attachment !=
VK_ATTACHMENT_UNUSED) {
subpass->fsr_attachment =
fsr_att_info->pFragmentShadingRateAttachment->attachment;
subpass->fsr_attachment_texel_size =
fsr_att_info->shadingRateAttachmentTexelSize;
} else {
subpass->fsr_attachment = VK_ATTACHMENT_UNUSED;
}
}
tu_render_pass_patch_input_gmem(pass);
@ -1235,6 +1249,25 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
pass->has_fdm = false;
}
const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_info =
vk_find_struct_const(info->pNext,
RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
if (fsr_info && fsr_info->imageView != VK_NULL_HANDLE) {
VK_FROM_HANDLE(tu_image_view, view, fsr_info->imageView);
struct tu_render_pass_attachment *att = &pass->attachments[a];
tu_setup_dynamic_attachment(att, view);
subpass->fsr_attachment = a++;
attachment_set_ops(device, att,
VK_ATTACHMENT_LOAD_OP_DONT_CARE,
VK_ATTACHMENT_LOAD_OP_DONT_CARE,
VK_ATTACHMENT_STORE_OP_DONT_CARE,
VK_ATTACHMENT_STORE_OP_DONT_CARE);
subpass->fsr_attachment_texel_size = fsr_info->shadingRateAttachmentTexelSize;
} else {
subpass->fsr_attachment = VK_ATTACHMENT_UNUSED;
}
if (TU_DEBUG(FDM) && !tu_render_pass_disable_fdm(pass))
pass->has_fdm = true;

View File

@ -63,6 +63,10 @@ struct tu_subpass
struct tu_subpass_attachment *color_attachments;
struct tu_subpass_attachment *resolve_attachments;
struct tu_subpass_attachment depth_stencil_attachment;
uint32_t fsr_attachment;
VkExtent2D fsr_attachment_texel_size;
/* When using dynamic rendering depth and stencil attachments may be
* set to unused independently, so we need to track this bit of
* information separately for each of them.

View File

@ -3098,7 +3098,7 @@ tu6_rast_size(struct tu_device *dev,
bool per_view_viewport)
{
if (CHIP == A6XX) {
return 15 + (dev->physical_device->info->a6xx.has_shading_rate ? 8 : 0);
return 15 + (dev->physical_device->info->a6xx.has_legacy_pipeline_shading_rate ? 8 : 0);
} else {
return 17;
}
@ -3168,7 +3168,7 @@ tu6_emit_rast(struct tu_cs *cs,
A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f),
A6XX_GRAS_SU_POINT_SIZE(1.0f));
if (CHIP == A6XX && cs->device->physical_device->info->a6xx.has_shading_rate) {
if (CHIP == A6XX && cs->device->physical_device->info->a6xx.has_legacy_pipeline_shading_rate) {
tu_cs_emit_regs(cs, A6XX_RB_UNKNOWN_8A00());
tu_cs_emit_regs(cs, A6XX_RB_UNKNOWN_8A10());
tu_cs_emit_regs(cs, A6XX_RB_UNKNOWN_8A20());
@ -3342,6 +3342,81 @@ tu6_emit_prim_mode_sysmem(struct tu_cs *cs,
.single_prim_mode = sysmem_prim_mode));
}
static const enum mesa_vk_dynamic_graphics_state tu_fragment_shading_rate_state[] = {
MESA_VK_DYNAMIC_FSR,
};
template <chip CHIP>
static unsigned
tu6_fragment_shading_rate_size(struct tu_device *dev,
const vk_fragment_shading_rate_state *fsr,
bool enable_att_fsr,
bool enable_prim_fsr,
bool fs_reads_fsr)
{
return 6;
}
template <chip CHIP>
static void
tu6_emit_fragment_shading_rate(struct tu_cs *cs,
const vk_fragment_shading_rate_state *fsr,
bool enable_att_fsr,
bool enable_prim_fsr,
bool fs_reads_fsr)
{
/* gl_ShadingRateEXT don't read 1x1 value with null config, so
* if it is read - we have to emit the config.
*/
if (!fsr || (!fs_reads_fsr && vk_fragment_shading_rate_is_disabled(fsr))) {
tu_cs_emit_regs(cs, A6XX_RB_FSR_CONFIG());
tu_cs_emit_regs(cs, A7XX_SP_FSR_CONFIG());
tu_cs_emit_regs(cs, A7XX_GRAS_FSR_CONFIG());
return;
}
bool enable_draw_fsr = true;
if (enable_att_fsr) {
if (fsr->combiner_ops[1] ==
VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR) {
enable_draw_fsr = false;
enable_prim_fsr = false;
} else if (fsr->combiner_ops[1] ==
VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) {
enable_att_fsr = false;
}
}
if (enable_prim_fsr) {
if (fsr->combiner_ops[0] ==
VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR) {
enable_draw_fsr = false;
} else if (fsr->combiner_ops[0] ==
VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) {
enable_prim_fsr = false;
}
}
tu_cs_emit_regs(
cs,
A6XX_RB_FSR_CONFIG(.unk2 = true, .pipeline_fsr_enable = enable_draw_fsr,
.attachment_fsr_enable = enable_att_fsr,
.primitive_fsr_enable = enable_prim_fsr));
tu_cs_emit_regs(
cs, A7XX_SP_FSR_CONFIG(.pipeline_fsr_enable = enable_draw_fsr,
.attachment_fsr_enable = enable_att_fsr,
.primitive_fsr_enable = enable_prim_fsr));
tu_cs_emit_regs(
cs, A7XX_GRAS_FSR_CONFIG(
.pipeline_fsr_enable = enable_draw_fsr,
.frag_size_x = util_logbase2(fsr->fragment_size.width),
.frag_size_y = util_logbase2(fsr->fragment_size.height),
.combiner_op_1 = (a6xx_fsr_combiner) fsr->combiner_ops[0],
.combiner_op_2 = (a6xx_fsr_combiner) fsr->combiner_ops[1],
.attachment_fsr_enable = enable_att_fsr,
.primitive_fsr_enable = enable_prim_fsr));
}
static inline bool
emit_pipeline_state(BITSET_WORD *keep, BITSET_WORD *remove,
BITSET_WORD *pipeline_set,
@ -3467,6 +3542,7 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
tu_calc_bandwidth(&pipeline->bandwidth, cb,
builder->graphics_state.rp);
DRAW_STATE(blend_constants, TU_DYNAMIC_STATE_BLEND_CONSTANTS, cb);
if (attachments_valid &&
!(builder->graphics_state.rp->attachments &
MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS)) {
@ -3526,6 +3602,19 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
vk_pipeline_flags_feedback_loops(builder->graphics_state.pipeline_flags),
&pipeline->prim_order.sysmem_single_prim_mode);
}
if (builder->device->physical_device->info->a6xx.has_attachment_shading_rate) {
bool has_fsr_att =
builder->graphics_state.pipeline_flags &
VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
DRAW_STATE_COND(fragment_shading_rate,
TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE,
attachments_valid && pipeline_contains_all_shader_state(pipeline),
builder->graphics_state.fsr,
has_fsr_att,
pipeline->program.writes_shading_rate,
pipeline->program.reads_shading_rate);
}
#undef DRAW_STATE
#undef DRAW_STATE_COND
#undef EMIT_STATE
@ -3692,6 +3781,16 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
&cmd->state.vk_rp);
DRAW_STATE(blend_constants, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
&cmd->vk.dynamic_graphics_state.cb);
if (cmd->device->physical_device->info->a6xx.has_attachment_shading_rate) {
DRAW_STATE_COND(fragment_shading_rate,
TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE,
cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS | TU_CMD_DIRTY_SHADING_RATE),
&cmd->vk.dynamic_graphics_state.fsr,
cmd->state.subpass->fsr_attachment != VK_ATTACHMENT_UNUSED,
cmd->state.program.writes_shading_rate,
cmd->state.program.reads_shading_rate);
}
DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST,
cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS |
TU_CMD_DIRTY_PER_VIEW_VIEWPORT),
@ -4170,6 +4269,11 @@ tu_pipeline_builder_init_graphics(
VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT;
}
if (subpass->fsr_attachment != VK_ATTACHMENT_UNUSED) {
rp_flags |=
VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
}
builder->unscaled_input_fragcoord = 0;
for (unsigned i = 0; i < subpass->input_count; i++) {
/* Input attachments stored in GMEM must be loaded with unscaled

View File

@ -32,6 +32,7 @@ enum tu_dynamic_state
TU_DYNAMIC_STATE_VERTEX_INPUT,
TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS,
TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM,
TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE = TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM,
TU_DYNAMIC_STATE_COUNT,
};

View File

@ -126,7 +126,7 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
);
}
if (CHIP == A6XX && ctx->screen->info->a6xx.has_shading_rate) {
if (CHIP == A6XX && ctx->screen->info->a6xx.has_legacy_pipeline_shading_rate) {
OUT_REG(ring, A6XX_RB_UNKNOWN_8A00());
OUT_REG(ring, A6XX_RB_UNKNOWN_8A10());
OUT_REG(ring, A6XX_RB_UNKNOWN_8A20());