mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-23 02:04:41 +08:00
tu/a7xx: Implement VK_KHR_fragment_shading_rate
- A650+ - should be able to support pipelineFragmentShadingRate but in some other way than A7XX. Not implemented here. - A7XX - support pipelineFragmentShadingRate and attachmentFragmentShadingRate - A740+ - support primitiveFragmentShadingRate layeredShadingRateAttachments is unsupported at the moment due to tests failure, but prop driver supports it. Passes: dEQP-VK.fragment_shading_rate.* On A750/A740 Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30905>
This commit is contained in:
parent
117379a77a
commit
2ab8eff511
@ -523,7 +523,7 @@ Khronos extensions that are not part of any Vulkan version:
|
||||
VK_KHR_external_semaphore_fd DONE (anv, dzn, hasvk, nvk, panvk, pvr, radv, tu, v3dv, vn)
|
||||
VK_KHR_external_semaphore_win32 DONE (dzn)
|
||||
VK_KHR_fragment_shader_barycentric DONE (nvk/Turing+, radv/gfx10.3+)
|
||||
VK_KHR_fragment_shading_rate DONE (anv/gen11+, nvk/Turing+, radv/gfx10.3+, vn)
|
||||
VK_KHR_fragment_shading_rate DONE (anv/gen11+, nvk/Turing+, radv/gfx10.3+, tu/a7xx+, vn)
|
||||
VK_KHR_get_display_properties2 DONE (anv, nvk, pvr, radv, tu, v3dv)
|
||||
VK_KHR_get_surface_capabilities2 DONE (anv, lvp, nvk, pvr, radv, tu, v3dv, vn)
|
||||
VK_KHR_global_priority DONE (anv, panvk, radv, tu)
|
||||
|
@ -79,7 +79,7 @@ struct fd_dev_info {
|
||||
bool tess_use_shared;
|
||||
|
||||
/* Does the hw support GL_QCOM_shading_rate? */
|
||||
bool has_shading_rate;
|
||||
bool has_legacy_pipeline_shading_rate;
|
||||
|
||||
/* Whether a 16-bit descriptor can be used */
|
||||
bool storage_16bit;
|
||||
@ -186,6 +186,8 @@ struct fd_dev_info {
|
||||
*/
|
||||
bool has_coherent_ubwc_flag_caches;
|
||||
|
||||
bool has_attachment_shading_rate;
|
||||
|
||||
struct {
|
||||
uint32_t PC_POWER_CNTL;
|
||||
uint32_t TPL1_DBG_ECO_CNTL;
|
||||
@ -302,6 +304,8 @@ struct fd_dev_info {
|
||||
|
||||
/* Whether only 256 vec4 constants are available for compute */
|
||||
bool compute_constlen_quirk;
|
||||
|
||||
bool has_primitive_shading_rate;
|
||||
} a7xx;
|
||||
};
|
||||
|
||||
|
@ -416,7 +416,7 @@ a6xx_gen4 = A6XXProps(
|
||||
has_cp_reg_write = False,
|
||||
has_8bpp_ubwc = False,
|
||||
has_lpac = True,
|
||||
has_shading_rate = True,
|
||||
has_legacy_pipeline_shading_rate = True,
|
||||
has_getfiberid = True,
|
||||
has_dp2acc = True,
|
||||
has_dp4acc = True,
|
||||
@ -842,7 +842,6 @@ a7xx_base = A6XXProps(
|
||||
has_separate_chroma_filter = True,
|
||||
has_sample_locations = True,
|
||||
has_lpac = True,
|
||||
has_shading_rate = True,
|
||||
has_getfiberid = True,
|
||||
has_dp2acc = True,
|
||||
has_dp4acc = True,
|
||||
@ -857,6 +856,7 @@ a7xx_base = A6XXProps(
|
||||
has_isam_v = True,
|
||||
has_ssbo_imm_offsets = True,
|
||||
has_early_preamble = True,
|
||||
has_attachment_shading_rate = True,
|
||||
)
|
||||
|
||||
a7xx_gen1 = A7XXProps(
|
||||
@ -875,6 +875,7 @@ a7xx_gen2 = A7XXProps(
|
||||
# this hint set. Match them for better compatibility by default.
|
||||
enable_tp_ubwc_flag_hint = False,
|
||||
has_64b_ssbo_atomics = True,
|
||||
has_primitive_shading_rate = True,
|
||||
)
|
||||
|
||||
a7xx_gen3 = A7XXProps(
|
||||
@ -895,6 +896,7 @@ a7xx_gen3 = A7XXProps(
|
||||
ubwc_coherency_quirk = True,
|
||||
has_persistent_counter = True,
|
||||
has_64b_ssbo_atomics = True,
|
||||
has_primitive_shading_rate = True,
|
||||
)
|
||||
|
||||
a730_magic_regs = dict(
|
||||
@ -950,11 +952,6 @@ a730_raw_magic_regs = [
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000],
|
||||
|
||||
# Shading rate group
|
||||
[A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000],
|
||||
]
|
||||
|
||||
a740_magic_regs = dict(
|
||||
@ -1021,12 +1018,7 @@ a740_raw_magic_regs = [
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
|
||||
|
||||
# Shading rate group
|
||||
[A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_FSR_BUFFER_DESC, 0x00000000],
|
||||
]
|
||||
|
||||
add_gpus([
|
||||
@ -1137,11 +1129,7 @@ add_gpus([
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
|
||||
|
||||
# Shading rate group
|
||||
[A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000],
|
||||
],
|
||||
))
|
||||
|
||||
@ -1238,12 +1226,6 @@ add_gpus([
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000],
|
||||
|
||||
# Shading rate group
|
||||
[A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_FSR_BUFFER_DESC, 0x00000000],
|
||||
],
|
||||
))
|
||||
|
||||
@ -1349,11 +1331,7 @@ add_gpus([
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000],
|
||||
|
||||
# Shading rate group
|
||||
[A6XXRegs.REG_A6XX_RB_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_SP_FSR_CONFIG, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
|
||||
[A6XXRegs.REG_A7XX_GRAS_FSR_CONFIG, 0x00000000],
|
||||
|
||||
[0x930a, 0],
|
||||
[0x960a, 1],
|
||||
|
@ -1610,9 +1610,14 @@ r3d_setup(struct tu_cmd_buffer *cmd,
|
||||
tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0));
|
||||
tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0));
|
||||
|
||||
if (CHIP >= A7XX)
|
||||
if (CHIP >= A7XX) {
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO());
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_RB_FSR_CONFIG());
|
||||
tu_cs_emit_regs(cs, A7XX_SP_FSR_CONFIG());
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_FSR_CONFIG());
|
||||
}
|
||||
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL,
|
||||
A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2));
|
||||
|
||||
|
@ -803,11 +803,20 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
|
||||
enable_mask = CP_SET_DRAW_STATE__0_SYSMEM;
|
||||
break;
|
||||
case TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM:
|
||||
/* By also applying the state during binning we ensure that there
|
||||
* is no rotation applied, by previous A6XX_GRAS_SC_CNTL::rotation.
|
||||
*/
|
||||
enable_mask =
|
||||
CP_SET_DRAW_STATE__0_SYSMEM | CP_SET_DRAW_STATE__0_BINNING;
|
||||
if (!cs->device->physical_device->info->a6xx.has_coherent_ubwc_flag_caches) {
|
||||
/* By also applying the state during binning we ensure that there
|
||||
* is no rotation applied, by previous A6XX_GRAS_SC_CNTL::rotation.
|
||||
*/
|
||||
enable_mask =
|
||||
CP_SET_DRAW_STATE__0_SYSMEM | CP_SET_DRAW_STATE__0_BINNING;
|
||||
} else {
|
||||
static_assert(TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM ==
|
||||
TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE);
|
||||
enable_mask = CP_SET_DRAW_STATE__0_GMEM |
|
||||
CP_SET_DRAW_STATE__0_SYSMEM |
|
||||
CP_SET_DRAW_STATE__0_BINNING;
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
enable_mask = CP_SET_DRAW_STATE__0_GMEM |
|
||||
@ -2542,6 +2551,7 @@ tu_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
|
||||
cmd_buffer->state.max_vbs_bound = 0;
|
||||
|
||||
cmd_buffer->vsc_initialized = false;
|
||||
cmd_buffer->prev_fsr_is_null = false;
|
||||
|
||||
ralloc_free(cmd_buffer->patchpoints_ctx);
|
||||
cmd_buffer->patchpoints_ctx = NULL;
|
||||
@ -3709,6 +3719,17 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_FEEDBACK_LOOPS | TU_CMD_DIRTY_LRZ;
|
||||
}
|
||||
|
||||
if (pipeline->program.writes_shading_rate !=
|
||||
cmd->state.pipeline_writes_shading_rate ||
|
||||
pipeline->program.reads_shading_rate !=
|
||||
cmd->state.pipeline_reads_shading_rate) {
|
||||
cmd->state.pipeline_writes_shading_rate =
|
||||
pipeline->program.writes_shading_rate;
|
||||
cmd->state.pipeline_reads_shading_rate =
|
||||
pipeline->program.reads_shading_rate;
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_SHADING_RATE;
|
||||
}
|
||||
|
||||
bool raster_order_attachment_access =
|
||||
pipeline->output.raster_order_attachment_access ||
|
||||
pipeline->ds.raster_order_attachment_access;
|
||||
@ -4586,6 +4607,49 @@ tu7_emit_subpass_clear(struct tu_cmd_buffer *cmd, struct tu_resolve_group *resol
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu7_emit_subpass_shading_rate(struct tu_cmd_buffer *cmd,
|
||||
const struct tu_subpass *subpass,
|
||||
struct tu_cs *cs)
|
||||
{
|
||||
if (subpass->fsr_attachment == VK_ATTACHMENT_UNUSED) {
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_FSR_BUFFER_DESC(),
|
||||
A7XX_GRAS_FSR_BUFFER_SIZE());
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_FSR_BUFFER_PITCH());
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_FSR_BUFFER_BASE());
|
||||
/* We need to invalidate cache when changing to NULL FSR attachment, but
|
||||
* only once.
|
||||
*/
|
||||
if (!cmd->prev_fsr_is_null) {
|
||||
tu_emit_raw_event_write<A7XX>(cmd, cs, LRZ_Q_CACHE_INVALIDATE,
|
||||
false);
|
||||
cmd->prev_fsr_is_null = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const struct tu_image_view *iview =
|
||||
cmd->state.attachments[subpass->fsr_attachment];
|
||||
assert(iview->vk.format == VK_FORMAT_R8_UINT);
|
||||
|
||||
tu_cs_emit_regs(
|
||||
cs,
|
||||
A7XX_GRAS_FSR_BUFFER_DESC(.layered = true,
|
||||
.tile_mode =
|
||||
(a6xx_tile_mode) iview->image->layout[0]
|
||||
.tile_mode, ),
|
||||
A7XX_GRAS_FSR_BUFFER_SIZE(.width = iview->view.width,
|
||||
.height = iview->view.height));
|
||||
tu_cs_emit_regs(
|
||||
cs, A7XX_GRAS_FSR_BUFFER_PITCH(.pitch = iview->view.pitch,
|
||||
.array_pitch = iview->view.layer_size));
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_GRAS_FSR_BUFFER_BASE(.qword = iview->view.base_addr));
|
||||
|
||||
tu_emit_raw_event_write<A7XX>(cmd, cs, LRZ_Q_CACHE_INVALIDATE, false);
|
||||
cmd->prev_fsr_is_null = false;
|
||||
}
|
||||
|
||||
/* emit loads, clears, and mrt/zs/msaa/ubwc state for the subpass that is
|
||||
* starting (either at vkCmdBeginRenderPass2() or vkCmdNextSubpass2())
|
||||
*
|
||||
@ -4613,6 +4677,10 @@ tu_emit_subpass_begin(struct tu_cmd_buffer *cmd)
|
||||
tu6_emit_mrt<CHIP>(cmd, cmd->state.subpass, &cmd->draw_cs);
|
||||
tu6_emit_render_cntl<CHIP>(cmd, cmd->state.subpass, &cmd->draw_cs, false);
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
tu7_emit_subpass_shading_rate(cmd, cmd->state.subpass, &cmd->draw_cs);
|
||||
}
|
||||
|
||||
tu_set_input_attachments(cmd, cmd->state.subpass);
|
||||
|
||||
vk_cmd_set_cb_attachment_count(&cmd->vk, cmd->state.subpass->color_count);
|
||||
@ -4787,6 +4855,15 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer,
|
||||
if (cmd->dynamic_pass.has_fdm)
|
||||
cmd->patchpoints_ctx = ralloc_context(NULL);
|
||||
|
||||
a = cmd->dynamic_subpass.fsr_attachment;
|
||||
if (a != VK_ATTACHMENT_UNUSED) {
|
||||
const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_info =
|
||||
vk_find_struct_const(pRenderingInfo->pNext,
|
||||
RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
|
||||
VK_FROM_HANDLE(tu_image_view, view, fsr_info->imageView);
|
||||
cmd->state.attachments[a] = view;
|
||||
}
|
||||
|
||||
tu_choose_gmem_layout(cmd);
|
||||
|
||||
cmd->state.renderpass_cache.pending_flush_bits =
|
||||
|
@ -73,8 +73,9 @@ enum tu_cmd_dirty_bits
|
||||
TU_CMD_DIRTY_RAST_ORDER = BIT(12),
|
||||
TU_CMD_DIRTY_FEEDBACK_LOOPS = BIT(13),
|
||||
TU_CMD_DIRTY_FS = BIT(14),
|
||||
TU_CMD_DIRTY_SHADING_RATE = BIT(15),
|
||||
/* all draw states were disabled and need to be re-enabled: */
|
||||
TU_CMD_DIRTY_DRAW_STATE = BIT(15)
|
||||
TU_CMD_DIRTY_DRAW_STATE = BIT(16)
|
||||
};
|
||||
|
||||
/* There are only three cache domains we have to care about: the CCU, or
|
||||
@ -514,6 +515,8 @@ struct tu_cmd_state
|
||||
bool raster_order_attachment_access_valid;
|
||||
bool blit_cache_cleaned;
|
||||
VkImageAspectFlags pipeline_feedback_loops;
|
||||
bool pipeline_writes_shading_rate;
|
||||
bool pipeline_reads_shading_rate;
|
||||
|
||||
bool pipeline_blend_lrz, pipeline_bandwidth;
|
||||
uint32_t pipeline_draw_states;
|
||||
@ -571,11 +574,11 @@ struct tu_cmd_buffer
|
||||
|
||||
struct tu_descriptor_state descriptors[MAX_BIND_POINTS];
|
||||
|
||||
struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1) + 1];
|
||||
struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1) + 2];
|
||||
struct tu_subpass_attachment dynamic_color_attachments[MAX_RTS];
|
||||
struct tu_subpass_attachment dynamic_input_attachments[MAX_RTS + 1];
|
||||
struct tu_subpass_attachment dynamic_resolve_attachments[MAX_RTS + 1];
|
||||
const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1) + 1];
|
||||
const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1) + 2];
|
||||
VkClearValue dynamic_clear_values[2 * (MAX_RTS + 1)];
|
||||
|
||||
struct tu_render_pass dynamic_pass;
|
||||
@ -613,6 +616,8 @@ struct tu_cmd_buffer
|
||||
uint32_t vsc_prim_strm_pitch;
|
||||
uint64_t vsc_draw_strm_va, vsc_draw_strm_size_va, vsc_prim_strm_va;
|
||||
bool vsc_initialized;
|
||||
|
||||
bool prev_fsr_is_null;
|
||||
};
|
||||
VK_DEFINE_HANDLE_CASTS(tu_cmd_buffer, vk.base, VkCommandBuffer,
|
||||
VK_OBJECT_TYPE_COMMAND_BUFFER)
|
||||
|
@ -166,6 +166,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
||||
.KHR_external_semaphore = true,
|
||||
.KHR_external_semaphore_fd = true,
|
||||
.KHR_format_feature_flags2 = true,
|
||||
.KHR_fragment_shading_rate = device->info->a6xx.has_attachment_shading_rate,
|
||||
.KHR_get_memory_requirements2 = true,
|
||||
.KHR_global_priority = true,
|
||||
.KHR_image_format_list = true,
|
||||
@ -468,6 +469,11 @@ tu_get_features(struct tu_physical_device *pdevice,
|
||||
/* VK_KHR_dynamic_rendering_local_read */
|
||||
features->dynamicRenderingLocalRead = true;
|
||||
|
||||
/* VK_KHR_fragment_shading_rate */
|
||||
features->pipelineFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
|
||||
features->primitiveFragmentShadingRate = pdevice->info->a7xx.has_primitive_shading_rate;
|
||||
features->attachmentFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
|
||||
|
||||
/* VK_KHR_index_type_uint8 */
|
||||
features->indexTypeUint8 = true;
|
||||
|
||||
@ -1045,6 +1051,34 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
||||
/* VK_KHR_compute_shader_derivatives */
|
||||
props->meshAndTaskShaderDerivatives = false;
|
||||
|
||||
/* VK_KHR_fragment_shading_rate */
|
||||
if (pdevice->info->a6xx.has_attachment_shading_rate) {
|
||||
props->minFragmentShadingRateAttachmentTexelSize = {8, 8};
|
||||
props->maxFragmentShadingRateAttachmentTexelSize = {8, 8};
|
||||
} else {
|
||||
props->minFragmentShadingRateAttachmentTexelSize = {0, 0};
|
||||
props->maxFragmentShadingRateAttachmentTexelSize = {0, 0};
|
||||
}
|
||||
props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
|
||||
props->primitiveFragmentShadingRateWithMultipleViewports =
|
||||
pdevice->info->a7xx.has_primitive_shading_rate;
|
||||
/* A7XX TODO: dEQP-VK.fragment_shading_rate.*.srlayered.* are failing
|
||||
* for some reason.
|
||||
*/
|
||||
props->layeredShadingRateAttachments = false;
|
||||
props->fragmentShadingRateNonTrivialCombinerOps = true;
|
||||
props->maxFragmentSize = {4, 4};
|
||||
props->maxFragmentSizeAspectRatio = 4;
|
||||
props->maxFragmentShadingRateCoverageSamples = 16;
|
||||
props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_4_BIT;
|
||||
props->fragmentShadingRateWithShaderDepthStencilWrites = true;
|
||||
props->fragmentShadingRateWithSampleMask = true;
|
||||
props->fragmentShadingRateWithShaderSampleMask = true;
|
||||
props->fragmentShadingRateWithConservativeRasterization = false;
|
||||
props->fragmentShadingRateWithFragmentShaderInterlock = false;
|
||||
props->fragmentShadingRateWithCustomSampleLocations = true;
|
||||
props->fragmentShadingRateStrictMultiplyCombiner = true;
|
||||
|
||||
/* VK_KHR_push_descriptor */
|
||||
props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
|
||||
|
||||
@ -1189,7 +1223,7 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
||||
/* VK_KHR_maintenance6 */
|
||||
props->blockTexelViewCompatibleMultipleLayers = true;
|
||||
props->maxCombinedImageSamplerDescriptorCount = 1;
|
||||
props->fragmentShadingRateClampCombinerInputs = false; /* TODO */
|
||||
props->fragmentShadingRateClampCombinerInputs = true;
|
||||
|
||||
/* VK_EXT_host_image_copy */
|
||||
|
||||
@ -1770,6 +1804,39 @@ tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
tu_GetPhysicalDeviceFragmentShadingRatesKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
uint32_t *pFragmentShadingRateCount,
|
||||
VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
|
||||
{
|
||||
VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
|
||||
pFragmentShadingRates, pFragmentShadingRateCount);
|
||||
|
||||
#define append_rate(w, h, s) \
|
||||
{ \
|
||||
VkPhysicalDeviceFragmentShadingRateKHR rate = { \
|
||||
.sType = \
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
|
||||
.sampleCounts = s, \
|
||||
.fragmentSize = { .width = w, .height = h }, \
|
||||
}; \
|
||||
vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, \
|
||||
r) *r = rate; \
|
||||
}
|
||||
|
||||
append_rate(4, 4, VK_SAMPLE_COUNT_1_BIT);
|
||||
append_rate(4, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT);
|
||||
append_rate(2, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
|
||||
append_rate(2, 1, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
|
||||
append_rate(1, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
|
||||
append_rate(1, 1, ~0);
|
||||
|
||||
#undef append_rate
|
||||
|
||||
return vk_outarray_status(&out);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu_queue_init(struct tu_device *device,
|
||||
struct tu_queue *queue,
|
||||
|
@ -265,6 +265,9 @@ tu_physical_device_get_format_properties(
|
||||
if (vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
linear = 0;
|
||||
|
||||
if (vk_format == VK_FORMAT_R8_UINT)
|
||||
optimal |= VK_FORMAT_FEATURE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
|
||||
|
||||
end:
|
||||
out_properties->linearTilingFeatures = linear;
|
||||
out_properties->optimalTilingFeatures = optimal;
|
||||
@ -513,6 +516,14 @@ tu_get_image_format_properties(
|
||||
}
|
||||
}
|
||||
|
||||
if (image_usage &
|
||||
VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) {
|
||||
if (!(format_feature_flags &
|
||||
VK_FORMAT_FEATURE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)) {
|
||||
return tu_image_unsupported_format(pImageFormatProperties);
|
||||
}
|
||||
}
|
||||
|
||||
*pImageFormatProperties = (VkImageFormatProperties) {
|
||||
.maxExtent = maxExtent,
|
||||
.maxMipLevels = maxMipLevels,
|
||||
|
@ -662,6 +662,13 @@ tu_image_init(struct tu_device *device, struct tu_image *image,
|
||||
TILE6_LINEAR) != WZYX)
|
||||
image->force_linear_tile = true;
|
||||
|
||||
/* Some kind of HW limitation. */
|
||||
if (pCreateInfo->usage &
|
||||
VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR &&
|
||||
image->vk.extent.width < 16) {
|
||||
image->force_linear_tile = true;
|
||||
}
|
||||
|
||||
if (image->force_linear_tile ||
|
||||
!ubwc_possible(device, image->vk.format, pCreateInfo->imageType,
|
||||
pCreateInfo->usage, image->vk.stencil_usage,
|
||||
|
@ -999,6 +999,20 @@ tu_CreateRenderPass2(VkDevice _device,
|
||||
if (a != VK_ATTACHMENT_UNUSED) {
|
||||
tu_subpass_use_attachment(pass, i, a, pCreateInfo);
|
||||
}
|
||||
|
||||
const VkFragmentShadingRateAttachmentInfoKHR *fsr_att_info =
|
||||
vk_find_struct_const(desc->pNext,
|
||||
FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
|
||||
if (fsr_att_info && fsr_att_info->pFragmentShadingRateAttachment &&
|
||||
fsr_att_info->pFragmentShadingRateAttachment->attachment !=
|
||||
VK_ATTACHMENT_UNUSED) {
|
||||
subpass->fsr_attachment =
|
||||
fsr_att_info->pFragmentShadingRateAttachment->attachment;
|
||||
subpass->fsr_attachment_texel_size =
|
||||
fsr_att_info->shadingRateAttachmentTexelSize;
|
||||
} else {
|
||||
subpass->fsr_attachment = VK_ATTACHMENT_UNUSED;
|
||||
}
|
||||
}
|
||||
|
||||
tu_render_pass_patch_input_gmem(pass);
|
||||
@ -1235,6 +1249,25 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
|
||||
pass->has_fdm = false;
|
||||
}
|
||||
|
||||
const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_info =
|
||||
vk_find_struct_const(info->pNext,
|
||||
RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
|
||||
if (fsr_info && fsr_info->imageView != VK_NULL_HANDLE) {
|
||||
VK_FROM_HANDLE(tu_image_view, view, fsr_info->imageView);
|
||||
|
||||
struct tu_render_pass_attachment *att = &pass->attachments[a];
|
||||
tu_setup_dynamic_attachment(att, view);
|
||||
subpass->fsr_attachment = a++;
|
||||
attachment_set_ops(device, att,
|
||||
VK_ATTACHMENT_LOAD_OP_DONT_CARE,
|
||||
VK_ATTACHMENT_LOAD_OP_DONT_CARE,
|
||||
VK_ATTACHMENT_STORE_OP_DONT_CARE,
|
||||
VK_ATTACHMENT_STORE_OP_DONT_CARE);
|
||||
subpass->fsr_attachment_texel_size = fsr_info->shadingRateAttachmentTexelSize;
|
||||
} else {
|
||||
subpass->fsr_attachment = VK_ATTACHMENT_UNUSED;
|
||||
}
|
||||
|
||||
if (TU_DEBUG(FDM) && !tu_render_pass_disable_fdm(pass))
|
||||
pass->has_fdm = true;
|
||||
|
||||
|
@ -63,6 +63,10 @@ struct tu_subpass
|
||||
struct tu_subpass_attachment *color_attachments;
|
||||
struct tu_subpass_attachment *resolve_attachments;
|
||||
struct tu_subpass_attachment depth_stencil_attachment;
|
||||
|
||||
uint32_t fsr_attachment;
|
||||
VkExtent2D fsr_attachment_texel_size;
|
||||
|
||||
/* When using dynamic rendering depth and stencil attachments may be
|
||||
* set to unused independently, so we need to track this bit of
|
||||
* information separately for each of them.
|
||||
|
@ -3098,7 +3098,7 @@ tu6_rast_size(struct tu_device *dev,
|
||||
bool per_view_viewport)
|
||||
{
|
||||
if (CHIP == A6XX) {
|
||||
return 15 + (dev->physical_device->info->a6xx.has_shading_rate ? 8 : 0);
|
||||
return 15 + (dev->physical_device->info->a6xx.has_legacy_pipeline_shading_rate ? 8 : 0);
|
||||
} else {
|
||||
return 17;
|
||||
}
|
||||
@ -3168,7 +3168,7 @@ tu6_emit_rast(struct tu_cs *cs,
|
||||
A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f),
|
||||
A6XX_GRAS_SU_POINT_SIZE(1.0f));
|
||||
|
||||
if (CHIP == A6XX && cs->device->physical_device->info->a6xx.has_shading_rate) {
|
||||
if (CHIP == A6XX && cs->device->physical_device->info->a6xx.has_legacy_pipeline_shading_rate) {
|
||||
tu_cs_emit_regs(cs, A6XX_RB_UNKNOWN_8A00());
|
||||
tu_cs_emit_regs(cs, A6XX_RB_UNKNOWN_8A10());
|
||||
tu_cs_emit_regs(cs, A6XX_RB_UNKNOWN_8A20());
|
||||
@ -3342,6 +3342,81 @@ tu6_emit_prim_mode_sysmem(struct tu_cs *cs,
|
||||
.single_prim_mode = sysmem_prim_mode));
|
||||
}
|
||||
|
||||
static const enum mesa_vk_dynamic_graphics_state tu_fragment_shading_rate_state[] = {
|
||||
MESA_VK_DYNAMIC_FSR,
|
||||
};
|
||||
|
||||
template <chip CHIP>
|
||||
static unsigned
|
||||
tu6_fragment_shading_rate_size(struct tu_device *dev,
|
||||
const vk_fragment_shading_rate_state *fsr,
|
||||
bool enable_att_fsr,
|
||||
bool enable_prim_fsr,
|
||||
bool fs_reads_fsr)
|
||||
{
|
||||
return 6;
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu6_emit_fragment_shading_rate(struct tu_cs *cs,
|
||||
const vk_fragment_shading_rate_state *fsr,
|
||||
bool enable_att_fsr,
|
||||
bool enable_prim_fsr,
|
||||
bool fs_reads_fsr)
|
||||
{
|
||||
/* gl_ShadingRateEXT don't read 1x1 value with null config, so
|
||||
* if it is read - we have to emit the config.
|
||||
*/
|
||||
if (!fsr || (!fs_reads_fsr && vk_fragment_shading_rate_is_disabled(fsr))) {
|
||||
tu_cs_emit_regs(cs, A6XX_RB_FSR_CONFIG());
|
||||
tu_cs_emit_regs(cs, A7XX_SP_FSR_CONFIG());
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_FSR_CONFIG());
|
||||
return;
|
||||
}
|
||||
|
||||
bool enable_draw_fsr = true;
|
||||
if (enable_att_fsr) {
|
||||
if (fsr->combiner_ops[1] ==
|
||||
VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR) {
|
||||
enable_draw_fsr = false;
|
||||
enable_prim_fsr = false;
|
||||
} else if (fsr->combiner_ops[1] ==
|
||||
VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) {
|
||||
enable_att_fsr = false;
|
||||
}
|
||||
}
|
||||
if (enable_prim_fsr) {
|
||||
if (fsr->combiner_ops[0] ==
|
||||
VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR) {
|
||||
enable_draw_fsr = false;
|
||||
} else if (fsr->combiner_ops[0] ==
|
||||
VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) {
|
||||
enable_prim_fsr = false;
|
||||
}
|
||||
}
|
||||
|
||||
tu_cs_emit_regs(
|
||||
cs,
|
||||
A6XX_RB_FSR_CONFIG(.unk2 = true, .pipeline_fsr_enable = enable_draw_fsr,
|
||||
.attachment_fsr_enable = enable_att_fsr,
|
||||
.primitive_fsr_enable = enable_prim_fsr));
|
||||
tu_cs_emit_regs(
|
||||
cs, A7XX_SP_FSR_CONFIG(.pipeline_fsr_enable = enable_draw_fsr,
|
||||
.attachment_fsr_enable = enable_att_fsr,
|
||||
.primitive_fsr_enable = enable_prim_fsr));
|
||||
tu_cs_emit_regs(
|
||||
cs, A7XX_GRAS_FSR_CONFIG(
|
||||
.pipeline_fsr_enable = enable_draw_fsr,
|
||||
.frag_size_x = util_logbase2(fsr->fragment_size.width),
|
||||
.frag_size_y = util_logbase2(fsr->fragment_size.height),
|
||||
.combiner_op_1 = (a6xx_fsr_combiner) fsr->combiner_ops[0],
|
||||
.combiner_op_2 = (a6xx_fsr_combiner) fsr->combiner_ops[1],
|
||||
.attachment_fsr_enable = enable_att_fsr,
|
||||
.primitive_fsr_enable = enable_prim_fsr));
|
||||
}
|
||||
|
||||
|
||||
static inline bool
|
||||
emit_pipeline_state(BITSET_WORD *keep, BITSET_WORD *remove,
|
||||
BITSET_WORD *pipeline_set,
|
||||
@ -3467,6 +3542,7 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
|
||||
tu_calc_bandwidth(&pipeline->bandwidth, cb,
|
||||
builder->graphics_state.rp);
|
||||
DRAW_STATE(blend_constants, TU_DYNAMIC_STATE_BLEND_CONSTANTS, cb);
|
||||
|
||||
if (attachments_valid &&
|
||||
!(builder->graphics_state.rp->attachments &
|
||||
MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS)) {
|
||||
@ -3526,6 +3602,19 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
|
||||
vk_pipeline_flags_feedback_loops(builder->graphics_state.pipeline_flags),
|
||||
&pipeline->prim_order.sysmem_single_prim_mode);
|
||||
}
|
||||
|
||||
if (builder->device->physical_device->info->a6xx.has_attachment_shading_rate) {
|
||||
bool has_fsr_att =
|
||||
builder->graphics_state.pipeline_flags &
|
||||
VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
|
||||
DRAW_STATE_COND(fragment_shading_rate,
|
||||
TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE,
|
||||
attachments_valid && pipeline_contains_all_shader_state(pipeline),
|
||||
builder->graphics_state.fsr,
|
||||
has_fsr_att,
|
||||
pipeline->program.writes_shading_rate,
|
||||
pipeline->program.reads_shading_rate);
|
||||
}
|
||||
#undef DRAW_STATE
|
||||
#undef DRAW_STATE_COND
|
||||
#undef EMIT_STATE
|
||||
@ -3692,6 +3781,16 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
|
||||
&cmd->state.vk_rp);
|
||||
DRAW_STATE(blend_constants, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
|
||||
&cmd->vk.dynamic_graphics_state.cb);
|
||||
|
||||
if (cmd->device->physical_device->info->a6xx.has_attachment_shading_rate) {
|
||||
DRAW_STATE_COND(fragment_shading_rate,
|
||||
TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE,
|
||||
cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS | TU_CMD_DIRTY_SHADING_RATE),
|
||||
&cmd->vk.dynamic_graphics_state.fsr,
|
||||
cmd->state.subpass->fsr_attachment != VK_ATTACHMENT_UNUSED,
|
||||
cmd->state.program.writes_shading_rate,
|
||||
cmd->state.program.reads_shading_rate);
|
||||
}
|
||||
DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST,
|
||||
cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS |
|
||||
TU_CMD_DIRTY_PER_VIEW_VIEWPORT),
|
||||
@ -4170,6 +4269,11 @@ tu_pipeline_builder_init_graphics(
|
||||
VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT;
|
||||
}
|
||||
|
||||
if (subpass->fsr_attachment != VK_ATTACHMENT_UNUSED) {
|
||||
rp_flags |=
|
||||
VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
|
||||
}
|
||||
|
||||
builder->unscaled_input_fragcoord = 0;
|
||||
for (unsigned i = 0; i < subpass->input_count; i++) {
|
||||
/* Input attachments stored in GMEM must be loaded with unscaled
|
||||
|
@ -32,6 +32,7 @@ enum tu_dynamic_state
|
||||
TU_DYNAMIC_STATE_VERTEX_INPUT,
|
||||
TU_DYNAMIC_STATE_PATCH_CONTROL_POINTS,
|
||||
TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM,
|
||||
TU_DYNAMIC_STATE_A7XX_FRAGMENT_SHADING_RATE = TU_DYNAMIC_STATE_PRIM_MODE_SYSMEM,
|
||||
TU_DYNAMIC_STATE_COUNT,
|
||||
};
|
||||
|
||||
|
@ -126,7 +126,7 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
|
||||
);
|
||||
}
|
||||
|
||||
if (CHIP == A6XX && ctx->screen->info->a6xx.has_shading_rate) {
|
||||
if (CHIP == A6XX && ctx->screen->info->a6xx.has_legacy_pipeline_shading_rate) {
|
||||
OUT_REG(ring, A6XX_RB_UNKNOWN_8A00());
|
||||
OUT_REG(ring, A6XX_RB_UNKNOWN_8A10());
|
||||
OUT_REG(ring, A6XX_RB_UNKNOWN_8A20());
|
||||
|
Loading…
Reference in New Issue
Block a user