radv: Add a flush postamble on GFX6.

Create a CS which contains just a cache flush,
that can be used as a postamble in command submissions.

According to RadeonSI code, the kernel flushes L2
before shaders are finished on GFX6.

Previously, RADV always added a flush at the end of
each command buffer. The flush postamble should be
a less wasteful alternative to that.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31695>
This commit is contained in:
Timur Kristóf 2024-10-16 23:11:13 +02:00 committed by Marge Bot
parent b3adf02b22
commit 030a7510ce
2 changed files with 54 additions and 1 deletions

View File

@ -1336,6 +1336,45 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
return radv_update_preamble_cs(queue, device, &needs);
}
/* Creates a postamble CS that executes cache flush commands
* that we can use at the end of each submission.
*
* GFX6: The kernel flushes L2 before shaders are finished.
* Therefore we need to wait for idle at the end of each submission.
*/
static VkResult
radv_create_flush_postamble(struct radv_queue *queue)
{
const struct radv_device *device = radv_queue_device(queue);
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_ip_type ip = radv_queue_family_to_ring(pdev, queue->state.qf);
struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs = ws->cs_create(ws, ip, false);
if (!cs)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
radeon_check_space(ws, cs, 256);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
if (ip == AMD_IP_GFX)
flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
enum rgp_flush_bits sqtt_flush_bits = 0;
radv_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, queue->state.qf, flush_bits, &sqtt_flush_bits, 0);
VkResult r = ws->cs_finalize(cs);
if (r != VK_SUCCESS) {
ws->cs_destroy(cs);
return r;
}
queue->state.flush_postamble_cs = cs;
return VK_SUCCESS;
}
static VkResult
radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
{
@ -1649,7 +1688,7 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
unsigned num_postambles = 0;
struct radeon_cmdbuf *initial_preambles[5] = {0};
struct radeon_cmdbuf *continue_preambles[5] = {0};
struct radeon_cmdbuf *postambles[3] = {0};
struct radeon_cmdbuf *postambles[4] = {0};
if (queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE) {
initial_preambles[num_initial_preambles++] =
@ -1678,6 +1717,10 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
}
}
if (queue->state.flush_postamble_cs) {
postambles[num_postambles++] = queue->state.flush_postamble_cs;
}
const unsigned num_1q_initial_preambles = num_initial_preambles;
const unsigned num_1q_continue_preambles = num_continue_preambles;
const unsigned num_1q_postambles = num_postambles;
@ -1930,6 +1973,13 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
goto fail;
}
if (pdev->info.gfx_level == GFX6 &&
(queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE)) {
result = radv_create_flush_postamble(queue);
if (result != VK_SUCCESS)
goto fail;
}
if (queue->state.qf == RADV_QUEUE_SPARSE) {
queue->vk.driver_submit = radv_queue_sparse_submit;
vk_queue_enable_submit_thread(&queue->vk);
@ -1956,6 +2006,8 @@ radv_queue_state_finish(struct radv_queue_state *queue, struct radv_device *devi
device->ws->cs_destroy(queue->gang_wait_preamble_cs);
if (queue->gang_wait_postamble_cs)
device->ws->cs_destroy(queue->gang_wait_postamble_cs);
if (queue->flush_postamble_cs)
device->ws->cs_destroy(queue->flush_postamble_cs);
if (queue->descriptor_bo)
radv_bo_destroy(device, NULL, queue->descriptor_bo);
if (queue->scratch_bo) {

View File

@ -67,6 +67,7 @@ struct radv_queue_state {
struct radeon_cmdbuf *continue_preamble_cs;
struct radeon_cmdbuf *gang_wait_preamble_cs;
struct radeon_cmdbuf *gang_wait_postamble_cs;
struct radeon_cmdbuf *flush_postamble_cs; /* GFX6 only */
/* the uses_shadow_regs here will be set only for general queue */
bool uses_shadow_regs;