anv: add new low level emission & dirty state tracking

A single Vulkan state can map to multiple fields in different GPU
instructions. This change introduces the bottom half of a simplified
emission mechanism where we do the following :
          Vulkan runtime state
                   |
                   V
        Intermediate driver state
                   |
                   V
         Instruction programming

This way we can detect that the intermediate state didn't change and
avoid HW instruction emission.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24536>
This commit is contained in:
Lionel Landwerlin 2023-08-02 11:36:39 +03:00 committed by Marge Bot
parent 8d66ff01b1
commit 50f6903bd9
10 changed files with 2252 additions and 247 deletions

View File

@ -52,6 +52,10 @@ anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
state->gfx.restart_index = UINT32_MAX;
state->gfx.object_preemption = true;
state->gfx.dirty = 0;
memcpy(state->gfx.dyn_state.dirty,
cmd_buffer->device->gfx_dirty_state,
sizeof(state->gfx.dyn_state.dirty));
}
static void
@ -422,6 +426,125 @@ anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->state.push_constants_dirty |= stages;
}
/**
* This function compute changes between 2 pipelines and flags the dirty HW
* state appropriately.
*/
static void
anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer,
struct anv_graphics_pipeline *old_pipeline,
struct anv_graphics_pipeline *new_pipeline)
{
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
#define diff_fix_state(bit, name) \
do { \
/* Fixed states should always have matching sizes */ \
assert(old_pipeline == NULL || \
old_pipeline->name.len == new_pipeline->name.len); \
/* Don't bother memcmp if the state is already dirty */ \
if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) && \
(old_pipeline == NULL || \
memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
&new_pipeline->batch_data[new_pipeline->name.offset], \
4 * new_pipeline->name.len) != 0)) \
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
} while (0)
#define diff_var_state(bit, name) \
do { \
/* Don't bother memcmp if the state is already dirty */ \
/* Also if the new state is empty, avoid marking dirty */ \
if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) && \
new_pipeline->name.len != 0 && \
(old_pipeline == NULL || \
old_pipeline->name.len != new_pipeline->name.len || \
memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
&new_pipeline->batch_data[new_pipeline->name.offset], \
4 * new_pipeline->name.len) != 0)) \
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
} while (0)
#define assert_identical(bit, name) \
do { \
/* Fixed states should always have matching sizes */ \
assert(old_pipeline == NULL || \
old_pipeline->name.len == new_pipeline->name.len); \
assert(old_pipeline == NULL || \
memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
&new_pipeline->batch_data[new_pipeline->name.offset], \
4 * new_pipeline->name.len) == 0); \
} while (0)
#define assert_empty(name) assert(new_pipeline->name.len == 0)
/* Compare all states, including partial packed ones, the dynamic part is
* left at 0 but the static part could still change.
*/
diff_fix_state(URB, final.urb);
diff_fix_state(VF_SGVS, final.vf_sgvs);
if (cmd_buffer->device->info->ver >= 11)
diff_fix_state(VF_SGVS_2, final.vf_sgvs_2);
if (cmd_buffer->device->info->ver >= 12)
diff_fix_state(PRIMITIVE_REPLICATION, final.primitive_replication);
diff_fix_state(SBE, final.sbe);
diff_fix_state(SBE_SWIZ, final.sbe_swiz);
diff_fix_state(MULTISAMPLE, final.ms);
diff_fix_state(VS, final.vs);
diff_fix_state(HS, final.hs);
diff_fix_state(DS, final.ds);
diff_fix_state(PS, final.ps);
diff_fix_state(PS_EXTRA, final.ps_extra);
diff_fix_state(CLIP, partial.clip);
diff_fix_state(SF, partial.sf);
diff_fix_state(RASTER, partial.raster);
diff_fix_state(WM, partial.wm);
diff_fix_state(STREAMOUT, partial.so);
diff_fix_state(GS, partial.gs);
diff_fix_state(TE, partial.te);
diff_fix_state(VFG, partial.vfg);
if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) {
diff_fix_state(TASK_CONTROL, final.task_control);
diff_fix_state(TASK_SHADER, final.task_shader);
diff_fix_state(TASK_REDISTRIB, final.task_redistrib);
diff_fix_state(MESH_CONTROL, final.mesh_control);
diff_fix_state(MESH_SHADER, final.mesh_shader);
diff_fix_state(MESH_DISTRIB, final.mesh_distrib);
diff_fix_state(CLIP_MESH, final.clip_mesh);
diff_fix_state(SBE_MESH, final.sbe_mesh);
} else {
assert_empty(final.task_control);
assert_empty(final.task_shader);
assert_empty(final.task_redistrib);
assert_empty(final.mesh_control);
assert_empty(final.mesh_shader);
assert_empty(final.mesh_distrib);
assert_empty(final.clip_mesh);
assert_empty(final.sbe_mesh);
}
/* States that should never vary between pipelines, but can be affected by
* blorp etc...
*/
assert_identical(VF_STATISTICS, final.vf_statistics);
/* States that can vary in length */
diff_var_state(VF_SGVS_INSTANCING, final.vf_sgvs_instancing);
diff_var_state(SO_DECL_LIST, final.so_decl_list);
#undef diff_fix_state
#undef diff_var_state
#undef assert_identical
#undef assert_empty
/* We're not diffing the following :
* - anv_graphics_pipeline::vertex_input_data
* - anv_graphics_pipeline::final::vf_instancing
*
* since they are tracked by the runtime.
*/
}
void anv_CmdBindPipeline(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
@ -451,26 +574,28 @@ void anv_CmdBindPipeline(
}
case VK_PIPELINE_BIND_POINT_GRAPHICS: {
struct anv_graphics_pipeline *gfx_pipeline =
struct anv_graphics_pipeline *old_pipeline =
cmd_buffer->state.gfx.pipeline;
struct anv_graphics_pipeline *new_pipeline =
anv_pipeline_to_graphics(pipeline);
if (cmd_buffer->state.gfx.pipeline == gfx_pipeline)
if (old_pipeline == new_pipeline)
return;
cmd_buffer->state.gfx.base.pipeline = pipeline;
cmd_buffer->state.gfx.pipeline = gfx_pipeline;
cmd_buffer->state.gfx.pipeline = new_pipeline;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
anv_foreach_stage(stage, gfx_pipeline->base.base.active_stages) {
anv_foreach_stage(stage, new_pipeline->base.base.active_stages) {
set_dirty_for_bind_map(cmd_buffer, stage,
&gfx_pipeline->base.shaders[stage]->bind_map);
&new_pipeline->base.shaders[stage]->bind_map);
}
/* Apply the non dynamic state from the pipeline */
vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk,
&gfx_pipeline->dynamic_state);
&new_pipeline->dynamic_state);
state = &cmd_buffer->state.gfx.base;
stages = gfx_pipeline->base.base.active_stages;
stages = new_pipeline->base.base.active_stages;
/* When the pipeline is using independent states and dynamic buffers,
@ -479,7 +604,7 @@ void anv_CmdBindPipeline(
*/
struct anv_push_constants *push =
&cmd_buffer->state.gfx.base.push_constants;
struct anv_pipeline_sets_layout *layout = &gfx_pipeline->base.base.layout;
struct anv_pipeline_sets_layout *layout = &new_pipeline->base.base.layout;
if (layout->independent_sets && layout->num_dynamic_buffers > 0) {
bool modified = false;
for (uint32_t s = 0; s < layout->num_sets; s++) {
@ -499,15 +624,17 @@ void anv_CmdBindPipeline(
cmd_buffer->state.push_constants_dirty |= stages;
}
if ((gfx_pipeline->fs_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) &&
push->gfx.fs_msaa_flags != gfx_pipeline->fs_msaa_flags) {
push->gfx.fs_msaa_flags = gfx_pipeline->fs_msaa_flags;
if ((new_pipeline->fs_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) &&
push->gfx.fs_msaa_flags != new_pipeline->fs_msaa_flags) {
push->gfx.fs_msaa_flags = new_pipeline->fs_msaa_flags;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
}
if (gfx_pipeline->dynamic_patch_control_points) {
if (new_pipeline->dynamic_patch_control_points) {
cmd_buffer->state.push_constants_dirty |=
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
}
anv_cmd_buffer_flush_pipeline_state(cmd_buffer, old_pipeline, new_pipeline);
break;
}

View File

@ -3482,6 +3482,34 @@ VkResult anv_CreateDevice(
anv_device_utrace_init(device);
BITSET_ONES(device->gfx_dirty_state);
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_INDEX_BUFFER);
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SO_DECL_LIST);
if (device->info->ver < 11)
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_VF_SGVS_2);
if (device->info->ver < 12) {
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_DEPTH_BOUNDS);
}
if (!device->vk.enabled_extensions.EXT_sample_locations)
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SAMPLE_PATTERN);
if (!device->vk.enabled_extensions.KHR_fragment_shading_rate)
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CPS);
if (!device->vk.enabled_extensions.EXT_mesh_shader) {
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SBE_MESH);
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CLIP_MESH);
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_CONTROL);
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_SHADER);
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_DISTRIB);
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_CONTROL);
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_SHADER);
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_REDISTRIB);
}
if (!intel_needs_workaround(device->info, 18019816803))
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_18019816803);
if (device->info->ver > 9)
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PMA_FIX);
*pDevice = anv_device_to_handle(device);
return VK_SUCCESS;

View File

@ -128,6 +128,10 @@ void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
void genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
bool enable);

View File

@ -1124,6 +1124,304 @@ anv_device_upload_nir(struct anv_device *device,
void
anv_load_fp64_shader(struct anv_device *device);
/**
* This enum tracks the various HW instructions that hold graphics state
* needing to be reprogrammed. Some instructions are grouped together as they
* pretty much need to be emitted together (like 3DSTATE_URB_*).
*
* Not all bits apply to all platforms. We build a dirty state based on
* enabled extensions & generation on anv_device.
*/
enum anv_gfx_state_bits {
/* Pipeline states */
ANV_GFX_STATE_URB, /* All legacy stages, including mesh */
ANV_GFX_STATE_VF_STATISTICS,
ANV_GFX_STATE_VF_SGVS,
ANV_GFX_STATE_VF_SGVS_2,
ANV_GFX_STATE_VF_SGVS_VI, /* 3DSTATE_VERTEX_ELEMENTS for sgvs elements */
ANV_GFX_STATE_VF_SGVS_INSTANCING, /* 3DSTATE_VF_INSTANCING for sgvs elements */
ANV_GFX_STATE_PRIMITIVE_REPLICATION,
ANV_GFX_STATE_MULTISAMPLE,
ANV_GFX_STATE_SBE,
ANV_GFX_STATE_SBE_SWIZ,
ANV_GFX_STATE_SO_DECL_LIST,
ANV_GFX_STATE_VS,
ANV_GFX_STATE_HS,
ANV_GFX_STATE_DS,
ANV_GFX_STATE_GS,
ANV_GFX_STATE_PS,
ANV_GFX_STATE_PS_EXTRA,
ANV_GFX_STATE_SBE_MESH,
ANV_GFX_STATE_CLIP_MESH,
ANV_GFX_STATE_MESH_CONTROL,
ANV_GFX_STATE_MESH_SHADER,
ANV_GFX_STATE_MESH_DISTRIB,
ANV_GFX_STATE_TASK_CONTROL,
ANV_GFX_STATE_TASK_SHADER,
ANV_GFX_STATE_TASK_REDISTRIB,
/* Dynamic states */
ANV_GFX_STATE_BLEND_STATE_POINTERS,
ANV_GFX_STATE_CLIP,
ANV_GFX_STATE_CC_STATE,
ANV_GFX_STATE_CPS,
ANV_GFX_STATE_DEPTH_BOUNDS,
ANV_GFX_STATE_INDEX_BUFFER,
ANV_GFX_STATE_LINE_STIPPLE,
ANV_GFX_STATE_PS_BLEND,
ANV_GFX_STATE_RASTER,
ANV_GFX_STATE_SAMPLE_MASK,
ANV_GFX_STATE_SAMPLE_PATTERN,
ANV_GFX_STATE_SCISSOR,
ANV_GFX_STATE_SF,
ANV_GFX_STATE_STREAMOUT,
ANV_GFX_STATE_TE,
ANV_GFX_STATE_VERTEX_INPUT,
ANV_GFX_STATE_VF,
ANV_GFX_STATE_VF_TOPOLOGY,
ANV_GFX_STATE_VFG,
ANV_GFX_STATE_VIEWPORT_CC,
ANV_GFX_STATE_VIEWPORT_SF_CLIP,
ANV_GFX_STATE_WM,
ANV_GFX_STATE_WM_DEPTH_STENCIL,
ANV_GFX_STATE_PMA_FIX, /* Fake state to implement workaround */
ANV_GFX_STATE_WA_18019816803, /* Fake state to implement workaround */
ANV_GFX_STATE_MAX,
};
const char *anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state);
/* This structure tracks the values to program in HW instructions for
* corresponding to dynamic states of the Vulkan API. Only fields that need to
* be reemitted outside of the VkPipeline object are tracked here.
*/
struct anv_gfx_dynamic_state {
/* 3DSTATE_BLEND_STATE_POINTERS */
struct {
bool AlphaToCoverageEnable;
bool AlphaToOneEnable;
bool IndependentAlphaBlendEnable;
struct {
bool WriteDisableAlpha;
bool WriteDisableRed;
bool WriteDisableGreen;
bool WriteDisableBlue;
uint32_t LogicOpFunction;
bool LogicOpEnable;
bool ColorBufferBlendEnable;
uint32_t ColorClampRange;
bool PreBlendColorClampEnable;
bool PostBlendColorClampEnable;
uint32_t SourceBlendFactor;
uint32_t DestinationBlendFactor;
uint32_t ColorBlendFunction;
uint32_t SourceAlphaBlendFactor;
uint32_t DestinationAlphaBlendFactor;
uint32_t AlphaBlendFunction;
} rts[MAX_RTS];
} blend;
/* 3DSTATE_CC_STATE_POINTERS */
struct {
float BlendConstantColorRed;
float BlendConstantColorGreen;
float BlendConstantColorBlue;
float BlendConstantColorAlpha;
} cc;
/* 3DSTATE_CLIP */
struct {
uint32_t APIMode;
uint32_t ViewportXYClipTestEnable;
uint32_t MaximumVPIndex;
uint32_t TriangleStripListProvokingVertexSelect;
uint32_t LineStripListProvokingVertexSelect;
uint32_t TriangleFanProvokingVertexSelect;
} clip;
/* 3DSTATE_CPS/3DSTATE_CPS_POINTERS */
struct {
/* Gfx11 */
uint32_t CoarsePixelShadingMode;
float MinCPSizeX;
float MinCPSizeY;
/* Gfx12+ */
uint32_t CoarsePixelShadingStateArrayPointer;
} cps;
/* 3DSTATE_DEPTH_BOUNDS */
struct {
bool DepthBoundsTestEnable;
float DepthBoundsTestMinValue;
float DepthBoundsTestMaxValue;
} db;
/* 3DSTATE_GS */
struct {
uint32_t ReorderMode;
} gs;
/* 3DSTATE_LINE_STIPPLE */
struct {
uint32_t LineStipplePattern;
float LineStippleInverseRepeatCount;
uint32_t LineStippleRepeatCount;
} ls;
/* 3DSTATE_PS_BLEND */
struct {
bool HasWriteableRT;
bool ColorBufferBlendEnable;
uint32_t SourceAlphaBlendFactor;
uint32_t DestinationAlphaBlendFactor;
uint32_t SourceBlendFactor;
uint32_t DestinationBlendFactor;
bool AlphaTestEnable;
bool IndependentAlphaBlendEnable;
bool AlphaToCoverageEnable;
} ps_blend;
/* 3DSTATE_RASTER */
struct {
uint32_t APIMode;
bool DXMultisampleRasterizationEnable;
bool AntialiasingEnable;
uint32_t CullMode;
uint32_t FrontWinding;
bool GlobalDepthOffsetEnableSolid;
bool GlobalDepthOffsetEnableWireframe;
bool GlobalDepthOffsetEnablePoint;
float GlobalDepthOffsetConstant;
float GlobalDepthOffsetScale;
float GlobalDepthOffsetClamp;
uint32_t FrontFaceFillMode;
uint32_t BackFaceFillMode;
bool ViewportZFarClipTestEnable;
bool ViewportZNearClipTestEnable;
bool ConservativeRasterizationEnable;
} raster;
/* 3DSTATE_SCISSOR_STATE_POINTERS */
struct {
uint32_t count;
struct {
uint32_t ScissorRectangleYMin;
uint32_t ScissorRectangleXMin;
uint32_t ScissorRectangleYMax;
uint32_t ScissorRectangleXMax;
} elem[MAX_SCISSORS];
} scissor;
/* 3DSTATE_SF */
struct {
float LineWidth;
uint32_t TriangleStripListProvokingVertexSelect;
uint32_t LineStripListProvokingVertexSelect;
uint32_t TriangleFanProvokingVertexSelect;
bool LegacyGlobalDepthBiasEnable;
} sf;
/* 3DSTATE_STREAMOUT */
struct {
bool RenderingDisable;
uint32_t RenderStreamSelect;
uint32_t ReorderMode;
} so;
/* 3DSTATE_SAMPLE_MASK */
struct {
uint32_t SampleMask;
} sm;
/* 3DSTATE_TE */
struct {
uint32_t OutputTopology;
} te;
/* 3DSTATE_VF */
struct {
bool IndexedDrawCutIndexEnable;
uint32_t CutIndex;
} vf;
/* 3DSTATE_VFG */
struct {
uint32_t DistributionMode;
bool ListCutIndexEnable;
} vfg;
/* 3DSTATE_VF_TOPOLOGY */
struct {
uint32_t PrimitiveTopologyType;
} vft;
/* 3DSTATE_VIEWPORT_STATE_POINTERS_CC */
struct {
uint32_t count;
struct {
float MinimumDepth;
float MaximumDepth;
} elem[MAX_VIEWPORTS];
} vp_cc;
/* 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP */
struct {
uint32_t count;
struct {
float ViewportMatrixElementm00;
float ViewportMatrixElementm11;
float ViewportMatrixElementm22;
float ViewportMatrixElementm30;
float ViewportMatrixElementm31;
float ViewportMatrixElementm32;
float XMinClipGuardband;
float XMaxClipGuardband;
float YMinClipGuardband;
float YMaxClipGuardband;
float XMinViewPort;
float XMaxViewPort;
float YMinViewPort;
float YMaxViewPort;
} elem[MAX_VIEWPORTS];
} vp_sf_clip;
/* 3DSTATE_WM */
struct {
uint32_t ForceThreadDispatchEnable;
bool LineStippleEnable;
} wm;
/* 3DSTATE_WM_DEPTH_STENCIL */
struct {
bool DoubleSidedStencilEnable;
uint32_t StencilTestMask;
uint32_t StencilWriteMask;
uint32_t BackfaceStencilTestMask;
uint32_t BackfaceStencilWriteMask;
uint32_t StencilReferenceValue;
uint32_t BackfaceStencilReferenceValue;
bool DepthTestEnable;
bool DepthBufferWriteEnable;
uint32_t DepthTestFunction;
bool StencilTestEnable;
bool StencilBufferWriteEnable;
uint32_t StencilFailOp;
uint32_t StencilPassDepthPassOp;
uint32_t StencilPassDepthFailOp;
uint32_t StencilTestFunction;
uint32_t BackfaceStencilFailOp;
uint32_t BackfaceStencilPassDepthPassOp;
uint32_t BackfaceStencilPassDepthFailOp;
uint32_t BackfaceStencilTestFunction;
} ds;
bool pma_fix;
BITSET_DECLARE(dirty, ANV_GFX_STATE_MAX);
};
enum anv_internal_kernel_name {
ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE,
@ -1304,6 +1602,16 @@ struct anv_device {
#ifdef ANDROID
struct u_gralloc *u_gralloc;
#endif
/** Precompute all dirty graphics bits
*
* Depending on platforms, some of the dirty bits don't apply (for example
* 3DSTATE_PRIMITIVE_REPLICATION is only Gfx12.0+). Disabling some
* extensions like Mesh shaders also allow us to avoid emitting any
* mesh/task related instructions (we only initialize them once at device
* initialization).
*/
BITSET_DECLARE(gfx_dirty_state, ANV_GFX_STATE_MAX);
};
static inline struct anv_state
@ -2860,6 +3168,8 @@ struct anv_cmd_graphics_state {
bool ds_write_state;
uint32_t n_occlusion_queries;
struct anv_gfx_dynamic_state dyn_state;
};
enum anv_depth_reg_mode {
@ -4568,23 +4878,6 @@ anv_line_rasterization_mode(VkLineRasterizationModeEXT line_mode,
return line_mode;
}
/* Fill provoking vertex mode to packet. */
#define ANV_SETUP_PROVOKING_VERTEX(cmd, mode) \
switch (mode) { \
case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: \
cmd.TriangleStripListProvokingVertexSelect = 0; \
cmd.LineStripListProvokingVertexSelect = 0; \
cmd.TriangleFanProvokingVertexSelect = 1; \
break; \
case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: \
cmd.TriangleStripListProvokingVertexSelect = 2; \
cmd.LineStripListProvokingVertexSelect = 1; \
cmd.TriangleFanProvokingVertexSelect = 2; \
break; \
default: \
unreachable("Invalid provoking vertex mode"); \
} \
static inline bool
anv_is_dual_src_blend_factor(VkBlendFactor factor)
{

View File

@ -93,3 +93,60 @@ anv_dump_pipe_bits(enum anv_pipe_bits bits, FILE *f)
if (bits & ANV_PIPE_CCS_CACHE_FLUSH_BIT)
fputs("+ccs_flush ", f);
}
const char *
anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state)
{
#define NAME(name) case ANV_GFX_STATE_##name: return #name;
switch (state) {
NAME(URB);
NAME(VF_STATISTICS);
NAME(VF_SGVS);
NAME(VF_SGVS_2);
NAME(VF_SGVS_INSTANCING);
NAME(PRIMITIVE_REPLICATION);
NAME(MULTISAMPLE);
NAME(SBE);
NAME(SBE_SWIZ);
NAME(SO_DECL_LIST);
NAME(VS);
NAME(HS);
NAME(DS);
NAME(GS);
NAME(PS);
NAME(PS_EXTRA);
NAME(SBE_MESH);
NAME(CLIP_MESH);
NAME(MESH_CONTROL);
NAME(MESH_SHADER);
NAME(MESH_DISTRIB);
NAME(TASK_CONTROL);
NAME(TASK_SHADER);
NAME(TASK_REDISTRIB);
NAME(BLEND_STATE_POINTERS);
NAME(CLIP);
NAME(CC_STATE);
NAME(CPS);
NAME(DEPTH_BOUNDS);
NAME(INDEX_BUFFER);
NAME(LINE_STIPPLE);
NAME(PS_BLEND);
NAME(RASTER);
NAME(SAMPLE_MASK);
NAME(SAMPLE_PATTERN);
NAME(SCISSOR);
NAME(SF);
NAME(STREAMOUT);
NAME(TE);
NAME(VERTEX_INPUT);
NAME(VF);
NAME(VF_TOPOLOGY);
NAME(VFG);
NAME(VIEWPORT_CC);
NAME(VIEWPORT_SF_CLIP);
NAME(WM);
NAME(WM_DEPTH_STENCIL);
NAME(PMA_FIX);
default: unreachable("invalid state");
}
}

View File

@ -273,6 +273,9 @@ blorp_exec_on_render(struct blorp_batch *batch,
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT);
struct anv_gfx_dynamic_state *hw_state =
&cmd_buffer->state.gfx.dyn_state;
const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, params->x1 - params->x0,
params->y1 - params->y0, scale);
@ -294,14 +297,17 @@ blorp_exec_on_render(struct blorp_batch *batch,
}
#endif
#if INTEL_NEEDS_WA_18019816803
/* Check if blorp ds state matches ours. */
if (intel_needs_workaround(cmd_buffer->device->info, 18019816803)) {
bool blorp_ds_state = params->depth.enabled || params->stencil.enabled;
if (cmd_buffer->state.gfx.ds_write_state != blorp_ds_state) {
batch->flags |= BLORP_BATCH_NEED_PSS_STALL_SYNC;
cmd_buffer->state.gfx.ds_write_state = blorp_ds_state;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WA_18019816803);
}
}
#endif
if (params->depth.enabled &&
!(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
@ -339,29 +345,53 @@ blorp_exec_on_render(struct blorp_batch *batch,
}
#endif
/* Calculate state that does not get touched by blorp.
* Flush everything else.
*/
anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER |
ANV_CMD_DIRTY_XFB_ENABLE);
BITSET_DECLARE(dyn_dirty, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX);
BITSET_ONES(dyn_dirty);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSORS);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_FSR);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS);
if (!params->wm_prog_data) {
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES);
BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP);
/* Flag all the instructions emitted by BLORP. */
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_URB);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
#if GFX_VER >= 11
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
#endif
#if GFX_VER >= 12
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
#endif
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SF);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SBE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SBE_SWIZ);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WM);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_POINTERS);
if (batch->blorp->config.use_mesh_shading) {
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL);
}
if (params->wm_prog_data) {
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS_BLEND);
}
anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_INDEX_BUFFER |
ANV_CMD_DIRTY_XFB_ENABLE);
cmd_buffer->state.gfx.vb_dirty = ~0;
cmd_buffer->state.gfx.dirty |= dirty;
BITSET_OR(cmd_buffer->vk.dynamic_graphics_state.dirty,
cmd_buffer->vk.dynamic_graphics_state.dirty, dyn_dirty);
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
}

View File

@ -3218,7 +3218,12 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
}
}
if (any_dynamic_state_dirty || cmd_buffer->state.gfx.dirty)
/* Flush the runtime state into the HW state tracking */
if (cmd_buffer->state.gfx.dirty || any_dynamic_state_dirty)
genX(cmd_buffer_flush_gfx_runtime_state)(cmd_buffer);
/* Flush the HW state into the commmand buffer */
if (!BITSET_IS_EMPTY(cmd_buffer->state.gfx.dyn_state.dirty))
genX(cmd_buffer_flush_gfx_hw_state)(cmd_buffer);
/* If the pipeline changed, we may need to re-allocate push constant space
@ -3278,7 +3283,6 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
}
/* When we're done, there is no more dirty gfx state. */
vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
cmd_buffer->state.gfx.dirty = 0;
}
@ -3693,7 +3697,9 @@ genX(CmdExecuteCommands)(
primary->state.current_hash_scale = 0;
primary->state.gfx.push_constant_stages = 0;
primary->state.gfx.ds_write_state = false;
vk_dynamic_graphics_state_dirty_all(&primary->vk.dynamic_graphics_state);
memcpy(primary->state.gfx.dyn_state.dirty,
primary->device->gfx_dirty_state,
sizeof(primary->state.gfx.dyn_state.dirty));
/* Each of the secondary command buffers will use its own state base
* address. We need to re-emit state base address for the primary after

File diff suppressed because it is too large Load Diff

View File

@ -334,11 +334,39 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
1ull << 32);
#endif
/* Invalidate pipeline, xfb (for 3DSTATE_SO_BUFFER) & raster discard (for
* 3DSTATE_STREAMOUT).
*/
cmd_buffer->state.gfx.dirty |= (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_XFB_ENABLE);
BITSET_SET(cmd_buffer->vk.dynamic_graphics_state.dirty,
MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE);
/* Flag all the instructions emitted by the memcpy. */
struct anv_gfx_dynamic_state *hw_state =
&cmd_buffer->state.gfx.dyn_state;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_URB);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
#if GFX_VER >= 11
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
#endif
#if GFX_VER >= 12
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
#endif
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SF);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SBE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS);
if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) {
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL);
}
cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_INDEX_BUFFER);
}

View File

@ -293,12 +293,49 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
state->cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
#endif
/* Flag all the instructions emitted by the memcpy. */
struct anv_gfx_dynamic_state *hw_state =
&state->cmd_buffer->state.gfx.dyn_state;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_URB);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
#if GFX_VER >= 11
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
#endif
#if GFX_VER >= 12
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
#endif
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WM);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SF);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SBE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS_BLEND);
if (device->vk.enabled_extensions.EXT_mesh_shader) {
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL);
}
state->cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0);
state->cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER |
ANV_CMD_DIRTY_XFB_ENABLE);
state->cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
state->cmd_buffer->state.gfx.push_constant_stages = VK_SHADER_STAGE_FRAGMENT_BIT;
vk_dynamic_graphics_state_dirty_all(&state->cmd_buffer->vk.dynamic_graphics_state);
}
static void