iris: enable generated indirect draws

This mirror the ring buffer mode we have in Anv.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26797>
This commit is contained in:
Lionel Landwerlin 2023-12-20 12:15:43 +02:00 committed by Marge Bot
parent d754ed5330
commit 5438b19104
15 changed files with 1143 additions and 7 deletions

View File

@ -299,7 +299,7 @@ if ['x86_64'].contains(host_machine.cpu_family()) and \
get_option('intel-clc') != 'system'
# Require intel-clc with Anv & Iris (for internal shaders)
with_intel_clc = get_option('intel-clc') == 'enabled' or \
with_intel_vk
with_intel_vk or with_gallium_iris
else
with_intel_clc = false
endif

View File

@ -13,6 +13,7 @@ DRI_CONF_SECTION_PERFORMANCE
DRI_CONF_ADAPTIVE_SYNC(true)
DRI_CONF_OPT_E(bo_reuse, 1, 0, 1, "Buffer object reuse",)
DRI_CONF_OPT_B(intel_tbimr, true, "Enable TBIMR tiled rendering")
DRI_CONF_OPT_I(generated_indirect_threshold, 100, 0, INT32_MAX, "Generated indirect draw threshold")
DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY

View File

@ -243,6 +243,12 @@ iris_batch_bytes_used(struct iris_batch *batch)
return batch->map_next - batch->map;
}
static inline uint64_t
iris_batch_current_address_u64(struct iris_batch *batch)
{
return batch->bo->address + (batch->map_next - batch->map);
}
/**
* Ensure the current command buffer has \param size bytes of space
* remaining. If not, this creates a secondary batch buffer and emits

View File

@ -119,6 +119,23 @@ iris_binder_reserve(struct iris_context *ice,
return binder_insert(binder, size);
}
/**
* Reserve and record binder space for generation shader (FS stage only).
*/
void
iris_binder_reserve_gen(struct iris_context *ice)
{
struct iris_binder *binder = &ice->state.binder;
binder->bt_offset[MESA_SHADER_FRAGMENT] =
iris_binder_reserve(ice, sizeof(uint32_t));
iris_record_state_size(ice->state.sizes,
binder->bo->address +
binder->bt_offset[MESA_SHADER_FRAGMENT],
sizeof(uint32_t));
}
/**
* Reserve and record binder space for 3D pipeline shader stages.
*

View File

@ -59,6 +59,7 @@ void iris_init_binder(struct iris_context *ice);
void iris_destroy_binder(struct iris_binder *binder);
uint32_t iris_binder_reserve(struct iris_context *ice, unsigned size);
void iris_binder_reserve_3d(struct iris_context *ice);
void iris_binder_reserve_gen(struct iris_context *ice);
void iris_binder_reserve_compute(struct iris_context *ice);
#endif

View File

@ -714,6 +714,28 @@ struct iris_context {
* drawid and is_indexed_draw. They will go in their own vertex element.
*/
struct iris_state_ref derived_draw_params;
struct {
/**
* Generation fragment shader
*/
struct iris_compiled_shader *shader;
/**
* Ring buffer where to generate indirect draw commands
*/
struct iris_bo *ring_bo;
/**
* Allocated iris_gen_indirect_params
*/
struct iris_state_ref params;
/**
* Vertices used to dispatch the generated fragment shaders
*/
struct iris_state_ref vertices;
} generation;
} draw;
struct {
@ -930,6 +952,60 @@ struct iris_context {
} state;
};
/**
* Push constant data handed over to the indirect draw generation shader
*/
struct iris_gen_indirect_params {
/**
* Address of iris_context:draw:generation:ring_bo
*/
uint64_t generated_cmds_addr;
/**
* Address of indirect data to draw with
*/
uint64_t indirect_data_addr;
/**
* Address inside iris_context:draw:generation:ring_bo where to draw ids
*/
uint64_t draw_id_addr;
/**
* Address of the indirect count (can be null, in which case max_draw_count
* is used)
*/
uint64_t draw_count_addr;
/**
* Address to jump to in order to generate more draws
*/
uint64_t gen_addr;
/**
* Address to jump to to end generated draws
*/
uint64_t end_addr;
/**
* Stride between the indirect draw data
*/
uint32_t indirect_data_stride;
/**
* Base index of the current generated draws in the ring buffer (increments
* by ring_count)
*/
uint32_t draw_base;
/**
* Maximum number of generated draw if draw_count_addr is null
*/
uint32_t max_draw_count;
/**
* bits 0-7: ANV_GENERATED_FLAG_*
* bits 8-15: vertex buffer mocs
* bits 16-23: stride between generated commands
*/
uint32_t flags;
/**
* Number of items to generate in the ring buffer
*/
uint32_t ring_count;
};
#define perf_debug(dbg, ...) do { \
if (INTEL_DEBUG(DEBUG_PERF)) \
dbg_printf(__VA_ARGS__); \
@ -1134,6 +1210,9 @@ bool iris_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
uint32_t *kernel_out,
void *prog_data_out);
void iris_ensure_indirect_generation_shader(struct iris_batch *batch);
/* iris_resolve.c */
void iris_predraw_resolve_inputs(struct iris_context *ice,

View File

@ -196,6 +196,14 @@ iris_simple_draw_vbo(struct iris_context *ice,
batch->screen->vtbl.upload_render_state(ice, batch, draw, drawid_offset, indirect, sc);
}
static inline bool
iris_use_draw_indirect_generation(const struct iris_screen *screen,
const struct pipe_draw_indirect_info *dindirect)
{
return dindirect != NULL &&
dindirect->draw_count >= screen->driconf.generated_indirect_threshold;
}
static void
iris_indirect_draw_vbo(struct iris_context *ice,
const struct pipe_draw_info *dinfo,
@ -204,6 +212,7 @@ iris_indirect_draw_vbo(struct iris_context *ice,
const struct pipe_draw_start_count_bias *draw)
{
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
struct iris_screen *screen = batch->screen;
struct pipe_draw_info info = *dinfo;
struct pipe_draw_indirect_info indirect = *dindirect;
const bool use_predicate =
@ -217,7 +226,14 @@ iris_indirect_draw_vbo(struct iris_context *ice,
iris_update_draw_parameters(ice, &info, drawid_offset, &indirect, draw);
batch->screen->vtbl.upload_indirect_render_state(ice, &info, &indirect, draw);
screen->vtbl.upload_indirect_render_state(ice, &info, &indirect, draw);
} else if (iris_use_draw_indirect_generation(screen, &indirect)) {
iris_batch_maybe_flush(batch, 1500);
iris_update_draw_parameters(ice, &info, drawid_offset, &indirect, draw);
screen->vtbl.upload_indirect_shader_render_state(
ice, &info, &indirect, draw);
} else {
iris_emit_buffer_barrier_for(batch, iris_resource_bo(indirect.buffer),
IRIS_DOMAIN_VF_READ);
@ -231,7 +247,7 @@ iris_indirect_draw_vbo(struct iris_context *ice,
if (use_predicate) {
/* Upload MI_PREDICATE_RESULT to GPR15.*/
batch->screen->vtbl.load_register_reg64(batch, CS_GPR(15), MI_PREDICATE_RESULT);
screen->vtbl.load_register_reg64(batch, CS_GPR(15), MI_PREDICATE_RESULT);
}
for (int i = 0; i < indirect.draw_count; i++) {
@ -245,7 +261,7 @@ iris_indirect_draw_vbo(struct iris_context *ice,
if (use_predicate) {
/* Restore MI_PREDICATE_RESULT. */
batch->screen->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(15));
screen->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(15));
}
}
@ -307,7 +323,19 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info,
iris_predraw_flush_buffers(ice, batch, stage);
}
iris_binder_reserve_3d(ice);
/* If we're going to use the generation shader, we need to allocate a
* binding table entry for it on <= Gfx9 because that platform does not
* have a null-rendertarget bit in the send message to the render cache,
* the EOT message might pollute later writes to the actual RT of the
* draws.
*
* The generation will call iris_binder_reserve_3d() after the generation
* draw call.
*/
if (iris_use_draw_indirect_generation(screen, indirect) && devinfo->ver <= 9)
iris_binder_reserve_gen(ice);
else
iris_binder_reserve_3d(ice);
batch->screen->vtbl.update_binder_address(batch, &ice->state.binder);

View File

@ -162,3 +162,10 @@ rw_bo(struct iris_bo *bo, uint64_t offset, enum iris_domain access)
return (struct iris_address) { .bo = bo, .offset = offset,
.access = access };
}
UNUSED static struct iris_address
iris_address_add(struct iris_address addr, uint64_t offset)
{
addr.offset += offset;
return addr;
}

View File

@ -74,3 +74,12 @@ void genX(math_add32_gpr0)(struct iris_context *ice,
void genX(math_div32_gpr0)(struct iris_context *ice,
struct iris_batch *batch,
uint32_t D);
/* iris_indirect_gen.c */
void genX(init_screen_gen_state)(struct iris_screen *screen);
struct iris_gen_indirect_params *
genX(emit_indirect_generate)(struct iris_batch *batch,
const struct pipe_draw_info *draw,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *sc,
struct iris_address *out_params_addr);

View File

@ -0,0 +1,650 @@
/* Copyright © 2023 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include <stdio.h>
#include <errno.h>
#ifdef HAVE_VALGRIND
#include <valgrind.h>
#include <memcheck.h>
#define VG(x) x
#else
#define VG(x)
#endif
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/u_upload_mgr.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_serialize.h"
#include "intel/compiler/brw_compiler.h"
#include "intel/common/intel_aux_map.h"
#include "intel/common/intel_l3_config.h"
#include "intel/common/intel_sample_positions.h"
#include "intel/ds/intel_tracepoints.h"
#include "iris_batch.h"
#include "iris_context.h"
#include "iris_defines.h"
#include "iris_pipe.h"
#include "iris_resource.h"
#include "iris_utrace.h"
#include "iris_genx_macros.h"
#include "intel/common/intel_genX_state.h"
#include "drm-uapi/i915_drm.h"
#include "libintel_shaders.h"
#if GFX_VERx10 == 80
# include "intel_gfx8_shaders_code.h"
#elif GFX_VERx10 == 90
# include "intel_gfx9_shaders_code.h"
#elif GFX_VERx10 == 110
# include "intel_gfx11_shaders_code.h"
#elif GFX_VERx10 == 120
# include "intel_gfx12_shaders_code.h"
#elif GFX_VERx10 == 125
# include "intel_gfx125_shaders_code.h"
#elif GFX_VERx10 == 200
# include "intel_gfx20_shaders_code.h"
#else
# error "Unsupported generation"
#endif
#define load_param(b, bit_size, struct_name, field_name) \
nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0), \
.base = offsetof(struct_name, field_name), \
.range = bit_size / 8)
static nir_def *
load_fragment_index(nir_builder *b)
{
nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
return nir_iadd(b,
nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
nir_channel(b, pos_in, 0));
}
static nir_shader *
load_shader_lib(struct iris_screen *screen, void *mem_ctx)
{
const nir_shader_compiler_options *nir_options =
screen->compiler->nir_options[MESA_SHADER_KERNEL];
struct blob_reader blob;
blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
sizeof(genX(intel_shaders_nir)));
return nir_deserialize(mem_ctx, nir_options, &blob);
}
static unsigned
iris_call_generation_shader(struct iris_screen *screen, nir_builder *b)
{
genX(libiris_write_draw)(
b,
load_param(b, 64, struct iris_gen_indirect_params, generated_cmds_addr),
load_param(b, 64, struct iris_gen_indirect_params, indirect_data_addr),
load_param(b, 64, struct iris_gen_indirect_params, draw_id_addr),
load_param(b, 32, struct iris_gen_indirect_params, indirect_data_stride),
load_param(b, 64, struct iris_gen_indirect_params, draw_count_addr),
load_param(b, 32, struct iris_gen_indirect_params, draw_base),
load_param(b, 32, struct iris_gen_indirect_params, max_draw_count),
load_param(b, 32, struct iris_gen_indirect_params, flags),
load_param(b, 32, struct iris_gen_indirect_params, ring_count),
load_param(b, 64, struct iris_gen_indirect_params, gen_addr),
load_param(b, 64, struct iris_gen_indirect_params, end_addr),
load_fragment_index(b));
return sizeof(struct iris_gen_indirect_params);
}
void
genX(init_screen_gen_state)(struct iris_screen *screen)
{
screen->vtbl.load_shader_lib = load_shader_lib;
screen->vtbl.call_generation_shader = iris_call_generation_shader;
}
/**
* Stream out temporary/short-lived state.
*
* This allocates space, pins the BO, and includes the BO address in the
* returned offset (which works because all state lives in 32-bit memory
* zones).
*/
static void *
upload_state(struct iris_batch *batch,
struct u_upload_mgr *uploader,
struct iris_state_ref *ref,
unsigned size,
unsigned alignment)
{
void *p = NULL;
u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
iris_use_pinned_bo(batch, iris_resource_bo(ref->res), false, IRIS_DOMAIN_NONE);
return p;
}
static uint32_t *
stream_state(struct iris_batch *batch,
struct u_upload_mgr *uploader,
struct pipe_resource **out_res,
unsigned size,
unsigned alignment,
uint32_t *out_offset)
{
void *ptr = NULL;
u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
struct iris_bo *bo = iris_resource_bo(*out_res);
iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
iris_record_state_size(batch->state_sizes,
bo->address + *out_offset, size);
*out_offset += iris_bo_offset_from_base_address(bo);
return ptr;
}
static void
emit_indirect_generate_draw(struct iris_batch *batch,
struct iris_address params_addr,
unsigned params_size,
unsigned ring_count)
{
struct iris_screen *screen = batch->screen;
struct iris_context *ice = batch->ice;
struct isl_device *isl_dev = &screen->isl_dev;
const struct intel_device_info *devinfo = screen->devinfo;
/* State emission */
uint32_t ves_dws[1 + 2 * GENX(VERTEX_ELEMENT_STATE_length)];
iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), ves_dws, ve) {
ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * 2 -
GENX(3DSTATE_VERTEX_ELEMENTS_length_bias);
}
iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[1], ve) {
ve.VertexBufferIndex = 1;
ve.Valid = true;
ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
ve.SourceElementOffset = 0;
ve.Component0Control = VFCOMP_STORE_SRC;
ve.Component1Control = VFCOMP_STORE_0;
ve.Component2Control = VFCOMP_STORE_0;
ve.Component3Control = VFCOMP_STORE_0;
}
iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[3], ve) {
ve.VertexBufferIndex = 0;
ve.Valid = true;
ve.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT;
ve.SourceElementOffset = 0;
ve.Component0Control = VFCOMP_STORE_SRC;
ve.Component1Control = VFCOMP_STORE_SRC;
ve.Component2Control = VFCOMP_STORE_SRC;
ve.Component3Control = VFCOMP_STORE_1_FP;
}
iris_batch_emit(batch, ves_dws, sizeof(ves_dws));
iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf);
iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
sgvs.InstanceIDEnable = true;
sgvs.InstanceIDComponentNumber = COMP_1;
sgvs.InstanceIDElementOffset = 0;
}
#if GFX_VER >= 11
iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
#endif
iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
vfi.InstancingEnable = false;
vfi.VertexElementIndex = 0;
}
iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
vfi.InstancingEnable = false;
vfi.VertexElementIndex = 1;
}
iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
}
ice->shaders.urb.cfg.size[MESA_SHADER_VERTEX] = 1;
ice->shaders.urb.cfg.size[MESA_SHADER_TESS_CTRL] = 1;
ice->shaders.urb.cfg.size[MESA_SHADER_TESS_EVAL] = 1;
ice->shaders.urb.cfg.size[MESA_SHADER_GEOMETRY] = 1;
genX(emit_urb_config)(batch,
false /* has_tess_eval */,
false /* has_geometry */);
iris_emit_cmd(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
ps_blend.HasWriteableRT = true;
}
iris_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
#if GFX_VER >= 12
iris_emit_cmd(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
db.DepthBoundsTestEnable = false;
db.DepthBoundsTestMinValue = 0.0;
db.DepthBoundsTestMaxValue = 1.0;
}
#endif
iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms);
iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
sm.SampleMask = 0x1;
}
iris_emit_cmd(batch, GENX(3DSTATE_VS), vs);
iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
iris_emit_cmd(batch, GENX(3DSTATE_DS), DS);
iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), so);
iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
iris_emit_cmd(batch, GENX(3DSTATE_CLIP), clip) {
clip.PerspectiveDivideDisable = true;
}
iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) {
#if GFX_VER >= 12
sf.DerefBlockSize = ice->state.urb_deref_block_size;
#endif
}
iris_emit_cmd(batch, GENX(3DSTATE_RASTER), raster) {
raster.CullMode = CULLMODE_NONE;
}
const struct brw_wm_prog_data *wm_prog_data = (void *)
ice->draw.generation.shader->prog_data;
iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
sbe.VertexURBEntryReadOffset = 1;
sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
sbe.VertexURBEntryReadLength = MAX2((wm_prog_data->num_varying_inputs + 1) / 2, 1);
sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
sbe.ForceVertexURBEntryReadLength = true;
sbe.ForceVertexURBEntryReadOffset = true;
#if GFX_VER >= 9
for (unsigned i = 0; i < 32; i++)
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
#endif
}
iris_emit_cmd(batch, GENX(3DSTATE_WM), wm) {
if (wm_prog_data->has_side_effects || wm_prog_data->uses_kill)
wm.ForceThreadDispatchEnable = ForceON;
}
iris_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
1 /* rasterization_samples */,
0 /* msaa_flags */);
ps.VectorMaskEnable = wm_prog_data->uses_vmask;
ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
#if GFX_VER < 20
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
wm_prog_data->base.ubo_ranges[0].length;
#endif
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
#if GFX_VER < 20
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
#endif
ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
#if GFX_VER < 20
ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
#endif
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
}
iris_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
psx.PixelShaderValid = true;
#if GFX_VER < 20
psx.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
#endif
psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
#if GFX_VER >= 9
#if GFX_VER >= 20
assert(!wm_prog_data->pulls_bary);
#else
psx.PixelShaderPullsBary = wm_prog_data->pulls_bary;
#endif
psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
#endif
psx.PixelShaderHasUAV = GFX_VER == 8;
}
iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
uint32_t cc_vp_address;
uint32_t *cc_vp_map =
stream_state(batch, ice->state.dynamic_uploader,
&ice->state.last_res.cc_vp,
4 * GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
ccv.MinimumDepth = 0.0f;
ccv.MaximumDepth = 1.0f;
}
cc.CCViewportPointer = cc_vp_address;
}
#if GFX_VER >= 12
/* Disable Primitive Replication. */
iris_emit_cmd(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
#endif
#if GFX_VERx10 == 125
/* DG2: Wa_22011440098
* MTL: Wa_18022330953
*
* In 3D mode, after programming push constant alloc command immediately
* program push constant command(ZERO length) without any commit between
* them.
*
* Note that Wa_16011448509 isn't needed here as all address bits are zero.
*/
iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), c) {
/* Update empty push constants for all stages (bitmask = 11111b) */
c.ShaderUpdateEnable = 0x1f;
c.MOCS = iris_mocs(NULL, isl_dev, 0);
}
#endif
float x0 = 0.0f, x1 = MIN2(ring_count, 8192);
float y0 = 0.0f, y1 = DIV_ROUND_UP(ring_count, 8192);
float z = 0.0f;
float *vertices =
upload_state(batch, ice->state.dynamic_uploader,
&ice->draw.generation.vertices,
ALIGN(9 * sizeof(float), 8), 8);
vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
uint32_t vbs_dws[1 + GENX(VERTEX_BUFFER_STATE_length)];
iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), vbs_dws, vbs) {
vbs.DWordLength = ARRAY_SIZE(vbs_dws) -
GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
}
_iris_pack_state(batch, GENX(VERTEX_BUFFER_STATE), &vbs_dws[1], vb) {
vb.VertexBufferIndex = 0;
vb.AddressModifyEnable = true;
vb.BufferStartingAddress = ro_bo(iris_resource_bo(ice->draw.generation.vertices.res),
ice->draw.generation.vertices.offset);
vb.BufferPitch = 3 * sizeof(float);
vb.BufferSize = 9 * sizeof(float);
vb.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
#if GFX_VER >= 12
vb.L3BypassDisable = true;
#endif
}
iris_batch_emit(batch, vbs_dws, sizeof(vbs_dws));
#if GFX_VERx10 > 120
uint32_t const_dws[GENX(3DSTATE_CONSTANT_ALL_length) +
GENX(3DSTATE_CONSTANT_ALL_DATA_length)];
iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), const_dws, all) {
all.DWordLength = ARRAY_SIZE(const_dws) -
GENX(3DSTATE_CONSTANT_ALL_length_bias);
all.ShaderUpdateEnable = 1 << MESA_SHADER_FRAGMENT;
all.MOCS = isl_mocs(isl_dev, 0, false);
all.PointerBufferMask = 0x1;
}
_iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA),
&const_dws[GENX(3DSTATE_CONSTANT_ALL_length)], data) {
data.PointerToConstantBuffer = params_addr;
data.ConstantBufferReadLength = DIV_ROUND_UP(params_size, 32);
}
iris_batch_emit(batch, const_dws, sizeof(const_dws));
#else
/* The Skylake PRM contains the following restriction:
*
* "The driver must ensure The following case does not occur without a
* flush to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length
* equal to zero committed followed by a 3DSTATE_CONSTANT_* with buffer
* 0 read length not equal to zero committed."
*
* To avoid this, we program the highest slot.
*/
iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_PS), c) {
#if GFX_VER > 8
c.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
#endif
c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(params_size, 32);
c.ConstantBody.Buffer[3] = params_addr;
}
#endif
#if GFX_VER <= 9
/* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted in
* order to commit constants. TODO: Investigate "Disable Gather at Set
* Shader" to go back to legacy mode...
*
* The null writes of the generation shader also appear to disturb the next
* RT writes, so we choose to reemit the binding table to a null RT on Gfx8
* too.
*/
struct iris_binder *binder = &ice->state.binder;
iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), ptr) {
ptr.PointertoPSBindingTable =
binder->bt_offset[MESA_SHADER_FRAGMENT] >> IRIS_BT_OFFSET_SHIFT;
}
uint32_t *bt_map = binder->map + binder->bt_offset[MESA_SHADER_FRAGMENT];
uint32_t surf_base_offset = binder->bo->address;
bt_map[0] = ice->state.null_fb.offset - surf_base_offset;
#endif
genX(maybe_emit_breakpoint)(batch, true);
iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
prim.VertexAccessType = SEQUENTIAL;
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
prim.VertexCountPerInstance = 3;
prim.InstanceCount = 1;
}
/* We've smashed all state compared to what the normal 3D pipeline
* rendering tracks for GL.
*/
uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE |
IRIS_DIRTY_SO_BUFFERS |
IRIS_DIRTY_SO_DECL_LIST |
IRIS_DIRTY_LINE_STIPPLE |
IRIS_ALL_DIRTY_FOR_COMPUTE |
IRIS_DIRTY_SCISSOR_RECT |
IRIS_DIRTY_VF);
/* Wa_14016820455
* On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated
* likely by a read cache invalidation when clipping is disabled, so we
* don't skip its dirty bit here, in order to reprogram it.
*/
if (GFX_VERx10 != 125)
skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT;
uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE |
IRIS_STAGE_DIRTY_UNCOMPILED_VS |
IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
IRIS_STAGE_DIRTY_UNCOMPILED_TES |
IRIS_STAGE_DIRTY_UNCOMPILED_GS |
IRIS_STAGE_DIRTY_UNCOMPILED_FS |
IRIS_STAGE_DIRTY_SAMPLER_STATES_VS |
IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS |
IRIS_STAGE_DIRTY_SAMPLER_STATES_TES |
IRIS_STAGE_DIRTY_SAMPLER_STATES_GS);
if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
/* Generation disabled tessellation, but it was already off anyway */
skip_stage_bits |= IRIS_STAGE_DIRTY_TCS |
IRIS_STAGE_DIRTY_TES |
IRIS_STAGE_DIRTY_CONSTANTS_TCS |
IRIS_STAGE_DIRTY_CONSTANTS_TES |
IRIS_STAGE_DIRTY_BINDINGS_TCS |
IRIS_STAGE_DIRTY_BINDINGS_TES;
}
if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
/* Generation disabled geometry shaders, but it was already off
* anyway
*/
skip_stage_bits |= IRIS_STAGE_DIRTY_GS |
IRIS_STAGE_DIRTY_CONSTANTS_GS |
IRIS_STAGE_DIRTY_BINDINGS_GS;
}
ice->state.dirty |= ~skip_bits;
ice->state.stage_dirty |= ~skip_stage_bits;
for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++)
ice->shaders.urb.cfg.size[i] = 0;
#if GFX_VER <= 9
/* Now reupdate the binding tables with the new offsets for the actual
* application shaders.
*/
iris_binder_reserve_3d(ice);
screen->vtbl.update_binder_address(batch, binder);
#endif
}
#define RING_SIZE (128 * 1024)
static void
ensure_ring_bo(struct iris_context *ice, struct iris_screen *screen)
{
struct iris_bufmgr *bufmgr = screen->bufmgr;
if (ice->draw.generation.ring_bo != NULL)
return;
ice->draw.generation.ring_bo =
iris_bo_alloc(bufmgr, "gen ring",
RING_SIZE, 8, IRIS_MEMZONE_OTHER,
BO_ALLOC_NO_SUBALLOC);
iris_get_backing_bo(ice->draw.generation.ring_bo)->real.kflags |= EXEC_OBJECT_CAPTURE;
}
struct iris_gen_indirect_params *
genX(emit_indirect_generate)(struct iris_batch *batch,
const struct pipe_draw_info *draw,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *sc,
struct iris_address *out_params_addr)
{
struct iris_screen *screen = batch->screen;
struct iris_context *ice = batch->ice;
iris_ensure_indirect_generation_shader(batch);
ensure_ring_bo(ice, screen);
const size_t struct_stride = draw->index_size > 0 ?
sizeof(uint32_t) * 5 :
sizeof(uint32_t) * 4;
unsigned cmd_stride = 0;
if (ice->state.vs_uses_draw_params ||
ice->state.vs_uses_derived_draw_params) {
cmd_stride += 4; /* 3DSTATE_VERTEX_BUFFERS */
if (ice->state.vs_uses_draw_params)
cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
if (ice->state.vs_uses_derived_draw_params)
cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
}
cmd_stride += 4 * GENX(3DPRIMITIVE_length);
const unsigned setup_dws =
#if GFX_VER >= 12
GENX(MI_ARB_CHECK_length) +
#endif
GENX(MI_BATCH_BUFFER_START_length);
const unsigned ring_count =
(RING_SIZE - 4 * setup_dws) /
(cmd_stride + 4 * 2 /* draw_id, is_indexed_draw */);
uint32_t params_size = align(sizeof(struct iris_gen_indirect_params), 32);
struct iris_gen_indirect_params *params =
upload_state(batch, ice->ctx.const_uploader,
&ice->draw.generation.params,
params_size, 64);
*out_params_addr =
ro_bo(iris_resource_bo(ice->draw.generation.params.res),
ice->draw.generation.params.offset);
iris_use_pinned_bo(batch,
iris_resource_bo(indirect->buffer),
false, IRIS_DOMAIN_NONE);
if (indirect->indirect_draw_count) {
iris_use_pinned_bo(batch,
iris_resource_bo(indirect->indirect_draw_count),
false, IRIS_DOMAIN_NONE);
}
iris_use_pinned_bo(batch, ice->draw.generation.ring_bo,
false, IRIS_DOMAIN_NONE);
*params = (struct iris_gen_indirect_params) {
.generated_cmds_addr = ice->draw.generation.ring_bo->address,
.ring_count = ring_count,
.draw_id_addr = ice->draw.generation.ring_bo->address +
ring_count * cmd_stride +
4 * GENX(MI_BATCH_BUFFER_START_length),
.draw_count_addr = indirect->indirect_draw_count ?
(iris_resource_bo(indirect->indirect_draw_count)->address +
indirect->indirect_draw_count_offset) : 0,
.indirect_data_addr = iris_resource_bo(indirect->buffer)->address +
indirect->offset,
.indirect_data_stride = indirect->stride == 0 ?
struct_stride : indirect->stride,
.max_draw_count = indirect->draw_count,
.flags = (draw->index_size > 0 ? ANV_GENERATED_FLAG_INDEXED : 0) |
(ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT ?
ANV_GENERATED_FLAG_PREDICATED : 0) |
(ice->state.vs_uses_draw_params ?
ANV_GENERATED_FLAG_BASE : 0) |
(ice->state.vs_uses_derived_draw_params ?
ANV_GENERATED_FLAG_DRAWID : 0) |
(iris_mocs(NULL, &screen->isl_dev,
ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
((cmd_stride / 4) << 16) |
util_bitcount64(ice->state.bound_vertex_buffers) << 24,
};
genX(maybe_emit_breakpoint)(batch, true);
emit_indirect_generate_draw(batch, *out_params_addr, params_size,
MIN2(ring_count, indirect->draw_count));
genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
genX(maybe_emit_breakpoint)(batch, false);
return params;
}

View File

@ -39,6 +39,7 @@
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "intel/compiler/brw_compiler.h"
#include "intel/compiler/brw_nir.h"
#include "iris_context.h"
#include "iris_resource.h"
@ -290,3 +291,140 @@ iris_destroy_program_cache(struct iris_context *ice)
ralloc_free(ice->shaders.cache);
}
static void
link_libintel_shaders(nir_shader *nir, const nir_shader *libintel)
{
nir_link_shader_functions(nir, libintel);
NIR_PASS_V(nir, nir_inline_functions);
NIR_PASS_V(nir, nir_remove_non_entrypoints);
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
glsl_get_cl_type_size_align);
NIR_PASS_V(nir, nir_opt_deref);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
nir_var_mem_global,
nir_address_format_62bit_generic);
}
void
iris_ensure_indirect_generation_shader(struct iris_batch *batch)
{
struct iris_context *ice = batch->ice;
if (ice->draw.generation.shader)
return;
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
const struct {
char name[40];
} key = {
.name = "iris-generation-shader",
};
ice->draw.generation.shader =
iris_find_cached_shader(ice, IRIS_CACHE_BLORP, sizeof(key), &key);
if (ice->draw.generation.shader != NULL)
return;
struct brw_compiler *compiler = screen->compiler;
const nir_shader_compiler_options *nir_options =
compiler->nir_options[MESA_SHADER_FRAGMENT];
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
nir_options,
"iris-indirect-generate");
uint32_t uniform_size =
screen->vtbl.call_generation_shader(screen, &b);
nir_shader *nir = b.shader;
void *mem_ctx = ralloc_context(NULL);
link_libintel_shaders(nir, screen->vtbl.load_shader_lib(screen, mem_ctx));
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_opt_cse);
NIR_PASS_V(nir, nir_opt_gcm, true);
NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_split_per_member_structs);
struct brw_nir_compiler_opts opts = {};
brw_preprocess_nir(compiler, nir, &opts);
NIR_PASS_V(nir, nir_propagate_invariant, false);
NIR_PASS_V(nir, nir_lower_input_attachments,
&(nir_input_attachment_options) {
.use_fragcoord_sysval = true,
.use_layer_id_sysval = true,
});
/* Reset sizes before gathering information */
nir->global_mem_size = 0;
nir->scratch_size = 0;
nir->info.shared_size = 0;
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
NIR_PASS_V(nir, nir_copy_prop);
NIR_PASS_V(nir, nir_opt_constant_folding);
NIR_PASS_V(nir, nir_opt_dce);
/* Do vectorizing here. For some reason when trying to do it in the back
* this just isn't working.
*/
nir_load_store_vectorize_options options = {
.modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global,
.callback = brw_nir_should_vectorize_mem,
.robust_modes = (nir_variable_mode)0,
};
NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
nir->num_uniforms = uniform_size;
union brw_any_prog_key prog_key;
memset(&prog_key, 0, sizeof(prog_key));
struct brw_wm_prog_data *prog_data = ralloc_size(NULL, sizeof(*prog_data));
memset(prog_data, 0, sizeof(*prog_data));
prog_data->base.nr_params = nir->num_uniforms / 4;
brw_nir_analyze_ubo_ranges(compiler, nir, prog_data->base.ubo_ranges);
struct brw_compile_stats stats[3];
struct brw_compile_fs_params params = {
.base = {
.nir = nir,
.log_data = &ice->dbg,
.debug_flag = DEBUG_WM,
.stats = stats,
.mem_ctx = mem_ctx,
},
.key = &prog_key.wm,
.prog_data = prog_data,
};
const unsigned *program = brw_compile_fs(compiler, &params);
struct iris_binding_table bt;
memset(&bt, 0, sizeof(bt));
struct iris_compiled_shader *shader =
iris_create_shader_variant(screen, ice->shaders.cache,
IRIS_CACHE_BLORP,
sizeof(key), &key);
iris_finalize_program(shader, &prog_data->base, NULL, NULL, 0, 0, 0, &bt);
iris_upload_shader(screen, NULL, shader, ice->shaders.cache,
ice->shaders.uploader_driver,
IRIS_CACHE_BLORP, sizeof(key), &key, program);
ralloc_free(mem_ctx);
struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
ice->draw.generation.shader = shader;
}

View File

@ -891,6 +891,8 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
driQueryOptionb(config->options, "intel_enable_wa_14018912822");
screen->driconf.enable_tbimr =
driQueryOptionb(config->options, "intel_tbimr");
screen->driconf.generated_indirect_threshold =
driQueryOptioni(config->options, "generated_indirect_threshold");
screen->precompile = debug_get_bool_option("shader_precompile", true);
@ -941,6 +943,7 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
iris_init_screen_program_functions(pscreen);
genX_call(screen->devinfo, init_screen_state, screen);
genX_call(screen->devinfo, init_screen_gen_state, screen);
glsl_type_singleton_init_or_ref();

View File

@ -45,6 +45,9 @@ struct iris_fs_prog_key;
struct iris_cs_prog_key;
enum iris_program_cache_id;
typedef struct nir_builder nir_builder;
typedef struct nir_shader nir_shader;
struct u_trace;
#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
@ -74,6 +77,10 @@ struct iris_vtable {
const struct pipe_draw_info *draw,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *sc);
void (*upload_indirect_shader_render_state)(struct iris_context *ice,
const struct pipe_draw_info *draw,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *sc);
void (*update_binder_address)(struct iris_batch *batch,
struct iris_binder *binder);
void (*upload_compute_state)(struct iris_context *ice,
@ -151,6 +158,9 @@ struct iris_vtable {
struct iris_cs_prog_key *key);
void (*lost_genx_state)(struct iris_context *ice, struct iris_batch *batch);
void (*disable_rhwo_optimization)(struct iris_batch *batch, bool disable);
nir_shader *(*load_shader_lib)(struct iris_screen *screen, void *mem_ctx);
unsigned (*call_generation_shader)(struct iris_screen *screen, nir_builder *b);
};
struct iris_address {
@ -195,6 +205,7 @@ struct iris_screen {
float lower_depth_range_rate;
bool intel_enable_wa_14018912822;
bool enable_tbimr;
unsigned generated_indirect_threshold;
} driconf;
/** Does the kernel support various features (KERNEL_HAS_* bitfield)? */

View File

@ -8536,6 +8536,189 @@ iris_upload_indirect_render_state(struct iris_context *ice,
#endif /* GFX_VERx10 >= 125 */
}
static void
iris_upload_indirect_shader_render_state(struct iris_context *ice,
const struct pipe_draw_info *draw,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *sc)
{
assert(indirect);
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
UNUSED struct iris_screen *screen = batch->screen;
UNUSED const struct intel_device_info *devinfo = screen->devinfo;
if (ice->state.dirty & IRIS_DIRTY_VERTEX_BUFFER_FLUSHES)
flush_vbos(ice, batch);
iris_batch_sync_region_start(batch);
/* Always pin the binder. If we're emitting new binding table pointers,
* we need it. If not, we're probably inheriting old tables via the
* context, and need it anyway. Since true zero-bindings cases are
* practically non-existent, just pin it and avoid last_res tracking.
*/
iris_use_pinned_bo(batch, ice->state.binder.bo, false,
IRIS_DOMAIN_NONE);
if (!batch->contains_draw) {
if (GFX_VER == 12) {
/* Re-emit constants when starting a new batch buffer in order to
* work around push constant corruption on context switch.
*
* XXX - Provide hardware spec quotation when available.
*/
ice->state.stage_dirty |= (IRIS_STAGE_DIRTY_CONSTANTS_VS |
IRIS_STAGE_DIRTY_CONSTANTS_TCS |
IRIS_STAGE_DIRTY_CONSTANTS_TES |
IRIS_STAGE_DIRTY_CONSTANTS_GS |
IRIS_STAGE_DIRTY_CONSTANTS_FS);
}
batch->contains_draw = true;
}
if (!batch->contains_draw_with_next_seqno) {
iris_restore_render_saved_bos(ice, batch, draw);
batch->contains_draw_with_next_seqno = true;
}
if (draw->index_size > 0)
iris_emit_index_buffer(ice, batch, draw, sc);
/* Make sure we have enough space to keep all the commands in the single BO
* (because of the jumps)
*/
iris_require_command_space(batch, 2000);
#ifndef NDEBUG
struct iris_bo *command_bo = batch->bo;
#endif
/* Jump point to generate more draw if we run out of space in the ring
* buffer.
*/
uint64_t gen_addr = iris_batch_current_address_u64(batch);
iris_handle_always_flush_cache(batch);
#if GFX_VER == 9
iris_emit_pipe_control_flush(batch, "before generation",
PIPE_CONTROL_VF_CACHE_INVALIDATE);
#endif
struct iris_address params_addr;
struct iris_gen_indirect_params *params =
genX(emit_indirect_generate)(batch, draw, indirect, sc,
&params_addr);
iris_emit_pipe_control_flush(batch, "after generation flush",
((ice->state.vs_uses_draw_params ||
ice->state.vs_uses_derived_draw_params) ?
PIPE_CONTROL_VF_CACHE_INVALIDATE : 0) |
PIPE_CONTROL_STALL_AT_SCOREBOARD |
PIPE_CONTROL_DATA_CACHE_FLUSH |
PIPE_CONTROL_CS_STALL);
trace_intel_begin_draw(&batch->trace);
/* Always pin the binder. If we're emitting new binding table pointers,
* we need it. If not, we're probably inheriting old tables via the
* context, and need it anyway. Since true zero-bindings cases are
* practically non-existent, just pin it and avoid last_res tracking.
*/
iris_use_pinned_bo(batch, ice->state.binder.bo, false,
IRIS_DOMAIN_NONE);
/* Wa_1306463417 - Send HS state for every primitive on gfx11.
* Wa_16011107343 (same for gfx12)
* We implement this by setting TCS dirty on each draw.
*/
if ((INTEL_NEEDS_WA_1306463417 || INTEL_NEEDS_WA_16011107343) &&
ice->shaders.prog[MESA_SHADER_TESS_CTRL]) {
ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS;
}
iris_upload_dirty_render_state(ice, batch, draw, true);
iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_DRAW, draw, indirect, sc);
genX(maybe_emit_breakpoint)(batch, true);
#if GFX_VER >= 12
iris_emit_cmd(batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = true;
}
#endif
iris_emit_cmd(batch, GENX(MI_BATCH_BUFFER_START), bbs) {
bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.BatchBufferStartAddress = (struct iris_address) {
.bo = ice->draw.generation.ring_bo,
};
}
/* Run the ring buffer one more time with the next set of commands */
uint64_t inc_addr = iris_batch_current_address_u64(batch);
{
iris_emit_pipe_control_flush(batch,
"post generated draws wait",
PIPE_CONTROL_STALL_AT_SCOREBOARD |
PIPE_CONTROL_CS_STALL);
struct mi_builder b;
mi_builder_init(&b, batch->screen->devinfo, batch);
struct iris_address draw_base_addr = iris_address_add(
params_addr,
offsetof(struct iris_gen_indirect_params, draw_base));
const uint32_t mocs =
iris_mocs(draw_base_addr.bo, &screen->isl_dev, 0);
mi_builder_set_mocs(&b, mocs);
mi_store(&b, mi_mem32(draw_base_addr),
mi_iadd(&b, mi_mem32(draw_base_addr),
mi_imm(params->ring_count)));
iris_emit_pipe_control_flush(batch,
"post generation base increment",
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_CONST_CACHE_INVALIDATE);
iris_emit_cmd(batch, GENX(MI_BATCH_BUFFER_START), bbs) {
bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.BatchBufferStartAddress = (struct iris_address) {
.offset = gen_addr,
};
}
}
/* Exit of the ring buffer */
uint64_t end_addr = iris_batch_current_address_u64(batch);
#ifndef NDEBUG
assert(command_bo == batch->bo);
#endif
genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
genX(maybe_emit_breakpoint)(batch, false);
iris_emit_pipe_control_flush(batch,
"post generated draws wait",
PIPE_CONTROL_STALL_AT_SCOREBOARD |
PIPE_CONTROL_CS_STALL);
params->gen_addr = inc_addr;
params->end_addr = end_addr;
iris_batch_sync_region_end(batch);
uint32_t count = (sc) ? sc->count : 0;
count *= draw->instance_count ? draw->instance_count : 1;
trace_intel_end_draw(&batch->trace, count);
}
static void
iris_load_indirect_location(struct iris_context *ice,
struct iris_batch *batch,
@ -8916,6 +9099,8 @@ iris_destroy_state(struct iris_context *ice)
pipe_resource_reference(&ice->draw.draw_params.res, NULL);
pipe_resource_reference(&ice->draw.derived_draw_params.res, NULL);
pipe_resource_reference(&ice->draw.generation.params.res, NULL);
pipe_resource_reference(&ice->draw.generation.vertices.res, NULL);
/* Loop over all VBOs, including ones for draw parameters */
for (unsigned i = 0; i < ARRAY_SIZE(genx->vertex_buffers); i++) {
@ -9974,6 +10159,7 @@ genX(init_screen_state)(struct iris_screen *screen)
screen->vtbl.init_copy_context = iris_init_copy_context;
screen->vtbl.upload_render_state = iris_upload_render_state;
screen->vtbl.upload_indirect_render_state = iris_upload_indirect_render_state;
screen->vtbl.upload_indirect_shader_render_state = iris_upload_indirect_shader_render_state;
screen->vtbl.update_binder_address = iris_update_binder_address;
screen->vtbl.upload_compute_state = iris_upload_compute_state;
screen->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;

View File

@ -74,7 +74,7 @@ iris_per_hw_ver_libs = []
foreach v : ['80', '90', '110', '120', '125', '200']
iris_per_hw_ver_libs += static_library(
'iris_per_hw_ver@0@'.format(v),
['iris_blorp.c', 'iris_query.c', 'iris_state.c', gen_xml_pack],
['iris_blorp.c', 'iris_query.c', 'iris_state.c', 'iris_indirect_gen.c', gen_xml_pack],
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_intel],
c_args : [
no_override_init_args, sse2_args,
@ -82,7 +82,7 @@ foreach v : ['80', '90', '110', '120', '125', '200']
],
gnu_symbol_visibility : 'hidden',
dependencies : [dep_libdrm, dep_valgrind, idep_genxml, idep_nir_headers,
idep_intel_driver_ds_headers, ],
idep_intel_driver_ds_headers, idep_intel_shaders, ],
)
endforeach