mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-01-22 05:33:55 +08:00
iris: enable generated indirect draws
This mirror the ring buffer mode we have in Anv. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26797>
This commit is contained in:
parent
d754ed5330
commit
5438b19104
@ -299,7 +299,7 @@ if ['x86_64'].contains(host_machine.cpu_family()) and \
|
||||
get_option('intel-clc') != 'system'
|
||||
# Require intel-clc with Anv & Iris (for internal shaders)
|
||||
with_intel_clc = get_option('intel-clc') == 'enabled' or \
|
||||
with_intel_vk
|
||||
with_intel_vk or with_gallium_iris
|
||||
else
|
||||
with_intel_clc = false
|
||||
endif
|
||||
|
@ -13,6 +13,7 @@ DRI_CONF_SECTION_PERFORMANCE
|
||||
DRI_CONF_ADAPTIVE_SYNC(true)
|
||||
DRI_CONF_OPT_E(bo_reuse, 1, 0, 1, "Buffer object reuse",)
|
||||
DRI_CONF_OPT_B(intel_tbimr, true, "Enable TBIMR tiled rendering")
|
||||
DRI_CONF_OPT_I(generated_indirect_threshold, 100, 0, INT32_MAX, "Generated indirect draw threshold")
|
||||
DRI_CONF_SECTION_END
|
||||
|
||||
DRI_CONF_SECTION_QUALITY
|
||||
|
@ -243,6 +243,12 @@ iris_batch_bytes_used(struct iris_batch *batch)
|
||||
return batch->map_next - batch->map;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
iris_batch_current_address_u64(struct iris_batch *batch)
|
||||
{
|
||||
return batch->bo->address + (batch->map_next - batch->map);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the current command buffer has \param size bytes of space
|
||||
* remaining. If not, this creates a secondary batch buffer and emits
|
||||
|
@ -119,6 +119,23 @@ iris_binder_reserve(struct iris_context *ice,
|
||||
return binder_insert(binder, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve and record binder space for generation shader (FS stage only).
|
||||
*/
|
||||
void
|
||||
iris_binder_reserve_gen(struct iris_context *ice)
|
||||
{
|
||||
struct iris_binder *binder = &ice->state.binder;
|
||||
|
||||
binder->bt_offset[MESA_SHADER_FRAGMENT] =
|
||||
iris_binder_reserve(ice, sizeof(uint32_t));
|
||||
|
||||
iris_record_state_size(ice->state.sizes,
|
||||
binder->bo->address +
|
||||
binder->bt_offset[MESA_SHADER_FRAGMENT],
|
||||
sizeof(uint32_t));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve and record binder space for 3D pipeline shader stages.
|
||||
*
|
||||
|
@ -59,6 +59,7 @@ void iris_init_binder(struct iris_context *ice);
|
||||
void iris_destroy_binder(struct iris_binder *binder);
|
||||
uint32_t iris_binder_reserve(struct iris_context *ice, unsigned size);
|
||||
void iris_binder_reserve_3d(struct iris_context *ice);
|
||||
void iris_binder_reserve_gen(struct iris_context *ice);
|
||||
void iris_binder_reserve_compute(struct iris_context *ice);
|
||||
|
||||
#endif
|
||||
|
@ -714,6 +714,28 @@ struct iris_context {
|
||||
* drawid and is_indexed_draw. They will go in their own vertex element.
|
||||
*/
|
||||
struct iris_state_ref derived_draw_params;
|
||||
|
||||
struct {
|
||||
/**
|
||||
* Generation fragment shader
|
||||
*/
|
||||
struct iris_compiled_shader *shader;
|
||||
|
||||
/**
|
||||
* Ring buffer where to generate indirect draw commands
|
||||
*/
|
||||
struct iris_bo *ring_bo;
|
||||
|
||||
/**
|
||||
* Allocated iris_gen_indirect_params
|
||||
*/
|
||||
struct iris_state_ref params;
|
||||
|
||||
/**
|
||||
* Vertices used to dispatch the generated fragment shaders
|
||||
*/
|
||||
struct iris_state_ref vertices;
|
||||
} generation;
|
||||
} draw;
|
||||
|
||||
struct {
|
||||
@ -930,6 +952,60 @@ struct iris_context {
|
||||
} state;
|
||||
};
|
||||
|
||||
/**
|
||||
* Push constant data handed over to the indirect draw generation shader
|
||||
*/
|
||||
struct iris_gen_indirect_params {
|
||||
/**
|
||||
* Address of iris_context:draw:generation:ring_bo
|
||||
*/
|
||||
uint64_t generated_cmds_addr;
|
||||
/**
|
||||
* Address of indirect data to draw with
|
||||
*/
|
||||
uint64_t indirect_data_addr;
|
||||
/**
|
||||
* Address inside iris_context:draw:generation:ring_bo where to draw ids
|
||||
*/
|
||||
uint64_t draw_id_addr;
|
||||
/**
|
||||
* Address of the indirect count (can be null, in which case max_draw_count
|
||||
* is used)
|
||||
*/
|
||||
uint64_t draw_count_addr;
|
||||
/**
|
||||
* Address to jump to in order to generate more draws
|
||||
*/
|
||||
uint64_t gen_addr;
|
||||
/**
|
||||
* Address to jump to to end generated draws
|
||||
*/
|
||||
uint64_t end_addr;
|
||||
/**
|
||||
* Stride between the indirect draw data
|
||||
*/
|
||||
uint32_t indirect_data_stride;
|
||||
/**
|
||||
* Base index of the current generated draws in the ring buffer (increments
|
||||
* by ring_count)
|
||||
*/
|
||||
uint32_t draw_base;
|
||||
/**
|
||||
* Maximum number of generated draw if draw_count_addr is null
|
||||
*/
|
||||
uint32_t max_draw_count;
|
||||
/**
|
||||
* bits 0-7: ANV_GENERATED_FLAG_*
|
||||
* bits 8-15: vertex buffer mocs
|
||||
* bits 16-23: stride between generated commands
|
||||
*/
|
||||
uint32_t flags;
|
||||
/**
|
||||
* Number of items to generate in the ring buffer
|
||||
*/
|
||||
uint32_t ring_count;
|
||||
};
|
||||
|
||||
#define perf_debug(dbg, ...) do { \
|
||||
if (INTEL_DEBUG(DEBUG_PERF)) \
|
||||
dbg_printf(__VA_ARGS__); \
|
||||
@ -1134,6 +1210,9 @@ bool iris_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
|
||||
uint32_t *kernel_out,
|
||||
void *prog_data_out);
|
||||
|
||||
void iris_ensure_indirect_generation_shader(struct iris_batch *batch);
|
||||
|
||||
|
||||
/* iris_resolve.c */
|
||||
|
||||
void iris_predraw_resolve_inputs(struct iris_context *ice,
|
||||
|
@ -196,6 +196,14 @@ iris_simple_draw_vbo(struct iris_context *ice,
|
||||
batch->screen->vtbl.upload_render_state(ice, batch, draw, drawid_offset, indirect, sc);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
iris_use_draw_indirect_generation(const struct iris_screen *screen,
|
||||
const struct pipe_draw_indirect_info *dindirect)
|
||||
{
|
||||
return dindirect != NULL &&
|
||||
dindirect->draw_count >= screen->driconf.generated_indirect_threshold;
|
||||
}
|
||||
|
||||
static void
|
||||
iris_indirect_draw_vbo(struct iris_context *ice,
|
||||
const struct pipe_draw_info *dinfo,
|
||||
@ -204,6 +212,7 @@ iris_indirect_draw_vbo(struct iris_context *ice,
|
||||
const struct pipe_draw_start_count_bias *draw)
|
||||
{
|
||||
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
|
||||
struct iris_screen *screen = batch->screen;
|
||||
struct pipe_draw_info info = *dinfo;
|
||||
struct pipe_draw_indirect_info indirect = *dindirect;
|
||||
const bool use_predicate =
|
||||
@ -217,7 +226,14 @@ iris_indirect_draw_vbo(struct iris_context *ice,
|
||||
|
||||
iris_update_draw_parameters(ice, &info, drawid_offset, &indirect, draw);
|
||||
|
||||
batch->screen->vtbl.upload_indirect_render_state(ice, &info, &indirect, draw);
|
||||
screen->vtbl.upload_indirect_render_state(ice, &info, &indirect, draw);
|
||||
} else if (iris_use_draw_indirect_generation(screen, &indirect)) {
|
||||
iris_batch_maybe_flush(batch, 1500);
|
||||
|
||||
iris_update_draw_parameters(ice, &info, drawid_offset, &indirect, draw);
|
||||
|
||||
screen->vtbl.upload_indirect_shader_render_state(
|
||||
ice, &info, &indirect, draw);
|
||||
} else {
|
||||
iris_emit_buffer_barrier_for(batch, iris_resource_bo(indirect.buffer),
|
||||
IRIS_DOMAIN_VF_READ);
|
||||
@ -231,7 +247,7 @@ iris_indirect_draw_vbo(struct iris_context *ice,
|
||||
|
||||
if (use_predicate) {
|
||||
/* Upload MI_PREDICATE_RESULT to GPR15.*/
|
||||
batch->screen->vtbl.load_register_reg64(batch, CS_GPR(15), MI_PREDICATE_RESULT);
|
||||
screen->vtbl.load_register_reg64(batch, CS_GPR(15), MI_PREDICATE_RESULT);
|
||||
}
|
||||
|
||||
for (int i = 0; i < indirect.draw_count; i++) {
|
||||
@ -245,7 +261,7 @@ iris_indirect_draw_vbo(struct iris_context *ice,
|
||||
|
||||
if (use_predicate) {
|
||||
/* Restore MI_PREDICATE_RESULT. */
|
||||
batch->screen->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(15));
|
||||
screen->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(15));
|
||||
}
|
||||
}
|
||||
|
||||
@ -307,7 +323,19 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info,
|
||||
iris_predraw_flush_buffers(ice, batch, stage);
|
||||
}
|
||||
|
||||
iris_binder_reserve_3d(ice);
|
||||
/* If we're going to use the generation shader, we need to allocate a
|
||||
* binding table entry for it on <= Gfx9 because that platform does not
|
||||
* have a null-rendertarget bit in the send message to the render cache,
|
||||
* the EOT message might pollute later writes to the actual RT of the
|
||||
* draws.
|
||||
*
|
||||
* The generation will call iris_binder_reserve_3d() after the generation
|
||||
* draw call.
|
||||
*/
|
||||
if (iris_use_draw_indirect_generation(screen, indirect) && devinfo->ver <= 9)
|
||||
iris_binder_reserve_gen(ice);
|
||||
else
|
||||
iris_binder_reserve_3d(ice);
|
||||
|
||||
batch->screen->vtbl.update_binder_address(batch, &ice->state.binder);
|
||||
|
||||
|
@ -162,3 +162,10 @@ rw_bo(struct iris_bo *bo, uint64_t offset, enum iris_domain access)
|
||||
return (struct iris_address) { .bo = bo, .offset = offset,
|
||||
.access = access };
|
||||
}
|
||||
|
||||
UNUSED static struct iris_address
|
||||
iris_address_add(struct iris_address addr, uint64_t offset)
|
||||
{
|
||||
addr.offset += offset;
|
||||
return addr;
|
||||
}
|
||||
|
@ -74,3 +74,12 @@ void genX(math_add32_gpr0)(struct iris_context *ice,
|
||||
void genX(math_div32_gpr0)(struct iris_context *ice,
|
||||
struct iris_batch *batch,
|
||||
uint32_t D);
|
||||
|
||||
/* iris_indirect_gen.c */
|
||||
void genX(init_screen_gen_state)(struct iris_screen *screen);
|
||||
struct iris_gen_indirect_params *
|
||||
genX(emit_indirect_generate)(struct iris_batch *batch,
|
||||
const struct pipe_draw_info *draw,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *sc,
|
||||
struct iris_address *out_params_addr);
|
||||
|
650
src/gallium/drivers/iris/iris_indirect_gen.c
Normal file
650
src/gallium/drivers/iris/iris_indirect_gen.c
Normal file
@ -0,0 +1,650 @@
|
||||
/* Copyright © 2023 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
|
||||
#ifdef HAVE_VALGRIND
|
||||
#include <valgrind.h>
|
||||
#include <memcheck.h>
|
||||
#define VG(x) x
|
||||
#else
|
||||
#define VG(x)
|
||||
#endif
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/nir/nir_serialize.h"
|
||||
#include "intel/compiler/brw_compiler.h"
|
||||
#include "intel/common/intel_aux_map.h"
|
||||
#include "intel/common/intel_l3_config.h"
|
||||
#include "intel/common/intel_sample_positions.h"
|
||||
#include "intel/ds/intel_tracepoints.h"
|
||||
#include "iris_batch.h"
|
||||
#include "iris_context.h"
|
||||
#include "iris_defines.h"
|
||||
#include "iris_pipe.h"
|
||||
#include "iris_resource.h"
|
||||
#include "iris_utrace.h"
|
||||
|
||||
#include "iris_genx_macros.h"
|
||||
#include "intel/common/intel_genX_state.h"
|
||||
|
||||
#include "drm-uapi/i915_drm.h"
|
||||
|
||||
#include "libintel_shaders.h"
|
||||
|
||||
#if GFX_VERx10 == 80
|
||||
# include "intel_gfx8_shaders_code.h"
|
||||
#elif GFX_VERx10 == 90
|
||||
# include "intel_gfx9_shaders_code.h"
|
||||
#elif GFX_VERx10 == 110
|
||||
# include "intel_gfx11_shaders_code.h"
|
||||
#elif GFX_VERx10 == 120
|
||||
# include "intel_gfx12_shaders_code.h"
|
||||
#elif GFX_VERx10 == 125
|
||||
# include "intel_gfx125_shaders_code.h"
|
||||
#elif GFX_VERx10 == 200
|
||||
# include "intel_gfx20_shaders_code.h"
|
||||
#else
|
||||
# error "Unsupported generation"
|
||||
#endif
|
||||
|
||||
#define load_param(b, bit_size, struct_name, field_name) \
|
||||
nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0), \
|
||||
.base = offsetof(struct_name, field_name), \
|
||||
.range = bit_size / 8)
|
||||
|
||||
static nir_def *
|
||||
load_fragment_index(nir_builder *b)
|
||||
{
|
||||
nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
|
||||
return nir_iadd(b,
|
||||
nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
|
||||
nir_channel(b, pos_in, 0));
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
load_shader_lib(struct iris_screen *screen, void *mem_ctx)
|
||||
{
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
screen->compiler->nir_options[MESA_SHADER_KERNEL];
|
||||
|
||||
struct blob_reader blob;
|
||||
blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
|
||||
sizeof(genX(intel_shaders_nir)));
|
||||
return nir_deserialize(mem_ctx, nir_options, &blob);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
iris_call_generation_shader(struct iris_screen *screen, nir_builder *b)
|
||||
{
|
||||
genX(libiris_write_draw)(
|
||||
b,
|
||||
load_param(b, 64, struct iris_gen_indirect_params, generated_cmds_addr),
|
||||
load_param(b, 64, struct iris_gen_indirect_params, indirect_data_addr),
|
||||
load_param(b, 64, struct iris_gen_indirect_params, draw_id_addr),
|
||||
load_param(b, 32, struct iris_gen_indirect_params, indirect_data_stride),
|
||||
load_param(b, 64, struct iris_gen_indirect_params, draw_count_addr),
|
||||
load_param(b, 32, struct iris_gen_indirect_params, draw_base),
|
||||
load_param(b, 32, struct iris_gen_indirect_params, max_draw_count),
|
||||
load_param(b, 32, struct iris_gen_indirect_params, flags),
|
||||
load_param(b, 32, struct iris_gen_indirect_params, ring_count),
|
||||
load_param(b, 64, struct iris_gen_indirect_params, gen_addr),
|
||||
load_param(b, 64, struct iris_gen_indirect_params, end_addr),
|
||||
load_fragment_index(b));
|
||||
return sizeof(struct iris_gen_indirect_params);
|
||||
}
|
||||
|
||||
void
|
||||
genX(init_screen_gen_state)(struct iris_screen *screen)
|
||||
{
|
||||
screen->vtbl.load_shader_lib = load_shader_lib;
|
||||
screen->vtbl.call_generation_shader = iris_call_generation_shader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream out temporary/short-lived state.
|
||||
*
|
||||
* This allocates space, pins the BO, and includes the BO address in the
|
||||
* returned offset (which works because all state lives in 32-bit memory
|
||||
* zones).
|
||||
*/
|
||||
static void *
|
||||
upload_state(struct iris_batch *batch,
|
||||
struct u_upload_mgr *uploader,
|
||||
struct iris_state_ref *ref,
|
||||
unsigned size,
|
||||
unsigned alignment)
|
||||
{
|
||||
void *p = NULL;
|
||||
u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
|
||||
iris_use_pinned_bo(batch, iris_resource_bo(ref->res), false, IRIS_DOMAIN_NONE);
|
||||
return p;
|
||||
}
|
||||
|
||||
static uint32_t *
|
||||
stream_state(struct iris_batch *batch,
|
||||
struct u_upload_mgr *uploader,
|
||||
struct pipe_resource **out_res,
|
||||
unsigned size,
|
||||
unsigned alignment,
|
||||
uint32_t *out_offset)
|
||||
{
|
||||
void *ptr = NULL;
|
||||
|
||||
u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
|
||||
|
||||
struct iris_bo *bo = iris_resource_bo(*out_res);
|
||||
iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
|
||||
|
||||
iris_record_state_size(batch->state_sizes,
|
||||
bo->address + *out_offset, size);
|
||||
|
||||
*out_offset += iris_bo_offset_from_base_address(bo);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_indirect_generate_draw(struct iris_batch *batch,
|
||||
struct iris_address params_addr,
|
||||
unsigned params_size,
|
||||
unsigned ring_count)
|
||||
{
|
||||
struct iris_screen *screen = batch->screen;
|
||||
struct iris_context *ice = batch->ice;
|
||||
struct isl_device *isl_dev = &screen->isl_dev;
|
||||
const struct intel_device_info *devinfo = screen->devinfo;
|
||||
|
||||
/* State emission */
|
||||
uint32_t ves_dws[1 + 2 * GENX(VERTEX_ELEMENT_STATE_length)];
|
||||
iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), ves_dws, ve) {
|
||||
ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * 2 -
|
||||
GENX(3DSTATE_VERTEX_ELEMENTS_length_bias);
|
||||
}
|
||||
iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[1], ve) {
|
||||
ve.VertexBufferIndex = 1;
|
||||
ve.Valid = true;
|
||||
ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
|
||||
ve.SourceElementOffset = 0;
|
||||
ve.Component0Control = VFCOMP_STORE_SRC;
|
||||
ve.Component1Control = VFCOMP_STORE_0;
|
||||
ve.Component2Control = VFCOMP_STORE_0;
|
||||
ve.Component3Control = VFCOMP_STORE_0;
|
||||
}
|
||||
iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[3], ve) {
|
||||
ve.VertexBufferIndex = 0;
|
||||
ve.Valid = true;
|
||||
ve.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT;
|
||||
ve.SourceElementOffset = 0;
|
||||
ve.Component0Control = VFCOMP_STORE_SRC;
|
||||
ve.Component1Control = VFCOMP_STORE_SRC;
|
||||
ve.Component2Control = VFCOMP_STORE_SRC;
|
||||
ve.Component3Control = VFCOMP_STORE_1_FP;
|
||||
}
|
||||
|
||||
iris_batch_emit(batch, ves_dws, sizeof(ves_dws));
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf);
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
|
||||
sgvs.InstanceIDEnable = true;
|
||||
sgvs.InstanceIDComponentNumber = COMP_1;
|
||||
sgvs.InstanceIDElementOffset = 0;
|
||||
}
|
||||
#if GFX_VER >= 11
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
|
||||
#endif
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
||||
vfi.InstancingEnable = false;
|
||||
vfi.VertexElementIndex = 0;
|
||||
}
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
||||
vfi.InstancingEnable = false;
|
||||
vfi.VertexElementIndex = 1;
|
||||
}
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
|
||||
topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
||||
}
|
||||
|
||||
ice->shaders.urb.cfg.size[MESA_SHADER_VERTEX] = 1;
|
||||
ice->shaders.urb.cfg.size[MESA_SHADER_TESS_CTRL] = 1;
|
||||
ice->shaders.urb.cfg.size[MESA_SHADER_TESS_EVAL] = 1;
|
||||
ice->shaders.urb.cfg.size[MESA_SHADER_GEOMETRY] = 1;
|
||||
genX(emit_urb_config)(batch,
|
||||
false /* has_tess_eval */,
|
||||
false /* has_geometry */);
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
|
||||
ps_blend.HasWriteableRT = true;
|
||||
}
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
|
||||
|
||||
#if GFX_VER >= 12
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
|
||||
db.DepthBoundsTestEnable = false;
|
||||
db.DepthBoundsTestMinValue = 0.0;
|
||||
db.DepthBoundsTestMaxValue = 1.0;
|
||||
}
|
||||
#endif
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms);
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
|
||||
sm.SampleMask = 0x1;
|
||||
}
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_VS), vs);
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_DS), DS);
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), so);
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_CLIP), clip) {
|
||||
clip.PerspectiveDivideDisable = true;
|
||||
}
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) {
|
||||
#if GFX_VER >= 12
|
||||
sf.DerefBlockSize = ice->state.urb_deref_block_size;
|
||||
#endif
|
||||
}
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_RASTER), raster) {
|
||||
raster.CullMode = CULLMODE_NONE;
|
||||
}
|
||||
|
||||
const struct brw_wm_prog_data *wm_prog_data = (void *)
|
||||
ice->draw.generation.shader->prog_data;
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
|
||||
sbe.VertexURBEntryReadOffset = 1;
|
||||
sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
|
||||
sbe.VertexURBEntryReadLength = MAX2((wm_prog_data->num_varying_inputs + 1) / 2, 1);
|
||||
sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
|
||||
sbe.ForceVertexURBEntryReadLength = true;
|
||||
sbe.ForceVertexURBEntryReadOffset = true;
|
||||
#if GFX_VER >= 9
|
||||
for (unsigned i = 0; i < 32; i++)
|
||||
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
|
||||
#endif
|
||||
}
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_WM), wm) {
|
||||
if (wm_prog_data->has_side_effects || wm_prog_data->uses_kill)
|
||||
wm.ForceThreadDispatchEnable = ForceON;
|
||||
}
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
|
||||
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
|
||||
1 /* rasterization_samples */,
|
||||
0 /* msaa_flags */);
|
||||
|
||||
ps.VectorMaskEnable = wm_prog_data->uses_vmask;
|
||||
|
||||
ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
|
||||
#if GFX_VER < 20
|
||||
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
|
||||
wm_prog_data->base.ubo_ranges[0].length;
|
||||
#endif
|
||||
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
|
||||
#if GFX_VER < 20
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
|
||||
#endif
|
||||
|
||||
ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
|
||||
ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
|
||||
#if GFX_VER < 20
|
||||
ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
|
||||
#endif
|
||||
|
||||
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
|
||||
}
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
|
||||
psx.PixelShaderValid = true;
|
||||
#if GFX_VER < 20
|
||||
psx.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
|
||||
#endif
|
||||
psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
|
||||
psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
||||
#if GFX_VER >= 9
|
||||
#if GFX_VER >= 20
|
||||
assert(!wm_prog_data->pulls_bary);
|
||||
#else
|
||||
psx.PixelShaderPullsBary = wm_prog_data->pulls_bary;
|
||||
#endif
|
||||
psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
|
||||
#endif
|
||||
psx.PixelShaderHasUAV = GFX_VER == 8;
|
||||
}
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
|
||||
uint32_t cc_vp_address;
|
||||
uint32_t *cc_vp_map =
|
||||
stream_state(batch, ice->state.dynamic_uploader,
|
||||
&ice->state.last_res.cc_vp,
|
||||
4 * GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
|
||||
|
||||
iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
|
||||
ccv.MinimumDepth = 0.0f;
|
||||
ccv.MaximumDepth = 1.0f;
|
||||
}
|
||||
cc.CCViewportPointer = cc_vp_address;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 12
|
||||
/* Disable Primitive Replication. */
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 == 125
|
||||
/* DG2: Wa_22011440098
|
||||
* MTL: Wa_18022330953
|
||||
*
|
||||
* In 3D mode, after programming push constant alloc command immediately
|
||||
* program push constant command(ZERO length) without any commit between
|
||||
* them.
|
||||
*
|
||||
* Note that Wa_16011448509 isn't needed here as all address bits are zero.
|
||||
*/
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), c) {
|
||||
/* Update empty push constants for all stages (bitmask = 11111b) */
|
||||
c.ShaderUpdateEnable = 0x1f;
|
||||
c.MOCS = iris_mocs(NULL, isl_dev, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
float x0 = 0.0f, x1 = MIN2(ring_count, 8192);
|
||||
float y0 = 0.0f, y1 = DIV_ROUND_UP(ring_count, 8192);
|
||||
float z = 0.0f;
|
||||
|
||||
float *vertices =
|
||||
upload_state(batch, ice->state.dynamic_uploader,
|
||||
&ice->draw.generation.vertices,
|
||||
ALIGN(9 * sizeof(float), 8), 8);
|
||||
|
||||
vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
|
||||
vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
|
||||
vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
|
||||
|
||||
|
||||
uint32_t vbs_dws[1 + GENX(VERTEX_BUFFER_STATE_length)];
|
||||
iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), vbs_dws, vbs) {
|
||||
vbs.DWordLength = ARRAY_SIZE(vbs_dws) -
|
||||
GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
|
||||
}
|
||||
_iris_pack_state(batch, GENX(VERTEX_BUFFER_STATE), &vbs_dws[1], vb) {
|
||||
vb.VertexBufferIndex = 0;
|
||||
vb.AddressModifyEnable = true;
|
||||
vb.BufferStartingAddress = ro_bo(iris_resource_bo(ice->draw.generation.vertices.res),
|
||||
ice->draw.generation.vertices.offset);
|
||||
vb.BufferPitch = 3 * sizeof(float);
|
||||
vb.BufferSize = 9 * sizeof(float);
|
||||
vb.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
|
||||
#if GFX_VER >= 12
|
||||
vb.L3BypassDisable = true;
|
||||
#endif
|
||||
}
|
||||
iris_batch_emit(batch, vbs_dws, sizeof(vbs_dws));
|
||||
|
||||
#if GFX_VERx10 > 120
|
||||
uint32_t const_dws[GENX(3DSTATE_CONSTANT_ALL_length) +
|
||||
GENX(3DSTATE_CONSTANT_ALL_DATA_length)];
|
||||
|
||||
iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), const_dws, all) {
|
||||
all.DWordLength = ARRAY_SIZE(const_dws) -
|
||||
GENX(3DSTATE_CONSTANT_ALL_length_bias);
|
||||
all.ShaderUpdateEnable = 1 << MESA_SHADER_FRAGMENT;
|
||||
all.MOCS = isl_mocs(isl_dev, 0, false);
|
||||
all.PointerBufferMask = 0x1;
|
||||
}
|
||||
_iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA),
|
||||
&const_dws[GENX(3DSTATE_CONSTANT_ALL_length)], data) {
|
||||
data.PointerToConstantBuffer = params_addr;
|
||||
data.ConstantBufferReadLength = DIV_ROUND_UP(params_size, 32);
|
||||
}
|
||||
iris_batch_emit(batch, const_dws, sizeof(const_dws));
|
||||
#else
|
||||
/* The Skylake PRM contains the following restriction:
|
||||
*
|
||||
* "The driver must ensure The following case does not occur without a
|
||||
* flush to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length
|
||||
* equal to zero committed followed by a 3DSTATE_CONSTANT_* with buffer
|
||||
* 0 read length not equal to zero committed."
|
||||
*
|
||||
* To avoid this, we program the highest slot.
|
||||
*/
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_PS), c) {
|
||||
#if GFX_VER > 8
|
||||
c.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
|
||||
#endif
|
||||
c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(params_size, 32);
|
||||
c.ConstantBody.Buffer[3] = params_addr;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if GFX_VER <= 9
|
||||
/* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted in
|
||||
* order to commit constants. TODO: Investigate "Disable Gather at Set
|
||||
* Shader" to go back to legacy mode...
|
||||
*
|
||||
* The null writes of the generation shader also appear to disturb the next
|
||||
* RT writes, so we choose to reemit the binding table to a null RT on Gfx8
|
||||
* too.
|
||||
*/
|
||||
struct iris_binder *binder = &ice->state.binder;
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), ptr) {
|
||||
ptr.PointertoPSBindingTable =
|
||||
binder->bt_offset[MESA_SHADER_FRAGMENT] >> IRIS_BT_OFFSET_SHIFT;
|
||||
}
|
||||
uint32_t *bt_map = binder->map + binder->bt_offset[MESA_SHADER_FRAGMENT];
|
||||
uint32_t surf_base_offset = binder->bo->address;
|
||||
bt_map[0] = ice->state.null_fb.offset - surf_base_offset;
|
||||
#endif
|
||||
|
||||
genX(maybe_emit_breakpoint)(batch, true);
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
|
||||
prim.VertexAccessType = SEQUENTIAL;
|
||||
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
||||
prim.VertexCountPerInstance = 3;
|
||||
prim.InstanceCount = 1;
|
||||
}
|
||||
|
||||
|
||||
/* We've smashed all state compared to what the normal 3D pipeline
|
||||
* rendering tracks for GL.
|
||||
*/
|
||||
|
||||
uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE |
|
||||
IRIS_DIRTY_SO_BUFFERS |
|
||||
IRIS_DIRTY_SO_DECL_LIST |
|
||||
IRIS_DIRTY_LINE_STIPPLE |
|
||||
IRIS_ALL_DIRTY_FOR_COMPUTE |
|
||||
IRIS_DIRTY_SCISSOR_RECT |
|
||||
IRIS_DIRTY_VF);
|
||||
/* Wa_14016820455
|
||||
* On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated
|
||||
* likely by a read cache invalidation when clipping is disabled, so we
|
||||
* don't skip its dirty bit here, in order to reprogram it.
|
||||
*/
|
||||
if (GFX_VERx10 != 125)
|
||||
skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT;
|
||||
|
||||
uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE |
|
||||
IRIS_STAGE_DIRTY_UNCOMPILED_VS |
|
||||
IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
|
||||
IRIS_STAGE_DIRTY_UNCOMPILED_TES |
|
||||
IRIS_STAGE_DIRTY_UNCOMPILED_GS |
|
||||
IRIS_STAGE_DIRTY_UNCOMPILED_FS |
|
||||
IRIS_STAGE_DIRTY_SAMPLER_STATES_VS |
|
||||
IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS |
|
||||
IRIS_STAGE_DIRTY_SAMPLER_STATES_TES |
|
||||
IRIS_STAGE_DIRTY_SAMPLER_STATES_GS);
|
||||
|
||||
if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
|
||||
/* Generation disabled tessellation, but it was already off anyway */
|
||||
skip_stage_bits |= IRIS_STAGE_DIRTY_TCS |
|
||||
IRIS_STAGE_DIRTY_TES |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_TCS |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_TES |
|
||||
IRIS_STAGE_DIRTY_BINDINGS_TCS |
|
||||
IRIS_STAGE_DIRTY_BINDINGS_TES;
|
||||
}
|
||||
|
||||
if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
|
||||
/* Generation disabled geometry shaders, but it was already off
|
||||
* anyway
|
||||
*/
|
||||
skip_stage_bits |= IRIS_STAGE_DIRTY_GS |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_GS |
|
||||
IRIS_STAGE_DIRTY_BINDINGS_GS;
|
||||
}
|
||||
|
||||
ice->state.dirty |= ~skip_bits;
|
||||
ice->state.stage_dirty |= ~skip_stage_bits;
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++)
|
||||
ice->shaders.urb.cfg.size[i] = 0;
|
||||
|
||||
#if GFX_VER <= 9
|
||||
/* Now reupdate the binding tables with the new offsets for the actual
|
||||
* application shaders.
|
||||
*/
|
||||
iris_binder_reserve_3d(ice);
|
||||
screen->vtbl.update_binder_address(batch, binder);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define RING_SIZE (128 * 1024)
|
||||
|
||||
static void
|
||||
ensure_ring_bo(struct iris_context *ice, struct iris_screen *screen)
|
||||
{
|
||||
struct iris_bufmgr *bufmgr = screen->bufmgr;
|
||||
|
||||
if (ice->draw.generation.ring_bo != NULL)
|
||||
return;
|
||||
|
||||
ice->draw.generation.ring_bo =
|
||||
iris_bo_alloc(bufmgr, "gen ring",
|
||||
RING_SIZE, 8, IRIS_MEMZONE_OTHER,
|
||||
BO_ALLOC_NO_SUBALLOC);
|
||||
iris_get_backing_bo(ice->draw.generation.ring_bo)->real.kflags |= EXEC_OBJECT_CAPTURE;
|
||||
|
||||
}
|
||||
|
||||
struct iris_gen_indirect_params *
|
||||
genX(emit_indirect_generate)(struct iris_batch *batch,
|
||||
const struct pipe_draw_info *draw,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *sc,
|
||||
struct iris_address *out_params_addr)
|
||||
{
|
||||
struct iris_screen *screen = batch->screen;
|
||||
struct iris_context *ice = batch->ice;
|
||||
|
||||
iris_ensure_indirect_generation_shader(batch);
|
||||
ensure_ring_bo(ice, screen);
|
||||
|
||||
const size_t struct_stride = draw->index_size > 0 ?
|
||||
sizeof(uint32_t) * 5 :
|
||||
sizeof(uint32_t) * 4;
|
||||
unsigned cmd_stride = 0;
|
||||
if (ice->state.vs_uses_draw_params ||
|
||||
ice->state.vs_uses_derived_draw_params) {
|
||||
cmd_stride += 4; /* 3DSTATE_VERTEX_BUFFERS */
|
||||
|
||||
if (ice->state.vs_uses_draw_params)
|
||||
cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
|
||||
|
||||
if (ice->state.vs_uses_derived_draw_params)
|
||||
cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
|
||||
}
|
||||
cmd_stride += 4 * GENX(3DPRIMITIVE_length);
|
||||
|
||||
const unsigned setup_dws =
|
||||
#if GFX_VER >= 12
|
||||
GENX(MI_ARB_CHECK_length) +
|
||||
#endif
|
||||
GENX(MI_BATCH_BUFFER_START_length);
|
||||
const unsigned ring_count =
|
||||
(RING_SIZE - 4 * setup_dws) /
|
||||
(cmd_stride + 4 * 2 /* draw_id, is_indexed_draw */);
|
||||
|
||||
uint32_t params_size = align(sizeof(struct iris_gen_indirect_params), 32);
|
||||
struct iris_gen_indirect_params *params =
|
||||
upload_state(batch, ice->ctx.const_uploader,
|
||||
&ice->draw.generation.params,
|
||||
params_size, 64);
|
||||
*out_params_addr =
|
||||
ro_bo(iris_resource_bo(ice->draw.generation.params.res),
|
||||
ice->draw.generation.params.offset);
|
||||
|
||||
iris_use_pinned_bo(batch,
|
||||
iris_resource_bo(indirect->buffer),
|
||||
false, IRIS_DOMAIN_NONE);
|
||||
if (indirect->indirect_draw_count) {
|
||||
iris_use_pinned_bo(batch,
|
||||
iris_resource_bo(indirect->indirect_draw_count),
|
||||
false, IRIS_DOMAIN_NONE);
|
||||
}
|
||||
iris_use_pinned_bo(batch, ice->draw.generation.ring_bo,
|
||||
false, IRIS_DOMAIN_NONE);
|
||||
|
||||
*params = (struct iris_gen_indirect_params) {
|
||||
.generated_cmds_addr = ice->draw.generation.ring_bo->address,
|
||||
.ring_count = ring_count,
|
||||
.draw_id_addr = ice->draw.generation.ring_bo->address +
|
||||
ring_count * cmd_stride +
|
||||
4 * GENX(MI_BATCH_BUFFER_START_length),
|
||||
.draw_count_addr = indirect->indirect_draw_count ?
|
||||
(iris_resource_bo(indirect->indirect_draw_count)->address +
|
||||
indirect->indirect_draw_count_offset) : 0,
|
||||
.indirect_data_addr = iris_resource_bo(indirect->buffer)->address +
|
||||
indirect->offset,
|
||||
.indirect_data_stride = indirect->stride == 0 ?
|
||||
struct_stride : indirect->stride,
|
||||
.max_draw_count = indirect->draw_count,
|
||||
.flags = (draw->index_size > 0 ? ANV_GENERATED_FLAG_INDEXED : 0) |
|
||||
(ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT ?
|
||||
ANV_GENERATED_FLAG_PREDICATED : 0) |
|
||||
(ice->state.vs_uses_draw_params ?
|
||||
ANV_GENERATED_FLAG_BASE : 0) |
|
||||
(ice->state.vs_uses_derived_draw_params ?
|
||||
ANV_GENERATED_FLAG_DRAWID : 0) |
|
||||
(iris_mocs(NULL, &screen->isl_dev,
|
||||
ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
|
||||
((cmd_stride / 4) << 16) |
|
||||
util_bitcount64(ice->state.bound_vertex_buffers) << 24,
|
||||
};
|
||||
|
||||
genX(maybe_emit_breakpoint)(batch, true);
|
||||
|
||||
emit_indirect_generate_draw(batch, *out_params_addr, params_size,
|
||||
MIN2(ring_count, indirect->draw_count));
|
||||
|
||||
genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
|
||||
genX(maybe_emit_breakpoint)(batch, false);
|
||||
|
||||
|
||||
return params;
|
||||
}
|
@ -39,6 +39,7 @@
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "intel/compiler/brw_compiler.h"
|
||||
#include "intel/compiler/brw_nir.h"
|
||||
#include "iris_context.h"
|
||||
#include "iris_resource.h"
|
||||
|
||||
@ -290,3 +291,140 @@ iris_destroy_program_cache(struct iris_context *ice)
|
||||
|
||||
ralloc_free(ice->shaders.cache);
|
||||
}
|
||||
|
||||
static void
|
||||
link_libintel_shaders(nir_shader *nir, const nir_shader *libintel)
|
||||
{
|
||||
nir_link_shader_functions(nir, libintel);
|
||||
NIR_PASS_V(nir, nir_inline_functions);
|
||||
NIR_PASS_V(nir, nir_remove_non_entrypoints);
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
|
||||
glsl_get_cl_type_size_align);
|
||||
NIR_PASS_V(nir, nir_opt_deref);
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io,
|
||||
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
|
||||
nir_var_mem_global,
|
||||
nir_address_format_62bit_generic);
|
||||
}
|
||||
|
||||
void
|
||||
iris_ensure_indirect_generation_shader(struct iris_batch *batch)
|
||||
{
|
||||
struct iris_context *ice = batch->ice;
|
||||
if (ice->draw.generation.shader)
|
||||
return;
|
||||
|
||||
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
|
||||
const struct {
|
||||
char name[40];
|
||||
} key = {
|
||||
.name = "iris-generation-shader",
|
||||
};
|
||||
ice->draw.generation.shader =
|
||||
iris_find_cached_shader(ice, IRIS_CACHE_BLORP, sizeof(key), &key);
|
||||
if (ice->draw.generation.shader != NULL)
|
||||
return;
|
||||
|
||||
struct brw_compiler *compiler = screen->compiler;
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
compiler->nir_options[MESA_SHADER_FRAGMENT];
|
||||
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
|
||||
nir_options,
|
||||
"iris-indirect-generate");
|
||||
|
||||
uint32_t uniform_size =
|
||||
screen->vtbl.call_generation_shader(screen, &b);
|
||||
|
||||
nir_shader *nir = b.shader;
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
link_libintel_shaders(nir, screen->vtbl.load_shader_lib(screen, mem_ctx));
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS_V(nir, nir_opt_cse);
|
||||
NIR_PASS_V(nir, nir_opt_gcm, true);
|
||||
NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
|
||||
|
||||
NIR_PASS_V(nir, nir_split_var_copies);
|
||||
NIR_PASS_V(nir, nir_split_per_member_structs);
|
||||
|
||||
struct brw_nir_compiler_opts opts = {};
|
||||
brw_preprocess_nir(compiler, nir, &opts);
|
||||
|
||||
NIR_PASS_V(nir, nir_propagate_invariant, false);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_input_attachments,
|
||||
&(nir_input_attachment_options) {
|
||||
.use_fragcoord_sysval = true,
|
||||
.use_layer_id_sysval = true,
|
||||
});
|
||||
|
||||
/* Reset sizes before gathering information */
|
||||
nir->global_mem_size = 0;
|
||||
nir->scratch_size = 0;
|
||||
nir->info.shared_size = 0;
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
||||
NIR_PASS_V(nir, nir_copy_prop);
|
||||
NIR_PASS_V(nir, nir_opt_constant_folding);
|
||||
NIR_PASS_V(nir, nir_opt_dce);
|
||||
|
||||
/* Do vectorizing here. For some reason when trying to do it in the back
|
||||
* this just isn't working.
|
||||
*/
|
||||
nir_load_store_vectorize_options options = {
|
||||
.modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global,
|
||||
.callback = brw_nir_should_vectorize_mem,
|
||||
.robust_modes = (nir_variable_mode)0,
|
||||
};
|
||||
NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
|
||||
|
||||
nir->num_uniforms = uniform_size;
|
||||
|
||||
union brw_any_prog_key prog_key;
|
||||
memset(&prog_key, 0, sizeof(prog_key));
|
||||
|
||||
struct brw_wm_prog_data *prog_data = ralloc_size(NULL, sizeof(*prog_data));
|
||||
memset(prog_data, 0, sizeof(*prog_data));
|
||||
prog_data->base.nr_params = nir->num_uniforms / 4;
|
||||
|
||||
brw_nir_analyze_ubo_ranges(compiler, nir, prog_data->base.ubo_ranges);
|
||||
|
||||
struct brw_compile_stats stats[3];
|
||||
struct brw_compile_fs_params params = {
|
||||
.base = {
|
||||
.nir = nir,
|
||||
.log_data = &ice->dbg,
|
||||
.debug_flag = DEBUG_WM,
|
||||
.stats = stats,
|
||||
.mem_ctx = mem_ctx,
|
||||
},
|
||||
.key = &prog_key.wm,
|
||||
.prog_data = prog_data,
|
||||
};
|
||||
const unsigned *program = brw_compile_fs(compiler, ¶ms);
|
||||
|
||||
struct iris_binding_table bt;
|
||||
memset(&bt, 0, sizeof(bt));
|
||||
|
||||
struct iris_compiled_shader *shader =
|
||||
iris_create_shader_variant(screen, ice->shaders.cache,
|
||||
IRIS_CACHE_BLORP,
|
||||
sizeof(key), &key);
|
||||
iris_finalize_program(shader, &prog_data->base, NULL, NULL, 0, 0, 0, &bt);
|
||||
|
||||
iris_upload_shader(screen, NULL, shader, ice->shaders.cache,
|
||||
ice->shaders.uploader_driver,
|
||||
IRIS_CACHE_BLORP, sizeof(key), &key, program);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
|
||||
iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
|
||||
|
||||
ice->draw.generation.shader = shader;
|
||||
}
|
||||
|
@ -891,6 +891,8 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
|
||||
driQueryOptionb(config->options, "intel_enable_wa_14018912822");
|
||||
screen->driconf.enable_tbimr =
|
||||
driQueryOptionb(config->options, "intel_tbimr");
|
||||
screen->driconf.generated_indirect_threshold =
|
||||
driQueryOptioni(config->options, "generated_indirect_threshold");
|
||||
|
||||
screen->precompile = debug_get_bool_option("shader_precompile", true);
|
||||
|
||||
@ -941,6 +943,7 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
|
||||
iris_init_screen_program_functions(pscreen);
|
||||
|
||||
genX_call(screen->devinfo, init_screen_state, screen);
|
||||
genX_call(screen->devinfo, init_screen_gen_state, screen);
|
||||
|
||||
glsl_type_singleton_init_or_ref();
|
||||
|
||||
|
@ -45,6 +45,9 @@ struct iris_fs_prog_key;
|
||||
struct iris_cs_prog_key;
|
||||
enum iris_program_cache_id;
|
||||
|
||||
typedef struct nir_builder nir_builder;
|
||||
typedef struct nir_shader nir_shader;
|
||||
|
||||
struct u_trace;
|
||||
|
||||
#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
|
||||
@ -74,6 +77,10 @@ struct iris_vtable {
|
||||
const struct pipe_draw_info *draw,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *sc);
|
||||
void (*upload_indirect_shader_render_state)(struct iris_context *ice,
|
||||
const struct pipe_draw_info *draw,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *sc);
|
||||
void (*update_binder_address)(struct iris_batch *batch,
|
||||
struct iris_binder *binder);
|
||||
void (*upload_compute_state)(struct iris_context *ice,
|
||||
@ -151,6 +158,9 @@ struct iris_vtable {
|
||||
struct iris_cs_prog_key *key);
|
||||
void (*lost_genx_state)(struct iris_context *ice, struct iris_batch *batch);
|
||||
void (*disable_rhwo_optimization)(struct iris_batch *batch, bool disable);
|
||||
|
||||
nir_shader *(*load_shader_lib)(struct iris_screen *screen, void *mem_ctx);
|
||||
unsigned (*call_generation_shader)(struct iris_screen *screen, nir_builder *b);
|
||||
};
|
||||
|
||||
struct iris_address {
|
||||
@ -195,6 +205,7 @@ struct iris_screen {
|
||||
float lower_depth_range_rate;
|
||||
bool intel_enable_wa_14018912822;
|
||||
bool enable_tbimr;
|
||||
unsigned generated_indirect_threshold;
|
||||
} driconf;
|
||||
|
||||
/** Does the kernel support various features (KERNEL_HAS_* bitfield)? */
|
||||
|
@ -8536,6 +8536,189 @@ iris_upload_indirect_render_state(struct iris_context *ice,
|
||||
#endif /* GFX_VERx10 >= 125 */
|
||||
}
|
||||
|
||||
static void
|
||||
iris_upload_indirect_shader_render_state(struct iris_context *ice,
|
||||
const struct pipe_draw_info *draw,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *sc)
|
||||
{
|
||||
assert(indirect);
|
||||
|
||||
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
|
||||
UNUSED struct iris_screen *screen = batch->screen;
|
||||
UNUSED const struct intel_device_info *devinfo = screen->devinfo;
|
||||
|
||||
if (ice->state.dirty & IRIS_DIRTY_VERTEX_BUFFER_FLUSHES)
|
||||
flush_vbos(ice, batch);
|
||||
|
||||
iris_batch_sync_region_start(batch);
|
||||
|
||||
/* Always pin the binder. If we're emitting new binding table pointers,
|
||||
* we need it. If not, we're probably inheriting old tables via the
|
||||
* context, and need it anyway. Since true zero-bindings cases are
|
||||
* practically non-existent, just pin it and avoid last_res tracking.
|
||||
*/
|
||||
iris_use_pinned_bo(batch, ice->state.binder.bo, false,
|
||||
IRIS_DOMAIN_NONE);
|
||||
|
||||
if (!batch->contains_draw) {
|
||||
if (GFX_VER == 12) {
|
||||
/* Re-emit constants when starting a new batch buffer in order to
|
||||
* work around push constant corruption on context switch.
|
||||
*
|
||||
* XXX - Provide hardware spec quotation when available.
|
||||
*/
|
||||
ice->state.stage_dirty |= (IRIS_STAGE_DIRTY_CONSTANTS_VS |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_TCS |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_TES |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_GS |
|
||||
IRIS_STAGE_DIRTY_CONSTANTS_FS);
|
||||
}
|
||||
batch->contains_draw = true;
|
||||
}
|
||||
|
||||
if (!batch->contains_draw_with_next_seqno) {
|
||||
iris_restore_render_saved_bos(ice, batch, draw);
|
||||
batch->contains_draw_with_next_seqno = true;
|
||||
}
|
||||
|
||||
if (draw->index_size > 0)
|
||||
iris_emit_index_buffer(ice, batch, draw, sc);
|
||||
|
||||
/* Make sure we have enough space to keep all the commands in the single BO
|
||||
* (because of the jumps)
|
||||
*/
|
||||
iris_require_command_space(batch, 2000);
|
||||
|
||||
#ifndef NDEBUG
|
||||
struct iris_bo *command_bo = batch->bo;
|
||||
#endif
|
||||
|
||||
/* Jump point to generate more draw if we run out of space in the ring
|
||||
* buffer.
|
||||
*/
|
||||
uint64_t gen_addr = iris_batch_current_address_u64(batch);
|
||||
|
||||
iris_handle_always_flush_cache(batch);
|
||||
|
||||
#if GFX_VER == 9
|
||||
iris_emit_pipe_control_flush(batch, "before generation",
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE);
|
||||
#endif
|
||||
|
||||
struct iris_address params_addr;
|
||||
struct iris_gen_indirect_params *params =
|
||||
genX(emit_indirect_generate)(batch, draw, indirect, sc,
|
||||
¶ms_addr);
|
||||
|
||||
iris_emit_pipe_control_flush(batch, "after generation flush",
|
||||
((ice->state.vs_uses_draw_params ||
|
||||
ice->state.vs_uses_derived_draw_params) ?
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE : 0) |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD |
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
trace_intel_begin_draw(&batch->trace);
|
||||
|
||||
/* Always pin the binder. If we're emitting new binding table pointers,
|
||||
* we need it. If not, we're probably inheriting old tables via the
|
||||
* context, and need it anyway. Since true zero-bindings cases are
|
||||
* practically non-existent, just pin it and avoid last_res tracking.
|
||||
*/
|
||||
iris_use_pinned_bo(batch, ice->state.binder.bo, false,
|
||||
IRIS_DOMAIN_NONE);
|
||||
|
||||
/* Wa_1306463417 - Send HS state for every primitive on gfx11.
|
||||
* Wa_16011107343 (same for gfx12)
|
||||
* We implement this by setting TCS dirty on each draw.
|
||||
*/
|
||||
if ((INTEL_NEEDS_WA_1306463417 || INTEL_NEEDS_WA_16011107343) &&
|
||||
ice->shaders.prog[MESA_SHADER_TESS_CTRL]) {
|
||||
ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS;
|
||||
}
|
||||
|
||||
iris_upload_dirty_render_state(ice, batch, draw, true);
|
||||
|
||||
iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_DRAW, draw, indirect, sc);
|
||||
|
||||
genX(maybe_emit_breakpoint)(batch, true);
|
||||
|
||||
#if GFX_VER >= 12
|
||||
iris_emit_cmd(batch, GENX(MI_ARB_CHECK), arb) {
|
||||
arb.PreParserDisableMask = true;
|
||||
arb.PreParserDisable = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
iris_emit_cmd(batch, GENX(MI_BATCH_BUFFER_START), bbs) {
|
||||
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
||||
bbs.BatchBufferStartAddress = (struct iris_address) {
|
||||
.bo = ice->draw.generation.ring_bo,
|
||||
};
|
||||
}
|
||||
|
||||
/* Run the ring buffer one more time with the next set of commands */
|
||||
uint64_t inc_addr = iris_batch_current_address_u64(batch);
|
||||
{
|
||||
iris_emit_pipe_control_flush(batch,
|
||||
"post generated draws wait",
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, batch->screen->devinfo, batch);
|
||||
|
||||
struct iris_address draw_base_addr = iris_address_add(
|
||||
params_addr,
|
||||
offsetof(struct iris_gen_indirect_params, draw_base));
|
||||
|
||||
const uint32_t mocs =
|
||||
iris_mocs(draw_base_addr.bo, &screen->isl_dev, 0);
|
||||
mi_builder_set_mocs(&b, mocs);
|
||||
|
||||
mi_store(&b, mi_mem32(draw_base_addr),
|
||||
mi_iadd(&b, mi_mem32(draw_base_addr),
|
||||
mi_imm(params->ring_count)));
|
||||
|
||||
iris_emit_pipe_control_flush(batch,
|
||||
"post generation base increment",
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE);
|
||||
|
||||
iris_emit_cmd(batch, GENX(MI_BATCH_BUFFER_START), bbs) {
|
||||
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
||||
bbs.BatchBufferStartAddress = (struct iris_address) {
|
||||
.offset = gen_addr,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/* Exit of the ring buffer */
|
||||
uint64_t end_addr = iris_batch_current_address_u64(batch);
|
||||
|
||||
#ifndef NDEBUG
|
||||
assert(command_bo == batch->bo);
|
||||
#endif
|
||||
|
||||
genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
|
||||
genX(maybe_emit_breakpoint)(batch, false);
|
||||
|
||||
iris_emit_pipe_control_flush(batch,
|
||||
"post generated draws wait",
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
params->gen_addr = inc_addr;
|
||||
params->end_addr = end_addr;
|
||||
|
||||
iris_batch_sync_region_end(batch);
|
||||
|
||||
uint32_t count = (sc) ? sc->count : 0;
|
||||
count *= draw->instance_count ? draw->instance_count : 1;
|
||||
trace_intel_end_draw(&batch->trace, count);
|
||||
}
|
||||
|
||||
static void
|
||||
iris_load_indirect_location(struct iris_context *ice,
|
||||
struct iris_batch *batch,
|
||||
@ -8916,6 +9099,8 @@ iris_destroy_state(struct iris_context *ice)
|
||||
|
||||
pipe_resource_reference(&ice->draw.draw_params.res, NULL);
|
||||
pipe_resource_reference(&ice->draw.derived_draw_params.res, NULL);
|
||||
pipe_resource_reference(&ice->draw.generation.params.res, NULL);
|
||||
pipe_resource_reference(&ice->draw.generation.vertices.res, NULL);
|
||||
|
||||
/* Loop over all VBOs, including ones for draw parameters */
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(genx->vertex_buffers); i++) {
|
||||
@ -9974,6 +10159,7 @@ genX(init_screen_state)(struct iris_screen *screen)
|
||||
screen->vtbl.init_copy_context = iris_init_copy_context;
|
||||
screen->vtbl.upload_render_state = iris_upload_render_state;
|
||||
screen->vtbl.upload_indirect_render_state = iris_upload_indirect_render_state;
|
||||
screen->vtbl.upload_indirect_shader_render_state = iris_upload_indirect_shader_render_state;
|
||||
screen->vtbl.update_binder_address = iris_update_binder_address;
|
||||
screen->vtbl.upload_compute_state = iris_upload_compute_state;
|
||||
screen->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
|
||||
|
@ -74,7 +74,7 @@ iris_per_hw_ver_libs = []
|
||||
foreach v : ['80', '90', '110', '120', '125', '200']
|
||||
iris_per_hw_ver_libs += static_library(
|
||||
'iris_per_hw_ver@0@'.format(v),
|
||||
['iris_blorp.c', 'iris_query.c', 'iris_state.c', gen_xml_pack],
|
||||
['iris_blorp.c', 'iris_query.c', 'iris_state.c', 'iris_indirect_gen.c', gen_xml_pack],
|
||||
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_intel],
|
||||
c_args : [
|
||||
no_override_init_args, sse2_args,
|
||||
@ -82,7 +82,7 @@ foreach v : ['80', '90', '110', '120', '125', '200']
|
||||
],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : [dep_libdrm, dep_valgrind, idep_genxml, idep_nir_headers,
|
||||
idep_intel_driver_ds_headers, ],
|
||||
idep_intel_driver_ds_headers, idep_intel_shaders, ],
|
||||
)
|
||||
endforeach
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user