nvk/nir: Add cbuf analysis to nvi_nir_lower_descriptors()

This builds a list of the 15 most commonly used UBOs or descriptor
buffers and places them in a new nvk_cbuf_map data structure.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26615>
This commit is contained in:
Faith Ekstrand 2023-12-08 15:15:48 -06:00 committed by Marge Bot
parent 952e06672f
commit f1c909edd5
4 changed files with 472 additions and 17 deletions

View File

@ -13,23 +13,82 @@
#include "nir_builder.h"
#include "nir_deref.h"
struct lower_desc_cbuf {
struct nvk_cbuf key;
uint32_t use_count;
uint64_t start;
uint64_t end;
};
static uint32_t
hash_cbuf(const void *data)
{
return _mesa_hash_data(data, sizeof(struct nvk_cbuf));
}
static bool
cbufs_equal(const void *a, const void *b)
{
return memcmp(a, b, sizeof(struct nvk_cbuf)) == 0;
}
static int
compar_cbufs(const void *_a, const void *_b)
{
const struct lower_desc_cbuf *a = _a;
const struct lower_desc_cbuf *b = _b;
#define COMPAR(field, pos) \
if (a->field < b->field) return -(pos); \
if (a->field > b->field) return (pos);
/* Sort by most used first */
COMPAR(use_count, -1)
/* Keep the list stable by then sorting by key fields. */
COMPAR(key.type, 1)
COMPAR(key.desc_set, 1)
COMPAR(key.dynamic_idx, 1)
COMPAR(key.desc_offset, 1)
#undef COMPAR
return 0;
}
struct lower_descriptors_ctx {
const struct vk_pipeline_layout *layout;
bool clamp_desc_array_bounds;
nir_address_format ubo_addr_format;
nir_address_format ssbo_addr_format;
struct hash_table *cbufs;
struct nvk_cbuf_map *cbuf_map;
};
static nir_def *
load_descriptor_set_addr(nir_builder *b, uint32_t set,
UNUSED const struct lower_descriptors_ctx *ctx)
static void
record_cbuf_use(const struct nvk_cbuf *key, uint64_t start, uint64_t end,
struct lower_descriptors_ctx *ctx)
{
uint32_t set_addr_offset =
nvk_root_descriptor_offset(sets) + set * sizeof(uint64_t);
return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
nir_imm_int(b, set_addr_offset),
.align_mul = 8, .align_offset = 0, .range = ~0);
struct hash_entry *entry = _mesa_hash_table_search(ctx->cbufs, key);
if (entry != NULL) {
struct lower_desc_cbuf *cbuf = entry->data;
cbuf->use_count++;
cbuf->start = MIN2(cbuf->start, start);
cbuf->end = MAX2(cbuf->end, end);
} else {
struct lower_desc_cbuf *cbuf =
ralloc(ctx->cbufs, struct lower_desc_cbuf);
*cbuf = (struct lower_desc_cbuf) {
.key = *key,
.use_count = 1,
.start = start,
.end = end,
};
_mesa_hash_table_insert(ctx->cbufs, &cbuf->key, cbuf);
}
}
static const struct nvk_descriptor_set_binding_layout *
@ -46,6 +105,366 @@ get_binding_layout(uint32_t set, uint32_t binding,
return &set_layout->binding[binding];
}
static void
record_descriptor_cbuf_use(uint32_t set, uint32_t binding, nir_src *index,
struct lower_descriptors_ctx *ctx)
{
const struct nvk_descriptor_set_binding_layout *binding_layout =
get_binding_layout(set, binding, ctx);
const struct nvk_cbuf key = {
.type = NVK_CBUF_TYPE_DESC_SET,
.desc_set = set,
};
uint64_t start, end;
if (index == NULL) {
/* When we don't have an index, assume 0 */
start = binding_layout->offset;
end = start + binding_layout->stride;
} else if (nir_src_is_const(*index)) {
start = binding_layout->offset +
nir_src_as_uint(*index) * binding_layout->stride;
end = start + binding_layout->stride;
} else {
start = binding_layout->offset;
end = start + binding_layout->array_size * binding_layout->stride;
}
record_cbuf_use(&key, start, end, ctx);
}
static void
record_vulkan_resource_cbuf_use(nir_intrinsic_instr *intrin,
struct lower_descriptors_ctx *ctx)
{
assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
/* These we'll handle later */
if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER &&
desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
return;
record_descriptor_cbuf_use(nir_intrinsic_desc_set(intrin),
nir_intrinsic_binding(intrin),
&intrin->src[0], ctx);
}
static void
record_deref_descriptor_cbuf_use(nir_deref_instr *deref,
struct lower_descriptors_ctx *ctx)
{
nir_src *index_src = NULL;
if (deref->deref_type == nir_deref_type_array) {
index_src = &deref->arr.index;
deref = nir_deref_instr_parent(deref);
}
assert(deref->deref_type == nir_deref_type_var);
nir_variable *var = deref->var;
record_descriptor_cbuf_use(var->data.descriptor_set,
var->data.binding,
index_src, ctx);
}
static void
record_tex_descriptor_cbuf_use(nir_tex_instr *tex,
struct lower_descriptors_ctx *ctx)
{
const int texture_src_idx =
nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
const int sampler_src_idx =
nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
if (texture_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[texture_src_idx].src);
record_deref_descriptor_cbuf_use(deref, ctx);
}
if (sampler_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
record_deref_descriptor_cbuf_use(deref, ctx);
}
}
static struct nvk_cbuf
ubo_deref_to_cbuf(nir_deref_instr *deref,
nir_intrinsic_instr **resource_index_out,
uint64_t *offset_out,
uint64_t *start_out, uint64_t *end_out,
const struct lower_descriptors_ctx *ctx)
{
assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
/* In case we early return */
*offset_out = 0;
*start_out = 0;
*end_out = UINT64_MAX;
*resource_index_out = NULL;
const struct nvk_cbuf invalid = {
.type = NVK_CBUF_TYPE_INVALID,
};
uint64_t offset = 0;
uint64_t range = glsl_get_explicit_size(deref->type, false);
bool offset_valid = true;
while (true) {
nir_deref_instr *parent = nir_deref_instr_parent(deref);
if (parent == NULL) {
assert(deref->deref_type == nir_deref_type_cast);
break;
}
switch (deref->deref_type) {
case nir_deref_type_var:
unreachable("Buffers don't use variables in Vulkan");
case nir_deref_type_array:
case nir_deref_type_array_wildcard: {
uint32_t stride = nir_deref_instr_array_stride(deref);
if (range > stride)
offset_valid = false;
if (deref->deref_type == nir_deref_type_array &&
nir_src_is_const(deref->arr.index)) {
offset += nir_src_as_uint(deref->arr.index) * stride;
} else {
range = glsl_get_length(parent->type) * stride;
}
break;
}
case nir_deref_type_ptr_as_array:
/* All bets are off. We shouldn't see these most of the time
* anyway, even with variable pointers.
*/
offset_valid = false;
break;
case nir_deref_type_struct:
offset += glsl_get_struct_field_offset(parent->type,
deref->strct.index);
break;
case nir_deref_type_cast:
/* nir_explicit_io_address_from_deref() can't handle casts */
offset_valid = false;
break;
default:
unreachable("Unknown deref type");
}
deref = parent;
}
nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
if (load_desc == NULL ||
load_desc->intrinsic != nir_intrinsic_load_vulkan_descriptor)
return invalid;
nir_intrinsic_instr *res_index = nir_src_as_intrinsic(load_desc->src[0]);
if (res_index == NULL ||
res_index->intrinsic != nir_intrinsic_vulkan_resource_index)
return invalid;
/* We try to early return as little as possible prior to this point so we
* can return the resource index intrinsic in as many cases as possible.
* After this point, though, early returns are fair game.
*/
*resource_index_out = res_index;
if (!offset_valid || !nir_src_is_const(res_index->src[0]))
return invalid;
uint32_t set = nir_intrinsic_desc_set(res_index);
uint32_t binding = nir_intrinsic_binding(res_index);
uint32_t index = nir_src_as_uint(res_index->src[0]);
const struct nvk_descriptor_set_binding_layout *binding_layout =
get_binding_layout(set, binding, ctx);
switch (binding_layout->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
*offset_out = 0;
*start_out = offset;
*end_out = offset + range;
return (struct nvk_cbuf) {
.type = NVK_CBUF_TYPE_UBO_DESC,
.desc_set = set,
.desc_offset = binding_layout->offset +
index * binding_layout->stride,
};
}
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
uint8_t dynamic_buffer_index =
nvk_descriptor_set_layout_dynbuf_start(ctx->layout, set) +
binding_layout->dynamic_buffer_index + index;
*offset_out = 0;
*start_out = offset;
*end_out = offset + range;
return (struct nvk_cbuf) {
.type = NVK_CBUF_TYPE_DYNAMIC_UBO,
.dynamic_idx = dynamic_buffer_index,
};
}
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
*offset_out = binding_layout->offset;
*start_out = binding_layout->offset + offset;
*end_out = *start_out + range;
return (struct nvk_cbuf) {
.type = NVK_CBUF_TYPE_DESC_SET,
.desc_set = set,
};
}
default:
return invalid;
}
}
static void
record_load_ubo_cbuf_uses(nir_deref_instr *deref,
struct lower_descriptors_ctx *ctx)
{
assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
UNUSED uint64_t offset;
uint64_t start, end;
nir_intrinsic_instr *res_index;
struct nvk_cbuf cbuf =
ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
if (cbuf.type != NVK_CBUF_TYPE_INVALID) {
record_cbuf_use(&cbuf, start, end, ctx);
} else if (res_index != NULL) {
record_vulkan_resource_cbuf_use(res_index, ctx);
}
}
static bool
record_cbuf_uses_instr(UNUSED nir_builder *b, nir_instr *instr, void *_ctx)
{
struct lower_descriptors_ctx *ctx = _ctx;
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_vulkan_resource_index:
record_vulkan_resource_cbuf_use(intrin, ctx);
return false;
case nir_intrinsic_load_deref: {
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
if (nir_deref_mode_is(deref, nir_var_mem_ubo))
record_load_ubo_cbuf_uses(deref, ctx);
return false;
}
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic:
case nir_intrinsic_image_deref_atomic_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples:
case nir_intrinsic_image_deref_load_param_intel:
case nir_intrinsic_image_deref_load_raw_intel:
case nir_intrinsic_image_deref_store_raw_intel: {
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
record_deref_descriptor_cbuf_use(deref, ctx);
return false;
}
default:
return false;
}
unreachable("All cases return false");
}
case nir_instr_type_tex:
record_tex_descriptor_cbuf_use(nir_instr_as_tex(instr), ctx);
return false;
default:
return false;
}
}
static void
build_cbuf_map(nir_shader *nir, struct lower_descriptors_ctx *ctx)
{
ctx->cbufs = _mesa_hash_table_create(NULL, hash_cbuf, cbufs_equal);
nir_shader_instructions_pass(nir, record_cbuf_uses_instr,
nir_metadata_all, (void *)ctx);
struct lower_desc_cbuf *cbufs =
ralloc_array(ctx->cbufs, struct lower_desc_cbuf,
_mesa_hash_table_num_entries(ctx->cbufs));
uint32_t num_cbufs = 0;
hash_table_foreach(ctx->cbufs, entry) {
struct lower_desc_cbuf *cbuf = entry->data;
/* We currently only start cbufs at the beginning so if it starts after
* the max cbuf size, there's no point in including it in the list.
*/
if (cbuf->start > NVK_MAX_CBUF_SIZE)
continue;
cbufs[num_cbufs++] = *cbuf;
}
qsort(cbufs, num_cbufs, sizeof(*cbufs), compar_cbufs);
uint32_t mapped_cbuf_count = 0;
/* Root descriptors always go in cbuf 0 */
ctx->cbuf_map->cbufs[mapped_cbuf_count++] = (struct nvk_cbuf) {
.type = NVK_CBUF_TYPE_ROOT_DESC,
};
uint8_t max_cbuf_bindings;
if (nir->info.stage == MESA_SHADER_COMPUTE ||
nir->info.stage == MESA_SHADER_KERNEL) {
max_cbuf_bindings = 8;
} else {
max_cbuf_bindings = 16;
}
for (uint32_t i = 0; i < num_cbufs; i++) {
if (mapped_cbuf_count >= max_cbuf_bindings)
break;
ctx->cbuf_map->cbufs[mapped_cbuf_count++] = cbufs[i].key;
}
ctx->cbuf_map->cbuf_count = mapped_cbuf_count;
ralloc_free(ctx->cbufs);
ctx->cbufs = NULL;
}
static nir_def *
load_descriptor_set_addr(nir_builder *b, uint32_t set,
UNUSED const struct lower_descriptors_ctx *ctx)
{
uint32_t set_addr_offset =
nvk_root_descriptor_offset(sets) + set * sizeof(uint64_t);
return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
nir_imm_int(b, set_addr_offset),
.align_mul = 8, .align_offset = 0, .range = ~0);
}
static nir_def *
load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
uint32_t set, uint32_t binding, nir_def *index,
@ -651,7 +1070,8 @@ lower_ssbo_descriptor_instr(nir_builder *b, nir_instr *instr,
bool
nvk_nir_lower_descriptors(nir_shader *nir,
const struct vk_pipeline_robustness_state *rs,
const struct vk_pipeline_layout *layout)
const struct vk_pipeline_layout *layout,
struct nvk_cbuf_map *cbuf_map_out)
{
struct lower_descriptors_ctx ctx = {
.layout = layout,
@ -663,12 +1083,23 @@ nvk_nir_lower_descriptors(nir_shader *nir,
.ubo_addr_format = nvk_buffer_addr_format(rs->uniform_buffers),
};
/* We run in two passes. The first attempts to lower everything it can.
* In the variable pointers case, some SSBO intrinsics may fail to lower
* but that's okay. The second pass cleans up any SSBO intrinsics which
* are left and lowers them to slightly less efficient but variable-
* pointers-correct versions.
/* We run in three passes:
*
* 1. Find ranges of UBOs that we can promote to bound UBOs. Nothing is
* actually lowered in this pass. It's just analysis.
*
* 2. Attempt to lower everything with direct descriptors. This may fail
* to lower some SSBO intrinsics when variable pointers are used.
*
* 3. Clean up any SSBO intrinsics which are left and lower them to
* slightly less efficient but variable- pointers-correct versions.
*/
if (cbuf_map_out != NULL) {
ctx.cbuf_map = cbuf_map_out;
build_cbuf_map(nir, &ctx);
}
bool pass_lower_descriptors =
nir_shader_instructions_pass(nir, try_lower_descriptors_instr,
nir_metadata_block_index |

View File

@ -30,6 +30,9 @@
#define NVK_SPARSE_ADDR_SPACE_SIZE (1ull << 39)
#define NVK_MAX_BUFFER_SIZE (1ull << 31)
/* Max size of a bound cbuf */
#define NVK_MAX_CBUF_SIZE (1u << 16)
struct nvk_addr_range {
uint64_t addr;
uint64_t range;

View File

@ -348,7 +348,7 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
*/
assert(dev->pdev->info.cls_eng3d >= MAXWELL_A || !nir_has_image_var(nir));
NIR_PASS(_, nir, nvk_nir_lower_descriptors, rs, layout);
NIR_PASS(_, nir, nvk_nir_lower_descriptors, rs, layout, NULL);
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
nir_address_format_64bit_global);
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,

View File

@ -25,6 +25,26 @@ struct vk_shader_module;
#define TU102_SHADER_HEADER_SIZE (32 * 4)
#define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE
enum PACKED nvk_cbuf_type {
NVK_CBUF_TYPE_INVALID = 0,
NVK_CBUF_TYPE_ROOT_DESC,
NVK_CBUF_TYPE_DESC_SET,
NVK_CBUF_TYPE_DYNAMIC_UBO,
NVK_CBUF_TYPE_UBO_DESC,
};
struct nvk_cbuf {
enum nvk_cbuf_type type;
uint8_t desc_set;
uint8_t dynamic_idx;
uint32_t desc_offset;
};
struct nvk_cbuf_map {
uint32_t cbuf_count;
struct nvk_cbuf cbufs[16];
};
struct nvk_shader {
struct nak_shader_info info;
@ -73,7 +93,8 @@ nvk_physical_device_spirv_options(const struct nvk_physical_device *pdev,
bool
nvk_nir_lower_descriptors(nir_shader *nir,
const struct vk_pipeline_robustness_state *rs,
const struct vk_pipeline_layout *layout);
const struct vk_pipeline_layout *layout,
struct nvk_cbuf_map *cbuf_map_out);
VkResult
nvk_shader_stage_to_nir(struct nvk_device *dev,