diff --git a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c index 52e88ef49c2..9ed3d4d9a1d 100644 --- a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c +++ b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c @@ -13,23 +13,82 @@ #include "nir_builder.h" #include "nir_deref.h" +struct lower_desc_cbuf { + struct nvk_cbuf key; + + uint32_t use_count; + + uint64_t start; + uint64_t end; +}; + +static uint32_t +hash_cbuf(const void *data) +{ + return _mesa_hash_data(data, sizeof(struct nvk_cbuf)); +} + +static bool +cbufs_equal(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct nvk_cbuf)) == 0; +} + +static int +compar_cbufs(const void *_a, const void *_b) +{ + const struct lower_desc_cbuf *a = _a; + const struct lower_desc_cbuf *b = _b; + +#define COMPAR(field, pos) \ + if (a->field < b->field) return -(pos); \ + if (a->field > b->field) return (pos); + + /* Sort by most used first */ + COMPAR(use_count, -1) + + /* Keep the list stable by then sorting by key fields. */ + COMPAR(key.type, 1) + COMPAR(key.desc_set, 1) + COMPAR(key.dynamic_idx, 1) + COMPAR(key.desc_offset, 1) + +#undef COMPAR + + return 0; +} + struct lower_descriptors_ctx { const struct vk_pipeline_layout *layout; bool clamp_desc_array_bounds; nir_address_format ubo_addr_format; nir_address_format ssbo_addr_format; + + struct hash_table *cbufs; + struct nvk_cbuf_map *cbuf_map; }; -static nir_def * -load_descriptor_set_addr(nir_builder *b, uint32_t set, - UNUSED const struct lower_descriptors_ctx *ctx) +static void +record_cbuf_use(const struct nvk_cbuf *key, uint64_t start, uint64_t end, + struct lower_descriptors_ctx *ctx) { - uint32_t set_addr_offset = - nvk_root_descriptor_offset(sets) + set * sizeof(uint64_t); - - return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0), - nir_imm_int(b, set_addr_offset), - .align_mul = 8, .align_offset = 0, .range = ~0); + struct hash_entry *entry = _mesa_hash_table_search(ctx->cbufs, key); + if (entry != NULL) { + struct lower_desc_cbuf *cbuf = entry->data; + cbuf->use_count++; + cbuf->start = MIN2(cbuf->start, start); + cbuf->end = MAX2(cbuf->end, end); + } else { + struct lower_desc_cbuf *cbuf = + ralloc(ctx->cbufs, struct lower_desc_cbuf); + *cbuf = (struct lower_desc_cbuf) { + .key = *key, + .use_count = 1, + .start = start, + .end = end, + }; + _mesa_hash_table_insert(ctx->cbufs, &cbuf->key, cbuf); + } } static const struct nvk_descriptor_set_binding_layout * @@ -46,6 +105,366 @@ get_binding_layout(uint32_t set, uint32_t binding, return &set_layout->binding[binding]; } +static void +record_descriptor_cbuf_use(uint32_t set, uint32_t binding, nir_src *index, + struct lower_descriptors_ctx *ctx) +{ + const struct nvk_descriptor_set_binding_layout *binding_layout = + get_binding_layout(set, binding, ctx); + + const struct nvk_cbuf key = { + .type = NVK_CBUF_TYPE_DESC_SET, + .desc_set = set, + }; + + uint64_t start, end; + if (index == NULL) { + /* When we don't have an index, assume 0 */ + start = binding_layout->offset; + end = start + binding_layout->stride; + } else if (nir_src_is_const(*index)) { + start = binding_layout->offset + + nir_src_as_uint(*index) * binding_layout->stride; + end = start + binding_layout->stride; + } else { + start = binding_layout->offset; + end = start + binding_layout->array_size * binding_layout->stride; + } + + record_cbuf_use(&key, start, end, ctx); +} + +static void +record_vulkan_resource_cbuf_use(nir_intrinsic_instr *intrin, + struct lower_descriptors_ctx *ctx) +{ + assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + + /* These we'll handle later */ + if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER && + desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) + return; + + record_descriptor_cbuf_use(nir_intrinsic_desc_set(intrin), + nir_intrinsic_binding(intrin), + &intrin->src[0], ctx); +} + +static void +record_deref_descriptor_cbuf_use(nir_deref_instr *deref, + struct lower_descriptors_ctx *ctx) +{ + nir_src *index_src = NULL; + if (deref->deref_type == nir_deref_type_array) { + index_src = &deref->arr.index; + deref = nir_deref_instr_parent(deref); + } + + assert(deref->deref_type == nir_deref_type_var); + nir_variable *var = deref->var; + + record_descriptor_cbuf_use(var->data.descriptor_set, + var->data.binding, + index_src, ctx); +} + +static void +record_tex_descriptor_cbuf_use(nir_tex_instr *tex, + struct lower_descriptors_ctx *ctx) +{ + const int texture_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + const int sampler_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + + if (texture_src_idx >= 0) { + nir_deref_instr *deref = nir_src_as_deref(tex->src[texture_src_idx].src); + record_deref_descriptor_cbuf_use(deref, ctx); + } + + if (sampler_src_idx >= 0) { + nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src); + record_deref_descriptor_cbuf_use(deref, ctx); + } +} + +static struct nvk_cbuf +ubo_deref_to_cbuf(nir_deref_instr *deref, + nir_intrinsic_instr **resource_index_out, + uint64_t *offset_out, + uint64_t *start_out, uint64_t *end_out, + const struct lower_descriptors_ctx *ctx) +{ + assert(nir_deref_mode_is(deref, nir_var_mem_ubo)); + + /* In case we early return */ + *offset_out = 0; + *start_out = 0; + *end_out = UINT64_MAX; + *resource_index_out = NULL; + + const struct nvk_cbuf invalid = { + .type = NVK_CBUF_TYPE_INVALID, + }; + + uint64_t offset = 0; + uint64_t range = glsl_get_explicit_size(deref->type, false); + bool offset_valid = true; + while (true) { + nir_deref_instr *parent = nir_deref_instr_parent(deref); + if (parent == NULL) { + assert(deref->deref_type == nir_deref_type_cast); + break; + } + + switch (deref->deref_type) { + case nir_deref_type_var: + unreachable("Buffers don't use variables in Vulkan"); + + case nir_deref_type_array: + case nir_deref_type_array_wildcard: { + uint32_t stride = nir_deref_instr_array_stride(deref); + if (range > stride) + offset_valid = false; + + if (deref->deref_type == nir_deref_type_array && + nir_src_is_const(deref->arr.index)) { + offset += nir_src_as_uint(deref->arr.index) * stride; + } else { + range = glsl_get_length(parent->type) * stride; + } + break; + } + + case nir_deref_type_ptr_as_array: + /* All bets are off. We shouldn't see these most of the time + * anyway, even with variable pointers. + */ + offset_valid = false; + break; + + case nir_deref_type_struct: + offset += glsl_get_struct_field_offset(parent->type, + deref->strct.index); + break; + + case nir_deref_type_cast: + /* nir_explicit_io_address_from_deref() can't handle casts */ + offset_valid = false; + break; + + default: + unreachable("Unknown deref type"); + } + + deref = parent; + } + + nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent); + if (load_desc == NULL || + load_desc->intrinsic != nir_intrinsic_load_vulkan_descriptor) + return invalid; + + nir_intrinsic_instr *res_index = nir_src_as_intrinsic(load_desc->src[0]); + if (res_index == NULL || + res_index->intrinsic != nir_intrinsic_vulkan_resource_index) + return invalid; + + /* We try to early return as little as possible prior to this point so we + * can return the resource index intrinsic in as many cases as possible. + * After this point, though, early returns are fair game. + */ + *resource_index_out = res_index; + + if (!offset_valid || !nir_src_is_const(res_index->src[0])) + return invalid; + + uint32_t set = nir_intrinsic_desc_set(res_index); + uint32_t binding = nir_intrinsic_binding(res_index); + uint32_t index = nir_src_as_uint(res_index->src[0]); + + const struct nvk_descriptor_set_binding_layout *binding_layout = + get_binding_layout(set, binding, ctx); + + switch (binding_layout->type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + *offset_out = 0; + *start_out = offset; + *end_out = offset + range; + return (struct nvk_cbuf) { + .type = NVK_CBUF_TYPE_UBO_DESC, + .desc_set = set, + .desc_offset = binding_layout->offset + + index * binding_layout->stride, + }; + } + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { + uint8_t dynamic_buffer_index = + nvk_descriptor_set_layout_dynbuf_start(ctx->layout, set) + + binding_layout->dynamic_buffer_index + index; + + *offset_out = 0; + *start_out = offset; + *end_out = offset + range; + + return (struct nvk_cbuf) { + .type = NVK_CBUF_TYPE_DYNAMIC_UBO, + .dynamic_idx = dynamic_buffer_index, + }; + } + + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: { + *offset_out = binding_layout->offset; + *start_out = binding_layout->offset + offset; + *end_out = *start_out + range; + + return (struct nvk_cbuf) { + .type = NVK_CBUF_TYPE_DESC_SET, + .desc_set = set, + }; + } + + default: + return invalid; + } +} + +static void +record_load_ubo_cbuf_uses(nir_deref_instr *deref, + struct lower_descriptors_ctx *ctx) +{ + assert(nir_deref_mode_is(deref, nir_var_mem_ubo)); + + UNUSED uint64_t offset; + uint64_t start, end; + nir_intrinsic_instr *res_index; + struct nvk_cbuf cbuf = + ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx); + + if (cbuf.type != NVK_CBUF_TYPE_INVALID) { + record_cbuf_use(&cbuf, start, end, ctx); + } else if (res_index != NULL) { + record_vulkan_resource_cbuf_use(res_index, ctx); + } +} + +static bool +record_cbuf_uses_instr(UNUSED nir_builder *b, nir_instr *instr, void *_ctx) +{ + struct lower_descriptors_ctx *ctx = _ctx; + + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + record_vulkan_resource_cbuf_use(intrin, ctx); + return false; + + case nir_intrinsic_load_deref: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (nir_deref_mode_is(deref, nir_var_mem_ubo)) + record_load_ubo_cbuf_uses(deref, ctx); + return false; + } + + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic: + case nir_intrinsic_image_deref_atomic_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_param_intel: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + record_deref_descriptor_cbuf_use(deref, ctx); + return false; + } + + default: + return false; + } + unreachable("All cases return false"); + } + + case nir_instr_type_tex: + record_tex_descriptor_cbuf_use(nir_instr_as_tex(instr), ctx); + return false; + + default: + return false; + } +} + +static void +build_cbuf_map(nir_shader *nir, struct lower_descriptors_ctx *ctx) +{ + ctx->cbufs = _mesa_hash_table_create(NULL, hash_cbuf, cbufs_equal); + + nir_shader_instructions_pass(nir, record_cbuf_uses_instr, + nir_metadata_all, (void *)ctx); + + struct lower_desc_cbuf *cbufs = + ralloc_array(ctx->cbufs, struct lower_desc_cbuf, + _mesa_hash_table_num_entries(ctx->cbufs)); + + uint32_t num_cbufs = 0; + hash_table_foreach(ctx->cbufs, entry) { + struct lower_desc_cbuf *cbuf = entry->data; + + /* We currently only start cbufs at the beginning so if it starts after + * the max cbuf size, there's no point in including it in the list. + */ + if (cbuf->start > NVK_MAX_CBUF_SIZE) + continue; + + cbufs[num_cbufs++] = *cbuf; + } + + qsort(cbufs, num_cbufs, sizeof(*cbufs), compar_cbufs); + + uint32_t mapped_cbuf_count = 0; + + /* Root descriptors always go in cbuf 0 */ + ctx->cbuf_map->cbufs[mapped_cbuf_count++] = (struct nvk_cbuf) { + .type = NVK_CBUF_TYPE_ROOT_DESC, + }; + + uint8_t max_cbuf_bindings; + if (nir->info.stage == MESA_SHADER_COMPUTE || + nir->info.stage == MESA_SHADER_KERNEL) { + max_cbuf_bindings = 8; + } else { + max_cbuf_bindings = 16; + } + + for (uint32_t i = 0; i < num_cbufs; i++) { + if (mapped_cbuf_count >= max_cbuf_bindings) + break; + + ctx->cbuf_map->cbufs[mapped_cbuf_count++] = cbufs[i].key; + } + ctx->cbuf_map->cbuf_count = mapped_cbuf_count; + + ralloc_free(ctx->cbufs); + ctx->cbufs = NULL; +} + +static nir_def * +load_descriptor_set_addr(nir_builder *b, uint32_t set, + UNUSED const struct lower_descriptors_ctx *ctx) +{ + uint32_t set_addr_offset = + nvk_root_descriptor_offset(sets) + set * sizeof(uint64_t); + + return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0), + nir_imm_int(b, set_addr_offset), + .align_mul = 8, .align_offset = 0, .range = ~0); +} + static nir_def * load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size, uint32_t set, uint32_t binding, nir_def *index, @@ -651,7 +1070,8 @@ lower_ssbo_descriptor_instr(nir_builder *b, nir_instr *instr, bool nvk_nir_lower_descriptors(nir_shader *nir, const struct vk_pipeline_robustness_state *rs, - const struct vk_pipeline_layout *layout) + const struct vk_pipeline_layout *layout, + struct nvk_cbuf_map *cbuf_map_out) { struct lower_descriptors_ctx ctx = { .layout = layout, @@ -663,12 +1083,23 @@ nvk_nir_lower_descriptors(nir_shader *nir, .ubo_addr_format = nvk_buffer_addr_format(rs->uniform_buffers), }; - /* We run in two passes. The first attempts to lower everything it can. - * In the variable pointers case, some SSBO intrinsics may fail to lower - * but that's okay. The second pass cleans up any SSBO intrinsics which - * are left and lowers them to slightly less efficient but variable- - * pointers-correct versions. + /* We run in three passes: + * + * 1. Find ranges of UBOs that we can promote to bound UBOs. Nothing is + * actually lowered in this pass. It's just analysis. + * + * 2. Attempt to lower everything with direct descriptors. This may fail + * to lower some SSBO intrinsics when variable pointers are used. + * + * 3. Clean up any SSBO intrinsics which are left and lower them to + * slightly less efficient but variable- pointers-correct versions. */ + + if (cbuf_map_out != NULL) { + ctx.cbuf_map = cbuf_map_out; + build_cbuf_map(nir, &ctx); + } + bool pass_lower_descriptors = nir_shader_instructions_pass(nir, try_lower_descriptors_instr, nir_metadata_block_index | diff --git a/src/nouveau/vulkan/nvk_private.h b/src/nouveau/vulkan/nvk_private.h index 06924f081ea..89c464b8eb5 100644 --- a/src/nouveau/vulkan/nvk_private.h +++ b/src/nouveau/vulkan/nvk_private.h @@ -30,6 +30,9 @@ #define NVK_SPARSE_ADDR_SPACE_SIZE (1ull << 39) #define NVK_MAX_BUFFER_SIZE (1ull << 31) +/* Max size of a bound cbuf */ +#define NVK_MAX_CBUF_SIZE (1u << 16) + struct nvk_addr_range { uint64_t addr; uint64_t range; diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index 34245429432..ab6f98fbefd 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -348,7 +348,7 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir, */ assert(dev->pdev->info.cls_eng3d >= MAXWELL_A || !nir_has_image_var(nir)); - NIR_PASS(_, nir, nvk_nir_lower_descriptors, rs, layout); + NIR_PASS(_, nir, nvk_nir_lower_descriptors, rs, layout, NULL); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global, nir_address_format_64bit_global); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo, diff --git a/src/nouveau/vulkan/nvk_shader.h b/src/nouveau/vulkan/nvk_shader.h index 62e1879e161..a432a237336 100644 --- a/src/nouveau/vulkan/nvk_shader.h +++ b/src/nouveau/vulkan/nvk_shader.h @@ -25,6 +25,26 @@ struct vk_shader_module; #define TU102_SHADER_HEADER_SIZE (32 * 4) #define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE +enum PACKED nvk_cbuf_type { + NVK_CBUF_TYPE_INVALID = 0, + NVK_CBUF_TYPE_ROOT_DESC, + NVK_CBUF_TYPE_DESC_SET, + NVK_CBUF_TYPE_DYNAMIC_UBO, + NVK_CBUF_TYPE_UBO_DESC, +}; + +struct nvk_cbuf { + enum nvk_cbuf_type type; + uint8_t desc_set; + uint8_t dynamic_idx; + uint32_t desc_offset; +}; + +struct nvk_cbuf_map { + uint32_t cbuf_count; + struct nvk_cbuf cbufs[16]; +}; + struct nvk_shader { struct nak_shader_info info; @@ -73,7 +93,8 @@ nvk_physical_device_spirv_options(const struct nvk_physical_device *pdev, bool nvk_nir_lower_descriptors(nir_shader *nir, const struct vk_pipeline_robustness_state *rs, - const struct vk_pipeline_layout *layout); + const struct vk_pipeline_layout *layout, + struct nvk_cbuf_map *cbuf_map_out); VkResult nvk_shader_stage_to_nir(struct nvk_device *dev,