radv: Implement NIR caching behind RADV_PERFTEST=nircache.

NIR caching is useful for two use cases:
- Shader permutations involving reused VS or FS.
- GPL-like engine that compiles a separate (library) variant and an
  optimized (monolithic) variant, e.g. DXVK.

By caching the result of radv_shader_spirv_to_nir, permutations hitting
the cache can have their compilation time reduced by 50% or more.

There are still open questions about the memory and storage footprint of
NIR caches, which is why this is gated behind a perftest flag. In
particular, Steam doesn't want to ship NIR cache since they are
unnecessary in presence of a full precompiled shader cache. In this
commit, the cache entries do not reside in memory and are immediately
written to the disk. Further design around how the caches are stored and
how to coordinate cache type with Steam etc. is left as future work.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26696>
This commit is contained in:
Tatsuyuki Ishi 2023-12-15 01:26:46 +09:00 committed by Marge Bot
parent 2ed5f2cace
commit 991ae339f5
6 changed files with 64 additions and 1 deletions

View File

@ -1347,6 +1347,8 @@ RADV driver environment variables
enable local BOs
``nggc``
enable NGG culling on GPUs where it's not enabled by default (GFX10.1 only).
``nircache``
cache per-stage NIR for graphics pipelines
``nosam``
disable optimizations that get enabled when all VRAM is CPU visible.
``pswave32``

View File

@ -91,6 +91,7 @@ enum {
RADV_PERFTEST_GS_FAST_LAUNCH_2 = 1u << 13,
RADV_PERFTEST_TRANSFER_QUEUE = 1u << 14,
RADV_PERFTEST_SHADER_OBJECT = 1u << 15,
RADV_PERFTEST_NIR_CACHE = 1u << 16,
};
bool radv_init_trace(struct radv_device *device);

View File

@ -102,6 +102,7 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P
{"gsfastlaunch2", RADV_PERFTEST_GS_FAST_LAUNCH_2},
{"transfer_queue", RADV_PERFTEST_TRANSFER_QUEUE},
{"shader_object", RADV_PERFTEST_SHADER_OBJECT},
{"nircache", RADV_PERFTEST_NIR_CACHE},
{NULL, 0}};
const char *

View File

@ -73,6 +73,18 @@ radv_hash_shaders(const struct radv_device *device, unsigned char *hash, const s
_mesa_sha1_final(&ctx, hash);
}
void
radv_hash_graphics_spirv_to_nir(blake3_hash hash, const struct radv_shader_stage *stage,
const struct radv_spirv_to_nir_options *options)
{
struct mesa_blake3 ctx;
_mesa_blake3_init(&ctx);
_mesa_blake3_update(&ctx, &stage->key, sizeof(stage->key));
_mesa_blake3_update(&ctx, options, sizeof(*options));
_mesa_blake3_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
_mesa_blake3_final(&ctx, hash);
}
void
radv_hash_rt_shaders(const struct radv_device *device, unsigned char *hash, const struct radv_ray_tracing_stage *stages,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_ray_tracing_group *groups)
@ -516,6 +528,33 @@ radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pip
pipeline->base.base.cache_object = vk_pipeline_cache_add_object(cache, &pipeline_obj->base);
}
nir_shader *
radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache, gl_shader_stage stage,
const blake3_hash key)
{
if (radv_is_cache_disabled(device))
return NULL;
if (!cache)
cache = device->mem_cache;
return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &device->physical_device->nir_options[stage],
NULL, NULL);
}
void
radv_pipeline_cache_insert_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const blake3_hash key,
const nir_shader *nir)
{
if (radv_is_cache_disabled(device))
return;
if (!cache)
cache = device->mem_cache;
vk_pipeline_cache_add_nir(cache, key, sizeof(blake3_hash), nir);
}
struct vk_pipeline_cache_object *
radv_pipeline_cache_lookup_nir_handle(struct radv_device *device, struct vk_pipeline_cache *cache, const uint8_t *sha1)
{

View File

@ -2460,6 +2460,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
struct radv_shader **shaders, struct radv_shader_binary **binaries,
struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary)
{
const bool nir_cache = device->instance->perftest_flags & RADV_PERFTEST_NIR_CACHE;
for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
if (!stages[s].entrypoint)
continue;
@ -2473,7 +2474,17 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
.fix_dual_src_mrt1_export =
gfx_state->ps.epilog.mrt0_is_dual_src && device->instance->drirc.dual_color_blend_by_location,
};
stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], &options, is_internal);
blake3_hash key;
if (nir_cache) {
radv_hash_graphics_spirv_to_nir(key, &stages[s], &options);
stages[s].nir = radv_pipeline_cache_lookup_nir(device, cache, s, key);
}
if (!stages[s].nir) {
stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], &options, is_internal);
if (nir_cache)
radv_pipeline_cache_insert_nir(device, cache, key, stages[s].nir);
}
}
stages[s].feedback.duration += os_time_get_nano() - stage_start;

View File

@ -417,6 +417,12 @@ void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct v
struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages,
const unsigned char *sha1);
nir_shader *radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache,
gl_shader_stage stage, const blake3_hash key);
void radv_pipeline_cache_insert_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const blake3_hash key,
const nir_shader *nir);
struct vk_pipeline_cache_object *radv_pipeline_cache_lookup_nir_handle(struct radv_device *device,
struct vk_pipeline_cache *cache,
const unsigned char *sha1);
@ -2053,6 +2059,9 @@ struct radv_ray_tracing_group;
void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, const struct radv_pipeline_layout *layout,
const struct radv_shader_stage_key *stage_key, struct radv_shader_stage *out_stage);
void radv_hash_graphics_spirv_to_nir(blake3_hash hash, const struct radv_shader_stage *stage,
const struct radv_spirv_to_nir_options *options);
void radv_hash_shaders(const struct radv_device *device, unsigned char *hash, const struct radv_shader_stage *stages,
uint32_t stage_count, const struct radv_pipeline_layout *layout,
const struct radv_graphics_state_key *gfx_state);