intel/clc: add ability to output NIR

This will be used to generate a serialized NIR of functions for
internal shaders.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26797>
This commit is contained in:
Lionel Landwerlin 2023-11-06 12:24:32 +02:00 committed by Marge Bot
parent 2bae1b6b66
commit 4fd7495c69
3 changed files with 339 additions and 46 deletions

View File

@ -452,3 +452,200 @@ brw_kernel_from_spirv(struct brw_compiler *compiler,
return kernel->code != NULL;
}
nir_shader *
brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size)
{
struct spirv_to_nir_options spirv_options = {
.environment = NIR_SPIRV_OPENCL,
.caps = {
.address = true,
.groups = true,
.image_write_without_format = true,
.int8 = true,
.int16 = true,
.int64 = true,
.int64_atomics = true,
.kernel = true,
.linkage = true, /* We receive linked kernel from clc */
.float_controls = true,
.generic_pointers = true,
.storage_8bit = true,
.storage_16bit = true,
.subgroup_arithmetic = true,
.subgroup_basic = true,
.subgroup_ballot = true,
.subgroup_dispatch = true,
.subgroup_quad = true,
.subgroup_shuffle = true,
.subgroup_vote = true,
.intel_subgroup_shuffle = true,
.intel_subgroup_buffer_block_io = true,
},
.shared_addr_format = nir_address_format_62bit_generic,
.global_addr_format = nir_address_format_62bit_generic,
.temp_addr_format = nir_address_format_62bit_generic,
.constant_addr_format = nir_address_format_64bit_global,
.create_library = true,
};
assert(spirv_size % 4 == 0);
nir_shader *nir =
spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
"library", &spirv_options, &brw_scalar_nir_options);
nir_validate_shader(nir, "after spirv_to_nir");
nir_validate_ssa_dominance(nir, "after spirv_to_nir");
ralloc_steal(mem_ctx, nir);
nir->info.name = ralloc_strdup(nir, "library");
if (INTEL_DEBUG(DEBUG_CS)) {
/* Re-index SSA defs so we print more sensible numbers. */
nir_foreach_function_impl(impl, nir) {
nir_index_ssa_defs(impl);
}
fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
nir_print_shader(nir, stderr);
}
NIR_PASS_V(nir, implement_intel_builtins);
NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
/* We have to lower away local constant initializers right before we
* inline functions. That way they get properly initialized at the top
* of the function and not at the top of its caller.
*/
NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp |
nir_var_function_temp));
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
{
bool progress;
do
{
progress = false;
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
NIR_PASS(progress, nir, nir_opt_deref);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_opt_algebraic);
} while (progress);
}
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
NIR_PASS_V(nir, nir_lower_returns);
NIR_PASS_V(nir, nir_inline_functions);
assert(nir->scratch_size == 0);
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
{
bool progress;
do
{
progress = false;
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
NIR_PASS(progress, nir, nir_opt_deref);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_split_var_copies);
NIR_PASS(progress, nir, nir_lower_var_copies);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
NIR_PASS(progress, nir, nir_opt_memcpy);
} while (progress);
}
NIR_PASS_V(nir, nir_scale_fdiv);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
nir->scratch_size = 0;
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant,
glsl_get_cl_type_size_align);
// Lower memcpy - needs to wait until types are sized
{
bool progress;
do {
progress = false;
NIR_PASS(progress, nir, nir_opt_memcpy);
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
NIR_PASS(progress, nir, nir_opt_deref);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_split_var_copies);
NIR_PASS(progress, nir, nir_lower_var_copies);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_cse);
} while (progress);
}
NIR_PASS_V(nir, nir_lower_memcpy);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_mem_shared | nir_var_function_temp | nir_var_uniform,
nir_address_format_32bit_offset_as_64bit);
NIR_PASS_V(nir, nir_lower_system_values);
/* Lower again, this time after dead-variables to get more compact variable
* layouts.
*/
nir->global_mem_size = 0;
nir->scratch_size = 0;
nir->info.shared_size = 0;
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
nir_var_shader_temp | nir_var_function_temp |
nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
glsl_get_cl_type_size_align);
if (nir->constant_data_size > 0) {
assert(nir->constant_data == NULL);
nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
nir_gather_explicit_io_initializers(nir, nir->constant_data,
nir->constant_data_size,
nir_var_mem_constant);
}
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
nir_address_format_64bit_global);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
nir_address_format_32bit_offset_as_64bit);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_shader_temp | nir_var_function_temp |
nir_var_mem_shared | nir_var_mem_global,
nir_address_format_62bit_generic);
if (INTEL_DEBUG(DEBUG_CS)) {
/* Re-index SSA defs so we print more sensible numbers. */
nir_foreach_function_impl(impl, nir) {
nir_index_ssa_defs(impl);
}
fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
nir_print_shader(nir, stderr);
}
return nir;
}

View File

@ -67,6 +67,9 @@ brw_kernel_from_spirv(struct brw_compiler *compiler,
const char *entrypoint_name,
char **error_str);
nir_shader *
brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size);
#ifdef __cplusplus
} /* extern "C" */
#endif

View File

@ -26,6 +26,7 @@
#include "common/intel_disasm.h"
#include "compiler/clc/clc.h"
#include "compiler/glsl_types.h"
#include "compiler/nir/nir_serialize.h"
#include "dev/intel_debug.h"
#include "util/build_id.h"
#include "util/disk_cache.h"
@ -114,6 +115,20 @@ print_u32_data(FILE *fp, const char *prefix, const char *arr_name,
fprintf(fp, "\n};\n");
}
static void
print_u8_data(FILE *fp, const char *prefix, const char *arr_name,
const uint8_t *data, size_t len)
{
fprintf(fp, "static const uint8_t %s_%s[] = {", prefix, arr_name);
for (unsigned i = 0; i < len; i++) {
if (i % 16 == 0)
fprintf(fp,"\n ");
fprintf(fp, " 0x%02" PRIx8 ",", data[i]);
}
fprintf(fp, "\n};\n");
}
static const char *
reloc_type_str(enum brw_shader_reloc_type type)
{
@ -268,6 +283,7 @@ print_usage(char *exec_name, FILE *f)
" -o, --out <filename> Specify the output filename.\n"
" -i, --in <filename> Specify one input filename. Accepted multiple times.\n"
" -s, --spv <filename> Specify the output filename for spirv.\n"
" -n, --nir Specify whether to output serialized NIR instead of ISA.\n"
" -v, --verbose Print more information during compilation.\n"
, exec_name);
}
@ -281,6 +297,7 @@ struct intel_clc_params {
char *spv_outfile;
char *prefix;
bool output_nir;
bool print_info;
void *mem_ctx;
@ -288,6 +305,74 @@ struct intel_clc_params {
struct intel_device_info devinfo;
};
#include "compiler/spirv/nir_spirv.h"
static int
output_nir(const struct intel_clc_params *params, struct clc_binary *binary)
{
struct spirv_to_nir_options spirv_options = {
.environment = NIR_SPIRV_OPENCL,
.caps = {
.address = true,
.groups = true,
.image_write_without_format = true,
.int8 = true,
.int16 = true,
.int64 = true,
.int64_atomics = true,
.kernel = true,
.linkage = true, /* We receive linked kernel from clc */
.float_controls = true,
.generic_pointers = true,
.storage_8bit = true,
.storage_16bit = true,
.subgroup_arithmetic = true,
.subgroup_basic = true,
.subgroup_ballot = true,
.subgroup_dispatch = true,
.subgroup_quad = true,
.subgroup_shuffle = true,
.subgroup_vote = true,
.intel_subgroup_shuffle = true,
.intel_subgroup_buffer_block_io = true,
},
.shared_addr_format = nir_address_format_62bit_generic,
.global_addr_format = nir_address_format_62bit_generic,
.temp_addr_format = nir_address_format_62bit_generic,
.constant_addr_format = nir_address_format_64bit_global,
.create_library = true,
};
FILE *fp = params->outfile != NULL ?
fopen(params->outfile, "w") : stdout;
if (!fp) {
fprintf(stderr, "Failed to open %s\n", params->outfile);
return -1;
}
spirv_library_to_nir_builder(fp, binary->data, binary->size / 4,
&spirv_options);
nir_shader *nir = brw_nir_from_spirv(params->mem_ctx,
binary->data, binary->size);
if (!nir) {
fprintf(stderr, "Failed to generate NIR out of SPIRV\n");
return -1;
}
struct blob blob;
blob_init(&blob);
nir_serialize(&blob, nir, false /* strip */);
print_u8_data(fp, params->prefix, "nir", blob.data, blob.size);
blob_finish(&blob);
if (params->outfile)
fclose(fp);
return 0;
}
static int
output_isa(const struct intel_clc_params *params, struct clc_binary *binary)
{
@ -362,6 +447,7 @@ int main(int argc, char **argv)
{"in", required_argument, 0, 'i'},
{"out", required_argument, 0, 'o'},
{"spv", required_argument, 0, 's'},
{"nir", no_argument, 0, 'n'},
{"verbose", no_argument, 0, 'v'},
{0, 0, 0, 0}
};
@ -381,7 +467,7 @@ int main(int argc, char **argv)
util_dynarray_init(&input_files, params.mem_ctx);
int ch;
while ((ch = getopt_long(argc, argv, "he:p:s:i:o:v", long_options, NULL)) != -1)
while ((ch = getopt_long(argc, argv, "he:p:s:i:no:v", long_options, NULL)) != -1)
{
switch (ch)
{
@ -399,6 +485,9 @@ int main(int argc, char **argv)
break;
case 'i':
util_dynarray_append(&input_files, char *, optarg);
break;
case 'n':
params.output_nir = true;
break;
case 's':
params.spv_outfile = optarg;
@ -426,34 +515,6 @@ int main(int argc, char **argv)
goto fail;
}
if (params.platform == NULL) {
fprintf(stderr, "No target platform name specified.\n");
print_usage(argv[0], stderr);
goto fail;
}
int pci_id = intel_device_name_to_pci_device_id(params.platform);
if (pci_id < 0) {
fprintf(stderr, "Invalid target platform name: %s\n", params.platform);
goto fail;
}
if (!intel_get_device_info_from_pci_id(pci_id, &params.devinfo)) {
fprintf(stderr, "Failed to get device information.\n");
goto fail;
}
if (params.devinfo.verx10 < 125) {
fprintf(stderr, "Platform currently not supported.\n");
goto fail;
}
if (params.entry_point == NULL) {
fprintf(stderr, "No entry-point name specified.\n");
print_usage(argv[0], stderr);
goto fail;
}
struct clc_logger logger = {
.error = msg_callback,
.warning = msg_callback,
@ -516,25 +577,57 @@ int main(int argc, char **argv)
fclose(fp);
}
if (!clc_parse_spirv(&spirv_obj, &logger, &parsed_spirv_data)) {
goto fail;
}
const struct clc_kernel_info *kernel_info = NULL;
for (unsigned i = 0; i < parsed_spirv_data.num_kernels; i++) {
if (strcmp(parsed_spirv_data.kernels[i].name, params.entry_point) == 0) {
kernel_info = &parsed_spirv_data.kernels[i];
break;
}
}
if (kernel_info == NULL) {
fprintf(stderr, "Kernel entrypoint %s not found\n", params.entry_point);
goto fail;
}
glsl_type_singleton_init_or_ref();
exit_code = output_isa(&params, &spirv_obj);
if (params.output_nir) {
exit_code = output_nir(&params, &spirv_obj);
} else {
if (params.platform == NULL) {
fprintf(stderr, "No target platform name specified.\n");
print_usage(argv[0], stderr);
goto fail;
}
int pci_id = intel_device_name_to_pci_device_id(params.platform);
if (pci_id < 0) {
fprintf(stderr, "Invalid target platform name: %s\n", params.platform);
goto fail;
}
if (!intel_get_device_info_from_pci_id(pci_id, &params.devinfo)) {
fprintf(stderr, "Failed to get device information.\n");
goto fail;
}
if (params.devinfo.verx10 < 125) {
fprintf(stderr, "Platform currently not supported.\n");
goto fail;
}
if (params.entry_point == NULL) {
fprintf(stderr, "No entry-point name specified.\n");
print_usage(argv[0], stderr);
goto fail;
}
struct clc_parsed_spirv parsed_spirv_data;
if (!clc_parse_spirv(&spirv_obj, &logger, &parsed_spirv_data))
goto fail;
const struct clc_kernel_info *kernel_info = NULL;
for (unsigned i = 0; i < parsed_spirv_data.num_kernels; i++) {
if (strcmp(parsed_spirv_data.kernels[i].name, params.entry_point) == 0) {
kernel_info = &parsed_spirv_data.kernels[i];
break;
}
}
if (kernel_info == NULL) {
fprintf(stderr, "Kernel entrypoint %s not found\n", params.entry_point);
goto fail;
}
exit_code = output_isa(&params, &spirv_obj);
}
glsl_type_singleton_decref();