mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-23 18:24:13 +08:00
r600/sfn: rewrite NIR backend
This is a rewite of the NIR backend. it adds some optimization and a scheduler. v2: - replace some magic numbers by constants - make sure constructor is always used with new - use default initialization in more places (changes suggested by Filip Gawin) Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Filip Gawin <filip@gawin.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17076>
This commit is contained in:
parent
ab06b00c63
commit
79ca456b48
@ -107,6 +107,10 @@ files_r600 = files(
|
||||
'sb/sb_valtable.cpp',
|
||||
'sfn/sfn_alu_defines.cpp',
|
||||
'sfn/sfn_alu_defines.h',
|
||||
'sfn/sfn_alu_readport_validation.cpp',
|
||||
'sfn/sfn_alu_readport_validation.h',
|
||||
'sfn/sfn_assembler.cpp',
|
||||
'sfn/sfn_assembler.h',
|
||||
'sfn/sfn_callstack.cpp',
|
||||
'sfn/sfn_callstack.h',
|
||||
'sfn/sfn_conditionaljumptracker.cpp',
|
||||
@ -114,73 +118,66 @@ files_r600 = files(
|
||||
'sfn/sfn_defines.h',
|
||||
'sfn/sfn_debug.cpp',
|
||||
'sfn/sfn_debug.h',
|
||||
'sfn/sfn_emitaluinstruction.cpp',
|
||||
'sfn/sfn_emitaluinstruction.h',
|
||||
'sfn/sfn_emitinstruction.cpp',
|
||||
'sfn/sfn_emitinstruction.h',
|
||||
'sfn/sfn_emitssboinstruction.cpp',
|
||||
'sfn/sfn_emitssboinstruction.h',
|
||||
'sfn/sfn_emittexinstruction.cpp',
|
||||
'sfn/sfn_emittexinstruction.h',
|
||||
'sfn/sfn_emitinstruction.h',
|
||||
'sfn/sfn_instruction_alu.cpp',
|
||||
'sfn/sfn_instruction_alu.h',
|
||||
'sfn/sfn_instruction_base.cpp',
|
||||
'sfn/sfn_instruction_base.h',
|
||||
'sfn/sfn_instruction_block.cpp',
|
||||
'sfn/sfn_instruction_block.h',
|
||||
'sfn/sfn_instruction_cf.cpp',
|
||||
'sfn/sfn_instruction_cf.h',
|
||||
'sfn/sfn_instruction_export.cpp',
|
||||
'sfn/sfn_instruction_export.h',
|
||||
'sfn/sfn_instruction_fetch.cpp',
|
||||
'sfn/sfn_instruction_fetch.h',
|
||||
'sfn/sfn_instruction_gds.cpp',
|
||||
'sfn/sfn_instruction_gds.h',
|
||||
'sfn/sfn_instruction_lds.cpp',
|
||||
'sfn/sfn_instruction_lds.h',
|
||||
'sfn/sfn_instruction_misc.cpp',
|
||||
'sfn/sfn_instruction_misc.h',
|
||||
'sfn/sfn_instruction_tex.cpp',
|
||||
'sfn/sfn_instruction_tex.h',
|
||||
'sfn/sfn_ir_to_assembly.cpp',
|
||||
'sfn/sfn_ir_to_assembly.h',
|
||||
'sfn/sfn_liverange.cpp',
|
||||
'sfn/sfn_liverange.h',
|
||||
'sfn/sfn_instr.cpp',
|
||||
'sfn/sfn_instr.h',
|
||||
'sfn/sfn_instr_alu.cpp',
|
||||
'sfn/sfn_instr_alu.h',
|
||||
'sfn/sfn_instr_alugroup.cpp',
|
||||
'sfn/sfn_instr_alugroup.h',
|
||||
'sfn/sfn_instr_controlflow.cpp',
|
||||
'sfn/sfn_instr_controlflow.h',
|
||||
'sfn/sfn_instr_export.cpp',
|
||||
'sfn/sfn_instr_export.h',
|
||||
'sfn/sfn_instr_fetch.cpp',
|
||||
'sfn/sfn_instr_fetch.h',
|
||||
'sfn/sfn_instr_mem.cpp',
|
||||
'sfn/sfn_instr_mem.h',
|
||||
'sfn/sfn_instr_lds.cpp',
|
||||
'sfn/sfn_instr_lds.h',
|
||||
'sfn/sfn_instr_tex.cpp',
|
||||
'sfn/sfn_instr_tex.h',
|
||||
'sfn/sfn_instrfactory.cpp',
|
||||
'sfn/sfn_instrfactory.h',
|
||||
'sfn/sfn_liverangeevaluator.cpp',
|
||||
'sfn/sfn_liverangeevaluator.h',
|
||||
'sfn/sfn_liverangeevaluator_helpers.cpp',
|
||||
'sfn/sfn_liverangeevaluator_helpers.h',
|
||||
'sfn/sfn_memorypool.cpp',
|
||||
'sfn/sfn_memorypool.h',
|
||||
'sfn/sfn_nir.cpp',
|
||||
'sfn/sfn_nir.h',
|
||||
'sfn/sfn_nir_legalize_image_load_store.cpp',
|
||||
'sfn/sfn_nir_lower_64bit.cpp',
|
||||
'sfn/sfn_nir_lower_alu.cpp',
|
||||
'sfn/sfn_nir_lower_alu.h',
|
||||
'sfn/sfn_nir_lower_tex.cpp',
|
||||
'sfn/sfn_nir_lower_tex.h',
|
||||
'sfn/sfn_nir_lower_fs_out_to_vector.cpp',
|
||||
'sfn/sfn_nir_lower_fs_out_to_vector.h',
|
||||
'sfn/sfn_nir_lower_tess_io.cpp',
|
||||
'sfn/sfn_nir_vectorize_vs_inputs.c',
|
||||
'sfn/sfn_shader_base.cpp',
|
||||
'sfn/sfn_shader_base.h',
|
||||
'sfn/sfn_shader_compute.cpp',
|
||||
'sfn/sfn_shader_compute.h',
|
||||
'sfn/sfn_shader_fragment.cpp',
|
||||
'sfn/sfn_shader_fragment.h',
|
||||
'sfn/sfn_shader_geometry.cpp',
|
||||
'sfn/sfn_shader_geometry.h',
|
||||
'sfn/sfn_shader_tcs.cpp',
|
||||
'sfn/sfn_shader_tcs.h',
|
||||
'sfn/sfn_shader_tess_eval.cpp',
|
||||
'sfn/sfn_shader_tess_eval.h',
|
||||
'sfn/sfn_shader_vertex.cpp',
|
||||
'sfn/sfn_shader_vertex.h',
|
||||
'sfn/sfn_shaderio.cpp',
|
||||
'sfn/sfn_shaderio.h',
|
||||
'sfn/sfn_value.cpp',
|
||||
'sfn/sfn_value.h',
|
||||
'sfn/sfn_value_gpr.cpp',
|
||||
'sfn/sfn_value_gpr.h',
|
||||
'sfn/sfn_valuepool.cpp',
|
||||
'sfn/sfn_valuepool.h',
|
||||
'sfn/sfn_vertexstageexport.cpp',
|
||||
'sfn/sfn_vertexstageexport.h',
|
||||
'sfn/sfn_optimizer.cpp',
|
||||
'sfn/sfn_peephole.cpp',
|
||||
'sfn/sfn_ra.cpp',
|
||||
'sfn/sfn_ra.h',
|
||||
'sfn/sfn_scheduler.cpp',
|
||||
'sfn/sfn_scheduler.h',
|
||||
'sfn/sfn_shader.cpp',
|
||||
'sfn/sfn_shader.h',
|
||||
'sfn/sfn_shader_cs.cpp',
|
||||
'sfn/sfn_shader_cs.h',
|
||||
'sfn/sfn_shader_fs.cpp',
|
||||
'sfn/sfn_shader_fs.h',
|
||||
'sfn/sfn_shader_gs.cpp',
|
||||
'sfn/sfn_shader_gs.h',
|
||||
'sfn/sfn_shader_tess.cpp',
|
||||
'sfn/sfn_shader_tess.h',
|
||||
'sfn/sfn_shader_vs.cpp',
|
||||
'sfn/sfn_shader_vs.h',
|
||||
'sfn/sfn_valuefactory.cpp',
|
||||
'sfn/sfn_valuefactory.h',
|
||||
'sfn/sfn_virtualvalues.cpp',
|
||||
'sfn/sfn_virtualvalues.h',
|
||||
)
|
||||
|
||||
egd_tables_h = custom_target(
|
||||
@ -200,11 +197,13 @@ libr600 = static_library(
|
||||
'r600',
|
||||
[files_r600, egd_tables_h],
|
||||
c_args : [r600_c_args, '-Wstrict-overflow=0'],
|
||||
cpp_args: '-std=c++17',
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
include_directories : [
|
||||
inc_src, inc_mapi, inc_mesa, inc_include, inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common,
|
||||
inc_gallium_drivers,
|
||||
],
|
||||
|
||||
dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm, idep_nir, idep_nir_headers],
|
||||
)
|
||||
|
||||
@ -212,3 +211,9 @@ driver_r600 = declare_dependency(
|
||||
compile_args : '-DGALLIUM_R600',
|
||||
link_with : [libr600, libmesa, libradeonwinsys],
|
||||
)
|
||||
|
||||
if with_tests
|
||||
subdir('sfn/tests')
|
||||
endif
|
||||
|
||||
|
||||
|
@ -407,8 +407,8 @@ static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *
|
||||
}
|
||||
assignment[4] = alu;
|
||||
} else {
|
||||
if (assignment[chan]) {
|
||||
assert(0); /* ALU.chan has already been allocated. */
|
||||
if (assignment[chan]) {
|
||||
assert(0); /* ALU.chan has already been allocated. */
|
||||
return -1;
|
||||
}
|
||||
assignment[chan] = alu;
|
||||
|
@ -355,6 +355,8 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
|
||||
void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
|
||||
unsigned *num_format, unsigned *format_comp, unsigned *endian);
|
||||
|
||||
int r600_load_ar(struct r600_bytecode *bc);
|
||||
|
||||
static inline int fp64_switch(int i)
|
||||
{
|
||||
switch (i) {
|
||||
|
@ -420,11 +420,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
||||
if (is_nir_enabled(&rscreen->b))
|
||||
return 1;
|
||||
return 0;
|
||||
case PIPE_CAP_INT64_DIVMOD:
|
||||
/* it is actually not supported, but the nir lowering hdanles this corectly wheras
|
||||
* the glsl lowering path seems to not initialize the buildins correctly.
|
||||
*/
|
||||
return is_nir_enabled(&rscreen->b);
|
||||
|
||||
case PIPE_CAP_TWO_SIDED_COLOR:
|
||||
return !is_nir_enabled(&rscreen->b);
|
||||
case PIPE_CAP_INT64_DIVMOD:
|
||||
/* it is actually not supported, but the nir lowering handles this corectly wheras
|
||||
* the glsl lowering path seems to not initialize the buildins correctly.
|
||||
*/
|
||||
return is_nir_enabled(&rscreen->b);
|
||||
case PIPE_CAP_CULL_DISTANCE:
|
||||
return 1;
|
||||
|
||||
|
@ -46,7 +46,7 @@ const std::map<EAluOp, AluOp> alu_ops = {
|
||||
{op1_cos ,AluOp(1, 1, AluOp::t,"COS")},
|
||||
{op1_exp_ieee ,AluOp(1, 1, AluOp::t,"EXP_IEEE")},
|
||||
{op1_floor ,AluOp(1, 1, AluOp::a,"FLOOR")},
|
||||
{op1_flt_to_int ,AluOp(1, 0, AluOp::a,"FLT_TO_INT")},
|
||||
{op1_flt_to_int ,AluOp(1, 0, AluOp::v,"FLT_TO_INT")},
|
||||
{op1_flt_to_uint ,AluOp(1, 1, AluOp::t,"FLT_TO_UINT")},
|
||||
{op1_flt_to_int_rpi ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_RPI")},
|
||||
{op1_flt_to_int_floor ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_FLOOR")},
|
||||
@ -84,15 +84,15 @@ const std::map<EAluOp, AluOp> alu_ops = {
|
||||
{op1_recipsqrt_ieee1 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_IEEE")},
|
||||
{op1_recip_int ,AluOp(1, 0, AluOp::t,"RECIP_INT")},
|
||||
{op1_recip_uint ,AluOp(1, 0, AluOp::t,"RECIP_UINT")},
|
||||
{op1_recip_64 ,AluOp(1, 1, AluOp::t,"RECIP_64")},
|
||||
{op1_recip_clamped_64 ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED_64")},
|
||||
{op1_recipsqrt_64 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_64")},
|
||||
{op1_recipsqrt_clamped_64,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
|
||||
{op1_recip_64 ,AluOp(2, 1, AluOp::t,"RECIP_64")},
|
||||
{op1_recip_clamped_64 ,AluOp(2, 1, AluOp::t,"RECIP_CLAMPED_64")},
|
||||
{op1_recipsqrt_64 ,AluOp(2, 1, AluOp::t,"RECIPSQRT_64")},
|
||||
{op1_recipsqrt_clamped_64,AluOp(2, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
|
||||
{op1_rndne ,AluOp(1, 1, AluOp::a,"RNDNE")},
|
||||
{op1_sqrt_ieee ,AluOp(1, 1, AluOp::t,"SQRT_IEEE")},
|
||||
{op1_sin ,AluOp(1, 1, AluOp::t,"SIN")},
|
||||
{op1_trunc ,AluOp(1, 1, AluOp::a,"TRUNC")},
|
||||
{op1_sqrt_64 ,AluOp(1, 1, AluOp::t,"SQRT_64")},
|
||||
{op1_sqrt_64 ,AluOp(2, 1, AluOp::t,"SQRT_64")},
|
||||
{op1_ubyte0_flt ,AluOp(1, 1, AluOp::v,"UBYTE0_FLT")},
|
||||
{op1_ubyte1_flt ,AluOp(1, 1, AluOp::v,"UBYTE1_FLT")},
|
||||
{op1_ubyte2_flt ,AluOp(1, 1, AluOp::v,"UBYTE2_FLT")},
|
||||
@ -273,53 +273,73 @@ const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const = {
|
||||
};
|
||||
|
||||
const std::map<ESDOp, LDSOp> lds_ops = {
|
||||
{DS_OP_ADD , {2, "DS_ADD"}},
|
||||
{DS_OP_SUB , {2, "DS_SUB"}},
|
||||
{DS_OP_RSUB , {2, "DS_RSUB"}},
|
||||
{DS_OP_INC , {2, "DS_INC"}},
|
||||
{DS_OP_DEC , {2, "DS_DEC"}},
|
||||
{DS_OP_MIN_INT , {2, "DS_MIN_INT"}},
|
||||
{DS_OP_MAX_INT , {2, "DS_MAX_INT"}},
|
||||
{DS_OP_MIN_UINT , {2, "DS_MIN_UINT"}},
|
||||
{DS_OP_MAX_UINT , {2, "DS_MAX_UINT"}},
|
||||
{DS_OP_AND , {2, "DS_AND"}},
|
||||
{DS_OP_OR , {2, "DS_OR"}},
|
||||
{DS_OP_XOR , {2, "DS_XOR"}},
|
||||
{DS_OP_MSKOR , {3, "DS_MSKOR"}},
|
||||
{DS_OP_WRITE , {2, "DS_WRITE"}},
|
||||
{DS_OP_WRITE_REL , {3, "DS_WRITE_REL"}},
|
||||
{DS_OP_WRITE2 , {3, "DS_WRITE2"}},
|
||||
{DS_OP_CMP_STORE , {3, "DS_CMP_STORE"}},
|
||||
{DS_OP_CMP_STORE_SPF , {3, "DS_CMP_STORE_SPF"}},
|
||||
{DS_OP_BYTE_WRITE , {2, "DS_BYTE_WRITE"}},
|
||||
{DS_OP_SHORT_WRITE , {2, "DS_SHORT_WRITE"}},
|
||||
{DS_OP_ADD_RET , {2, "DS_ADD_RET"}},
|
||||
{DS_OP_SUB_RET , {2, "DS_SUB_RET"}},
|
||||
{DS_OP_RSUB_RET , {2, "DS_RSUB_RET"}},
|
||||
{DS_OP_INC_RET , {2, "DS_INC_RET"}},
|
||||
{DS_OP_DEC_RET , {2, "DS_DEC_RET"}},
|
||||
{DS_OP_MIN_INT_RET , {2, "DS_MIN_INT_RET"}},
|
||||
{DS_OP_MAX_INT_RET , {2, "DS_MAX_INT_RET"}},
|
||||
{DS_OP_MIN_UINT_RET , {2, "DS_MIN_UINT_RET"}},
|
||||
{DS_OP_MAX_UINT_RET , {2, "DS_MAX_UINT_RET"}},
|
||||
{DS_OP_AND_RET , {2, "DS_AND_RET"}},
|
||||
{DS_OP_OR_RET , {2, "DS_OR_RET"}},
|
||||
{DS_OP_XOR_RET , {2, "DS_XOR_RET"}},
|
||||
{DS_OP_MSKOR_RET , {3, "DS_MSKOR_RET"}},
|
||||
{DS_OP_XCHG_RET , {2, "DS_XCHG_RET"}},
|
||||
{DS_OP_XCHG_REL_RET , {3, "DS_XCHG_REL_RET"}},
|
||||
{DS_OP_XCHG2_RET , {3, "DS_XCHG2_RET"}},
|
||||
{DS_OP_CMP_XCHG_RET , {3, "DS_CMP_XCHG_RET"}},
|
||||
{DS_OP_CMP_XCHG_SPF_RET, {3, "DS_CMP_XCHG_SPF_RET"}},
|
||||
{DS_OP_READ_RET , {1, "DS_READ_RET"}},
|
||||
{DS_OP_READ_REL_RET , {1, "DS_READ_REL_RET"}},
|
||||
{DS_OP_READ2_RET , {2, "DS_READ2_RET"}},
|
||||
{DS_OP_READWRITE_RET , {3, "DS_READWRITE_RET"}},
|
||||
{DS_OP_BYTE_READ_RET , {1, "DS_BYTE_READ_RET"}},
|
||||
{DS_OP_UBYTE_READ_RET, {1, "DS_UBYTE_READ_RET"}},
|
||||
{DS_OP_SHORT_READ_RET, {1, "DS_SHORT_READ_RET"}},
|
||||
{DS_OP_USHORT_READ_RET, {1, "DS_USHORT_READ_RET"}},
|
||||
{DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "DS_ATOMIC_ORDERED_ALLOC_RET"}}
|
||||
{DS_OP_ADD , {2, "ADD"}},
|
||||
{DS_OP_SUB , {2, "SUB"}},
|
||||
{DS_OP_RSUB , {2, "RSUB"}},
|
||||
{DS_OP_INC , {2, "INC"}},
|
||||
{DS_OP_DEC , {2, "DEC"}},
|
||||
{DS_OP_MIN_INT , {2, "MIN_INT"}},
|
||||
{DS_OP_MAX_INT , {2, "MAX_INT"}},
|
||||
{DS_OP_MIN_UINT , {2, "MIN_UINT"}},
|
||||
{DS_OP_MAX_UINT , {2, "MAX_UINT"}},
|
||||
{DS_OP_AND , {2, "AND"}},
|
||||
{DS_OP_OR , {2, "OR"}},
|
||||
{DS_OP_XOR , {2, "XOR"}},
|
||||
{DS_OP_MSKOR , {3, "MSKOR"}},
|
||||
{DS_OP_WRITE , {2, "WRITE"}},
|
||||
{DS_OP_WRITE_REL , {3, "WRITE_REL"}},
|
||||
{DS_OP_WRITE2 , {3, "WRITE2"}},
|
||||
{DS_OP_CMP_STORE , {3, "CMP_STORE"}},
|
||||
{DS_OP_CMP_STORE_SPF , {3, "CMP_STORE_SPF"}},
|
||||
{DS_OP_BYTE_WRITE , {2, "BYTE_WRITE"}},
|
||||
{DS_OP_SHORT_WRITE , {2, "SHORT_WRITE"}},
|
||||
{DS_OP_ADD_RET , {2, "ADD_RET"}},
|
||||
{DS_OP_SUB_RET , {2, "SUB_RET"}},
|
||||
{DS_OP_RSUB_RET , {2, "RSUB_RET"}},
|
||||
{DS_OP_INC_RET , {2, "INC_RET"}},
|
||||
{DS_OP_DEC_RET , {2, "DEC_RET"}},
|
||||
{DS_OP_MIN_INT_RET , {2, "MIN_INT_RET"}},
|
||||
{DS_OP_MAX_INT_RET , {2, "MAX_INT_RET"}},
|
||||
{DS_OP_MIN_UINT_RET , {2, "MIN_UINT_RET"}},
|
||||
{DS_OP_MAX_UINT_RET , {2, "MAX_UINT_RET"}},
|
||||
{DS_OP_AND_RET , {2, "AND_RET"}},
|
||||
{DS_OP_OR_RET , {2, "OR_RET"}},
|
||||
{DS_OP_XOR_RET , {2, "XOR_RET"}},
|
||||
{DS_OP_MSKOR_RET , {3, "MSKOR_RET"}},
|
||||
{DS_OP_XCHG_RET , {2, "XCHG_RET"}},
|
||||
{DS_OP_XCHG_REL_RET , {3, "XCHG_REL_RET"}},
|
||||
{DS_OP_XCHG2_RET , {3, "XCHG2_RET"}},
|
||||
{DS_OP_CMP_XCHG_RET , {3, "CMP_XCHG_RET"}},
|
||||
{DS_OP_CMP_XCHG_SPF_RET, {3, "CMP_XCHG_SPF_RET"}},
|
||||
{DS_OP_READ_RET , {1, "READ_RET"}},
|
||||
{DS_OP_READ_REL_RET , {1, "READ_REL_RET"}},
|
||||
{DS_OP_READ2_RET , {2, "READ2_RET"}},
|
||||
{DS_OP_READWRITE_RET , {3, "READWRITE_RET"}},
|
||||
{DS_OP_BYTE_READ_RET , {1, "BYTE_READ_RET"}},
|
||||
{DS_OP_UBYTE_READ_RET, {1, "UBYTE_READ_RET"}},
|
||||
{DS_OP_SHORT_READ_RET, {1, "SHORT_READ_RET"}},
|
||||
{DS_OP_USHORT_READ_RET, {1, "USHORT_READ_RET"}},
|
||||
{DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "ATOMIC_ORDERED_ALLOC_RET"}},
|
||||
{LDS_ADD_RET, {2, "LDS_ADD_RET"}},
|
||||
{LDS_ADD, {2, "LDS_ADD"}},
|
||||
{LDS_AND_RET, {2, "LDS_AND_RET"}},
|
||||
{LDS_AND, {2, "LDS_AND"}},
|
||||
{LDS_WRITE, {2, "LDS_WRITE"}},
|
||||
{LDS_OR_RET, {2, "LDS_OR_RET"}},
|
||||
{LDS_OR, {2, "LDS_OR"}},
|
||||
{LDS_MAX_INT_RET, {2, "LDS_MAX_INT_RET"}},
|
||||
{LDS_MAX_INT, {2, "LDS_MAX_INT"}},
|
||||
{LDS_MAX_UINT_RET, {2, "LDS_MAX_UINT_RET"}},
|
||||
{LDS_MAX_UINT, {2, "LDS_MAX_UINT"}},
|
||||
{LDS_MIN_INT_RET, {2, "LDS_MIN_INT_RET"}},
|
||||
{LDS_MIN_INT, {2, "LDS_MIN_INT"}},
|
||||
{LDS_MIN_UINT_RET, {2, "LDS_MIN_UINT_RET"}},
|
||||
{LDS_MIN_UINT, {2, "LDS_MIN_UINT"}},
|
||||
{LDS_XOR_RET, {2, "LDS_XOR"}},
|
||||
{LDS_XOR, {2, "LDS_XOR"}},
|
||||
{LDS_XCHG_RET, {2, "LDS_XCHG_RET"}},
|
||||
{LDS_CMP_XCHG_RET, {3, "LDS_CMP_XCHG_RET"}},
|
||||
{LDS_WRITE_REL, {3, "LDS_WRITE_REL"}},
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -27,6 +27,8 @@
|
||||
#ifndef r600_sfn_alu_defines_h
|
||||
#define r600_sfn_alu_defines_h
|
||||
|
||||
#include "../r600_isa.h"
|
||||
|
||||
#include <map>
|
||||
#include <bitset>
|
||||
|
||||
@ -235,12 +237,71 @@ enum EAluOp {
|
||||
op3_cnde_int = 28<< 6,
|
||||
op3_cndgt_int = 29<< 6,
|
||||
op3_cndge_int = 30<< 6,
|
||||
op3_mul_lit = 31<< 6
|
||||
op3_mul_lit = 31<< 6,
|
||||
op_invalid = 0xffff
|
||||
};
|
||||
|
||||
enum AluModifiers {
|
||||
alu_src0_neg,
|
||||
alu_src0_abs,
|
||||
alu_src0_rel,
|
||||
alu_src1_neg,
|
||||
alu_src1_abs,
|
||||
alu_src1_rel,
|
||||
alu_src2_neg,
|
||||
alu_src2_rel,
|
||||
alu_dst_clamp,
|
||||
alu_dst_rel,
|
||||
alu_last_instr,
|
||||
alu_update_exec,
|
||||
alu_update_pred,
|
||||
alu_write,
|
||||
alu_op3,
|
||||
alu_is_trans,
|
||||
alu_is_cayman_trans,
|
||||
alu_is_lds,
|
||||
alu_lds_group_start,
|
||||
alu_lds_group_end,
|
||||
alu_lds_address,
|
||||
alu_no_schedule_bias,
|
||||
alu_64bit_op,
|
||||
alu_flag_count
|
||||
};
|
||||
|
||||
enum AluDstModifiers {
|
||||
omod_off = 0,
|
||||
omod_mul2 = 1,
|
||||
omod_mul4 = 2,
|
||||
omod_divl2 = 3
|
||||
};
|
||||
|
||||
using AluOpFlags=std::bitset<32>;
|
||||
enum AluPredSel {
|
||||
pred_off = 0,
|
||||
pred_zero = 2,
|
||||
pred_one = 3
|
||||
};
|
||||
|
||||
enum AluBankSwizzle {
|
||||
alu_vec_012 = 0,
|
||||
sq_alu_scl_201 = 0,
|
||||
alu_vec_021 = 1,
|
||||
sq_alu_scl_122 = 1,
|
||||
alu_vec_120 = 2,
|
||||
sq_alu_scl_212 = 2,
|
||||
alu_vec_102 = 3,
|
||||
sq_alu_scl_221 = 3,
|
||||
alu_vec_201 = 4,
|
||||
sq_alu_scl_unknown = 4,
|
||||
alu_vec_210 = 5,
|
||||
alu_vec_unknown = 6
|
||||
};
|
||||
|
||||
inline AluBankSwizzle operator ++(AluBankSwizzle& x) {
|
||||
x = static_cast<AluBankSwizzle>(x + 1);
|
||||
return x;
|
||||
}
|
||||
|
||||
using AluOpFlags=std::bitset<alu_flag_count>;
|
||||
|
||||
struct AluOp {
|
||||
static constexpr int x = 1;
|
||||
@ -314,6 +375,8 @@ struct AluInlineConstantDescr {
|
||||
|
||||
extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const;
|
||||
|
||||
#define LDSOP2(X) LDS_ ## X = LDS_OP2_LDS_ ## X
|
||||
|
||||
enum ESDOp {
|
||||
DS_OP_ADD = 0,
|
||||
DS_OP_SUB = 1,
|
||||
@ -362,9 +425,31 @@ enum ESDOp {
|
||||
DS_OP_SHORT_READ_RET = 56,
|
||||
DS_OP_USHORT_READ_RET = 57,
|
||||
DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63,
|
||||
DS_OP_INVALID = 64
|
||||
DS_OP_INVALID = 64,
|
||||
LDSOP2(ADD_RET),
|
||||
LDSOP2(ADD),
|
||||
LDSOP2(AND_RET),
|
||||
LDSOP2(AND),
|
||||
LDSOP2(WRITE),
|
||||
LDSOP2(OR_RET),
|
||||
LDSOP2(OR),
|
||||
LDSOP2(MAX_INT_RET),
|
||||
LDSOP2(MAX_INT),
|
||||
LDSOP2(MAX_UINT_RET),
|
||||
LDSOP2(MAX_UINT),
|
||||
LDSOP2(MIN_INT_RET),
|
||||
LDSOP2(MIN_INT),
|
||||
LDSOP2(MIN_UINT_RET),
|
||||
LDSOP2(MIN_UINT),
|
||||
LDSOP2(XOR_RET),
|
||||
LDSOP2(XOR),
|
||||
LDSOP2(XCHG_RET),
|
||||
LDS_CMP_XCHG_RET = LDS_OP3_LDS_CMP_XCHG_RET,
|
||||
LDS_WRITE_REL = LDS_OP3_LDS_WRITE_REL
|
||||
};
|
||||
|
||||
#undef LDSOP2
|
||||
|
||||
struct LDSOp {
|
||||
int nsrc;
|
||||
const char *name;
|
||||
@ -372,6 +457,18 @@ struct LDSOp {
|
||||
|
||||
extern const std::map<ESDOp, LDSOp> lds_ops;
|
||||
|
||||
struct KCacheLine {
|
||||
int bank{0};
|
||||
int addr{0};
|
||||
int len{0};
|
||||
enum KCacheLockMode {
|
||||
free,
|
||||
lock_1,
|
||||
lock_2
|
||||
} mode{free};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // ALU_DEFINES_H
|
||||
|
329
src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp
Normal file
329
src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp
Normal file
@ -0,0 +1,329 @@
|
||||
#include "sfn_alu_readport_validation.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class ReserveReadport : public ConstRegisterVisitor {
|
||||
public:
|
||||
ReserveReadport(AluReadportReservation& reserv);
|
||||
|
||||
void visit(const LocalArray& value) override;
|
||||
void visit(const LiteralConstant& value) override;
|
||||
void visit(const InlineConstant& value) override;
|
||||
|
||||
void reserve_gpr(int sel, int chan);
|
||||
|
||||
AluReadportReservation& reserver;
|
||||
int cycle = -1;
|
||||
int isrc = -1;
|
||||
int src0_sel = -1;
|
||||
int src0_chan = -1;
|
||||
bool success = true;
|
||||
|
||||
static const int max_const_readports = 2;
|
||||
};
|
||||
|
||||
|
||||
class ReserveReadportVec : public ReserveReadport {
|
||||
public:
|
||||
using ReserveReadport::ReserveReadport;
|
||||
|
||||
void visit(const Register& value) override;
|
||||
void visit(const LocalArrayValue& value) override;
|
||||
void visit(const UniformValue& value) override;
|
||||
};
|
||||
|
||||
class ReserveReadportTrans : public ReserveReadport
|
||||
{
|
||||
public:
|
||||
ReserveReadportTrans(AluReadportReservation& reserv);
|
||||
|
||||
int n_consts;
|
||||
};
|
||||
|
||||
class ReserveReadportTransPass1 : public ReserveReadportTrans {
|
||||
public:
|
||||
using ReserveReadportTrans::ReserveReadportTrans;
|
||||
|
||||
void visit(const Register& value) override;
|
||||
void visit(const LocalArrayValue& value) override;
|
||||
void visit(const UniformValue& value) override;
|
||||
void visit(const InlineConstant& value) override;
|
||||
void visit(const LiteralConstant& value) override;
|
||||
};
|
||||
|
||||
|
||||
class ReserveReadportTransPass2 : public ReserveReadportTrans {
|
||||
public:
|
||||
using ReserveReadportTrans::ReserveReadportTrans;
|
||||
|
||||
void visit(const Register& value) override;
|
||||
void visit(const LocalArrayValue& value) override;
|
||||
void visit(const UniformValue& value) override;
|
||||
};
|
||||
|
||||
bool AluReadportReservation::schedule_vec_src(PVirtualValue src[3], int nsrc, AluBankSwizzle swz)
|
||||
{
|
||||
ReserveReadportVec visitor(*this);
|
||||
|
||||
if (src[0]->as_register()) {
|
||||
visitor.src0_sel = src[0]->sel();
|
||||
visitor.src0_chan = src[0]->chan();
|
||||
} else {
|
||||
visitor.src0_sel = 0xffff;
|
||||
visitor.src0_chan = 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i < nsrc; ++i) {
|
||||
visitor.cycle = cycle_vec(swz, i);
|
||||
visitor.isrc = i;
|
||||
src[i]->accept(visitor);
|
||||
}
|
||||
|
||||
return visitor.success;
|
||||
}
|
||||
|
||||
bool AluReadportReservation::schedule_vec_instruction(const AluInstr& alu, AluBankSwizzle swz)
|
||||
{
|
||||
ReserveReadportVec visitor(*this);
|
||||
|
||||
for (unsigned i = 0; i < alu.n_sources() && visitor.success; ++i) {
|
||||
visitor.cycle = cycle_vec(swz, i);
|
||||
visitor.isrc = i;
|
||||
if (i == 1 && alu.src(i).equal_to(alu.src(0)))
|
||||
continue;
|
||||
alu.src(i).accept(visitor);
|
||||
}
|
||||
return visitor.success;
|
||||
}
|
||||
|
||||
bool AluReadportReservation::schedule_trans_instruction(const AluInstr& alu, AluBankSwizzle swz)
|
||||
{
|
||||
|
||||
ReserveReadportTransPass1 visitor1(*this);
|
||||
|
||||
for (unsigned i = 0; i < alu.n_sources(); ++i) {
|
||||
visitor1.cycle = cycle_trans(swz, i);
|
||||
alu.src(i).accept(visitor1);
|
||||
}
|
||||
if (!visitor1.success)
|
||||
return false;
|
||||
|
||||
|
||||
ReserveReadportTransPass2 visitor2(*this);
|
||||
visitor2.n_consts = visitor1.n_consts;
|
||||
|
||||
|
||||
for (unsigned i = 0; i < alu.n_sources(); ++i) {
|
||||
visitor2.cycle = cycle_trans(swz, i);
|
||||
|
||||
alu.src(i).accept(visitor2);
|
||||
}
|
||||
return visitor2.success;
|
||||
}
|
||||
|
||||
|
||||
AluReadportReservation::AluReadportReservation()
|
||||
{
|
||||
for (int i = 0; i < max_chan_channels; ++i) {
|
||||
for (int j = 0; j < max_gpr_readports; ++j)
|
||||
m_hw_gpr[j][i] = -1;
|
||||
m_hw_const_addr[i] = -1;
|
||||
m_hw_const_chan[i] = -1;
|
||||
m_hw_const_bank[i] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool AluReadportReservation::reserve_gpr(int sel, int chan, int cycle)
|
||||
{
|
||||
if (m_hw_gpr[cycle][chan] == -1) {
|
||||
m_hw_gpr[cycle][chan] = sel;
|
||||
}
|
||||
else if (m_hw_gpr[cycle][chan] != sel) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AluReadportReservation::reserve_const(const UniformValue& value)
|
||||
{
|
||||
int match = -1;
|
||||
int empty = -1;
|
||||
|
||||
for (int res = 0; res < ReserveReadport::max_const_readports; ++res) {
|
||||
if (m_hw_const_addr[res] == -1)
|
||||
empty = res;
|
||||
else if ((m_hw_const_addr[res] == value.sel()) &&
|
||||
(m_hw_const_bank[res] == value.kcache_bank()) &&
|
||||
(m_hw_const_chan[res] == (value.chan() >> 1)))
|
||||
match = res;
|
||||
}
|
||||
|
||||
if (match < 0) {
|
||||
if (empty >= 0) {
|
||||
m_hw_const_addr[empty] = value.sel();
|
||||
(m_hw_const_bank[empty] = value.kcache_bank());
|
||||
m_hw_const_chan[empty] = value.chan() >> 1;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AluReadportReservation::add_literal(uint32_t value)
|
||||
{
|
||||
for (unsigned i = 0; i < m_nliterals; ++i) {
|
||||
if (m_literals[i] == value)
|
||||
return true;
|
||||
}
|
||||
if (m_nliterals < m_literals.size()) {
|
||||
m_literals[m_nliterals++] = value;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int AluReadportReservation::cycle_vec(AluBankSwizzle swz, int src)
|
||||
{
|
||||
static const int mapping[AluBankSwizzle::alu_vec_unknown][max_gpr_readports] = {
|
||||
{0, 1, 2},
|
||||
{0, 2, 1},
|
||||
{1, 0, 2},
|
||||
{1, 2, 0},
|
||||
{2, 0, 1},
|
||||
{2, 1, 0}
|
||||
};
|
||||
return mapping[swz][src];
|
||||
}
|
||||
|
||||
int AluReadportReservation::cycle_trans(AluBankSwizzle swz, int src)
|
||||
{
|
||||
static const int mapping[AluBankSwizzle::sq_alu_scl_unknown][max_gpr_readports] = {
|
||||
{2, 1, 0},
|
||||
{1, 2, 2},
|
||||
{2, 1, 2},
|
||||
{2, 2, 1},
|
||||
};
|
||||
return mapping[swz][src];
|
||||
}
|
||||
|
||||
|
||||
ReserveReadport::ReserveReadport(AluReadportReservation& reserv):
|
||||
reserver(reserv)
|
||||
{
|
||||
}
|
||||
|
||||
void ReserveReadport::visit(const LocalArray& value)
|
||||
{
|
||||
(void)value;
|
||||
unreachable("a full array is not available here");
|
||||
}
|
||||
|
||||
void ReserveReadport::visit(const LiteralConstant& value)
|
||||
{
|
||||
success &= reserver.add_literal(value.value());
|
||||
}
|
||||
|
||||
void ReserveReadport::visit(const InlineConstant& value)
|
||||
{
|
||||
(void)value;
|
||||
}
|
||||
|
||||
void ReserveReadportVec::visit(const Register& value)
|
||||
{
|
||||
reserve_gpr(value.sel(), value.chan());
|
||||
}
|
||||
|
||||
void ReserveReadportVec::visit(const LocalArrayValue& value)
|
||||
{
|
||||
// Set the hightest non-sign bit to indicated that we use the
|
||||
// AR register
|
||||
reserve_gpr(0x4000000 | value.sel(), value.chan());
|
||||
}
|
||||
|
||||
void ReserveReadport::reserve_gpr(int sel, int chan)
|
||||
{
|
||||
if (isrc == 1 && src0_sel == sel && src0_chan == chan)
|
||||
return;
|
||||
success &= reserver.reserve_gpr(sel, chan, cycle);
|
||||
}
|
||||
|
||||
void ReserveReadportVec::visit(const UniformValue& value)
|
||||
{
|
||||
// kcache bank?
|
||||
success &= reserver.reserve_const(value);
|
||||
}
|
||||
|
||||
ReserveReadportTrans::ReserveReadportTrans(AluReadportReservation& reserv):
|
||||
ReserveReadport(reserv),
|
||||
n_consts(0)
|
||||
{}
|
||||
|
||||
void ReserveReadportTransPass1::visit(const Register& value)
|
||||
{
|
||||
(void)value;
|
||||
}
|
||||
|
||||
void ReserveReadportTransPass1::visit(const LocalArrayValue& value)
|
||||
{
|
||||
(void)value;
|
||||
}
|
||||
|
||||
void ReserveReadportTransPass1::visit(const UniformValue& value)
|
||||
{
|
||||
if (n_consts >= max_const_readports) {
|
||||
success = false;
|
||||
return;
|
||||
}
|
||||
n_consts++;
|
||||
success &= reserver.reserve_const(value);
|
||||
}
|
||||
|
||||
void ReserveReadportTransPass1::visit(const InlineConstant& value)
|
||||
{
|
||||
(void)value;
|
||||
if (n_consts >= max_const_readports) {
|
||||
success = false;
|
||||
return;
|
||||
}
|
||||
n_consts++;
|
||||
}
|
||||
|
||||
void ReserveReadportTransPass1::visit(const LiteralConstant& value)
|
||||
{
|
||||
if (n_consts >= max_const_readports) {
|
||||
success = false;
|
||||
return;
|
||||
}
|
||||
n_consts++;
|
||||
success &= reserver.add_literal(value.value());
|
||||
}
|
||||
|
||||
void ReserveReadportTransPass2::visit(const Register& value)
|
||||
{
|
||||
if (cycle < n_consts) {
|
||||
success = false;
|
||||
return;
|
||||
}
|
||||
reserve_gpr(value.sel(), value.chan());
|
||||
}
|
||||
|
||||
void ReserveReadportTransPass2::visit(const LocalArrayValue& value)
|
||||
{
|
||||
if (cycle < n_consts) {
|
||||
success = false;
|
||||
return;
|
||||
}
|
||||
reserve_gpr(0x4000000 | value.sel(), value.chan());
|
||||
}
|
||||
|
||||
void ReserveReadportTransPass2::visit(const UniformValue& value)
|
||||
{
|
||||
(void)value;
|
||||
}
|
||||
|
||||
|
||||
}
|
41
src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h
Normal file
41
src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h
Normal file
@ -0,0 +1,41 @@
|
||||
#ifndef ALUREADPORTVALIDATION_H
|
||||
#define ALUREADPORTVALIDATION_H
|
||||
|
||||
#include "sfn_instr_alu.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class AluReadportReservation {
|
||||
public:
|
||||
AluReadportReservation();
|
||||
AluReadportReservation(const AluReadportReservation& orig) = default;
|
||||
AluReadportReservation& operator = (const AluReadportReservation& orig) = default;
|
||||
|
||||
bool schedule_vec_src(PVirtualValue src[3], int nsrc, AluBankSwizzle swz);
|
||||
|
||||
bool schedule_vec_instruction(const AluInstr& alu, AluBankSwizzle swz);
|
||||
bool schedule_trans_instruction(const AluInstr& alu, AluBankSwizzle swz);
|
||||
|
||||
bool reserve_gpr(int sel, int chan, int cycle);
|
||||
bool reserve_const(const UniformValue& value);
|
||||
|
||||
bool add_literal(uint32_t value);
|
||||
|
||||
static int cycle_vec(AluBankSwizzle swz, int src);
|
||||
static int cycle_trans(AluBankSwizzle swz, int src);
|
||||
|
||||
static const int max_chan_channels = 4;
|
||||
static const int max_gpr_readports = 3;
|
||||
|
||||
std::array<std::array<int, max_chan_channels>, max_gpr_readports> m_hw_gpr;
|
||||
std::array<int, max_chan_channels> m_hw_const_addr;
|
||||
std::array<int, max_chan_channels> m_hw_const_chan;
|
||||
std::array<int, max_chan_channels> m_hw_const_bank;
|
||||
std::array<uint32_t, max_chan_channels> m_literals;
|
||||
uint32_t m_nliterals{0};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // ALUREADPORTVALIDATION_H
|
File diff suppressed because it is too large
Load Diff
26
src/gallium/drivers/r600/sfn/sfn_assembler.h
Normal file
26
src/gallium/drivers/r600/sfn/sfn_assembler.h
Normal file
@ -0,0 +1,26 @@
|
||||
#ifndef ASSEMBLER_H
|
||||
#define ASSEMBLER_H
|
||||
|
||||
#include "../r600_pipe.h"
|
||||
#include "../r600_shader.h"
|
||||
|
||||
#include "sfn_shader.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class Assembler
|
||||
{
|
||||
public:
|
||||
Assembler(r600_shader *sh, const r600_shader_key& key);
|
||||
|
||||
bool lower(Shader *shader);
|
||||
private:
|
||||
r600_shader *m_sh;
|
||||
const r600_shader_key& m_key;
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // ASSAMBLY_H
|
@ -38,10 +38,7 @@ enum JumpType {
|
||||
|
||||
/**
|
||||
Class to link the jump locations
|
||||
|
||||
*/
|
||||
|
||||
|
||||
class ConditionalJumpTracker
|
||||
{
|
||||
public:
|
||||
@ -49,7 +46,6 @@ public:
|
||||
~ConditionalJumpTracker();
|
||||
|
||||
/* Mark the start of a loop or a if/else */
|
||||
|
||||
void push(r600_bytecode_cf *start, JumpType type);
|
||||
|
||||
/* Mark the end of a loop or a if/else and fixup the jump sites */
|
||||
|
@ -61,6 +61,10 @@ static const struct debug_named_value sfn_debug_options[] = {
|
||||
{"nomerge", SfnLog::nomerge, "Skip register merge step"},
|
||||
{"tex", SfnLog::tex, "Log texture ops"},
|
||||
{"trans", SfnLog::trans, "Log generic translation messages"},
|
||||
{"schedule", SfnLog::schedule, "Log scheduling"},
|
||||
{"opt", SfnLog::opt, "Log optimization"},
|
||||
{"steps", SfnLog::steps, "Log shaders at transformation steps"},
|
||||
{"noopt", SfnLog::noopt, "Don't run backend optimizations"},
|
||||
DEBUG_NAMED_VALUE_END
|
||||
};
|
||||
|
||||
|
@ -64,8 +64,12 @@ public:
|
||||
merge = 1 << 10,
|
||||
tex = 1 << 11,
|
||||
trans = 1 << 12,
|
||||
all = (1 << 13) - 1,
|
||||
schedule = 1 << 13,
|
||||
opt = 1 << 14,
|
||||
all = (1 << 15) - 1,
|
||||
nomerge = 1 << 16,
|
||||
steps = 1 << 17,
|
||||
noopt = 1 << 18
|
||||
};
|
||||
|
||||
SfnLog();
|
||||
|
@ -303,6 +303,9 @@ enum EVFetchFlagShift {
|
||||
vtx_alt_const,
|
||||
vtx_use_tc,
|
||||
vtx_vpm,
|
||||
vtx_is_mega_fetch,
|
||||
vtx_uncached,
|
||||
vtx_indexed,
|
||||
vtx_unknown
|
||||
};
|
||||
|
||||
|
@ -2,44 +2,33 @@
|
||||
|
||||
This code is an attempt to implement a NIR backend for r600.
|
||||
|
||||
Supported hardware: Cayman, Evergreen and NI (tested on CAYMAN, CEDAR and BARTS)
|
||||
|
||||
Thanks to soft fp64 the OpenGL version is now 4.5 also for EG.
|
||||
|
||||
sb can bee enabled for nir, it still gives some improvements, e.g. with Xonotic
|
||||
The aim is still to get rid of it.
|
||||
|
||||
|
||||
## State
|
||||
|
||||
Supported hardware: Evergreen and NI (tested on CEDAR and BARTS)
|
||||
|
||||
Thanks to soft fp64 the OpenGL version is now 4.5
|
||||
|
||||
sb has been enabled for nir to be able to run some more demanding work loads. The aim is
|
||||
still to get rid of it.
|
||||
|
||||
TODO:
|
||||
|
||||
piglits gpu passes mostly like with TGSI, there are some fixes but also a few regressions.
|
||||
|
||||
CTS gles
|
||||
- 2 passes like with TGSI
|
||||
- 3 no regressions, a few fixes compared to TGSI
|
||||
- 31
|
||||
* a few fixes with interpolation specifiers
|
||||
* synchronization has some unstable tests, this might be because global synchronization is missing (in both)
|
||||
|
||||
GL CTS:
|
||||
* a few regressions and a hang with KHR-GL43.compute_shader.shared-max
|
||||
|
||||
piglit:
|
||||
* spilling arrays is broken on Barts (but it works on Cedar)
|
||||
* a few tests fail because the register limit is exhausted, and needlessly so, because
|
||||
with better RA it would work
|
||||
* spilling arrays is broken on Barts and CAYMAN (but it works on Cedar)
|
||||
|
||||
## Needed optimizations:
|
||||
|
||||
- Register allocator and scheduler (Could the sb allocator and scheduler
|
||||
be ported?)
|
||||
|
||||
- peepholes:
|
||||
- compare + set predicate
|
||||
- compare + set predicate / kill
|
||||
- use clause local registers
|
||||
- reduce register usage
|
||||
- don't rely on the backend to schedule addr load and Index load as well
|
||||
- don't rely on the backend to merge some alu groups
|
||||
|
||||
## There are still some hangs
|
||||
|
||||
|
||||
|
||||
- copy propagation:
|
||||
- Moves from inputs are usually not required, they could be forwarded
|
||||
- texture operations often move additional parameters in extra registers
|
||||
but they are actually needed in the same registers they come from and
|
||||
could just be swizzled into the right place
|
||||
(lower in NIR like it is done in e.g. in ETNAVIV)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,116 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SFN_EMITALUINSTRUCTION_H
|
||||
#define SFN_EMITALUINSTRUCTION_H
|
||||
|
||||
#include "sfn_emitinstruction.h"
|
||||
|
||||
#include "sfn_alu_defines.h"
|
||||
#include "sfn_instruction_alu.h"
|
||||
#include "sfn_instruction_tex.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
|
||||
class EmitAluInstruction : public EmitInstruction
|
||||
{
|
||||
public:
|
||||
EmitAluInstruction(ShaderFromNirProcessor& processor);
|
||||
|
||||
private:
|
||||
|
||||
enum AluOp2Opts {
|
||||
op2_opt_none = 0,
|
||||
op2_opt_reverse = 1,
|
||||
op2_opt_neg_src1 = 1 << 1
|
||||
};
|
||||
|
||||
bool do_emit(nir_instr* instr) override;
|
||||
|
||||
void split_constants(const nir_alu_instr& instr, unsigned nsrc_comp);
|
||||
|
||||
bool emit_mov(const nir_alu_instr& instr);
|
||||
bool emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, const AluOpFlags &flags = 0);
|
||||
bool emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
|
||||
|
||||
bool emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode);
|
||||
bool emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode);
|
||||
|
||||
bool emit_alu_inot(const nir_alu_instr& instr);
|
||||
bool emit_alu_ineg(const nir_alu_instr& instr);
|
||||
bool emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
|
||||
|
||||
bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array<uint8_t, 3> reorder={0,1,2});
|
||||
bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false);
|
||||
|
||||
bool emit_alu_b2f(const nir_alu_instr& instr);
|
||||
bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op);
|
||||
bool emit_dot(const nir_alu_instr& instr, int n);
|
||||
bool emit_create_vec(const nir_alu_instr& instr, unsigned nc);
|
||||
bool emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
|
||||
bool emit_any_iequal(const nir_alu_instr& instr, unsigned nc);
|
||||
|
||||
bool emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
|
||||
bool emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all);
|
||||
|
||||
bool emit_fdph(const nir_alu_instr &instr);
|
||||
bool emit_discard_if(const nir_intrinsic_instr *instr);
|
||||
|
||||
bool emit_alu_f2b32(const nir_alu_instr& instr);
|
||||
bool emit_b2i32(const nir_alu_instr& instr);
|
||||
bool emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op);
|
||||
bool emit_pack_64_2x32_split(const nir_alu_instr& instr);
|
||||
bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp);
|
||||
|
||||
bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine);
|
||||
bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr);
|
||||
bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr);
|
||||
bool emit_pack_32_2x16_split(const nir_alu_instr& instr);
|
||||
|
||||
bool emit_cube(const nir_alu_instr& instr);
|
||||
private:
|
||||
void make_last(AluInstruction *ir) const;
|
||||
void split_alu_modifiers(const nir_alu_src &src, const GPRVector::Values& v,
|
||||
GPRVector::Values& out, int ncomp);
|
||||
|
||||
void preload_src(const nir_alu_instr& instr);
|
||||
unsigned num_src_comp(const nir_alu_instr& instr);
|
||||
|
||||
using vreg = std::array<PValue, 4>;
|
||||
|
||||
std::array<PValue, 4> m_src[4];
|
||||
};
|
||||
|
||||
inline void EmitAluInstruction::make_last(AluInstruction *ir) const
|
||||
{
|
||||
if (ir)
|
||||
ir->set_flag(alu_last_instr);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // SFN_EMITALUINSTRUCTION_H
|
@ -1,169 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sfn_emitinstruction.h"
|
||||
|
||||
#include "sfn_shader_base.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
EmitInstruction::EmitInstruction(ShaderFromNirProcessor& processor):
|
||||
m_proc(processor)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
EmitInstruction::~EmitInstruction()
|
||||
{
|
||||
}
|
||||
|
||||
bool EmitInstruction::emit(nir_instr* instr)
|
||||
{
|
||||
return do_emit(instr);
|
||||
}
|
||||
|
||||
bool EmitInstruction::use_legacy_math_rules(void)
|
||||
{
|
||||
return m_proc.use_legacy_math_rules();
|
||||
}
|
||||
|
||||
PValue EmitInstruction::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
|
||||
{
|
||||
return m_proc.from_nir(v, component, swizzled);
|
||||
}
|
||||
|
||||
PValue EmitInstruction::from_nir(const nir_alu_src& v, unsigned component)
|
||||
{
|
||||
return m_proc.from_nir(v, component);
|
||||
}
|
||||
|
||||
PValue EmitInstruction::from_nir(const nir_tex_src& v, unsigned component)
|
||||
{
|
||||
return m_proc.from_nir(v, component);
|
||||
}
|
||||
|
||||
PValue EmitInstruction::from_nir(const nir_alu_dest& v, unsigned component)
|
||||
{
|
||||
return m_proc.from_nir(v, component);
|
||||
}
|
||||
|
||||
PValue EmitInstruction::from_nir(const nir_dest& v, unsigned component)
|
||||
{
|
||||
return m_proc.from_nir(v, component);
|
||||
}
|
||||
|
||||
PValue EmitInstruction::from_nir(const nir_src& v, unsigned component)
|
||||
{
|
||||
return m_proc.from_nir(v, component);
|
||||
}
|
||||
|
||||
void EmitInstruction::emit_instruction(Instruction *ir)
|
||||
{
|
||||
return m_proc.emit_instruction(ir);
|
||||
}
|
||||
|
||||
void EmitInstruction::emit_instruction(AluInstruction *ir)
|
||||
{
|
||||
return m_proc.emit_instruction(ir);
|
||||
}
|
||||
|
||||
bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest,
|
||||
std::vector<PValue> src0,
|
||||
const std::set<AluModifiers>& m_flags)
|
||||
{
|
||||
return m_proc.emit_instruction(opcode, dest,src0, m_flags);
|
||||
}
|
||||
|
||||
const nir_variable *
|
||||
EmitInstruction::get_deref_location(const nir_src& v) const
|
||||
{
|
||||
return m_proc.get_deref_location(v);
|
||||
}
|
||||
|
||||
PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
|
||||
{
|
||||
return m_proc.from_nir_with_fetch_constant(src, component, channel);
|
||||
}
|
||||
|
||||
GPRVector EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
|
||||
const GPRVector::Swizzle& swizzle, bool match)
|
||||
{
|
||||
return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle, match);
|
||||
}
|
||||
|
||||
PGPRValue EmitInstruction::get_temp_register(int channel)
|
||||
{
|
||||
return m_proc.get_temp_register(channel);
|
||||
}
|
||||
|
||||
GPRVector EmitInstruction::get_temp_vec4(const GPRVector::Swizzle& swizzle)
|
||||
{
|
||||
return m_proc.get_temp_vec4(swizzle);
|
||||
}
|
||||
|
||||
PValue EmitInstruction::create_register_from_nir_src(const nir_src& src, unsigned swizzle)
|
||||
{
|
||||
return m_proc.create_register_from_nir_src(src, swizzle);
|
||||
}
|
||||
|
||||
enum amd_gfx_level EmitInstruction::get_chip_class(void) const
|
||||
{
|
||||
return m_proc.get_chip_class();
|
||||
}
|
||||
|
||||
PValue EmitInstruction::literal(uint32_t value)
|
||||
{
|
||||
return m_proc.literal(value);
|
||||
}
|
||||
|
||||
GPRVector EmitInstruction::vec_from_nir(const nir_dest& dst, int num_components)
|
||||
{
|
||||
return m_proc.vec_from_nir(dst, num_components);
|
||||
}
|
||||
|
||||
bool EmitInstruction::inject_register(unsigned sel, unsigned swizzle,
|
||||
const PValue& reg, bool map)
|
||||
{
|
||||
return m_proc.inject_register(sel, swizzle, reg, map);
|
||||
}
|
||||
|
||||
int EmitInstruction::remap_atomic_base(int base)
|
||||
{
|
||||
return m_proc.remap_atomic_base(base);
|
||||
}
|
||||
|
||||
void EmitInstruction::set_has_txs_cube_array_comp()
|
||||
{
|
||||
m_proc.sh_info().has_txq_cube_array_z_comp = 1;
|
||||
}
|
||||
|
||||
const std::set<AluModifiers> EmitInstruction::empty = {};
|
||||
const std::set<AluModifiers> EmitInstruction::write = {alu_write};
|
||||
const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr};
|
||||
const std::set<AluModifiers> EmitInstruction::last = {alu_last_instr};
|
||||
|
||||
}
|
||||
|
@ -1,102 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef EMITINSTRUCTION_H
|
||||
#define EMITINSTRUCTION_H
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "sfn_defines.h"
|
||||
#include "sfn_value.h"
|
||||
#include "sfn_instruction_alu.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class ShaderFromNirProcessor;
|
||||
|
||||
class EmitInstruction
|
||||
{
|
||||
public:
|
||||
EmitInstruction(ShaderFromNirProcessor& processor);
|
||||
virtual ~EmitInstruction();
|
||||
bool emit(nir_instr* instr);
|
||||
|
||||
static const std::set<AluModifiers> empty;
|
||||
static const std::set<AluModifiers> write;
|
||||
static const std::set<AluModifiers> last_write;
|
||||
static const std::set<AluModifiers> last;
|
||||
|
||||
protected:
|
||||
virtual bool do_emit(nir_instr* instr) = 0;
|
||||
|
||||
// forwards from ValuePool
|
||||
PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
|
||||
PValue from_nir(const nir_src& v, unsigned component);
|
||||
PValue from_nir(const nir_alu_src& v, unsigned component);
|
||||
PValue from_nir(const nir_tex_src& v, unsigned component);
|
||||
PValue from_nir(const nir_alu_dest& v, unsigned component);
|
||||
PValue from_nir(const nir_dest& v, unsigned component);
|
||||
|
||||
PValue create_register_from_nir_src(const nir_src& src, unsigned comp);
|
||||
|
||||
PGPRValue get_temp_register(int channel = -1);
|
||||
GPRVector get_temp_vec4(const GPRVector::Swizzle& swizzle = {0,1,2,3});
|
||||
|
||||
// forwards from ShaderFromNirProcessor
|
||||
void emit_instruction(Instruction *ir);
|
||||
void emit_instruction(AluInstruction *ir);
|
||||
bool emit_instruction(EAluOp opcode, PValue dest,
|
||||
std::vector<PValue> src0,
|
||||
const std::set<AluModifiers>& m_flags);
|
||||
bool use_legacy_math_rules(void);
|
||||
|
||||
PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
|
||||
GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
|
||||
const GPRVector::Swizzle& swizzle, bool match = false);
|
||||
|
||||
const nir_variable *get_deref_location(const nir_src& v) const;
|
||||
|
||||
enum amd_gfx_level get_chip_class(void) const;
|
||||
|
||||
PValue literal(uint32_t value);
|
||||
|
||||
GPRVector vec_from_nir(const nir_dest& dst, int num_components);
|
||||
|
||||
bool inject_register(unsigned sel, unsigned swizzle,
|
||||
const PValue& reg, bool map);
|
||||
|
||||
int remap_atomic_base(int base);
|
||||
|
||||
void set_has_txs_cube_array_comp();
|
||||
private:
|
||||
|
||||
ShaderFromNirProcessor& m_proc;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // EMITINSTRUCTION_H
|
@ -1,741 +0,0 @@
|
||||
#include "sfn_emitssboinstruction.h"
|
||||
|
||||
#include "sfn_instruction_fetch.h"
|
||||
#include "sfn_instruction_gds.h"
|
||||
#include "sfn_instruction_misc.h"
|
||||
#include "sfn_instruction_tex.h"
|
||||
#include "../r600_pipe.h"
|
||||
#include "../r600_asm.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
|
||||
|
||||
EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
|
||||
EmitInstruction(processor),
|
||||
m_require_rat_return_address(false),
|
||||
m_ssbo_image_offset(0)
|
||||
{
|
||||
}
|
||||
|
||||
void EmitSSBOInstruction::set_ssbo_offset(int offset)
|
||||
{
|
||||
m_ssbo_image_offset = offset;
|
||||
}
|
||||
|
||||
|
||||
void EmitSSBOInstruction::set_require_rat_return_address()
|
||||
{
|
||||
m_require_rat_return_address = true;
|
||||
}
|
||||
|
||||
bool
|
||||
EmitSSBOInstruction::load_rat_return_address()
|
||||
{
|
||||
if (m_require_rat_return_address) {
|
||||
m_rat_return_address = get_temp_vec4();
|
||||
emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
|
||||
emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
|
||||
emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
|
||||
literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
|
||||
emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
|
||||
m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
|
||||
{alu_write, alu_last_instr}));
|
||||
m_require_rat_return_address = false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool EmitSSBOInstruction::do_emit(nir_instr* instr)
|
||||
{
|
||||
const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_atomic_counter_add:
|
||||
case nir_intrinsic_atomic_counter_and:
|
||||
case nir_intrinsic_atomic_counter_exchange:
|
||||
case nir_intrinsic_atomic_counter_max:
|
||||
case nir_intrinsic_atomic_counter_min:
|
||||
case nir_intrinsic_atomic_counter_or:
|
||||
case nir_intrinsic_atomic_counter_xor:
|
||||
case nir_intrinsic_atomic_counter_comp_swap:
|
||||
return emit_atomic(intr);
|
||||
case nir_intrinsic_atomic_counter_read:
|
||||
case nir_intrinsic_atomic_counter_post_dec:
|
||||
return emit_unary_atomic(intr);
|
||||
case nir_intrinsic_atomic_counter_inc:
|
||||
return emit_atomic_inc(intr);
|
||||
case nir_intrinsic_atomic_counter_pre_dec:
|
||||
return emit_atomic_pre_dec(intr);
|
||||
case nir_intrinsic_load_ssbo:
|
||||
return emit_load_ssbo(intr);
|
||||
case nir_intrinsic_store_ssbo:
|
||||
return emit_store_ssbo(intr);
|
||||
case nir_intrinsic_ssbo_atomic_add:
|
||||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||
case nir_intrinsic_ssbo_atomic_or:
|
||||
case nir_intrinsic_ssbo_atomic_xor:
|
||||
case nir_intrinsic_ssbo_atomic_imax:
|
||||
case nir_intrinsic_ssbo_atomic_imin:
|
||||
case nir_intrinsic_ssbo_atomic_umax:
|
||||
case nir_intrinsic_ssbo_atomic_umin:
|
||||
case nir_intrinsic_ssbo_atomic_and:
|
||||
case nir_intrinsic_ssbo_atomic_exchange:
|
||||
return emit_ssbo_atomic_op(intr);
|
||||
case nir_intrinsic_image_store:
|
||||
return emit_image_store(intr);
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
return emit_image_load(intr);
|
||||
case nir_intrinsic_image_size:
|
||||
return emit_image_size(intr);
|
||||
case nir_intrinsic_get_ssbo_size:
|
||||
return emit_buffer_size(intr);
|
||||
case nir_intrinsic_memory_barrier:
|
||||
case nir_intrinsic_memory_barrier_image:
|
||||
case nir_intrinsic_memory_barrier_buffer:
|
||||
case nir_intrinsic_group_memory_barrier:
|
||||
return make_stores_ack_and_waitack();
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
|
||||
{
|
||||
bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
|
||||
|
||||
ESDOp op = read_result ? get_opcode(instr->intrinsic) :
|
||||
get_opcode_wo(instr->intrinsic);
|
||||
|
||||
if (DS_OP_INVALID == op)
|
||||
return false;
|
||||
|
||||
|
||||
|
||||
GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
|
||||
|
||||
int base = remap_atomic_base(nir_intrinsic_base(instr));
|
||||
|
||||
PValue uav_id = from_nir(instr->src[0], 0);
|
||||
|
||||
PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
|
||||
|
||||
GDSInstr *ir = nullptr;
|
||||
if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap) {
|
||||
PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
|
||||
ir = new GDSInstr(op, dest, value, value2, uav_id, base);
|
||||
} else {
|
||||
ir = new GDSInstr(op, dest, value, uav_id, base);
|
||||
}
|
||||
|
||||
emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
|
||||
{
|
||||
bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
|
||||
|
||||
ESDOp op = read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
|
||||
|
||||
if (DS_OP_INVALID == op)
|
||||
return false;
|
||||
|
||||
GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
|
||||
|
||||
PValue uav_id = from_nir(instr->src[0], 0);
|
||||
|
||||
auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
|
||||
|
||||
emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode) const
|
||||
{
|
||||
switch (opcode) {
|
||||
case nir_intrinsic_atomic_counter_add:
|
||||
return DS_OP_ADD_RET;
|
||||
case nir_intrinsic_atomic_counter_and:
|
||||
return DS_OP_AND_RET;
|
||||
case nir_intrinsic_atomic_counter_exchange:
|
||||
return DS_OP_XCHG_RET;
|
||||
case nir_intrinsic_atomic_counter_inc:
|
||||
return DS_OP_INC_RET;
|
||||
case nir_intrinsic_atomic_counter_max:
|
||||
return DS_OP_MAX_UINT_RET;
|
||||
case nir_intrinsic_atomic_counter_min:
|
||||
return DS_OP_MIN_UINT_RET;
|
||||
case nir_intrinsic_atomic_counter_or:
|
||||
return DS_OP_OR_RET;
|
||||
case nir_intrinsic_atomic_counter_read:
|
||||
return DS_OP_READ_RET;
|
||||
case nir_intrinsic_atomic_counter_xor:
|
||||
return DS_OP_XOR_RET;
|
||||
case nir_intrinsic_atomic_counter_post_dec:
|
||||
return DS_OP_DEC_RET;
|
||||
case nir_intrinsic_atomic_counter_comp_swap:
|
||||
return DS_OP_CMP_XCHG_RET;
|
||||
case nir_intrinsic_atomic_counter_pre_dec:
|
||||
default:
|
||||
return DS_OP_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
ESDOp EmitSSBOInstruction::get_opcode_wo(const nir_intrinsic_op opcode) const
|
||||
{
|
||||
switch (opcode) {
|
||||
case nir_intrinsic_atomic_counter_add:
|
||||
return DS_OP_ADD;
|
||||
case nir_intrinsic_atomic_counter_and:
|
||||
return DS_OP_AND;
|
||||
case nir_intrinsic_atomic_counter_inc:
|
||||
return DS_OP_INC;
|
||||
case nir_intrinsic_atomic_counter_max:
|
||||
return DS_OP_MAX_UINT;
|
||||
case nir_intrinsic_atomic_counter_min:
|
||||
return DS_OP_MIN_UINT;
|
||||
case nir_intrinsic_atomic_counter_or:
|
||||
return DS_OP_OR;
|
||||
case nir_intrinsic_atomic_counter_xor:
|
||||
return DS_OP_XOR;
|
||||
case nir_intrinsic_atomic_counter_post_dec:
|
||||
return DS_OP_DEC;
|
||||
case nir_intrinsic_atomic_counter_comp_swap:
|
||||
return DS_OP_CMP_XCHG_RET;
|
||||
case nir_intrinsic_atomic_counter_exchange:
|
||||
return DS_OP_XCHG_RET;
|
||||
case nir_intrinsic_atomic_counter_pre_dec:
|
||||
default:
|
||||
return DS_OP_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
RatInstruction::ERatOp
|
||||
EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
|
||||
{
|
||||
switch (opcode) {
|
||||
case nir_intrinsic_ssbo_atomic_add:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
return RatInstruction::ADD_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_and:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
return RatInstruction::AND_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
return RatInstruction::XCHG_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_or:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
return RatInstruction::OR_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
return RatInstruction::MIN_INT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
return RatInstruction::MAX_INT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
return RatInstruction::MIN_UINT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
return RatInstruction::MAX_UINT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
return RatInstruction::XOR_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
if (util_format_is_float(format))
|
||||
return RatInstruction::CMPXCHG_FLT_RTN;
|
||||
else
|
||||
return RatInstruction::CMPXCHG_INT_RTN;
|
||||
case nir_intrinsic_image_load:
|
||||
return RatInstruction::NOP_RTN;
|
||||
default:
|
||||
unreachable("Unsupported RAT instruction");
|
||||
}
|
||||
}
|
||||
|
||||
RatInstruction::ERatOp
|
||||
EmitSSBOInstruction::get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const
|
||||
{
|
||||
switch (opcode) {
|
||||
case nir_intrinsic_ssbo_atomic_add:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
return RatInstruction::ADD;
|
||||
case nir_intrinsic_ssbo_atomic_and:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
return RatInstruction::AND;
|
||||
case nir_intrinsic_ssbo_atomic_or:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
return RatInstruction::OR;
|
||||
case nir_intrinsic_ssbo_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
return RatInstruction::MIN_INT;
|
||||
case nir_intrinsic_ssbo_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
return RatInstruction::MAX_INT;
|
||||
case nir_intrinsic_ssbo_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
return RatInstruction::MIN_UINT;
|
||||
case nir_intrinsic_ssbo_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
return RatInstruction::MAX_UINT;
|
||||
case nir_intrinsic_ssbo_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
return RatInstruction::XOR;
|
||||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
if (util_format_is_float(format))
|
||||
return RatInstruction::CMPXCHG_FLT;
|
||||
else
|
||||
return RatInstruction::CMPXCHG_INT;
|
||||
default:
|
||||
unreachable("Unsupported WO RAT instruction");
|
||||
}
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::load_atomic_inc_limits()
|
||||
{
|
||||
m_atomic_update = get_temp_register();
|
||||
m_atomic_update->set_keep_alive();
|
||||
emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
|
||||
{alu_write, alu_last_instr}));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
|
||||
{
|
||||
bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
|
||||
PValue uav_id = from_nir(instr->src[0], 0);
|
||||
GPRVector dest = read_result ? make_dest(instr): GPRVector(0, {7,7,7,7});
|
||||
auto ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest,
|
||||
m_atomic_update, uav_id,
|
||||
remap_atomic_base(nir_intrinsic_base(instr)));
|
||||
emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
|
||||
{
|
||||
GPRVector dest = make_dest(instr);
|
||||
|
||||
PValue uav_id = from_nir(instr->src[0], 0);
|
||||
|
||||
auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
|
||||
remap_atomic_base(nir_intrinsic_base(instr)));
|
||||
emit_instruction(ir);
|
||||
|
||||
emit_instruction(new AluInstruction(op2_sub_int, dest.x(), dest.x(), literal(1), last_write));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
|
||||
{
|
||||
GPRVector dest = make_dest(instr);
|
||||
|
||||
/** src0 not used, should be some offset */
|
||||
auto addr = from_nir(instr->src[1], 0);
|
||||
PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
|
||||
|
||||
/** Should be lowered in nir */
|
||||
emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
|
||||
{alu_write, alu_last_instr}));
|
||||
|
||||
const EVTXDataFormat formats[4] = {
|
||||
fmt_32,
|
||||
fmt_32_32,
|
||||
fmt_32_32_32,
|
||||
fmt_32_32_32_32
|
||||
};
|
||||
|
||||
const std::array<int,4> dest_swt[4] = {
|
||||
{0,7,7,7},
|
||||
{0,1,7,7},
|
||||
{0,1,2,7},
|
||||
{0,1,2,3}
|
||||
};
|
||||
|
||||
/* TODO fix resource index */
|
||||
auto ir = new FetchInstruction(dest, addr_temp,
|
||||
R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
|
||||
, from_nir(instr->src[0], 0),
|
||||
formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
|
||||
ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
|
||||
ir->set_flag(vtx_use_tc);
|
||||
|
||||
emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
|
||||
{
|
||||
|
||||
GPRVector::Swizzle swz = {7,7,7,7};
|
||||
for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i)
|
||||
swz[i] = i;
|
||||
|
||||
auto orig_addr = from_nir(instr->src[2], 0);
|
||||
|
||||
GPRVector addr_vec = get_temp_vec4({0,1,2,7});
|
||||
|
||||
auto temp2 = get_temp_vec4();
|
||||
|
||||
auto rat_id = from_nir(instr->src[1], 0);
|
||||
|
||||
emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
|
||||
PValue(new LiteralValue(2)), write));
|
||||
emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
|
||||
emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
|
||||
|
||||
|
||||
auto values = vec_from_nir_with_fetch_constant(instr->src[0],
|
||||
(1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
|
||||
|
||||
auto cf_op = cf_mem_rat;
|
||||
//auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
|
||||
auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
|
||||
values, addr_vec, m_ssbo_image_offset, rat_id, 1,
|
||||
1, 0, false);
|
||||
emit_instruction(store);
|
||||
m_store_ops.push_back(store);
|
||||
|
||||
for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
|
||||
emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), get_chip_class() == CAYMAN ? last_write : write));
|
||||
emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
|
||||
{addr_vec.reg_i(0), Value::one_i}, last_write));
|
||||
store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
|
||||
temp2, addr_vec, m_ssbo_image_offset, rat_id, 1,
|
||||
1, 0, false);
|
||||
emit_instruction(store);
|
||||
if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
|
||||
m_store_ops.push_back(store);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
|
||||
{
|
||||
int imageid = 0;
|
||||
PValue image_offset;
|
||||
|
||||
if (nir_src_is_const(intrin->src[0]))
|
||||
imageid = nir_src_as_int(intrin->src[0]);
|
||||
else
|
||||
image_offset = from_nir(intrin->src[0], 0);
|
||||
|
||||
auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
|
||||
auto undef = from_nir(intrin->src[2], 0);
|
||||
auto value = vec_from_nir_with_fetch_constant(intrin->src[3], 0xf, {0,1,2,3});
|
||||
auto unknown = from_nir(intrin->src[4], 0);
|
||||
|
||||
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
|
||||
nir_intrinsic_image_array(intrin)) {
|
||||
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
|
||||
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
|
||||
}
|
||||
|
||||
auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
|
||||
auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
|
||||
image_offset, 1, 0xf, 0, false);
|
||||
|
||||
//if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
|
||||
m_store_ops.push_back(store);
|
||||
|
||||
emit_instruction(store);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
|
||||
{
|
||||
int imageid = 0;
|
||||
PValue image_offset;
|
||||
|
||||
if (nir_src_is_const(intrin->src[0]))
|
||||
imageid = nir_src_as_int(intrin->src[0]);
|
||||
else
|
||||
image_offset = from_nir(intrin->src[0], 0);
|
||||
|
||||
bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
|
||||
auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
|
||||
get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
|
||||
|
||||
auto coord_orig = from_nir(intrin->src[1], 0, 0);
|
||||
auto coord = get_temp_register(0);
|
||||
|
||||
emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
|
||||
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
|
||||
from_nir(intrin->src[3], 0), {alu_write}));
|
||||
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
|
||||
from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
|
||||
} else {
|
||||
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
|
||||
from_nir(intrin->src[2], 0), {alu_write}));
|
||||
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
|
||||
}
|
||||
|
||||
|
||||
GPRVector out_vec({coord, coord, coord, coord});
|
||||
|
||||
auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
|
||||
image_offset, 1, 0xf, 0, true);
|
||||
emit_instruction(atomic);
|
||||
|
||||
if (read_result) {
|
||||
emit_instruction(new WaitAck(0));
|
||||
|
||||
GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
|
||||
auto fetch = new FetchInstruction(vc_fetch,
|
||||
no_index_offset,
|
||||
fmt_32,
|
||||
vtx_nf_int,
|
||||
vtx_es_none,
|
||||
m_rat_return_address.reg_i(1),
|
||||
dest,
|
||||
0,
|
||||
false,
|
||||
0xf,
|
||||
R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
|
||||
0,
|
||||
bim_none,
|
||||
false,
|
||||
false,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
image_offset,
|
||||
{0,7,7,7});
|
||||
fetch->set_flag(vtx_srf_mode);
|
||||
fetch->set_flag(vtx_use_tc);
|
||||
fetch->set_flag(vtx_vpm);
|
||||
emit_instruction(fetch);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
bool
|
||||
EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
|
||||
{
|
||||
int imageid = 0;
|
||||
PValue image_offset;
|
||||
|
||||
if (nir_src_is_const(intrin->src[0]))
|
||||
imageid = nir_src_as_int(intrin->src[0]);
|
||||
else
|
||||
image_offset = from_nir(intrin->src[0], 0);
|
||||
|
||||
bool read_retvalue = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
|
||||
auto rat_op = read_retvalue ? get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin)):
|
||||
get_rat_opcode_wo(intrin->intrinsic, nir_intrinsic_format(intrin));
|
||||
|
||||
GPRVector::Swizzle swz = {0,1,2,3};
|
||||
auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
|
||||
|
||||
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
|
||||
nir_intrinsic_image_array(intrin)) {
|
||||
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
|
||||
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
|
||||
}
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_image_load) {
|
||||
if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
|
||||
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
|
||||
from_nir(intrin->src[4], 0), {alu_write}));
|
||||
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
|
||||
from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
|
||||
} else {
|
||||
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
|
||||
from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
|
||||
}
|
||||
}
|
||||
auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
|
||||
|
||||
auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
|
||||
image_offset, 1, 0xf, 0, true);
|
||||
emit_instruction(store);
|
||||
return read_retvalue ? fetch_return_value(intrin) : true;
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
|
||||
{
|
||||
emit_instruction(new WaitAck(0));
|
||||
|
||||
pipe_format format = nir_intrinsic_format(intrin);
|
||||
unsigned fmt = fmt_32;
|
||||
unsigned num_format = 0;
|
||||
unsigned format_comp = 0;
|
||||
unsigned endian = 0;
|
||||
|
||||
int imageid = 0;
|
||||
PValue image_offset;
|
||||
|
||||
if (nir_src_is_const(intrin->src[0]))
|
||||
imageid = nir_src_as_int(intrin->src[0]);
|
||||
else
|
||||
image_offset = from_nir(intrin->src[0], 0);
|
||||
|
||||
r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
|
||||
|
||||
GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
|
||||
|
||||
auto fetch = new FetchInstruction(vc_fetch,
|
||||
no_index_offset,
|
||||
(EVTXDataFormat)fmt,
|
||||
(EVFetchNumFormat)num_format,
|
||||
(EVFetchEndianSwap)endian,
|
||||
m_rat_return_address.reg_i(1),
|
||||
dest,
|
||||
0,
|
||||
false,
|
||||
0x3,
|
||||
R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
|
||||
0,
|
||||
bim_none,
|
||||
false,
|
||||
false,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
image_offset, {0,1,2,3});
|
||||
fetch->set_flag(vtx_srf_mode);
|
||||
fetch->set_flag(vtx_use_tc);
|
||||
fetch->set_flag(vtx_vpm);
|
||||
if (format_comp)
|
||||
fetch->set_flag(vtx_format_comp_signed);
|
||||
|
||||
emit_instruction(fetch);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
|
||||
{
|
||||
GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
|
||||
GPRVector src{0,{4,4,4,4}};
|
||||
|
||||
assert(nir_src_as_uint(intrin->src[1]) == 0);
|
||||
|
||||
auto const_offset = nir_src_as_const_value(intrin->src[0]);
|
||||
auto dyn_offset = PValue();
|
||||
int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
|
||||
if (const_offset)
|
||||
res_id += const_offset[0].u32;
|
||||
else
|
||||
dyn_offset = from_nir(intrin->src[0], 0);
|
||||
|
||||
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
|
||||
emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
|
||||
res_id,
|
||||
bim_none));
|
||||
return true;
|
||||
} else {
|
||||
emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
|
||||
0/* ?? */,
|
||||
res_id, dyn_offset));
|
||||
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
|
||||
nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
|
||||
/* Need to load the layers from a const buffer */
|
||||
|
||||
set_has_txs_cube_array_comp();
|
||||
|
||||
if (const_offset) {
|
||||
unsigned lookup_resid = const_offset[0].u32;
|
||||
emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
|
||||
PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
|
||||
R600_BUFFER_INFO_CONST_BUFFER)),
|
||||
EmitInstruction::last_write));
|
||||
} else {
|
||||
/* If the adressing is indirect we have to get the z-value by using a binary search */
|
||||
GPRVector trgt;
|
||||
GPRVector help;
|
||||
|
||||
auto addr = help.reg_i(0);
|
||||
auto comp = help.reg_i(1);
|
||||
auto low_bit = help.reg_i(2);
|
||||
auto high_bit = help.reg_i(3);
|
||||
|
||||
emit_instruction(new AluInstruction(op2_lshr_int, addr, from_nir(intrin->src[0], 0),
|
||||
literal(2), EmitInstruction::write));
|
||||
emit_instruction(new AluInstruction(op2_and_int, comp, from_nir(intrin->src[0], 0),
|
||||
literal(3), EmitInstruction::last_write));
|
||||
|
||||
emit_instruction(new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, R600_SHADER_BUFFER_INFO_SEL,
|
||||
R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none));
|
||||
|
||||
emit_instruction(new AluInstruction(op3_cnde_int, comp, high_bit, trgt.reg_i(0), trgt.reg_i(2),
|
||||
EmitInstruction::write));
|
||||
emit_instruction(new AluInstruction(op3_cnde_int, high_bit, high_bit, trgt.reg_i(1), trgt.reg_i(3),
|
||||
EmitInstruction::last_write));
|
||||
|
||||
emit_instruction(new AluInstruction(op3_cnde_int, dest.reg_i(2), low_bit, comp, high_bit, EmitInstruction::last_write));
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
|
||||
{
|
||||
std::array<PValue,4> dst_elms;
|
||||
|
||||
|
||||
for (uint16_t i = 0; i < 4; ++i) {
|
||||
dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
|
||||
}
|
||||
|
||||
GPRVector dst(dst_elms);
|
||||
GPRVector src(0,{4,4,4,4});
|
||||
|
||||
auto const_offset = nir_src_as_const_value(intr->src[0]);
|
||||
auto dyn_offset = PValue();
|
||||
int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
|
||||
if (const_offset)
|
||||
res_id += const_offset[0].u32;
|
||||
else
|
||||
assert(0 && "dynamic buffer offset not supported in buffer_size");
|
||||
|
||||
emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
|
||||
res_id, bim_none));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitSSBOInstruction::make_stores_ack_and_waitack()
|
||||
{
|
||||
for (auto&& store: m_store_ops)
|
||||
store->set_ack();
|
||||
|
||||
if (!m_store_ops.empty())
|
||||
emit_instruction(new WaitAck(0));
|
||||
|
||||
m_store_ops.clear();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
|
||||
{
|
||||
GPRVector::Values v;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i)
|
||||
v[i] = from_nir(ir->dest, i);
|
||||
return GPRVector(v);
|
||||
}
|
||||
|
||||
}
|
@ -1,60 +0,0 @@
|
||||
#ifndef SFN_EMITSSBOINSTRUCTION_H
|
||||
#define SFN_EMITSSBOINSTRUCTION_H
|
||||
|
||||
#include "sfn_emitinstruction.h"
|
||||
#include "sfn_instruction_gds.h"
|
||||
#include "sfn_value_gpr.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class EmitSSBOInstruction: public EmitInstruction {
|
||||
public:
|
||||
EmitSSBOInstruction(ShaderFromNirProcessor& processor);
|
||||
|
||||
void set_ssbo_offset(int offset);
|
||||
|
||||
void set_require_rat_return_address();
|
||||
bool load_rat_return_address();
|
||||
bool load_atomic_inc_limits();
|
||||
|
||||
private:
|
||||
bool do_emit(nir_instr *instr);
|
||||
|
||||
bool emit_atomic(const nir_intrinsic_instr* instr);
|
||||
bool emit_unary_atomic(const nir_intrinsic_instr* instr);
|
||||
bool emit_atomic_inc(const nir_intrinsic_instr* instr);
|
||||
bool emit_atomic_pre_dec(const nir_intrinsic_instr* instr);
|
||||
|
||||
bool emit_load_ssbo(const nir_intrinsic_instr* instr);
|
||||
bool emit_store_ssbo(const nir_intrinsic_instr* instr);
|
||||
|
||||
bool emit_image_size(const nir_intrinsic_instr *intrin);
|
||||
bool emit_image_load(const nir_intrinsic_instr *intrin);
|
||||
bool emit_image_store(const nir_intrinsic_instr *intrin);
|
||||
bool emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin);
|
||||
bool emit_buffer_size(const nir_intrinsic_instr *intrin);
|
||||
|
||||
bool fetch_return_value(const nir_intrinsic_instr *intrin);
|
||||
|
||||
bool make_stores_ack_and_waitack();
|
||||
|
||||
ESDOp get_opcode(nir_intrinsic_op opcode) const;
|
||||
ESDOp get_opcode_wo(const nir_intrinsic_op opcode) const;
|
||||
|
||||
RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const;
|
||||
RatInstruction::ERatOp get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const;
|
||||
|
||||
|
||||
GPRVector make_dest(const nir_intrinsic_instr* instr);
|
||||
|
||||
PGPRValue m_atomic_update;
|
||||
|
||||
bool m_require_rat_return_address;
|
||||
GPRVector m_rat_return_address;
|
||||
int m_ssbo_image_offset;
|
||||
std::vector<RatInstruction *> m_store_ops;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // SFN_EMITSSBOINSTRUCTION_H
|
@ -1,671 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sfn_emittexinstruction.h"
|
||||
#include "sfn_shader_base.h"
|
||||
#include "sfn_instruction_fetch.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
EmitTexInstruction::EmitTexInstruction(ShaderFromNirProcessor &processor):
|
||||
EmitInstruction (processor)
|
||||
{
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::do_emit(nir_instr* instr)
|
||||
{
|
||||
nir_tex_instr* ir = nir_instr_as_tex(instr);
|
||||
|
||||
TexInputs src;
|
||||
if (!get_inputs(*ir, src))
|
||||
return false;
|
||||
|
||||
if (ir->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
switch (ir->op) {
|
||||
case nir_texop_txf:
|
||||
return emit_buf_txf(ir, src);
|
||||
case nir_texop_txs:
|
||||
return emit_tex_txs(ir, src, {0,1,2,3});
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
switch (ir->op) {
|
||||
case nir_texop_tex:
|
||||
return emit_tex_tex(ir, src);
|
||||
case nir_texop_txf:
|
||||
return emit_tex_txf(ir, src);
|
||||
case nir_texop_txb:
|
||||
return emit_tex_txb(ir, src);
|
||||
case nir_texop_txl:
|
||||
return emit_tex_txl(ir, src);
|
||||
case nir_texop_txd:
|
||||
return emit_tex_txd(ir, src);
|
||||
case nir_texop_txs:
|
||||
return emit_tex_txs(ir, src, {0,1,2,3});
|
||||
case nir_texop_lod:
|
||||
return emit_tex_lod(ir, src);
|
||||
case nir_texop_tg4:
|
||||
return emit_tex_tg4(ir, src);
|
||||
case nir_texop_txf_ms:
|
||||
return emit_tex_txf_ms(ir, src);
|
||||
case nir_texop_query_levels:
|
||||
return emit_tex_txs(ir, src, {3,7,7,7});
|
||||
case nir_texop_texture_samples:
|
||||
return emit_tex_texture_samples(ir, src, {3,7,7,7});
|
||||
default:
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_buf_txf(nir_tex_instr* instr, TexInputs &src)
|
||||
{
|
||||
auto dst = make_dest(*instr);
|
||||
|
||||
auto ir = new FetchInstruction(vc_fetch, no_index_offset, dst, src.coord.reg_i(0), 0,
|
||||
instr->texture_index + R600_MAX_CONST_BUFFERS,
|
||||
src.texture_offset, bim_none);
|
||||
ir->set_flag(vtx_use_const_field);
|
||||
emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_tex_tex(nir_tex_instr* instr, TexInputs& src)
|
||||
{
|
||||
|
||||
r600::sfn_log << SfnLog::instr << "emit '"
|
||||
<< *reinterpret_cast<nir_instr*>(instr)
|
||||
<< "' (" << __func__ << ")\n";
|
||||
|
||||
auto tex_op = TexInstruction::sample;
|
||||
|
||||
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
|
||||
assert(!sampler.indirect);
|
||||
|
||||
if (instr->is_shadow) {
|
||||
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
|
||||
{alu_last_instr, alu_write}));
|
||||
tex_op = TexInstruction::sample_c;
|
||||
}
|
||||
|
||||
auto dst = make_dest(*instr);
|
||||
auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
if (instr->is_array)
|
||||
handle_array_index(*instr, src.coord, irt);
|
||||
|
||||
set_rect_coordinate_flags(instr, irt);
|
||||
set_offsets(irt, src.offset);
|
||||
|
||||
emit_instruction(irt);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_tex_txd(nir_tex_instr* instr, TexInputs& src)
|
||||
{
|
||||
r600::sfn_log << SfnLog::instr << "emit '"
|
||||
<< *reinterpret_cast<nir_instr*>(instr)
|
||||
<< "' (" << __func__ << ")\n";
|
||||
|
||||
auto tex_op = TexInstruction::sample_g;
|
||||
auto dst = make_dest(*instr);
|
||||
|
||||
GPRVector empty_dst(0,{7,7,7,7});
|
||||
|
||||
if (instr->is_shadow) {
|
||||
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
|
||||
{alu_last_instr, alu_write}));
|
||||
tex_op = TexInstruction::sample_c_g;
|
||||
}
|
||||
|
||||
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
|
||||
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
|
||||
|
||||
TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, src.ddx,
|
||||
sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
irgh->set_dest_swizzle({7,7,7,7});
|
||||
|
||||
TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, src.ddy,
|
||||
sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
irgv->set_dest_swizzle({7,7,7,7});
|
||||
|
||||
TexInstruction *ir = new TexInstruction(tex_op, dst, src.coord, sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
if (instr->is_array)
|
||||
handle_array_index(*instr, src.coord, ir);
|
||||
|
||||
set_rect_coordinate_flags(instr, ir);
|
||||
set_offsets(ir, src.offset);
|
||||
|
||||
emit_instruction(irgh);
|
||||
emit_instruction(irgv);
|
||||
emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_tex_txf(nir_tex_instr* instr, TexInputs& src)
|
||||
{
|
||||
r600::sfn_log << SfnLog::instr << "emit '"
|
||||
<< *reinterpret_cast<nir_instr*>(instr)
|
||||
<< "' (" << __func__ << ")\n";
|
||||
|
||||
auto dst = make_dest(*instr);
|
||||
|
||||
if (*src.coord.reg_i(3) != *src.lod) {
|
||||
if (src.coord.sel() != src.lod->sel())
|
||||
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {alu_write, alu_last_instr}));
|
||||
else
|
||||
src.coord.set_reg_i(3, src.lod);
|
||||
}
|
||||
|
||||
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
|
||||
assert(!sampler.indirect);
|
||||
|
||||
/* txf doesn't need rounding for the array index, but 1D has the array index
|
||||
* in the z component */
|
||||
if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
|
||||
src.coord.set_reg_i(2, src.coord.reg_i(1));
|
||||
|
||||
auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
|
||||
sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
|
||||
|
||||
if (src.offset) {
|
||||
assert(src.offset->is_ssa);
|
||||
AluInstruction *ir = nullptr;
|
||||
for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
|
||||
ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
|
||||
{src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
|
||||
emit_instruction(ir);
|
||||
}
|
||||
if (ir)
|
||||
ir->set_flag(alu_last_instr);
|
||||
}
|
||||
|
||||
if (instr->is_array)
|
||||
tex_ir->set_flag(TexInstruction::z_unnormalized);
|
||||
|
||||
emit_instruction(tex_ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_tex_lod(nir_tex_instr* instr, TexInputs& src)
|
||||
{
|
||||
auto tex_op = TexInstruction::get_tex_lod;
|
||||
|
||||
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
|
||||
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
|
||||
|
||||
auto dst = make_dest(*instr);
|
||||
auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
irt->set_dest_swizzle({1,0,7,7});
|
||||
emit_instruction(irt);
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_tex_txl(nir_tex_instr* instr, TexInputs& src)
|
||||
{
|
||||
r600::sfn_log << SfnLog::instr << "emit '"
|
||||
<< *reinterpret_cast<nir_instr*>(instr)
|
||||
<< "' (" << __func__ << ")\n";
|
||||
|
||||
auto tex_op = TexInstruction::sample_l;
|
||||
if (instr->is_shadow) {
|
||||
if (src.coord.sel() != src.comperator->sel())
|
||||
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
|
||||
else
|
||||
src.coord.set_reg_i(2, src.comperator);
|
||||
tex_op = TexInstruction::sample_c_l;
|
||||
}
|
||||
|
||||
if (src.coord.sel() != src.lod->sel())
|
||||
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {last_write}));
|
||||
else
|
||||
src.coord.set_reg_i(3, src.lod);
|
||||
|
||||
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
|
||||
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
|
||||
|
||||
auto dst = make_dest(*instr);
|
||||
auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
|
||||
if (instr->is_array)
|
||||
handle_array_index(*instr, src.coord, irt);
|
||||
|
||||
set_rect_coordinate_flags(instr, irt);
|
||||
set_offsets(irt, src.offset);
|
||||
|
||||
emit_instruction(irt);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_tex_txb(nir_tex_instr* instr, TexInputs& src)
|
||||
{
|
||||
auto tex_op = TexInstruction::sample_lb;
|
||||
|
||||
std::array<uint8_t, 4> in_swizzle = {0,1,2,3};
|
||||
|
||||
if (instr->is_shadow) {
|
||||
if (src.coord.sel() != src.comperator->sel())
|
||||
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
|
||||
else
|
||||
src.coord.set_reg_i(2, src.comperator);
|
||||
tex_op = TexInstruction::sample_c_lb;
|
||||
}
|
||||
|
||||
if (src.coord.sel() != src.bias->sel())
|
||||
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.bias, {last_write}));
|
||||
else
|
||||
src.coord.set_reg_i(3, src.bias);
|
||||
|
||||
GPRVector tex_src(src.coord, in_swizzle);
|
||||
|
||||
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
|
||||
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
|
||||
|
||||
auto dst = make_dest(*instr);
|
||||
auto irt = new TexInstruction(tex_op, dst, tex_src, sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
if (instr->is_array)
|
||||
handle_array_index(*instr, tex_src, irt);
|
||||
|
||||
set_rect_coordinate_flags(instr, irt);
|
||||
set_offsets(irt, src.offset);
|
||||
|
||||
emit_instruction(irt);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_tex_txs(nir_tex_instr* instr, TexInputs& tex_src,
|
||||
const std::array<int,4>& dest_swz)
|
||||
{
|
||||
std::array<PValue,4> dst_elms;
|
||||
std::array<PValue,4> src_elms;
|
||||
|
||||
for (uint16_t i = 0; i < 4; ++i) {
|
||||
dst_elms[i] = from_nir(instr->dest, (i < instr->dest.ssa.num_components) ? i : 7);
|
||||
}
|
||||
|
||||
GPRVector dst(dst_elms);
|
||||
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
|
||||
instr->sampler_index + R600_MAX_CONST_BUFFERS,
|
||||
bim_none));
|
||||
} else {
|
||||
for (uint16_t i = 0; i < 4; ++i)
|
||||
src_elms[i] = tex_src.lod;
|
||||
GPRVector src(src_elms);
|
||||
|
||||
auto sampler = get_sampler_id(instr->sampler_index, tex_src.sampler_deref);
|
||||
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
|
||||
|
||||
auto ir = new TexInstruction(TexInstruction::get_resinfo, dst, src,
|
||||
sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
|
||||
ir->set_dest_swizzle(dest_swz);
|
||||
emit_instruction(ir);
|
||||
|
||||
if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
|
||||
PValue src(new UniformValue(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2),
|
||||
sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER));
|
||||
|
||||
auto alu = new AluInstruction(op1_mov, dst[2], src, {last_write});
|
||||
emit_instruction(alu);
|
||||
set_has_txs_cube_array_comp();
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
|
||||
const std::array<int, 4> &dest_swz)
|
||||
{
|
||||
GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
|
||||
GPRVector help{0,{4,4,4,4}};
|
||||
|
||||
auto dyn_offset = PValue();
|
||||
int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index;
|
||||
|
||||
auto ir = new TexInstruction(TexInstruction::get_nsampled, dest, help,
|
||||
0, res_id, src.sampler_offset);
|
||||
ir->set_dest_swizzle(dest_swz);
|
||||
emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src)
|
||||
{
|
||||
r600::sfn_log << SfnLog::instr << "emit '"
|
||||
<< *reinterpret_cast<nir_instr*>(instr)
|
||||
<< "' (" << __func__ << ")\n";
|
||||
|
||||
TexInstruction *set_ofs = nullptr;
|
||||
|
||||
auto tex_op = TexInstruction::gather4;
|
||||
|
||||
if (instr->is_shadow) {
|
||||
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
|
||||
{alu_last_instr, alu_write}));
|
||||
tex_op = TexInstruction::gather4_c;
|
||||
}
|
||||
|
||||
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
|
||||
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
|
||||
|
||||
bool literal_offset = false;
|
||||
if (src.offset) {
|
||||
literal_offset = nir_src_as_const_value(*src.offset) != 0;
|
||||
r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
|
||||
(literal_offset ? "literal" : "varying") <<
|
||||
"\n";
|
||||
|
||||
if (!literal_offset) {
|
||||
GPRVector::Swizzle swizzle = {4,4,4,4};
|
||||
for (unsigned i = 0; i < instr->coord_components; ++i)
|
||||
swizzle[i] = i;
|
||||
|
||||
int noffsets = instr->coord_components;
|
||||
if (instr->is_array)
|
||||
--noffsets;
|
||||
|
||||
auto ofs = vec_from_nir_with_fetch_constant(*src.offset,
|
||||
( 1 << noffsets) - 1,
|
||||
swizzle);
|
||||
GPRVector dummy(0, {7,7,7,7});
|
||||
tex_op = (tex_op == TexInstruction::gather4_c) ?
|
||||
TexInstruction::gather4_c_o : TexInstruction::gather4_o;
|
||||
|
||||
set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy,
|
||||
ofs, sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
set_ofs->set_dest_swizzle({7,7,7,7});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* pre CAYMAN needs swizzle */
|
||||
auto dst = make_dest(*instr);
|
||||
auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
|
||||
if (get_chip_class() != CAYMAN)
|
||||
irt->set_dest_swizzle({1,2,0,3});
|
||||
irt->set_gather_comp(instr->component);
|
||||
|
||||
if (instr->is_array)
|
||||
handle_array_index(*instr, src.coord, irt);
|
||||
|
||||
if (literal_offset) {
|
||||
r600::sfn_log << SfnLog::tex << "emit literal offsets\n";
|
||||
set_offsets(irt, src.offset);
|
||||
}
|
||||
|
||||
set_rect_coordinate_flags(instr, irt);
|
||||
|
||||
if (set_ofs)
|
||||
emit_instruction(set_ofs);
|
||||
|
||||
emit_instruction(irt);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src)
|
||||
{
|
||||
assert(instr->src[0].src.is_ssa);
|
||||
|
||||
r600::sfn_log << SfnLog::instr << "emit '"
|
||||
<< *reinterpret_cast<nir_instr*>(instr)
|
||||
<< "' (" << __func__ << ")\n";
|
||||
|
||||
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
|
||||
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
|
||||
|
||||
PGPRValue sample_id_dest_reg = get_temp_register();
|
||||
GPRVector sample_id_dest(sample_id_dest_reg->sel(), {7,7,7,7});
|
||||
sample_id_dest.set_reg_i(sample_id_dest_reg->chan(), sample_id_dest_reg);
|
||||
std::array<int,4> dest_swz = {7,7,7,7};
|
||||
dest_swz[sample_id_dest_reg->chan()] = 0;
|
||||
|
||||
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3),
|
||||
src.ms_index,
|
||||
{alu_write, alu_last_instr}));
|
||||
|
||||
auto tex_sample_id_ir = new TexInstruction(TexInstruction::ld, sample_id_dest, src.coord,
|
||||
sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
tex_sample_id_ir->set_flag(TexInstruction::x_unnormalized);
|
||||
tex_sample_id_ir->set_flag(TexInstruction::y_unnormalized);
|
||||
tex_sample_id_ir->set_flag(TexInstruction::z_unnormalized);
|
||||
tex_sample_id_ir->set_flag(TexInstruction::w_unnormalized);
|
||||
tex_sample_id_ir->set_inst_mode(1);
|
||||
|
||||
tex_sample_id_ir->set_dest_swizzle(dest_swz);
|
||||
|
||||
emit_instruction(tex_sample_id_ir);
|
||||
|
||||
if (src.ms_index->type() != Value::literal ||
|
||||
static_cast<const LiteralValue&>(*src.ms_index).value() != 0) {
|
||||
PValue help = get_temp_register();
|
||||
|
||||
emit_instruction(new AluInstruction(op2_lshl_int, help,
|
||||
src.ms_index, literal(2),
|
||||
{alu_write, alu_last_instr}));
|
||||
|
||||
emit_instruction(new AluInstruction(op2_lshr_int, sample_id_dest_reg,
|
||||
{sample_id_dest_reg, help},
|
||||
{alu_write, alu_last_instr}));
|
||||
}
|
||||
|
||||
emit_instruction(new AluInstruction(op2_and_int, src.coord.reg_i(3),
|
||||
{sample_id_dest_reg, PValue(new LiteralValue(15))},
|
||||
{alu_write, alu_last_instr}));
|
||||
|
||||
auto dst = make_dest(*instr);
|
||||
|
||||
/* txf doesn't need rounding for the array index, but 1D has the array index
|
||||
* in the z component */
|
||||
if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
|
||||
src.coord.set_reg_i(2, src.coord.reg_i(1));
|
||||
|
||||
auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
|
||||
sampler.id,
|
||||
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
|
||||
|
||||
|
||||
if (src.offset) {
|
||||
assert(src.offset->is_ssa);
|
||||
AluInstruction *ir = nullptr;
|
||||
for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
|
||||
ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
|
||||
{src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
|
||||
emit_instruction(ir);
|
||||
}
|
||||
if (ir)
|
||||
ir->set_flag(alu_last_instr);
|
||||
}
|
||||
|
||||
emit_instruction(tex_ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src)
|
||||
{
|
||||
sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
|
||||
|
||||
unsigned grad_components = instr.coord_components;
|
||||
if (instr.is_array && !instr.array_is_lowered_cube)
|
||||
--grad_components;
|
||||
|
||||
|
||||
src.offset = nullptr;
|
||||
bool retval = true;
|
||||
for (unsigned i = 0; i < instr.num_srcs; ++i) {
|
||||
switch (instr.src[i].src_type) {
|
||||
case nir_tex_src_bias:
|
||||
src.bias = from_nir(instr.src[i], 0);
|
||||
break;
|
||||
|
||||
case nir_tex_src_coord: {
|
||||
src.coord = vec_from_nir_with_fetch_constant(instr.src[i].src,
|
||||
(1 << instr.coord_components) - 1,
|
||||
{0,1,2,3});
|
||||
} break;
|
||||
case nir_tex_src_comparator:
|
||||
src.comperator = from_nir(instr.src[i], 0);
|
||||
break;
|
||||
case nir_tex_src_ddx: {
|
||||
sfn_log << SfnLog::tex << "Get DDX ";
|
||||
src.ddx = vec_from_nir_with_fetch_constant(instr.src[i].src,
|
||||
(1 << grad_components) - 1,
|
||||
swizzle_from_comps(grad_components));
|
||||
sfn_log << SfnLog::tex << src.ddx << "\n";
|
||||
} break;
|
||||
case nir_tex_src_ddy:{
|
||||
sfn_log << SfnLog::tex << "Get DDY ";
|
||||
src.ddy = vec_from_nir_with_fetch_constant(instr.src[i].src,
|
||||
(1 << grad_components) - 1,
|
||||
swizzle_from_comps(grad_components));
|
||||
sfn_log << SfnLog::tex << src.ddy << "\n";
|
||||
} break;
|
||||
case nir_tex_src_lod:
|
||||
src.lod = from_nir_with_fetch_constant(instr.src[i].src, 0);
|
||||
break;
|
||||
case nir_tex_src_offset:
|
||||
sfn_log << SfnLog::tex << " -- Find offset\n";
|
||||
src.offset = &instr.src[i].src;
|
||||
break;
|
||||
case nir_tex_src_sampler_deref:
|
||||
src.sampler_deref = get_deref_location(instr.src[i].src);
|
||||
break;
|
||||
case nir_tex_src_texture_deref:
|
||||
src.texture_deref = get_deref_location(instr.src[i].src);
|
||||
break;
|
||||
case nir_tex_src_ms_index:
|
||||
src.ms_index = from_nir(instr.src[i], 0);
|
||||
break;
|
||||
case nir_tex_src_texture_offset:
|
||||
src.texture_offset = from_nir(instr.src[i], 0);
|
||||
break;
|
||||
case nir_tex_src_sampler_offset:
|
||||
src.sampler_offset = from_nir(instr.src[i], 0);
|
||||
break;
|
||||
case nir_tex_src_plane:
|
||||
case nir_tex_src_projector:
|
||||
case nir_tex_src_min_lod:
|
||||
default:
|
||||
sfn_log << SfnLog::tex << "Texture source type " << instr.src[i].src_type << " not supported\n";
|
||||
retval = false;
|
||||
}
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr)
|
||||
{
|
||||
int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
|
||||
instr.dest.reg.reg->num_components;
|
||||
std::array<PValue,4> dst_elms;
|
||||
for (uint16_t i = 0; i < 4; ++i)
|
||||
dst_elms[i] = from_nir(instr.dest, (i < num_dest_components) ? i : 7);
|
||||
return GPRVector(dst_elms);
|
||||
}
|
||||
|
||||
|
||||
GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr,
|
||||
const std::array<int, 4>& swizzle)
|
||||
{
|
||||
int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
|
||||
instr.dest.reg.reg->num_components;
|
||||
std::array<PValue,4> dst_elms;
|
||||
for (uint16_t i = 0; i < 4; ++i) {
|
||||
int k = swizzle[i];
|
||||
dst_elms[i] = from_nir(instr.dest, (k < num_dest_components) ? k : 7);
|
||||
}
|
||||
return GPRVector(dst_elms);
|
||||
}
|
||||
|
||||
void EmitTexInstruction::set_rect_coordinate_flags(nir_tex_instr* instr,
|
||||
TexInstruction* ir) const
|
||||
{
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
|
||||
ir->set_flag(TexInstruction::x_unnormalized);
|
||||
ir->set_flag(TexInstruction::y_unnormalized);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitTexInstruction::set_offsets(TexInstruction* ir, nir_src *offset)
|
||||
{
|
||||
if (!offset)
|
||||
return;
|
||||
|
||||
assert(offset->is_ssa);
|
||||
auto literal = nir_src_as_const_value(*offset);
|
||||
assert(literal);
|
||||
|
||||
for (int i = 0; i < offset->ssa->num_components; ++i) {
|
||||
ir->set_offset(i, literal[i].i32);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitTexInstruction::handle_array_index(const nir_tex_instr& instr, const GPRVector& src, TexInstruction *ir)
|
||||
{
|
||||
int src_idx = instr.sampler_dim == GLSL_SAMPLER_DIM_1D ? 1 : 2;
|
||||
emit_instruction(new AluInstruction(op1_rndne, src.reg_i(2), src.reg_i(src_idx),
|
||||
{alu_last_instr, alu_write}));
|
||||
ir->set_flag(TexInstruction::z_unnormalized);
|
||||
}
|
||||
|
||||
EmitTexInstruction::SamplerId
|
||||
EmitTexInstruction::get_sampler_id(int sampler_id, const nir_variable *deref)
|
||||
{
|
||||
EmitTexInstruction::SamplerId result = {sampler_id, false};
|
||||
|
||||
if (deref) {
|
||||
assert(glsl_type_is_sampler(deref->type));
|
||||
result.id = deref->data.binding;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
EmitTexInstruction::TexInputs::TexInputs():
|
||||
sampler_deref(nullptr),
|
||||
texture_deref(nullptr),
|
||||
offset(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
}
|
@ -1,96 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SFN_EMITTEXINSTRUCTION_H
|
||||
#define SFN_EMITTEXINSTRUCTION_H
|
||||
|
||||
#include "sfn_emitinstruction.h"
|
||||
#include "sfn_instruction_tex.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class EmitTexInstruction : public EmitInstruction
|
||||
{
|
||||
public:
|
||||
EmitTexInstruction(ShaderFromNirProcessor& processor);
|
||||
|
||||
private:
|
||||
struct TexInputs {
|
||||
TexInputs();
|
||||
const nir_variable *sampler_deref;
|
||||
const nir_variable *texture_deref;
|
||||
GPRVector coord;
|
||||
PValue bias;
|
||||
PValue comperator;
|
||||
PValue lod;
|
||||
GPRVector ddx;
|
||||
GPRVector ddy;
|
||||
nir_src *offset;
|
||||
PValue gather_comp;
|
||||
PValue ms_index;
|
||||
PValue sampler_offset;
|
||||
PValue texture_offset;
|
||||
};
|
||||
|
||||
bool emit_tex_tex(nir_tex_instr* instr, TexInputs& src);
|
||||
|
||||
bool emit_tex_txf(nir_tex_instr* instr, TexInputs &src);
|
||||
bool emit_tex_txb(nir_tex_instr* instr, TexInputs& src);
|
||||
bool emit_tex_txd(nir_tex_instr* instr, TexInputs& src);
|
||||
bool emit_tex_txl(nir_tex_instr* instr, TexInputs& src);
|
||||
bool emit_tex_txs(nir_tex_instr* instr, TexInputs& src,
|
||||
const std::array<int, 4> &dest_swz);
|
||||
bool emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
|
||||
const std::array<int, 4> &dest_swz);
|
||||
bool emit_tex_lod(nir_tex_instr* instr, TexInputs& src);
|
||||
bool emit_tex_tg4(nir_tex_instr* instr, TexInputs& src);
|
||||
bool emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src);
|
||||
bool emit_buf_txf(nir_tex_instr* instr, TexInputs& src);
|
||||
|
||||
bool get_inputs(const nir_tex_instr& instr, TexInputs &src);
|
||||
|
||||
void set_rect_coordinate_flags(nir_tex_instr* instr, TexInstruction* ir) const;
|
||||
|
||||
bool do_emit(nir_instr* instr) override;
|
||||
|
||||
GPRVector make_dest(nir_tex_instr& instr);
|
||||
GPRVector make_dest(nir_tex_instr &instr, const std::array<int, 4> &swizzle);
|
||||
|
||||
void set_offsets(TexInstruction* ir, nir_src *offset);
|
||||
void handle_array_index(const nir_tex_instr& instr, const GPRVector &src, TexInstruction* ir);
|
||||
|
||||
struct SamplerId {
|
||||
int id;
|
||||
bool indirect;
|
||||
};
|
||||
|
||||
SamplerId get_sampler_id(int sampler_id, const nir_variable *deref);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // SFN_EMITTEXINSTRUCTION_H
|
522
src/gallium/drivers/r600/sfn/sfn_instr.cpp
Normal file
522
src/gallium/drivers/r600/sfn/sfn_instr.cpp
Normal file
@ -0,0 +1,522 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2021 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sfn_instr_alugroup.h"
|
||||
#include "sfn_instr_export.h"
|
||||
#include "sfn_instr_fetch.h"
|
||||
#include "sfn_instr_mem.h"
|
||||
#include "sfn_instr_lds.h"
|
||||
#include "sfn_instr_tex.h"
|
||||
#include "sfn_instr_controlflow.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <numeric>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
Instr::Instr():
|
||||
m_use_count(0),
|
||||
m_block_id(std::numeric_limits<int>::max()),
|
||||
m_index(std::numeric_limits<int>::max())
|
||||
{
|
||||
}
|
||||
|
||||
Instr::~Instr()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void Instr::print(std::ostream& os) const
|
||||
{
|
||||
do_print(os);
|
||||
}
|
||||
|
||||
bool Instr::ready() const
|
||||
{
|
||||
for (auto& i : m_required_instr)
|
||||
if (!i->ready())
|
||||
return false;
|
||||
return do_ready();
|
||||
}
|
||||
|
||||
int int_from_string_with_prefix(const std::string& str, const std::string& prefix)
|
||||
{
|
||||
if (str.substr(0, prefix.length()) != prefix) {
|
||||
std::cerr << "Expect '" << prefix << "' as start of '" << str << "'\n";
|
||||
assert(0);
|
||||
}
|
||||
|
||||
std::stringstream help(str.substr(prefix.length()));
|
||||
int retval;
|
||||
help >> retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
int sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle &swz, bool& is_ssa)
|
||||
{
|
||||
assert(str[0] == 'R' || str[0] == '_' || str[0] == 'S');
|
||||
int sel = 0;
|
||||
|
||||
auto istr = str.begin() + 1;
|
||||
|
||||
if (str[0] == '_') {
|
||||
while (istr != str.end() && *istr == '_')
|
||||
++istr;
|
||||
sel = std::numeric_limits<int>::max();
|
||||
} else {
|
||||
while (istr != str.end() && isdigit(*istr)) {
|
||||
sel *= 10;
|
||||
sel += *istr - '0';
|
||||
++istr;
|
||||
}
|
||||
}
|
||||
|
||||
assert(*istr == '.');
|
||||
istr++;
|
||||
|
||||
int i = 0;
|
||||
while (istr != str.end()) {
|
||||
switch (*istr) {
|
||||
case 'x': swz[i] = 0; break;
|
||||
case 'y': swz[i] = 1; break;
|
||||
case 'z': swz[i] = 2; break;
|
||||
case 'w': swz[i] = 3; break;
|
||||
case '0': swz[i] = 4; break;
|
||||
case '1': swz[i] = 5; break;
|
||||
case '_': swz[i] = 7; break;
|
||||
default:
|
||||
unreachable("Unknown swizzle character");
|
||||
}
|
||||
++istr;
|
||||
++i;
|
||||
}
|
||||
|
||||
is_ssa = str[0] == 'S';
|
||||
|
||||
return sel;
|
||||
}
|
||||
|
||||
bool Instr::is_last() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Instr::set_dead()
|
||||
{
|
||||
if (m_instr_flags.test(always_keep))
|
||||
return false;
|
||||
bool is_dead = propagate_death();
|
||||
m_instr_flags.set(dead);
|
||||
return is_dead;
|
||||
}
|
||||
|
||||
bool Instr::propagate_death()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Instr::replace_source(PRegister old_src, PVirtualValue new_src)
|
||||
{
|
||||
(void)old_src;
|
||||
(void)new_src;
|
||||
return false;
|
||||
}
|
||||
|
||||
void Instr::add_required_instr(Instr *instr)
|
||||
{
|
||||
assert(instr);
|
||||
m_required_instr.push_back(instr);
|
||||
instr->m_dependend_instr.push_back(this);
|
||||
}
|
||||
|
||||
void Instr::replace_required_instr(Instr *old_instr, Instr *new_instr)
|
||||
{
|
||||
|
||||
for (auto i = m_required_instr.begin(); i != m_required_instr.end(); ++i) {
|
||||
if (*i == old_instr)
|
||||
*i = new_instr;
|
||||
}
|
||||
}
|
||||
|
||||
bool Instr::replace_dest(PRegister new_dest, r600::AluInstr *move_instr)
|
||||
{
|
||||
(void)new_dest;
|
||||
(void)move_instr;
|
||||
return false;
|
||||
}
|
||||
|
||||
void Instr::set_blockid(int id, int index)
|
||||
{
|
||||
m_block_id = id;
|
||||
m_index = index;
|
||||
forward_set_blockid(id, index);
|
||||
}
|
||||
|
||||
|
||||
void Instr::forward_set_blockid(int id, int index)
|
||||
{
|
||||
(void)id;
|
||||
(void)index;
|
||||
}
|
||||
|
||||
InstrWithVectorResult::InstrWithVectorResult(const RegisterVec4& dest,
|
||||
const RegisterVec4::Swizzle& dest_swizzle):
|
||||
m_dest(dest),
|
||||
m_dest_swizzle(dest_swizzle)
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (m_dest_swizzle[i] < 6)
|
||||
m_dest[i]->add_parent(this);
|
||||
}
|
||||
}
|
||||
|
||||
void InstrWithVectorResult::print_dest(std::ostream& os) const
|
||||
{
|
||||
os << (m_dest[0]->is_ssa() ? 'S' : 'R' ) << m_dest.sel();
|
||||
os << ".";
|
||||
for (int i = 0; i < 4; ++i)
|
||||
os << VirtualValue::chanchar[m_dest_swizzle[i]];
|
||||
}
|
||||
|
||||
bool InstrWithVectorResult::comp_dest(const RegisterVec4& dest,
|
||||
const RegisterVec4::Swizzle& dest_swizzle) const
|
||||
{
|
||||
for(int i = 0; i < 4; ++i) {
|
||||
if (!m_dest[i]->equal_to(*dest[i])) {
|
||||
return false;
|
||||
}
|
||||
if (m_dest_swizzle[i] != dest_swizzle[i])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Block::do_print(std::ostream& os) const
|
||||
{
|
||||
for (int j = 0; j < 2 * m_nesting_depth; ++j)
|
||||
os << ' ';
|
||||
os << "BLOCK START\n";
|
||||
for (auto& i : m_instructions) {
|
||||
for (int j = 0; j < 2 * (m_nesting_depth + i->nesting_corr()) + 2; ++j)
|
||||
os << ' ';
|
||||
os << *i << "\n";
|
||||
}
|
||||
for (int j = 0; j < 2 * m_nesting_depth; ++j)
|
||||
os << ' ';
|
||||
os << "BLOCK END\n";
|
||||
}
|
||||
|
||||
bool Block::is_equal_to(const Block& lhs) const
|
||||
{
|
||||
if (m_id != lhs.m_id || m_nesting_depth != lhs.m_nesting_depth)
|
||||
return false;
|
||||
|
||||
if (m_instructions.size() != lhs.m_instructions.size())
|
||||
return false;
|
||||
|
||||
return std::inner_product(m_instructions.begin(), m_instructions.end(), lhs.m_instructions.begin(),
|
||||
true,
|
||||
[] (bool l, bool r) { return l && r;},
|
||||
[](PInst l, PInst r) { return l->equal_to(*r);});
|
||||
}
|
||||
|
||||
inline bool operator != (const Block& lhs, const Block& rhs)
|
||||
{
|
||||
return !lhs.is_equal_to(rhs);
|
||||
}
|
||||
|
||||
void Block::erase(iterator node)
|
||||
{
|
||||
m_instructions.erase(node);
|
||||
}
|
||||
|
||||
void Block::set_type(Type t)
|
||||
{
|
||||
m_blocK_type = t;
|
||||
switch (t) {
|
||||
case vtx:
|
||||
case gds:
|
||||
case tex: m_remaining_slots = 8; break; /* TODO: 16 for >= EVERGREEN */
|
||||
default:
|
||||
m_remaining_slots = 0xffff;
|
||||
}
|
||||
}
|
||||
|
||||
Block::Block(int nesting_depth, int id):
|
||||
m_nesting_depth(nesting_depth),
|
||||
m_id(id),
|
||||
m_next_index(0)
|
||||
{
|
||||
assert(!has_instr_flag(force_cf));
|
||||
}
|
||||
|
||||
void Block::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void Block::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
void Block::push_back(PInst instr)
|
||||
{
|
||||
instr->set_blockid(m_id, m_next_index++);
|
||||
if (m_remaining_slots != 0xffff) {
|
||||
uint32_t new_slots = instr->slots();
|
||||
m_remaining_slots -= new_slots;
|
||||
}
|
||||
if (m_lds_group_start)
|
||||
m_lds_group_requirement += instr->slots();
|
||||
|
||||
m_instructions.push_back(instr);
|
||||
}
|
||||
|
||||
bool Block::try_reserve_kcache(const AluGroup& group)
|
||||
{
|
||||
auto kcache_constants = group.get_kconsts();
|
||||
for (auto& kc : kcache_constants) {
|
||||
auto u = kc->as_uniform();
|
||||
assert(u);
|
||||
if (!try_reserve_kcache(*u))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Block::try_reserve_kcache(const UniformValue& u)
|
||||
{
|
||||
const int kcache_banks = 4; // TODO: handle pre-evergreen
|
||||
|
||||
int bank = u.kcache_bank();
|
||||
int sel = (u.sel() - 512);
|
||||
int line = sel >> 4;
|
||||
|
||||
bool found = false;
|
||||
|
||||
for (int i = 0; i < kcache_banks && !found; ++i) {
|
||||
if (m_kcache[i].mode) {
|
||||
if (m_kcache[i].bank < bank)
|
||||
continue;
|
||||
|
||||
if ((m_kcache[i].bank == bank &&
|
||||
m_kcache[i].addr > line + 1) ||
|
||||
m_kcache[i].bank > bank) {
|
||||
if (m_kcache[kcache_banks - 1].mode)
|
||||
return false;
|
||||
|
||||
memmove(&m_kcache[i+1],&m_kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
|
||||
m_kcache[i].mode = KCacheLine::lock_1;
|
||||
m_kcache[i].bank = bank;
|
||||
m_kcache[i].addr = line;
|
||||
return true;
|
||||
}
|
||||
|
||||
int d = line - m_kcache[i].addr;
|
||||
|
||||
if (d == -1) {
|
||||
m_kcache[i].addr--;
|
||||
if (m_kcache[i].mode == KCacheLine::lock_2) {
|
||||
/* we are prepending the line to the current set,
|
||||
* discarding the existing second line,
|
||||
* so we'll have to insert line+2 after it */
|
||||
line += 2;
|
||||
continue;
|
||||
} else if (m_kcache[i].mode == KCacheLine::lock_1) {
|
||||
m_kcache[i].mode = KCacheLine::lock_2;
|
||||
return true;
|
||||
} else {
|
||||
/* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
|
||||
return false;
|
||||
}
|
||||
} else if (d == 1) {
|
||||
m_kcache[i].mode = KCacheLine::lock_2;
|
||||
return true;
|
||||
} else if (d == 0)
|
||||
return true;
|
||||
} else { /* free kcache set - use it */
|
||||
m_kcache[i].mode = KCacheLine::lock_1;
|
||||
m_kcache[i].bank = bank;
|
||||
m_kcache[i].addr = line;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Block::lds_group_start(AluInstr *alu)
|
||||
{
|
||||
assert(!m_lds_group_start);
|
||||
m_lds_group_start = alu;
|
||||
m_lds_group_requirement = 0;
|
||||
}
|
||||
|
||||
void Block::lds_group_end()
|
||||
{
|
||||
assert(m_lds_group_start);
|
||||
m_lds_group_start->set_required_slots(m_lds_group_requirement);
|
||||
m_lds_group_start = 0;
|
||||
}
|
||||
|
||||
InstrWithVectorResult::InstrWithVectorResult(const InstrWithVectorResult& orig):
|
||||
m_dest(orig.m_dest),
|
||||
m_dest_swizzle(orig.m_dest_swizzle)
|
||||
{
|
||||
}
|
||||
|
||||
class InstrComparer : public ConstInstrVisitor {
|
||||
public:
|
||||
InstrComparer() = default;
|
||||
bool result {false};
|
||||
|
||||
#define DECLARE_MEMBER(TYPE) \
|
||||
InstrComparer(const TYPE *instr) \
|
||||
{ \
|
||||
this_ ## TYPE = instr; \
|
||||
} \
|
||||
\
|
||||
void visit(const TYPE& instr) \
|
||||
{ \
|
||||
result = false; \
|
||||
if (!this_ ## TYPE) \
|
||||
return; \
|
||||
result = this_ ## TYPE->is_equal_to(instr); \
|
||||
} \
|
||||
\
|
||||
const TYPE *this_ ## TYPE{nullptr};
|
||||
|
||||
DECLARE_MEMBER(AluInstr);
|
||||
DECLARE_MEMBER(AluGroup);
|
||||
DECLARE_MEMBER(TexInstr);
|
||||
DECLARE_MEMBER(ExportInstr);
|
||||
DECLARE_MEMBER(FetchInstr);
|
||||
DECLARE_MEMBER(Block);
|
||||
DECLARE_MEMBER(ControlFlowInstr);
|
||||
DECLARE_MEMBER(IfInstr);
|
||||
DECLARE_MEMBER(WriteScratchInstr);
|
||||
DECLARE_MEMBER(StreamOutInstr);
|
||||
DECLARE_MEMBER(MemRingOutInstr);
|
||||
DECLARE_MEMBER(EmitVertexInstr);
|
||||
DECLARE_MEMBER(GDSInstr);
|
||||
DECLARE_MEMBER(WriteTFInstr);
|
||||
DECLARE_MEMBER(LDSAtomicInstr);
|
||||
DECLARE_MEMBER(LDSReadInstr);
|
||||
DECLARE_MEMBER(RatInstr);
|
||||
};
|
||||
|
||||
class InstrCompareForward: public ConstInstrVisitor {
|
||||
public:
|
||||
|
||||
void visit(const AluInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const AluGroup& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const TexInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const ExportInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const FetchInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const Block& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const ControlFlowInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const IfInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const WriteScratchInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const StreamOutInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const MemRingOutInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const EmitVertexInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const GDSInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const WriteTFInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const LDSAtomicInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const LDSReadInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
void visit(const RatInstr& instr) override {
|
||||
m_comparer = InstrComparer(&instr);
|
||||
}
|
||||
|
||||
InstrComparer m_comparer;
|
||||
};
|
||||
|
||||
|
||||
bool Instr::equal_to(const Instr& lhs) const
|
||||
{
|
||||
InstrCompareForward cmp;
|
||||
accept(cmp);
|
||||
lhs.accept(cmp.m_comparer);
|
||||
|
||||
return cmp.m_comparer.result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
} // ns r600
|
314
src/gallium/drivers/r600/sfn/sfn_instr.h
Normal file
314
src/gallium/drivers/r600/sfn/sfn_instr.h
Normal file
@ -0,0 +1,314 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2021 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "sfn_virtualvalues.h"
|
||||
#include "sfn_alu_defines.h"
|
||||
#include "sfn_defines.h"
|
||||
#include <set>
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class ConstInstrVisitor;
|
||||
|
||||
class InstrVisitor;
|
||||
class AluInstr;
|
||||
class AluGroup;
|
||||
class TexInstr;
|
||||
class ExportInstr;
|
||||
class FetchInstr;
|
||||
class ControlFlowInstr;
|
||||
class IfInstr;
|
||||
class WriteScratchInstr;
|
||||
class StreamOutInstr;
|
||||
class MemRingOutInstr;
|
||||
class EmitVertexInstr;
|
||||
class GDSInstr;
|
||||
class WriteTFInstr;
|
||||
class LDSAtomicInstr;
|
||||
class LDSReadInstr;
|
||||
class RatInstr;
|
||||
|
||||
|
||||
int int_from_string_with_prefix(const std::string& str, const std::string& prefix);
|
||||
int sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle& swz, bool& is_ssa);
|
||||
|
||||
class Instr : public Allocate {
|
||||
public:
|
||||
|
||||
enum Flags {
|
||||
always_keep,
|
||||
dead,
|
||||
scheduled,
|
||||
vpm,
|
||||
force_cf,
|
||||
ack_rat_return_write,
|
||||
nflags
|
||||
};
|
||||
|
||||
Instr();
|
||||
|
||||
Instr(const Instr& orig) = default;
|
||||
|
||||
virtual ~Instr();
|
||||
|
||||
using Pointer = R600_POINTER_TYPE(Instr);
|
||||
|
||||
void print(std::ostream& os) const;
|
||||
bool equal_to(const Instr& lhs) const;
|
||||
|
||||
virtual void accept(ConstInstrVisitor& visitor) const = 0;
|
||||
virtual void accept(InstrVisitor& visitor) = 0;
|
||||
virtual bool end_group() const { return true;}
|
||||
|
||||
virtual bool is_last() const;
|
||||
|
||||
void set_always_keep() {m_instr_flags.set(always_keep);}
|
||||
bool set_dead();
|
||||
virtual void set_scheduled() { m_instr_flags.set(scheduled); forward_set_scheduled();}
|
||||
void add_use() {++m_use_count;}
|
||||
void dec_use() {assert(m_use_count > 0); --m_use_count;}
|
||||
bool is_dead() const {return m_instr_flags.test(dead);}
|
||||
bool is_scheduled() const {return m_instr_flags.test(scheduled);}
|
||||
bool keep() const {return m_instr_flags.test(always_keep);}
|
||||
bool has_uses() const {return m_use_count > 0;}
|
||||
|
||||
bool has_instr_flag(Flags f) const {return m_instr_flags.test(f);}
|
||||
void set_instr_flag(Flags f) { m_instr_flags.set(f);}
|
||||
|
||||
virtual bool replace_source(PRegister old_src, PVirtualValue new_src);
|
||||
virtual bool replace_dest(PRegister new_dest, AluInstr *move_instr);
|
||||
|
||||
virtual int nesting_corr() const { return 0;}
|
||||
|
||||
virtual bool end_block() const { return false;}
|
||||
virtual int nesting_offset() const { return 0;}
|
||||
|
||||
void set_blockid(int id, int index);
|
||||
int block_id() const {return m_block_id;}
|
||||
int index() const { return m_index;}
|
||||
|
||||
void add_required_instr(Instr *instr);
|
||||
void replace_required_instr(Instr *old_instr, Instr *new_instr);
|
||||
|
||||
bool ready() const;
|
||||
|
||||
virtual uint32_t slots() const {return 0;};
|
||||
|
||||
using InstrList = std::list<Instr *, Allocator<Instr *>>;
|
||||
|
||||
const InstrList& dependend_instr() { return m_dependend_instr;}
|
||||
|
||||
protected:
|
||||
|
||||
const InstrList& required_instr() const {return m_required_instr; }
|
||||
|
||||
private:
|
||||
virtual void forward_set_blockid(int id, int index);
|
||||
|
||||
virtual bool do_ready() const = 0;
|
||||
|
||||
virtual void do_print(std::ostream& os) const = 0;
|
||||
virtual bool propagate_death();
|
||||
virtual void forward_set_scheduled() {}
|
||||
|
||||
InstrList m_required_instr;
|
||||
InstrList m_dependend_instr;
|
||||
|
||||
int m_use_count;
|
||||
int m_block_id;
|
||||
int m_index;
|
||||
std::bitset<nflags> m_instr_flags{0};
|
||||
|
||||
};
|
||||
using PInst = Instr::Pointer;
|
||||
|
||||
class Block : public Instr {
|
||||
public:
|
||||
|
||||
enum Type {
|
||||
cf,
|
||||
alu,
|
||||
tex,
|
||||
vtx,
|
||||
gds,
|
||||
unknown
|
||||
};
|
||||
|
||||
using Instructions = std::list<Instr *, Allocator<Instr *>>;
|
||||
using Pointer = R600_POINTER_TYPE(Block);
|
||||
using iterator = Instructions::iterator;
|
||||
using reverse_iterator = Instructions::reverse_iterator;
|
||||
using const_iterator = Instructions::const_iterator;
|
||||
|
||||
Block(int nesting_depth, int id);
|
||||
Block(const Block& orig) = delete;
|
||||
|
||||
void push_back(PInst instr);
|
||||
iterator begin() { return m_instructions.begin(); }
|
||||
iterator end() { return m_instructions.end(); }
|
||||
reverse_iterator rbegin() { return m_instructions.rbegin(); }
|
||||
reverse_iterator rend() { return m_instructions.rend(); }
|
||||
|
||||
const_iterator begin() const { return m_instructions.begin();}
|
||||
const_iterator end() const { return m_instructions.end();}
|
||||
|
||||
bool empty() const { return m_instructions.empty();}
|
||||
|
||||
void erase(iterator node);
|
||||
|
||||
bool is_equal_to(const Block& lhs) const;
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
int nesting_depth() const { return m_nesting_depth;}
|
||||
|
||||
int id() const {return m_id;}
|
||||
|
||||
auto type() const {return m_blocK_type; }
|
||||
void set_type(Type t);
|
||||
uint32_t remaining_slots() const { return m_remaining_slots;}
|
||||
|
||||
bool try_reserve_kcache(const AluGroup& group);
|
||||
|
||||
auto last_lds_instr() {return m_last_lds_instr;}
|
||||
void set_last_lds_instr(Instr *instr) {m_last_lds_instr = instr;}
|
||||
|
||||
void lds_group_start(AluInstr *alu);
|
||||
void lds_group_end();
|
||||
bool lds_group_active() { return m_lds_group_start != nullptr;}
|
||||
|
||||
size_t size() const { return m_instructions.size();}
|
||||
|
||||
private:
|
||||
bool try_reserve_kcache(const UniformValue& u);
|
||||
|
||||
bool do_ready() const override {return true;};
|
||||
void do_print(std::ostream& os) const override;
|
||||
Instructions m_instructions;
|
||||
int m_nesting_depth;
|
||||
int m_id;
|
||||
int m_next_index;
|
||||
|
||||
Type m_blocK_type{unknown};
|
||||
uint32_t m_remaining_slots{0xffff};
|
||||
|
||||
std::array<KCacheLine, 4> m_kcache;
|
||||
|
||||
Instr *m_last_lds_instr{nullptr};
|
||||
|
||||
int m_lds_group_requirement{0};
|
||||
AluInstr *m_lds_group_start{nullptr};
|
||||
};
|
||||
|
||||
class InstrWithVectorResult : public Instr {
|
||||
public:
|
||||
InstrWithVectorResult(const RegisterVec4& dest, const RegisterVec4::Swizzle& dest_swizzle);
|
||||
|
||||
void set_dest_swizzle(const RegisterVec4::Swizzle& swz) {m_dest_swizzle = swz;}
|
||||
int dest_swizzle(int i) const { return m_dest_swizzle[i];}
|
||||
const RegisterVec4::Swizzle& all_dest_swizzle() const { return m_dest_swizzle;}
|
||||
const RegisterVec4& dst() const {return m_dest;}
|
||||
|
||||
protected:
|
||||
InstrWithVectorResult(const InstrWithVectorResult& orig);
|
||||
|
||||
void print_dest(std::ostream& os) const;
|
||||
bool comp_dest(const RegisterVec4& dest, const RegisterVec4::Swizzle& dest_swizzle) const;
|
||||
|
||||
private:
|
||||
RegisterVec4 m_dest;
|
||||
RegisterVec4::Swizzle m_dest_swizzle;
|
||||
};
|
||||
|
||||
inline bool operator == (const Instr& lhs, const Instr& rhs) {
|
||||
return lhs.equal_to(rhs);
|
||||
}
|
||||
|
||||
inline bool operator != (const Instr& lhs, const Instr& rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
inline std::ostream& operator << (std::ostream& os, const Instr& instr)
|
||||
{
|
||||
instr.print(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
template <typename T, typename = std::enable_if_t<std::is_base_of_v<Instr, T>>>
|
||||
std::ostream& operator<<(std::ostream& os, const T& instr) {
|
||||
instr.print(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
class ConstInstrVisitor {
|
||||
public:
|
||||
virtual void visit(const AluInstr& instr) = 0;
|
||||
virtual void visit(const AluGroup& instr) = 0;
|
||||
virtual void visit(const TexInstr& instr) = 0;
|
||||
virtual void visit(const ExportInstr& instr) = 0;
|
||||
virtual void visit(const FetchInstr& instr) = 0;
|
||||
virtual void visit(const Block& instr) = 0;
|
||||
virtual void visit(const ControlFlowInstr& instr) = 0;
|
||||
virtual void visit(const IfInstr& instr) = 0;
|
||||
virtual void visit(const WriteScratchInstr& instr) = 0;
|
||||
virtual void visit(const StreamOutInstr& instr) = 0;
|
||||
virtual void visit(const MemRingOutInstr& instr) = 0;
|
||||
virtual void visit(const EmitVertexInstr& instr) = 0;
|
||||
virtual void visit(const GDSInstr& instr) = 0;
|
||||
virtual void visit(const WriteTFInstr& instr) = 0;
|
||||
virtual void visit(const LDSAtomicInstr& instr) = 0;
|
||||
virtual void visit(const LDSReadInstr& instr) = 0;
|
||||
virtual void visit(const RatInstr& instr) = 0;
|
||||
};
|
||||
|
||||
class InstrVisitor {
|
||||
public:
|
||||
virtual void visit(AluInstr *instr) = 0;
|
||||
virtual void visit(AluGroup *instr) = 0;
|
||||
virtual void visit(TexInstr *instr) = 0;
|
||||
virtual void visit(ExportInstr *instr) = 0;
|
||||
virtual void visit(FetchInstr *instr) = 0;
|
||||
virtual void visit(Block *instr) = 0;
|
||||
virtual void visit(ControlFlowInstr *instr) = 0;
|
||||
virtual void visit(IfInstr *instr) = 0;
|
||||
virtual void visit(WriteScratchInstr *instr) = 0;
|
||||
virtual void visit(StreamOutInstr *instr) = 0;
|
||||
virtual void visit(MemRingOutInstr *instr) = 0;
|
||||
virtual void visit(EmitVertexInstr *instr) = 0;
|
||||
virtual void visit(GDSInstr *instr) = 0;
|
||||
virtual void visit(WriteTFInstr *instr) = 0;
|
||||
virtual void visit(LDSAtomicInstr *instr) = 0;
|
||||
virtual void visit(LDSReadInstr *instr) = 0;
|
||||
virtual void visit(RatInstr *instr) = 0;
|
||||
};
|
||||
|
||||
|
||||
} // ns r600
|
2449
src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
Normal file
2449
src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
Normal file
File diff suppressed because it is too large
Load Diff
193
src/gallium/drivers/r600/sfn/sfn_instr_alu.h
Normal file
193
src/gallium/drivers/r600/sfn/sfn_instr_alu.h
Normal file
@ -0,0 +1,193 @@
|
||||
#ifndef INSTRALU_H
|
||||
#define INSTRALU_H
|
||||
|
||||
#include "sfn_instr.h"
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
struct nir_alu_instr;
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class Shader;
|
||||
class ValueFactory;
|
||||
|
||||
class AluInstr : public Instr {
|
||||
public:
|
||||
|
||||
using SrcValues = std::vector<PVirtualValue, Allocator<PVirtualValue>>;
|
||||
|
||||
enum Op2Options {
|
||||
op2_opt_none = 0,
|
||||
op2_opt_reverse = 1,
|
||||
op2_opt_neg_src1 = 1 << 1,
|
||||
op2_opt_abs_src0 = 1 << 2
|
||||
};
|
||||
|
||||
static constexpr const AluBankSwizzle bs[6] = {
|
||||
alu_vec_012,
|
||||
alu_vec_021,
|
||||
alu_vec_120,
|
||||
alu_vec_102,
|
||||
alu_vec_201,
|
||||
alu_vec_210
|
||||
};
|
||||
|
||||
static const AluModifiers src_abs_flags[2];
|
||||
static const AluModifiers src_neg_flags[3];
|
||||
static const AluModifiers src_rel_flags[3];
|
||||
|
||||
AluInstr(EAluOp opcode);
|
||||
AluInstr(EAluOp opcode, int chan);
|
||||
AluInstr(EAluOp opcode, PRegister dest,
|
||||
SrcValues src0,
|
||||
const std::set<AluModifiers>& flags, int alu_slot);
|
||||
|
||||
AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0,
|
||||
const std::set<AluModifiers>& flags);
|
||||
|
||||
AluInstr(EAluOp opcode, PRegister dest,
|
||||
PVirtualValue src0, PVirtualValue src1,
|
||||
const std::set<AluModifiers>& flags);
|
||||
|
||||
AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, PVirtualValue src1,
|
||||
PVirtualValue src2,
|
||||
const std::set<AluModifiers>& flags);
|
||||
|
||||
AluInstr(ESDOp op, PVirtualValue src0, PVirtualValue src1, PVirtualValue address);
|
||||
AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags);
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
auto opcode() const {assert(!has_alu_flag(alu_is_lds)); return m_opcode;}
|
||||
auto lds_opcode() const {assert(has_alu_flag(alu_is_lds)); return m_lds_opcode;}
|
||||
|
||||
bool can_propagate_src() const;
|
||||
bool can_propagate_dest() const;
|
||||
|
||||
bool replace_source(PRegister old_src, PVirtualValue new_src) override;
|
||||
bool replace_dest(PRegister new_dest, AluInstr *move_instr) override;
|
||||
|
||||
void set_op(EAluOp op) {m_opcode = op;}
|
||||
|
||||
PRegister dest() const {return m_dest;}
|
||||
unsigned n_sources() const {return m_src.size();}
|
||||
|
||||
int dest_chan() const {return m_dest ? m_dest->chan() : m_fallback_chan;}
|
||||
|
||||
PVirtualValue psrc(unsigned i) {return i < m_src.size() ? m_src[i] : nullptr;}
|
||||
VirtualValue& src(unsigned i) {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
|
||||
const VirtualValue& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
|
||||
|
||||
void set_sources(SrcValues src);
|
||||
const SrcValues& sources() const {return m_src;}
|
||||
void pin_sources_to_chan();
|
||||
|
||||
int register_priority() const;
|
||||
|
||||
void reset_alu_flag(AluModifiers flag) {m_alu_flags.reset(flag);}
|
||||
void set_alu_flag(AluModifiers flag) {m_alu_flags.set(flag);}
|
||||
bool has_alu_flag(AluModifiers f) const {return m_alu_flags.test(f);}
|
||||
|
||||
ECFAluOpCode cf_type() const {return m_cf_type;}
|
||||
void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
|
||||
void set_bank_swizzle(AluBankSwizzle swz) {m_bank_swizzle = swz;}
|
||||
AluBankSwizzle bank_swizzle() const {return m_bank_swizzle;}
|
||||
|
||||
void set_index_offset(unsigned offs) {m_idx_offset = offs;}
|
||||
auto index_offset() const {return m_idx_offset;}
|
||||
|
||||
bool is_equal_to(const AluInstr& lhs) const;
|
||||
|
||||
bool has_lds_access() const;
|
||||
|
||||
static const std::map<ECFAluOpCode, std::string> cf_map;
|
||||
static const std::map<AluBankSwizzle, std::string> bank_swizzle_map;
|
||||
static Instr::Pointer from_string(std::istream &is, ValueFactory& value_factory, AluGroup *);
|
||||
static bool from_nir(nir_alu_instr *alu, Shader& shader);
|
||||
|
||||
int alu_slots() const {return m_alu_slots;}
|
||||
|
||||
AluGroup *split(ValueFactory &vf);
|
||||
|
||||
bool end_group() const override { return m_alu_flags.test(alu_last_instr);}
|
||||
|
||||
static const std::set<AluModifiers> empty;
|
||||
static const std::set<AluModifiers> write;
|
||||
static const std::set<AluModifiers> last;
|
||||
static const std::set<AluModifiers> last_write;
|
||||
|
||||
std::pair<PRegister, bool> indirect_addr() const;
|
||||
|
||||
void add_extra_dependency(PVirtualValue reg);
|
||||
|
||||
void set_required_slots(int nslots) { m_required_slots = nslots;}
|
||||
unsigned required_slots() const { return m_required_slots;}
|
||||
|
||||
void add_priority(int priority) { m_priority += priority;}
|
||||
int priority() const { return m_priority;}
|
||||
void inc_priority() { ++m_priority;}
|
||||
|
||||
void set_parent_group(AluGroup *group) { m_parent_group = group;}
|
||||
|
||||
private:
|
||||
friend class AluGroup;
|
||||
|
||||
void update_uses();
|
||||
|
||||
bool do_ready() const override;
|
||||
|
||||
bool can_copy_propagate() const;
|
||||
|
||||
bool check_readport_validation(PRegister old_src, PVirtualValue new_src) const;
|
||||
|
||||
void set_alu_flags(const AluOpFlags& flags) { m_alu_flags = flags; }
|
||||
bool propagate_death() override;
|
||||
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
union {
|
||||
EAluOp m_opcode;
|
||||
ESDOp m_lds_opcode;
|
||||
};
|
||||
|
||||
PRegister m_dest{nullptr};
|
||||
SrcValues m_src;
|
||||
|
||||
AluOpFlags m_alu_flags;
|
||||
AluBankSwizzle m_bank_swizzle{alu_vec_unknown};
|
||||
ECFAluOpCode m_cf_type{cf_alu};
|
||||
int m_alu_slots{1};
|
||||
int m_fallback_chan{0};
|
||||
unsigned m_idx_offset{0};
|
||||
unsigned m_required_slots{0};
|
||||
int m_priority{0};
|
||||
std::set<PRegister, std::less<PRegister>, Allocator<PRegister>> m_extra_dependencies;
|
||||
AluGroup *m_parent_group{nullptr};
|
||||
};
|
||||
|
||||
class AluInstrVisitor : public InstrVisitor {
|
||||
public:
|
||||
void visit(AluGroup *instr) override;
|
||||
void visit(Block *instr) override;
|
||||
void visit(IfInstr *instr) override;
|
||||
|
||||
void visit(TexInstr *instr) override {(void)instr;}
|
||||
void visit(ExportInstr *instr) override {(void)instr;}
|
||||
void visit(FetchInstr *instr) override {(void)instr;}
|
||||
void visit(ControlFlowInstr *instr) override {(void)instr;}
|
||||
void visit(WriteScratchInstr *instr) override {(void)instr;}
|
||||
void visit(StreamOutInstr *instr) override {(void)instr;}
|
||||
void visit(MemRingOutInstr *instr) override {(void)instr;}
|
||||
void visit(EmitVertexInstr *instr) override {(void)instr;}
|
||||
void visit(GDSInstr *instr) override {(void)instr;};
|
||||
void visit(WriteTFInstr *instr) override {(void)instr;};
|
||||
void visit(LDSAtomicInstr *instr) override {(void)instr;};
|
||||
void visit(LDSReadInstr *instr) override {(void)instr;};
|
||||
void visit(RatInstr *instr) override {(void)instr;};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif // INSTRALU_H
|
361
src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp
Normal file
361
src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp
Normal file
@ -0,0 +1,361 @@
|
||||
#include "sfn_instr_alugroup.h"
|
||||
#include "sfn_debug.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
AluGroup::AluGroup()
|
||||
{
|
||||
std::fill(m_slots.begin(), m_slots.end(), nullptr);
|
||||
}
|
||||
|
||||
bool AluGroup::add_instruction(AluInstr *instr)
|
||||
{
|
||||
/* we can only schedule one op that accesses LDS or
|
||||
the LDS read queue */
|
||||
if (m_has_lds_op && instr->has_lds_access())
|
||||
return false;
|
||||
|
||||
if (instr->has_alu_flag(alu_is_trans) && add_trans_instructions(instr))
|
||||
return true;
|
||||
|
||||
if (add_vec_instructions(instr)) {
|
||||
instr->set_parent_group(this);
|
||||
return true;
|
||||
}
|
||||
|
||||
auto opinfo = alu_ops.find(instr->opcode());
|
||||
assert(opinfo != alu_ops.end());
|
||||
|
||||
if (s_max_slots > 4 &&
|
||||
opinfo->second.can_channel(AluOp::t) &&
|
||||
add_trans_instructions(instr)) {
|
||||
instr->set_parent_group(this);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AluGroup::add_trans_instructions(AluInstr *instr)
|
||||
{
|
||||
if (m_slots[4] || s_max_slots < 5)
|
||||
return false;
|
||||
|
||||
if (!update_indirect_access(instr))
|
||||
return false;
|
||||
|
||||
/* LDS instructions have to be scheduled in X */
|
||||
if (instr->has_alu_flag(alu_is_lds))
|
||||
return false;
|
||||
|
||||
auto opinfo = alu_ops.find(instr->opcode());
|
||||
assert(opinfo != alu_ops.end());
|
||||
|
||||
if (!opinfo->second.can_channel(AluOp::t))
|
||||
return false;
|
||||
|
||||
|
||||
/* if we schedule a non-trans instr into the trans slot, we have to make
|
||||
* sure that the corresponding vector slot is already occupied, otherwise
|
||||
* the hardware will schedule it as vector op and the bank-swizzle as
|
||||
* checked here (and in r600_asm.c) will not catch conflicts.
|
||||
*/
|
||||
if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) {
|
||||
if (instr->dest() && instr->dest()->pin() == pin_free) {
|
||||
int used_slot = 3;
|
||||
while (!m_slots[used_slot] && used_slot >= 0)
|
||||
--used_slot;
|
||||
|
||||
// if we schedule a non-trans instr into the trans slot,
|
||||
// there should always be some slot that is already used
|
||||
assert(used_slot >= 0);
|
||||
instr->dest()->set_chan(used_slot);
|
||||
}
|
||||
}
|
||||
|
||||
for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown ; ++i) {
|
||||
AluReadportReservation readports_evaluator = m_readports_evaluator;
|
||||
if (readports_evaluator.schedule_trans_instruction(*instr, i)) {
|
||||
m_readports_evaluator = readports_evaluator;
|
||||
m_slots[4] = instr;
|
||||
instr->pin_sources_to_chan();
|
||||
sfn_log << SfnLog::schedule << "T: " << *instr << "\n";
|
||||
|
||||
/* We added a vector op in the trans channel, so we have to
|
||||
* make sure the corresponding vector channel is used */
|
||||
if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()])
|
||||
m_slots[instr->dest_chan()] =
|
||||
new AluInstr(op0_nop, instr->dest_chan());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int AluGroup::free_slots() const
|
||||
{
|
||||
int free_mask = 0;
|
||||
for(int i = 0; i < s_max_slots; ++i) {
|
||||
if (!m_slots[i])
|
||||
free_mask |= 1 << i;
|
||||
}
|
||||
return free_mask;
|
||||
}
|
||||
|
||||
class AluAllowSlotSwitch : public AluInstrVisitor {
|
||||
public:
|
||||
using AluInstrVisitor::visit;
|
||||
|
||||
void visit(AluInstr *alu) {
|
||||
yes = (alu->alu_slots() == 1 || alu->has_alu_flag(alu_is_cayman_trans));
|
||||
}
|
||||
|
||||
bool yes{false};
|
||||
|
||||
};
|
||||
|
||||
bool AluGroup::add_vec_instructions(AluInstr *instr)
|
||||
{
|
||||
if (!update_indirect_access(instr))
|
||||
return false;
|
||||
|
||||
int param_src = -1;
|
||||
for (auto& s : instr->sources()) {
|
||||
auto is = s->as_inline_const();
|
||||
if (is)
|
||||
param_src = is->sel() - ALU_SRC_PARAM_BASE;
|
||||
}
|
||||
|
||||
if (param_src >= 0) {
|
||||
if (m_param_used < 0)
|
||||
m_param_used = param_src;
|
||||
else if (m_param_used != param_src)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_has_lds_op && instr->has_lds_access())
|
||||
return false;
|
||||
|
||||
int preferred_chan = instr->dest_chan();
|
||||
if (!m_slots[preferred_chan]) {
|
||||
if (instr->bank_swizzle() != alu_vec_unknown) {
|
||||
if (try_readport(instr, instr->bank_swizzle()))
|
||||
return true;
|
||||
} else {
|
||||
for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
|
||||
if (try_readport(instr, i))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
auto dest = instr->dest();
|
||||
if (dest && dest->pin() == pin_free) {
|
||||
|
||||
for (auto u : dest->uses()) {
|
||||
AluAllowSlotSwitch swich_allowed;
|
||||
u->accept(swich_allowed);
|
||||
if (!swich_allowed.yes)
|
||||
return false;
|
||||
}
|
||||
|
||||
int free_chan = 0;
|
||||
while (m_slots[free_chan] && free_chan < 4)
|
||||
free_chan++;
|
||||
|
||||
if (!m_slots[free_chan] && free_chan < 4) {
|
||||
sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
|
||||
dest->set_chan(free_chan);
|
||||
if (instr->bank_swizzle() != alu_vec_unknown) {
|
||||
if (try_readport(instr, instr->bank_swizzle()))
|
||||
return true;
|
||||
} else {
|
||||
for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
|
||||
if (try_readport(instr, i))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
|
||||
{
|
||||
int preferred_chan = instr->dest_chan();
|
||||
AluReadportReservation readports_evaluator = m_readports_evaluator;
|
||||
if (readports_evaluator.schedule_vec_instruction(*instr, cycle)) {
|
||||
m_readports_evaluator = readports_evaluator;
|
||||
m_slots[preferred_chan] = instr;
|
||||
m_has_lds_op |= instr->has_lds_access();
|
||||
sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
|
||||
auto dest = instr->dest();
|
||||
if (dest && dest->pin() == pin_free)
|
||||
dest->set_pin(pin_chan);
|
||||
instr->pin_sources_to_chan();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AluGroup::update_indirect_access(AluInstr *instr)
|
||||
{
|
||||
auto indirect_addr = instr->indirect_addr();
|
||||
|
||||
if (indirect_addr.first) {
|
||||
if (!m_addr_used) {
|
||||
m_addr_used = indirect_addr.first;
|
||||
m_addr_is_index = indirect_addr.second;
|
||||
} else if (!indirect_addr.first->equal_to(*m_addr_used)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void AluGroup::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void AluGroup::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
void AluGroup::set_scheduled()
|
||||
{
|
||||
for (int i = 0; i < s_max_slots; ++i) {
|
||||
if (m_slots[i])
|
||||
m_slots[i]->set_scheduled();
|
||||
}
|
||||
}
|
||||
|
||||
void AluGroup::fix_last_flag()
|
||||
{
|
||||
bool last_seen = false;
|
||||
for (int i = s_max_slots - 1; i >= 0; --i) {
|
||||
if (m_slots[i]) {
|
||||
if (!last_seen) {
|
||||
m_slots[i]->set_alu_flag(alu_last_instr);
|
||||
last_seen = true;
|
||||
} else {
|
||||
m_slots[i]->reset_alu_flag(alu_last_instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool AluGroup::is_equal_to(const AluGroup& other) const
|
||||
{
|
||||
for (int i = 0; i < s_max_slots; ++i) {
|
||||
if (!other.m_slots[i]) {
|
||||
if (!m_slots[i])
|
||||
continue;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_slots[i]) {
|
||||
if (!other.m_slots[i])
|
||||
return false;
|
||||
else if (!m_slots[i]->is_equal_to(*other.m_slots[i]))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AluGroup::has_lds_group_end() const
|
||||
{
|
||||
for (int i = 0; i < s_max_slots; ++i) {
|
||||
if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AluGroup::do_ready() const
|
||||
{
|
||||
for (int i = 0; i < s_max_slots; ++i) {
|
||||
if (m_slots[i] && !m_slots[i]->ready())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void AluGroup::forward_set_blockid(int id, int index)
|
||||
{
|
||||
for (int i = 0; i < s_max_slots; ++i) {
|
||||
if (m_slots[i]) {
|
||||
m_slots[i]->set_blockid(id, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t AluGroup::slots() const
|
||||
{
|
||||
uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1;
|
||||
for (int i = 0; i < s_max_slots; ++i) {
|
||||
if (m_slots[i])
|
||||
++result;
|
||||
}
|
||||
if (m_addr_used) {
|
||||
++result;
|
||||
if (m_addr_is_index)
|
||||
++result;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void AluGroup::do_print(std::ostream& os) const
|
||||
{
|
||||
const char slotname[] = "xyzwt";
|
||||
|
||||
os << "ALU_GROUP_BEGIN\n";
|
||||
for (int i = 0; i < s_max_slots; ++i) {
|
||||
if (m_slots[i]) {
|
||||
for (int j = 0; j < 2 * m_nesting_depth + 4; ++j)
|
||||
os << ' ';
|
||||
os << slotname[i] << ": ";
|
||||
m_slots[i]->print(os);
|
||||
os << "\n";
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 2 * m_nesting_depth + 2; ++i)
|
||||
os << ' ';
|
||||
os << "ALU_GROUP_END";
|
||||
}
|
||||
|
||||
AluInstr::SrcValues AluGroup::get_kconsts() const
|
||||
{
|
||||
AluInstr::SrcValues result;
|
||||
|
||||
for (int i = 0; i < s_max_slots; ++i) {
|
||||
if (m_slots[i]) {
|
||||
for (auto s : m_slots[i]->sources())
|
||||
if (s->as_uniform())
|
||||
result.push_back(s);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void AluGroup::set_chipclass(r600_chip_class chip_class)
|
||||
{
|
||||
switch (chip_class) {
|
||||
case ISA_CC_CAYMAN:
|
||||
s_max_slots = 4;
|
||||
break;
|
||||
default:
|
||||
s_max_slots = 5;
|
||||
}
|
||||
}
|
||||
|
||||
int AluGroup::s_max_slots = 5;
|
||||
|
||||
}
|
89
src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h
Normal file
89
src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h
Normal file
@ -0,0 +1,89 @@
|
||||
#ifndef ALUGROUP_H
|
||||
#define ALUGROUP_H
|
||||
|
||||
#include "sfn_instr_alu.h"
|
||||
#include "sfn_alu_readport_validation.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class AluGroup : public Instr
|
||||
{
|
||||
public:
|
||||
using Slots = std::array<AluInstr *, 5>;
|
||||
|
||||
AluGroup();
|
||||
|
||||
using iterator = Slots::iterator;
|
||||
using const_iterator = Slots::const_iterator;
|
||||
|
||||
bool add_instruction(AluInstr *instr);
|
||||
bool add_trans_instructions(AluInstr *instr);
|
||||
bool add_vec_instructions(AluInstr *instr);
|
||||
|
||||
bool is_equal_to(const AluGroup& other) const;
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
auto begin() {return m_slots.begin(); }
|
||||
auto end() {return m_slots.begin() + s_max_slots; }
|
||||
auto begin() const {return m_slots.begin(); }
|
||||
auto end() const {return m_slots.begin() + s_max_slots; }
|
||||
|
||||
bool end_group() const override { return true; }
|
||||
|
||||
void set_scheduled() override;
|
||||
|
||||
void set_nesting_depth(int depth) {m_nesting_depth = depth;}
|
||||
|
||||
void fix_last_flag();
|
||||
|
||||
static void set_chipclass(r600_chip_class chip_class);
|
||||
|
||||
int free_slots() const;
|
||||
|
||||
auto addr() const {return std::make_pair(m_addr_used, m_addr_is_index);}
|
||||
|
||||
uint32_t slots() const override;
|
||||
|
||||
AluInstr::SrcValues get_kconsts() const;
|
||||
|
||||
bool has_lds_group_start() const { return m_slots[0] ?
|
||||
m_slots[0]->has_alu_flag(alu_lds_group_start) : false;}
|
||||
|
||||
bool has_lds_group_end() const;
|
||||
|
||||
const auto& readport_reserer() const { return m_readports_evaluator; }
|
||||
void set_readport_reserer(const AluReadportReservation& rr) {
|
||||
m_readports_evaluator = rr;
|
||||
};
|
||||
|
||||
static bool has_t() { return s_max_slots == 5;}
|
||||
|
||||
private:
|
||||
void forward_set_blockid(int id, int index) override;
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
bool update_indirect_access(AluInstr *instr);
|
||||
bool try_readport(AluInstr *instr, AluBankSwizzle cycle);
|
||||
|
||||
Slots m_slots;
|
||||
|
||||
AluReadportReservation m_readports_evaluator;
|
||||
|
||||
static int s_max_slots;
|
||||
|
||||
PRegister m_addr_used{nullptr};
|
||||
|
||||
int m_param_used{-1};
|
||||
|
||||
int m_nesting_depth{0};
|
||||
bool m_has_lds_op{false};
|
||||
bool m_addr_is_index{false};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // ALUGROUP_H
|
176
src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp
Normal file
176
src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp
Normal file
@ -0,0 +1,176 @@
|
||||
#include "sfn_instr_controlflow.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
ControlFlowInstr::ControlFlowInstr(CFType type):
|
||||
m_type(type)
|
||||
{
|
||||
}
|
||||
|
||||
bool ControlFlowInstr::do_ready() const
|
||||
{
|
||||
/* Have to rework this, but the CF should always */
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool ControlFlowInstr::is_equal_to(const ControlFlowInstr& rhs) const
|
||||
{
|
||||
return m_type == rhs.m_type;
|
||||
}
|
||||
|
||||
void ControlFlowInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void ControlFlowInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
void ControlFlowInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
switch (m_type) {
|
||||
case cf_else: os << "ELSE"; break;
|
||||
case cf_endif: os << "ENDIF";break;
|
||||
case cf_loop_begin: os << "LOOP_BEGIN"; break;
|
||||
case cf_loop_end: os << "LOOP_END"; break;
|
||||
case cf_loop_break: os << "BREAK"; break;
|
||||
case cf_loop_continue: os << "CONTINUE"; break;
|
||||
case cf_wait_ack: os << "WAIT_ACK"; break;
|
||||
default:
|
||||
unreachable("Unknown CF type");
|
||||
}
|
||||
}
|
||||
|
||||
Instr::Pointer ControlFlowInstr::from_string(std::string type_str)
|
||||
{
|
||||
if (type_str == "ELSE")
|
||||
return new ControlFlowInstr(cf_else);
|
||||
else if (type_str == "ENDIF")
|
||||
return new ControlFlowInstr(cf_endif);
|
||||
else if (type_str == "LOOP_BEGIN")
|
||||
return new ControlFlowInstr(cf_loop_begin);
|
||||
else if (type_str == "LOOP_END")
|
||||
return new ControlFlowInstr(cf_loop_end);
|
||||
else if (type_str == "BREAK")
|
||||
return new ControlFlowInstr(cf_loop_break);
|
||||
else if (type_str == "CONTINUE")
|
||||
return new ControlFlowInstr(cf_loop_continue);
|
||||
else if (type_str == "WAIT_ACK")
|
||||
return new ControlFlowInstr(cf_wait_ack);
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int ControlFlowInstr::nesting_corr() const
|
||||
{
|
||||
switch (m_type) {
|
||||
case cf_else:
|
||||
case cf_endif:
|
||||
case cf_loop_end: return -1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int ControlFlowInstr::nesting_offset() const
|
||||
{
|
||||
switch (m_type) {
|
||||
case cf_endif:
|
||||
case cf_loop_end: return -1;
|
||||
case cf_loop_begin: return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
IfInstr::IfInstr(AluInstr *pred):
|
||||
m_predicate(pred)
|
||||
{
|
||||
assert(pred);
|
||||
}
|
||||
|
||||
IfInstr::IfInstr(const IfInstr& orig)
|
||||
{
|
||||
m_predicate = new AluInstr(*orig.m_predicate);
|
||||
}
|
||||
|
||||
bool IfInstr::is_equal_to(const IfInstr& rhs) const
|
||||
{
|
||||
return m_predicate->equal_to(*rhs.m_predicate);
|
||||
}
|
||||
|
||||
void IfInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void IfInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
bool IfInstr::replace_source(PRegister old_src, PVirtualValue new_src)
|
||||
{
|
||||
return m_predicate->replace_source(old_src, new_src);
|
||||
}
|
||||
|
||||
bool IfInstr::do_ready() const
|
||||
{
|
||||
return m_predicate->ready();
|
||||
}
|
||||
|
||||
void IfInstr::forward_set_scheduled()
|
||||
{
|
||||
m_predicate->set_scheduled();
|
||||
}
|
||||
|
||||
void IfInstr::forward_set_blockid(int id, int index)
|
||||
{
|
||||
m_predicate->set_blockid(id, index);
|
||||
}
|
||||
|
||||
void IfInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "IF (( " << *m_predicate << " ))";
|
||||
}
|
||||
|
||||
void IfInstr::set_predicate(AluInstr *new_predicate)
|
||||
{
|
||||
m_predicate = new_predicate;
|
||||
m_predicate->set_blockid(block_id(), index());
|
||||
}
|
||||
|
||||
Instr::Pointer IfInstr::from_string(std::istream &is, ValueFactory& value_factory)
|
||||
{
|
||||
std::string pred_start;
|
||||
is >> pred_start;
|
||||
if (pred_start != "((")
|
||||
return nullptr;
|
||||
char buf[2048];
|
||||
|
||||
is.get(buf, 2048, ')');
|
||||
std::string pred_end;
|
||||
is >> pred_end;
|
||||
|
||||
if (pred_end != "))") {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::istringstream bufstr(buf);
|
||||
|
||||
std::string instr_type;
|
||||
bufstr >> instr_type;
|
||||
|
||||
if (instr_type != "ALU")
|
||||
return nullptr;
|
||||
|
||||
auto pred = AluInstr::from_string(bufstr, value_factory, nullptr);
|
||||
return new IfInstr(static_cast<AluInstr*>(pred));
|
||||
}
|
||||
|
||||
}
|
81
src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h
Normal file
81
src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h
Normal file
@ -0,0 +1,81 @@
|
||||
#ifndef CONTROLFLOWINSTR_H
|
||||
#define CONTROLFLOWINSTR_H
|
||||
|
||||
#include "sfn_instr_alu.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class ControlFlowInstr : public Instr
|
||||
{
|
||||
public:
|
||||
enum CFType {
|
||||
cf_else,
|
||||
cf_endif,
|
||||
cf_loop_begin,
|
||||
cf_loop_end,
|
||||
cf_loop_break,
|
||||
cf_loop_continue,
|
||||
cf_stream_write,
|
||||
cf_wait_ack
|
||||
};
|
||||
|
||||
ControlFlowInstr(CFType type);
|
||||
|
||||
ControlFlowInstr(const ControlFlowInstr& orig) = default;
|
||||
|
||||
bool is_equal_to(const ControlFlowInstr& lhs) const;
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
CFType cf_type() const { return m_type;}
|
||||
|
||||
int nesting_corr() const override;
|
||||
|
||||
static Instr::Pointer from_string(std::string type_str);
|
||||
|
||||
bool end_block() const override { return true;}
|
||||
|
||||
int nesting_offset() const override;
|
||||
|
||||
private:
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
CFType m_type;
|
||||
};
|
||||
|
||||
class IfInstr : public Instr {
|
||||
public:
|
||||
|
||||
IfInstr(AluInstr *pred);
|
||||
IfInstr(const IfInstr& orig);
|
||||
|
||||
bool is_equal_to(const IfInstr& lhs) const;
|
||||
|
||||
void set_predicate(AluInstr *new_predicate);
|
||||
|
||||
AluInstr *predicate() const { return m_predicate; }
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
bool replace_source(PRegister old_src, PVirtualValue new_src) override;
|
||||
|
||||
static Instr::Pointer from_string(std::istream &is, ValueFactory& value_factory);
|
||||
|
||||
bool end_block() const override { return true;}
|
||||
int nesting_offset() const override { return 1;}
|
||||
|
||||
private:
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
void forward_set_blockid(int id, int index) override;
|
||||
void forward_set_scheduled() override;
|
||||
|
||||
AluInstr *m_predicate;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // CONTROLFLOWINSTR_H
|
524
src/gallium/drivers/r600/sfn/sfn_instr_export.cpp
Normal file
524
src/gallium/drivers/r600/sfn/sfn_instr_export.cpp
Normal file
@ -0,0 +1,524 @@
|
||||
#include "sfn_instr_export.h"
|
||||
#include "sfn_valuefactory.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
using std::string;
|
||||
|
||||
static char *writemask_to_swizzle(int writemask, char *buf)
|
||||
{
|
||||
const char *swz = "xyzw";
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
WriteOutInstr::WriteOutInstr(const RegisterVec4& value):
|
||||
m_value(value)
|
||||
{
|
||||
m_value.add_use(this);
|
||||
set_always_keep();
|
||||
}
|
||||
|
||||
void WriteOutInstr::override_chan(int i, int chan)
|
||||
{
|
||||
m_value.set_value(i,
|
||||
new Register(m_value[i]->sel(), chan,
|
||||
m_value[i]->pin()));
|
||||
}
|
||||
|
||||
ExportInstr::ExportInstr(ExportType type, unsigned loc, const RegisterVec4& value):
|
||||
WriteOutInstr(value),
|
||||
m_type(type),
|
||||
m_loc(loc),
|
||||
m_is_last(false)
|
||||
{
|
||||
}
|
||||
|
||||
void ExportInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void ExportInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
|
||||
bool ExportInstr::is_equal_to(const ExportInstr& lhs) const
|
||||
{
|
||||
return
|
||||
|
||||
(m_type == lhs.m_type &&
|
||||
m_loc == lhs.m_loc &&
|
||||
value() == lhs.value() &&
|
||||
m_is_last == lhs.m_is_last);
|
||||
}
|
||||
|
||||
ExportInstr::ExportType ExportInstr::type_from_string(const std::string& s)
|
||||
{
|
||||
(void)s;
|
||||
return param;
|
||||
}
|
||||
|
||||
void ExportInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "EXPORT";
|
||||
if (m_is_last)
|
||||
os << "_DONE";
|
||||
|
||||
switch (m_type) {
|
||||
case param: os << " PARAM "; break;
|
||||
case pos: os << " POS "; break;
|
||||
case pixel: os << " PIXEL "; break;
|
||||
}
|
||||
os << m_loc << " ";
|
||||
value().print(os);
|
||||
}
|
||||
|
||||
bool ExportInstr::do_ready() const
|
||||
{
|
||||
return value().ready(block_id(), index());
|
||||
}
|
||||
|
||||
Instr::Pointer ExportInstr::from_string(std::istream& is, ValueFactory& vf)
|
||||
{
|
||||
return from_string_impl(is, vf);
|
||||
}
|
||||
|
||||
Instr::Pointer ExportInstr::last_from_string(std::istream& is, ValueFactory &vf)
|
||||
{
|
||||
auto result = from_string_impl(is, vf);
|
||||
result->set_is_last_export(true);
|
||||
return result;
|
||||
}
|
||||
|
||||
ExportInstr::Pointer ExportInstr::from_string_impl(std::istream& is, ValueFactory &vf)
|
||||
{
|
||||
string typestr;
|
||||
int pos;
|
||||
string value_str;
|
||||
|
||||
is >> typestr >> pos >> value_str;
|
||||
|
||||
ExportInstr::ExportType type;
|
||||
|
||||
if (typestr == "PARAM")
|
||||
type = ExportInstr::param;
|
||||
else if (typestr == "POS")
|
||||
type = ExportInstr::pos;
|
||||
else if (typestr == "PIXEL")
|
||||
type = ExportInstr::pixel;
|
||||
else
|
||||
unreachable("Unknown export type");
|
||||
|
||||
RegisterVec4 value = vf.src_vec4_from_string(value_str);
|
||||
|
||||
return new ExportInstr( type, pos, value);
|
||||
}
|
||||
|
||||
WriteScratchInstr::WriteScratchInstr(const RegisterVec4& value, PRegister addr,
|
||||
int align, int align_offset, int writemask, int array_size):
|
||||
WriteOutInstr(value),
|
||||
m_address(addr),
|
||||
m_align(align),
|
||||
m_align_offset(align_offset),
|
||||
m_writemask(writemask),
|
||||
m_array_size(array_size - 1)
|
||||
{
|
||||
addr->add_use(this);
|
||||
}
|
||||
|
||||
WriteScratchInstr::WriteScratchInstr(const RegisterVec4& value, int loc,
|
||||
int align, int align_offset,int writemask):
|
||||
WriteOutInstr(value),
|
||||
m_loc(loc),
|
||||
m_align(align),
|
||||
m_align_offset(align_offset),
|
||||
m_writemask(writemask)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void WriteScratchInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void WriteScratchInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
bool WriteScratchInstr::is_equal_to(const WriteScratchInstr& lhs) const
|
||||
{
|
||||
if (m_address) {
|
||||
if (!lhs.m_address)
|
||||
return false;
|
||||
if (! m_address->equal_to(*lhs.m_address))
|
||||
return false;
|
||||
} else if (lhs.m_address)
|
||||
return false;
|
||||
|
||||
return m_loc == lhs.m_loc &&
|
||||
m_align == lhs.m_align &&
|
||||
m_align_offset == lhs.m_align_offset &&
|
||||
m_writemask == lhs.m_writemask &&
|
||||
m_array_size == lhs.m_array_size &&
|
||||
value().sel() == lhs.value().sel();
|
||||
}
|
||||
|
||||
bool WriteScratchInstr::do_ready() const
|
||||
{
|
||||
return value().ready(block_id(), index()) &&
|
||||
(!m_address || m_address->ready(block_id(), index()));
|
||||
}
|
||||
|
||||
void WriteScratchInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
char buf[6];
|
||||
|
||||
os << "WRITE_SCRATCH ";
|
||||
if (m_address)
|
||||
os << "@" << *m_address << "[" << m_array_size + 1<<"]";
|
||||
else
|
||||
os << m_loc;
|
||||
|
||||
os << (value()[0]->is_ssa() ? " S" : " R")
|
||||
<< value().sel() << "." << writemask_to_swizzle(m_writemask, buf)
|
||||
<< " " << "AL:" << m_align << " ALO:" << m_align_offset;
|
||||
}
|
||||
|
||||
auto WriteScratchInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
|
||||
{
|
||||
string loc_str;
|
||||
string value_str;
|
||||
string align_str;
|
||||
string align_offset_str;
|
||||
int offset;
|
||||
|
||||
int array_size = 0;
|
||||
PVirtualValue addr_reg = nullptr;
|
||||
|
||||
is >> loc_str >> value_str >> align_str >> align_offset_str;
|
||||
|
||||
std::istringstream loc_ss(loc_str);
|
||||
|
||||
auto align = int_from_string_with_prefix(align_str, "AL:");
|
||||
auto align_offset = int_from_string_with_prefix(align_offset_str, "ALO:");
|
||||
auto value = vf.src_vec4_from_string(value_str);
|
||||
|
||||
int writemask = 0;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (value[i]->chan() == i)
|
||||
writemask |= 1 << i;
|
||||
}
|
||||
|
||||
if (loc_str[0] == '@') {
|
||||
|
||||
string addr_str;
|
||||
char c;
|
||||
loc_ss >> c;
|
||||
loc_ss >> c;
|
||||
|
||||
while (!loc_ss.eof() && c != '[') {
|
||||
addr_str.append(1, c);
|
||||
loc_ss >> c;
|
||||
}
|
||||
addr_reg = vf.src_from_string(addr_str);
|
||||
assert(addr_reg && addr_reg->as_register());
|
||||
|
||||
loc_ss >> array_size;
|
||||
loc_ss >> c;
|
||||
assert(c == ']');
|
||||
return new WriteScratchInstr(value, addr_reg->as_register(), align, align_offset, writemask, array_size);
|
||||
} else {
|
||||
loc_ss >> offset;
|
||||
return new WriteScratchInstr(value, offset, align, align_offset, writemask);
|
||||
}
|
||||
}
|
||||
|
||||
StreamOutInstr::StreamOutInstr(const RegisterVec4& value, int num_components,
|
||||
int array_base, int comp_mask, int out_buffer,
|
||||
int stream):
|
||||
WriteOutInstr(value),
|
||||
m_element_size(num_components == 3 ? 3 : num_components - 1),
|
||||
m_array_base(array_base),
|
||||
m_writemask(comp_mask),
|
||||
m_output_buffer(out_buffer),
|
||||
m_stream(stream)
|
||||
{
|
||||
}
|
||||
|
||||
unsigned StreamOutInstr::op() const
|
||||
{
|
||||
int op = 0;
|
||||
switch (m_output_buffer) {
|
||||
case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
|
||||
case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
|
||||
case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
|
||||
case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
|
||||
}
|
||||
return 4 * m_stream + op;
|
||||
}
|
||||
|
||||
bool StreamOutInstr::is_equal_to(const StreamOutInstr& oth) const
|
||||
{
|
||||
|
||||
return value() == oth.value() &&
|
||||
m_element_size == oth.m_element_size &&
|
||||
m_burst_count == oth.m_burst_count &&
|
||||
m_array_base == oth.m_array_base &&
|
||||
m_array_size == oth.m_array_size &&
|
||||
m_writemask == oth.m_writemask &&
|
||||
m_output_buffer == oth.m_output_buffer &&
|
||||
m_stream == oth.m_stream;
|
||||
}
|
||||
|
||||
void StreamOutInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "WRITE STREAM(" << m_stream << ") " << value()
|
||||
<< " ES:" << m_element_size
|
||||
<< " BC:" << m_burst_count
|
||||
<< " BUF:" << m_output_buffer
|
||||
<< " ARRAY:" << m_array_base;
|
||||
if (m_array_size != 0xfff)
|
||||
os << "+" << m_array_size;
|
||||
}
|
||||
|
||||
bool StreamOutInstr::do_ready() const
|
||||
{
|
||||
return value().ready(block_id(), index());
|
||||
}
|
||||
|
||||
void StreamOutInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void StreamOutInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
|
||||
MemRingOutInstr::MemRingOutInstr(ECFOpCode ring, EMemWriteType type,
|
||||
const RegisterVec4& value,
|
||||
unsigned base_addr, unsigned ncomp,
|
||||
PRegister index):
|
||||
WriteOutInstr(value),
|
||||
m_ring_op(ring),
|
||||
m_type(type),
|
||||
m_base_address(base_addr),
|
||||
m_num_comp(ncomp),
|
||||
m_export_index(index)
|
||||
{
|
||||
assert(m_ring_op == cf_mem_ring || m_ring_op == cf_mem_ring1||
|
||||
m_ring_op == cf_mem_ring2 || m_ring_op == cf_mem_ring3);
|
||||
assert(m_num_comp <= 4);
|
||||
|
||||
if (m_export_index)
|
||||
m_export_index->add_use(this);
|
||||
}
|
||||
|
||||
unsigned MemRingOutInstr::ncomp() const
|
||||
{
|
||||
switch (m_num_comp) {
|
||||
case 1: return 0;
|
||||
case 2: return 1;
|
||||
case 3:
|
||||
case 4: return 3;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
|
||||
bool MemRingOutInstr::is_equal_to(const MemRingOutInstr& oth) const
|
||||
{
|
||||
|
||||
bool equal = value() == oth.value() &&
|
||||
m_ring_op == oth.m_ring_op &&
|
||||
m_type == oth.m_type &&
|
||||
m_num_comp == oth.m_num_comp &&
|
||||
m_base_address == oth.m_base_address;
|
||||
|
||||
if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
|
||||
equal &= (*m_export_index == *oth.m_export_index);
|
||||
return equal;
|
||||
|
||||
}
|
||||
|
||||
static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" };
|
||||
void MemRingOutInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
|
||||
os << "MEM_RING " << (m_ring_op == cf_mem_ring ? 0 : m_ring_op - cf_mem_ring1 + 1);
|
||||
os << " " << write_type_str[m_type] << " " << m_base_address;
|
||||
os << " " << value();
|
||||
if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
|
||||
os << " @" << *m_export_index;
|
||||
os << " ES:" << m_num_comp;
|
||||
}
|
||||
|
||||
void MemRingOutInstr::patch_ring(int stream, PRegister index)
|
||||
{
|
||||
const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
|
||||
|
||||
assert(stream < 4);
|
||||
m_ring_op = ring_op[stream];
|
||||
m_export_index = index;
|
||||
}
|
||||
|
||||
bool MemRingOutInstr::do_ready() const
|
||||
{
|
||||
if (m_export_index && !m_export_index->ready(block_id(), index()))
|
||||
return false;
|
||||
|
||||
return value().ready(block_id(), index());
|
||||
}
|
||||
|
||||
void MemRingOutInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void MemRingOutInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
static const std::map<string, MemRingOutInstr::EMemWriteType> type_lookop =
|
||||
{
|
||||
{"WRITE", MemRingOutInstr::mem_write},
|
||||
{"WRITE_IDX", MemRingOutInstr::mem_write_ind},
|
||||
{"WRITE_ACK", MemRingOutInstr::mem_write_ack},
|
||||
{"WRITE_IDX_ACK", MemRingOutInstr::mem_write_ind_ack}
|
||||
};
|
||||
|
||||
auto MemRingOutInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
|
||||
{
|
||||
string type_str;
|
||||
|
||||
int ring;
|
||||
|
||||
int base_address;
|
||||
string value_str;
|
||||
|
||||
is >> ring >> type_str >> base_address >> value_str;
|
||||
assert(ring < 4);
|
||||
|
||||
auto itype = type_lookop.find(type_str);
|
||||
assert(itype != type_lookop.end());
|
||||
|
||||
auto type = itype->second;
|
||||
|
||||
PVirtualValue index{nullptr};
|
||||
if (type == mem_write_ind || type == mem_write_ind_ack) {
|
||||
char c;
|
||||
string index_str;
|
||||
is >> c >> index_str;
|
||||
assert('@' == c );
|
||||
index = vf.src_from_string(index_str);
|
||||
}
|
||||
|
||||
string elm_size_str;
|
||||
is >> elm_size_str;
|
||||
|
||||
int num_comp = int_from_string_with_prefix(elm_size_str, "ES:");
|
||||
|
||||
auto value = vf.src_vec4_from_string(value_str);
|
||||
|
||||
ECFOpCode opcodes[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
|
||||
assert(ring < 4);
|
||||
|
||||
return new MemRingOutInstr(opcodes[ring], type, value, base_address, num_comp, index->as_register());
|
||||
}
|
||||
|
||||
EmitVertexInstr::EmitVertexInstr(int stream, bool cut):
|
||||
m_stream(stream),
|
||||
m_cut(cut)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
bool EmitVertexInstr::is_equal_to(const EmitVertexInstr& oth) const
|
||||
{
|
||||
return oth.m_stream == m_stream &&
|
||||
oth.m_cut == m_cut;
|
||||
}
|
||||
|
||||
void EmitVertexInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void EmitVertexInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
bool EmitVertexInstr::do_ready() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void EmitVertexInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream;
|
||||
}
|
||||
|
||||
auto EmitVertexInstr::from_string(std::istream& is, bool cut) -> Pointer
|
||||
{
|
||||
char c;
|
||||
is >> c;
|
||||
assert(c == '@');
|
||||
|
||||
int stream;
|
||||
is >> stream;
|
||||
|
||||
return new EmitVertexInstr(stream, cut);
|
||||
}
|
||||
|
||||
void WriteTFInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void WriteTFInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
bool WriteTFInstr::is_equal_to(const WriteTFInstr& rhs) const
|
||||
{
|
||||
return value() == rhs.value();
|
||||
}
|
||||
|
||||
auto WriteTFInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
|
||||
{
|
||||
string value_str;
|
||||
is >> value_str;
|
||||
|
||||
auto value = vf.src_vec4_from_string(value_str);
|
||||
|
||||
return new WriteTFInstr(value);
|
||||
}
|
||||
|
||||
bool WriteTFInstr::do_ready() const
|
||||
{
|
||||
return value().ready(block_id(), index());
|
||||
}
|
||||
|
||||
void WriteTFInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "WRITE_TF " << value();
|
||||
}
|
||||
|
||||
}
|
||||
|
213
src/gallium/drivers/r600/sfn/sfn_instr_export.h
Normal file
213
src/gallium/drivers/r600/sfn/sfn_instr_export.h
Normal file
@ -0,0 +1,213 @@
|
||||
#ifndef INSTR_EXPORT_H
|
||||
#define INSTR_EXPORT_H
|
||||
|
||||
#include "sfn_instr.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class ValueFactory;
|
||||
|
||||
|
||||
class WriteOutInstr: public Instr {
|
||||
public:
|
||||
WriteOutInstr(const RegisterVec4& value);
|
||||
WriteOutInstr(const WriteOutInstr& orig) = delete;
|
||||
|
||||
void override_chan(int i, int chan);
|
||||
|
||||
const RegisterVec4& value() const {return m_value;};
|
||||
RegisterVec4& value() {return m_value;};
|
||||
private:
|
||||
|
||||
RegisterVec4 m_value;
|
||||
};
|
||||
|
||||
class ExportInstr: public WriteOutInstr {
|
||||
public:
|
||||
enum ExportType {
|
||||
pixel,
|
||||
pos,
|
||||
param
|
||||
};
|
||||
|
||||
using Pointer = R600_POINTER_TYPE(ExportInstr);
|
||||
|
||||
ExportInstr(ExportType type, unsigned loc, const RegisterVec4& value);
|
||||
ExportInstr(const ExportInstr& orig) = delete;
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
bool is_equal_to(const ExportInstr& lhs) const;
|
||||
|
||||
|
||||
static ExportType type_from_string(const std::string& s);
|
||||
|
||||
ExportType export_type() const {return m_type;}
|
||||
|
||||
unsigned location() const {return m_loc;}
|
||||
|
||||
void set_is_last_export(bool value) {m_is_last = value;}
|
||||
bool is_last_export() const {return m_is_last;}
|
||||
|
||||
static Instr::Pointer from_string(std::istream& is, ValueFactory &vf);
|
||||
static Instr::Pointer last_from_string(std::istream& is, ValueFactory &vf);
|
||||
|
||||
private:
|
||||
static ExportInstr::Pointer from_string_impl(std::istream& is, ValueFactory &vf);
|
||||
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
ExportType m_type;
|
||||
unsigned m_loc;
|
||||
bool m_is_last;
|
||||
};
|
||||
|
||||
class WriteScratchInstr : public WriteOutInstr {
|
||||
public:
|
||||
WriteScratchInstr(const RegisterVec4& value, PRegister addr,
|
||||
int align, int align_offset, int writemask, int array_size);
|
||||
WriteScratchInstr(const RegisterVec4& value, int addr, int align, int align_offset,
|
||||
int writemask);
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
bool is_equal_to(const WriteScratchInstr& lhs) const;
|
||||
|
||||
unsigned location() const { return m_loc;};
|
||||
int write_mask() const { return m_writemask;}
|
||||
auto address() const { return m_address;}
|
||||
bool indirect() const { return !!m_address;}
|
||||
int array_size() const { return m_array_size;}
|
||||
|
||||
static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
|
||||
private:
|
||||
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
unsigned m_loc{0};
|
||||
PRegister m_address {nullptr};
|
||||
unsigned m_align;
|
||||
unsigned m_align_offset;
|
||||
unsigned m_writemask;
|
||||
int m_array_size{0};
|
||||
};
|
||||
|
||||
class StreamOutInstr: public WriteOutInstr {
|
||||
public:
|
||||
StreamOutInstr(const RegisterVec4& value, int num_components,
|
||||
int array_base, int comp_mask, int out_buffer,
|
||||
int stream);
|
||||
int element_size() const { return m_element_size;}
|
||||
int burst_count() const { return m_burst_count;}
|
||||
int array_base() const { return m_array_base;}
|
||||
int array_size() const { return m_array_size;}
|
||||
int comp_mask() const { return m_writemask;}
|
||||
unsigned op() const;
|
||||
|
||||
bool is_equal_to(const StreamOutInstr& lhs) const;
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
private:
|
||||
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
int m_element_size{0};
|
||||
int m_burst_count{1};
|
||||
int m_array_base{0};
|
||||
int m_array_size{0xfff};
|
||||
int m_writemask{0};
|
||||
int m_output_buffer{0};
|
||||
int m_stream{0};
|
||||
};
|
||||
|
||||
class MemRingOutInstr: public WriteOutInstr {
|
||||
public:
|
||||
|
||||
enum EMemWriteType {
|
||||
mem_write = 0,
|
||||
mem_write_ind = 1,
|
||||
mem_write_ack = 2,
|
||||
mem_write_ind_ack = 3,
|
||||
};
|
||||
|
||||
MemRingOutInstr(ECFOpCode ring, EMemWriteType type,
|
||||
const RegisterVec4& value, unsigned base_addr,
|
||||
unsigned ncomp, PRegister m_index);
|
||||
|
||||
unsigned op() const{return m_ring_op;}
|
||||
unsigned ncomp() const;
|
||||
unsigned addr() const {return m_base_address;}
|
||||
EMemWriteType type() const {return m_type;}
|
||||
unsigned index_reg() const {assert(m_export_index->sel() >= 0); return m_export_index->sel();}
|
||||
unsigned array_base() const {return m_base_address; }
|
||||
PVirtualValue export_index() const {return m_export_index;}
|
||||
|
||||
void patch_ring(int stream, PRegister index);
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
bool is_equal_to(const MemRingOutInstr& lhs) const;
|
||||
|
||||
static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
|
||||
|
||||
private:
|
||||
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
ECFOpCode m_ring_op;
|
||||
EMemWriteType m_type;
|
||||
unsigned m_base_address;
|
||||
unsigned m_num_comp;
|
||||
PRegister m_export_index;
|
||||
};
|
||||
|
||||
class EmitVertexInstr : public Instr {
|
||||
public:
|
||||
EmitVertexInstr(int stream, bool cut);
|
||||
ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;}
|
||||
int stream() const { return m_stream;}
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
bool is_equal_to(const EmitVertexInstr& lhs) const;
|
||||
|
||||
static auto from_string(std::istream& is, bool cut) -> Pointer;
|
||||
|
||||
private:
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
int m_stream;
|
||||
bool m_cut;
|
||||
};
|
||||
|
||||
class WriteTFInstr : public WriteOutInstr {
|
||||
public:
|
||||
using WriteOutInstr::WriteOutInstr;
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
bool is_equal_to(const WriteTFInstr& rhs) const;
|
||||
|
||||
static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
|
||||
|
||||
private:
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // INSTR_EXPORT_H
|
659
src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp
Normal file
659
src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp
Normal file
@ -0,0 +1,659 @@
|
||||
#include "sfn_instr_fetch.h"
|
||||
#include "sfn_valuefactory.h"
|
||||
#include "sfn_defines.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
using std::string;
|
||||
using std::istringstream;
|
||||
|
||||
FetchInstr::FetchInstr(EVFetchInstr opcode,
|
||||
const RegisterVec4& dst,
|
||||
const RegisterVec4::Swizzle& dest_swizzle,
|
||||
PRegister src,
|
||||
uint32_t src_offset,
|
||||
EVFetchType fetch_type,
|
||||
EVTXDataFormat data_format,
|
||||
EVFetchNumFormat num_format,
|
||||
EVFetchEndianSwap endian_swap,
|
||||
uint32_t resource_id,
|
||||
PRegister resource_offset):
|
||||
InstrWithVectorResult(dst, dest_swizzle),
|
||||
m_opcode(opcode),
|
||||
m_src(src),
|
||||
m_src_offset(src_offset),
|
||||
m_fetch_type(fetch_type),
|
||||
m_data_format(data_format),
|
||||
m_num_format(num_format),
|
||||
m_endian_swap(endian_swap),
|
||||
m_resource_id(resource_id),
|
||||
m_resource_offset(resource_offset),
|
||||
m_mega_fetch_count(0),
|
||||
m_array_base(0),
|
||||
m_array_size(0),
|
||||
m_elm_size(0)
|
||||
{
|
||||
switch (m_opcode) {
|
||||
case vc_fetch :
|
||||
m_opname ="VFETCH";
|
||||
break;
|
||||
case vc_semantic :
|
||||
m_opname = "FETCH_SEMANTIC";
|
||||
break;
|
||||
case vc_get_buf_resinfo :
|
||||
set_print_skip(mfc);
|
||||
set_print_skip(fmt);
|
||||
set_print_skip(ftype);
|
||||
m_opname = "GET_BUF_RESINFO";
|
||||
break;
|
||||
case vc_read_scratch :
|
||||
m_opname = "READ_SCRATCH";
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknwon fetch instruction");
|
||||
}
|
||||
|
||||
if (m_src)
|
||||
m_src->add_use(this);
|
||||
|
||||
if (m_resource_offset && m_resource_offset->as_register())
|
||||
m_resource_offset->as_register()->add_use(this);
|
||||
}
|
||||
|
||||
void FetchInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void FetchInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
bool FetchInstr::is_equal_to(const FetchInstr& rhs) const
|
||||
{
|
||||
if (m_src) {
|
||||
if (rhs.m_src) {
|
||||
if (!m_src->equal_to(*rhs.m_src))
|
||||
return false;
|
||||
} else
|
||||
return false;
|
||||
} else if (rhs.m_src)
|
||||
return false;
|
||||
|
||||
if (!comp_dest(rhs.dst(), rhs.all_dest_swizzle()))
|
||||
return false;
|
||||
|
||||
if (m_tex_flags != rhs.m_tex_flags)
|
||||
return false;
|
||||
|
||||
if (m_resource_offset && rhs.m_resource_offset) {
|
||||
if (!m_resource_offset->equal_to(*rhs.m_resource_offset))
|
||||
return false;
|
||||
} else if (!(!!m_resource_offset == !!rhs.m_resource_offset))
|
||||
return false;
|
||||
|
||||
return m_opcode == rhs.m_opcode &&
|
||||
m_src_offset == rhs.m_src_offset &&
|
||||
m_fetch_type == rhs.m_fetch_type &&
|
||||
m_data_format == rhs.m_data_format &&
|
||||
m_num_format == rhs.m_num_format &&
|
||||
m_endian_swap == rhs.m_endian_swap &&
|
||||
m_resource_id == rhs.m_resource_id &&
|
||||
m_mega_fetch_count == rhs.m_mega_fetch_count &&
|
||||
m_array_base == rhs.m_array_base &&
|
||||
m_array_size == rhs.m_array_size &&
|
||||
m_elm_size == rhs.m_elm_size;
|
||||
}
|
||||
|
||||
bool FetchInstr::propagate_death()
|
||||
{
|
||||
auto reg = m_src->as_register();
|
||||
if (reg)
|
||||
reg->del_use(this);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FetchInstr::replace_source(PRegister old_src, PVirtualValue new_src)
|
||||
{
|
||||
bool success = false;
|
||||
auto new_reg = new_src->as_register();
|
||||
if (new_reg) {
|
||||
if (old_src->equal_to(*m_src)) {
|
||||
m_src->del_use(this);
|
||||
m_src = new_reg;
|
||||
new_reg->add_use(this);
|
||||
success = true;
|
||||
}
|
||||
if (m_resource_offset && old_src->equal_to(*m_resource_offset)) {
|
||||
m_resource_offset->del_use(this);
|
||||
m_resource_offset = new_reg;
|
||||
new_reg->add_use(this);
|
||||
success = true;
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
bool FetchInstr::do_ready() const
|
||||
{
|
||||
for (auto i: required_instr()) {
|
||||
if (!i->is_scheduled())
|
||||
return false;
|
||||
}
|
||||
|
||||
bool result = m_src && m_src->ready(block_id(), index());
|
||||
if (m_resource_offset) {
|
||||
auto r = m_resource_offset->as_register();
|
||||
if (r)
|
||||
result &= r->ready(block_id(), index());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void FetchInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
os << m_opname << ' ';
|
||||
|
||||
print_dest(os);
|
||||
|
||||
os << " :";
|
||||
|
||||
if (m_opcode != vc_get_buf_resinfo) {
|
||||
|
||||
if (m_src && m_src->chan() < 7) {
|
||||
os << " " << *m_src;
|
||||
if (m_src_offset)
|
||||
os << " + " << m_src_offset << "b";
|
||||
}
|
||||
}
|
||||
|
||||
if (m_opcode != vc_read_scratch)
|
||||
os << " RID:" << m_resource_id;
|
||||
|
||||
if (m_resource_offset) {
|
||||
os << " + ";
|
||||
m_resource_offset->print(os);
|
||||
}
|
||||
|
||||
if (!m_skip_print.test(ftype)) {
|
||||
switch (m_fetch_type) {
|
||||
case vertex_data : os << " VERTEX"; break;
|
||||
case instance_data : os << " INSTANCE_DATA"; break;
|
||||
case no_index_offset : os << " NO_IDX_OFFSET"; break;
|
||||
default:
|
||||
unreachable("Unknwon fetch instruction type");
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_skip_print.test(fmt)) {
|
||||
os << " FMT(";
|
||||
auto fmt = s_data_format_map.find(m_data_format);
|
||||
if (fmt != s_data_format_map.end())
|
||||
os << fmt->second << ",";
|
||||
else
|
||||
unreachable("unknwon data format");
|
||||
|
||||
if (m_tex_flags.test(format_comp_signed))
|
||||
os << "S";
|
||||
else
|
||||
os << "U";
|
||||
|
||||
switch (m_num_format) {
|
||||
case vtx_nf_norm : os << "NORM"; break;
|
||||
case vtx_nf_int : os << "INT"; break;
|
||||
case vtx_nf_scaled: os << "SCALED"; break;
|
||||
default:
|
||||
unreachable("Unknwon number format");
|
||||
}
|
||||
|
||||
os << ")";
|
||||
}
|
||||
|
||||
if (m_array_base) {
|
||||
if (m_opcode != vc_read_scratch)
|
||||
os << " BASE:" << m_array_base;
|
||||
else
|
||||
os << " L[0x" << std::uppercase << std::hex << m_array_base << std::dec << "]";
|
||||
}
|
||||
|
||||
if (m_array_size)
|
||||
os << " SIZE:" << m_array_size + 1;
|
||||
|
||||
if (m_tex_flags.test(is_mega_fetch) && !m_skip_print.test(mfc))
|
||||
os << " MFC:" << m_mega_fetch_count;
|
||||
|
||||
if (m_elm_size)
|
||||
os << " ES:" << m_elm_size;
|
||||
|
||||
if (m_tex_flags.test(fetch_whole_quad)) os << " WQ";
|
||||
if (m_tex_flags.test(use_const_field)) os << " UCF";
|
||||
if (m_tex_flags.test(srf_mode)) os << " SRF";
|
||||
if (m_tex_flags.test(buf_no_stride)) os << " BNS";
|
||||
if (m_tex_flags.test(alt_const)) os << " AC";
|
||||
if (m_tex_flags.test(use_tc)) os << " TC";
|
||||
if (m_tex_flags.test(vpm)) os << " VPM";
|
||||
if (m_tex_flags.test(uncached) && m_opcode != vc_read_scratch) os << " UNCACHED";
|
||||
if (m_tex_flags.test(indexed) && m_opcode != vc_read_scratch) os << " INDEXED";
|
||||
}
|
||||
|
||||
Instr::Pointer FetchInstr::from_string(std::istream& is, ValueFactory& vf)
|
||||
{
|
||||
return from_string_impl(is, vc_fetch, vf);
|
||||
}
|
||||
|
||||
Instr::Pointer FetchInstr::from_string_impl(std::istream& is, EVFetchInstr opcode, ValueFactory& vf)
|
||||
{
|
||||
std::string deststr;
|
||||
is >> deststr;
|
||||
|
||||
RegisterVec4::Swizzle dst_swz;
|
||||
auto dest_reg = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
|
||||
|
||||
char help;
|
||||
is >> help;
|
||||
assert(help == ':');
|
||||
|
||||
string srcstr;
|
||||
is >> srcstr;
|
||||
|
||||
std::cerr << "Get source " << srcstr << "\n";
|
||||
|
||||
auto src_reg = vf.src_from_string(srcstr)->as_register();
|
||||
assert(src_reg);
|
||||
|
||||
string res_id_str;
|
||||
string next;
|
||||
is >> next;
|
||||
|
||||
int src_offset_val = 0;
|
||||
|
||||
if (next == "+") {
|
||||
is >> src_offset_val;
|
||||
is >> help;
|
||||
assert(help == 'b');
|
||||
is >> res_id_str;
|
||||
} else {
|
||||
res_id_str = next;
|
||||
}
|
||||
|
||||
int res_id = int_from_string_with_prefix(res_id_str, "RID:");
|
||||
|
||||
string fetch_type_str;
|
||||
is >> fetch_type_str;
|
||||
|
||||
EVFetchType fetch_type = vertex_data;
|
||||
if (fetch_type_str == "VERTEX") {
|
||||
fetch_type = vertex_data;
|
||||
} else {
|
||||
assert("Fetch type not yet implemented");
|
||||
}
|
||||
|
||||
string format_str;
|
||||
is >> format_str;
|
||||
|
||||
assert(!strncmp(format_str.c_str(), "FMT(", 4));
|
||||
string data_format;
|
||||
string num_format_str;
|
||||
|
||||
istringstream fmt_stream(format_str.substr(4));
|
||||
bool is_num_fmr = false;
|
||||
assert(!fmt_stream.eof());
|
||||
|
||||
do {
|
||||
char c;
|
||||
fmt_stream >> c;
|
||||
|
||||
if (c == ',') {
|
||||
is_num_fmr = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_num_fmr)
|
||||
data_format.append(1, c);
|
||||
else
|
||||
num_format_str.append(1, c);
|
||||
} while (!fmt_stream.eof());
|
||||
|
||||
EVTXDataFormat fmt = fmt_invalid;
|
||||
|
||||
for (auto& [f, name] : s_data_format_map) {
|
||||
if (data_format == name) {
|
||||
fmt = f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(fmt != fmt_invalid);
|
||||
|
||||
bool fmt_signed = num_format_str[0] == 'S';
|
||||
assert(fmt_signed || num_format_str[0] == 'U');
|
||||
|
||||
size_t num_format_end = num_format_str.find(')');
|
||||
num_format_str = num_format_str.substr(1, num_format_end - 1) ;
|
||||
|
||||
EVFetchNumFormat num_fmt;
|
||||
if (num_format_str == "NORM")
|
||||
num_fmt = vtx_nf_norm;
|
||||
else if (num_format_str == "INT")
|
||||
num_fmt = vtx_nf_int;
|
||||
else if (num_format_str == "SCALED")
|
||||
num_fmt = vtx_nf_scaled;
|
||||
else {
|
||||
std::cerr << "Number format: '" << num_format_str << "' : ";
|
||||
unreachable("Unknown number format");
|
||||
}
|
||||
|
||||
auto fetch = new FetchInstr(opcode, dest_reg, dst_swz,
|
||||
src_reg, src_offset_val, fetch_type, fmt, num_fmt,
|
||||
vtx_es_none, res_id, nullptr);
|
||||
if (fmt_signed)
|
||||
fetch->set_fetch_flag(format_comp_signed);
|
||||
|
||||
while (!is.eof() && is.good()) {
|
||||
std::string next_token;
|
||||
is >> next_token;
|
||||
|
||||
if (next_token.empty())
|
||||
break;
|
||||
|
||||
if (next_token.find(':') != string::npos) {
|
||||
fetch->set_param_from_string(next_token);
|
||||
} else {
|
||||
fetch->set_flag_from_string(next_token);
|
||||
}
|
||||
}
|
||||
|
||||
return fetch;
|
||||
}
|
||||
|
||||
void FetchInstr::set_param_from_string(const std::string& token)
|
||||
{
|
||||
if (token.substr(0,4) == "MFC:")
|
||||
set_mfc(int_from_string_with_prefix(token, "MFC:"));
|
||||
else if (token.substr(0,5) == "ARRB:")
|
||||
set_array_base(int_from_string_with_prefix(token, "ARRB:"));
|
||||
else if (token.substr(0,5) == "ARRS:")
|
||||
set_array_size(int_from_string_with_prefix(token, "ARRS:"));
|
||||
else if (token.substr(0,3) == "ES:")
|
||||
set_element_size(int_from_string_with_prefix(token, "ES:"));
|
||||
else {
|
||||
std::cerr << "Token '" << token << "': ";
|
||||
unreachable("Unknown token in fetch param list");
|
||||
}
|
||||
}
|
||||
|
||||
void FetchInstr::set_flag_from_string(const std::string& token)
|
||||
{
|
||||
auto flag = s_flag_map.find(token.c_str());
|
||||
if (flag != s_flag_map.end())
|
||||
set_fetch_flag(flag->second);
|
||||
else {
|
||||
std::cerr << "Token: " << token << " : ";
|
||||
unreachable("Unknown token in fetch flag list");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const std::map<const char *, FetchInstr::EFlags> FetchInstr::s_flag_map = {
|
||||
{"WQ", fetch_whole_quad},
|
||||
{"UCF", use_const_field},
|
||||
{"SRF", srf_mode},
|
||||
{"BNS", buf_no_stride},
|
||||
{"AC", alt_const},
|
||||
{"TC", use_tc},
|
||||
{"VPM", vpm},
|
||||
{"UNCACHED", uncached},
|
||||
{"INDEXED", indexed}
|
||||
};
|
||||
|
||||
const std::map<EVTXDataFormat, const char *> FetchInstr::s_data_format_map = {
|
||||
{fmt_invalid, "INVALID"},
|
||||
{fmt_8, "8"},
|
||||
{fmt_4_4, "4_4"},
|
||||
{fmt_3_3_2, "3_3_2"},
|
||||
{fmt_reserved_4, "RESERVED_4"},
|
||||
{fmt_16, "16"},
|
||||
{fmt_16_float, "16F"},
|
||||
{fmt_8_8, "8_8"},
|
||||
{fmt_5_6_5, "5_6_5"},
|
||||
{fmt_6_5_5, "6_5_5"},
|
||||
{fmt_1_5_5_5, "1_5_5_5"},
|
||||
{fmt_4_4_4_4, "4_4_4_4"},
|
||||
{fmt_5_5_5_1, "5_5_5_1"},
|
||||
{fmt_32, "32"},
|
||||
{fmt_32_float, "32F"},
|
||||
{fmt_16_16, "16_16"},
|
||||
{fmt_16_16_float, "16_16F"},
|
||||
{fmt_8_24, "8_24"},
|
||||
{fmt_8_24_float, "8_24F"},
|
||||
{fmt_24_8, "24_8"},
|
||||
{fmt_24_8_float, "24_8F"},
|
||||
{fmt_10_11_11, "10_11_11"},
|
||||
{fmt_10_11_11_float, "10_11_11F"},
|
||||
{fmt_11_11_10, "11_11_10"},
|
||||
{fmt_10_11_11_float, "11_11_10F"},
|
||||
{fmt_2_10_10_10, "2_10_10_10"},
|
||||
{fmt_8_8_8_8, "8_8_8_8"},
|
||||
{fmt_10_10_10_2, "10_10_10_2"},
|
||||
{fmt_x24_8_32_float, "X24_8_32F"},
|
||||
{fmt_32_32, "32_32"},
|
||||
{fmt_32_32_float, "32_32F"},
|
||||
{fmt_16_16_16_16, "16_16_16_16"},
|
||||
{fmt_16_16_16_16_float, "16_16_16_16F"},
|
||||
{fmt_reserved_33, "RESERVED_33"},
|
||||
{fmt_32_32_32_32, "32_32_32_32"},
|
||||
{fmt_32_32_32_32_float, "32_32_32_32F"},
|
||||
{fmt_reserved_36, "RESERVED_36"},
|
||||
{fmt_1, "1"},
|
||||
{fmt_1_reversed, "1_REVERSED"},
|
||||
{fmt_gb_gr, "GB_GR"},
|
||||
{fmt_bg_rg, "BG_RG"},
|
||||
{fmt_32_as_8, "32_AS_8"},
|
||||
{fmt_32_as_8_8, "32_AS_8_8"},
|
||||
{fmt_5_9_9_9_sharedexp, "5_9_9_9_SHAREDEXP"},
|
||||
{fmt_8_8_8, "8_8_8"},
|
||||
{fmt_16_16_16, "16_16_16"},
|
||||
{fmt_16_16_16_float, "16_16_16F"},
|
||||
{fmt_32_32_32, "32_32_32"},
|
||||
{fmt_32_32_32_float, "32_32_32F"},
|
||||
{fmt_bc1, "BC1"},
|
||||
{fmt_bc2, "BC2"},
|
||||
{fmt_bc3, "BC3"},
|
||||
{fmt_bc4, "BC4"},
|
||||
{fmt_bc5, "BC5"},
|
||||
{fmt_apc0, "APC0"},
|
||||
{fmt_apc1, "APC1"},
|
||||
{fmt_apc2, "APC2"},
|
||||
{fmt_apc3, "APC3"},
|
||||
{fmt_apc4, "APC4"},
|
||||
{fmt_apc5, "APC5"},
|
||||
{fmt_apc6, "APC6"},
|
||||
{fmt_apc7, "APC7"},
|
||||
{fmt_ctx1, "CTX1"},
|
||||
{fmt_reserved_63, "RESERVED_63"}
|
||||
};
|
||||
|
||||
|
||||
QueryBufferSizeInstr::QueryBufferSizeInstr(const RegisterVec4& dst,
|
||||
const RegisterVec4::Swizzle& dst_swz,
|
||||
uint32_t resid):
|
||||
FetchInstr(vc_get_buf_resinfo,
|
||||
dst, dst_swz,
|
||||
new Register( 0, 7, pin_fully),
|
||||
0,
|
||||
no_index_offset,
|
||||
fmt_32_32_32_32,
|
||||
vtx_nf_norm,
|
||||
vtx_es_none,
|
||||
resid,
|
||||
nullptr)
|
||||
{
|
||||
set_fetch_flag(format_comp_signed);
|
||||
set_print_skip(mfc);
|
||||
set_print_skip(fmt);
|
||||
set_print_skip(ftype);
|
||||
}
|
||||
|
||||
Instr::Pointer QueryBufferSizeInstr::from_string(std::istream& is, ValueFactory& vf)
|
||||
{
|
||||
std::string deststr, res_id_str;
|
||||
is >> deststr;
|
||||
|
||||
char help;
|
||||
is >> help;
|
||||
assert(help == ':');
|
||||
|
||||
is >> res_id_str;
|
||||
|
||||
RegisterVec4::Swizzle dst_swz;
|
||||
auto dst = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
|
||||
int res_id = int_from_string_with_prefix(res_id_str, "RID:");
|
||||
|
||||
return new QueryBufferSizeInstr( dst, dst_swz, res_id);
|
||||
}
|
||||
|
||||
LoadFromBuffer::LoadFromBuffer(const RegisterVec4& dst, const RegisterVec4::Swizzle& dst_swizzle,
|
||||
PRegister addr, uint32_t addr_offset,
|
||||
uint32_t resid, PRegister res_offset, EVTXDataFormat data_format):
|
||||
FetchInstr(vc_fetch, dst, dst_swizzle, addr, addr_offset, no_index_offset,
|
||||
data_format, vtx_nf_scaled, vtx_es_none, resid, res_offset)
|
||||
{
|
||||
set_fetch_flag(format_comp_signed);
|
||||
set_mfc(16);
|
||||
override_opname("LOAD_BUF");
|
||||
set_print_skip(mfc);
|
||||
set_print_skip(fmt);
|
||||
set_print_skip(ftype);
|
||||
}
|
||||
|
||||
Instr::Pointer LoadFromBuffer::from_string(std::istream& is, ValueFactory& vf)
|
||||
{
|
||||
std::string deststr;
|
||||
is >> deststr;
|
||||
|
||||
RegisterVec4::Swizzle dst_swz;
|
||||
auto dst = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
|
||||
|
||||
char help;
|
||||
is >> help;
|
||||
assert(help == ':');
|
||||
|
||||
string addrstr;
|
||||
is >> addrstr;
|
||||
auto addr_reg = vf.src_from_string(addrstr)->as_register();
|
||||
|
||||
string res_id_str;
|
||||
string next;
|
||||
is >> next;
|
||||
|
||||
int addr_offset_val = 0;
|
||||
|
||||
if (next == "+") {
|
||||
is >> addr_offset_val;
|
||||
is >> help;
|
||||
assert(help == 'b');
|
||||
is >> res_id_str;
|
||||
} else {
|
||||
res_id_str = next;
|
||||
}
|
||||
|
||||
int res_id = int_from_string_with_prefix(res_id_str, "RID:");
|
||||
|
||||
next.clear();
|
||||
is >> next;
|
||||
PRegister res_offset = nullptr;
|
||||
if (next == "+") {
|
||||
string res_offset_str;
|
||||
is >> res_offset_str;
|
||||
res_offset = vf.src_from_string(res_offset_str)->as_register();
|
||||
}
|
||||
|
||||
auto fetch = new LoadFromBuffer( dst, dst_swz,
|
||||
addr_reg, addr_offset_val,
|
||||
res_id, res_offset, fmt_32_32_32_32_float);
|
||||
is >> next;
|
||||
if (next == "SRF")
|
||||
fetch->set_fetch_flag(srf_mode);
|
||||
|
||||
return fetch;
|
||||
}
|
||||
|
||||
class AddrResolver: public RegisterVisitor {
|
||||
public:
|
||||
AddrResolver(LoadFromScratch *lfs) : m_lfs(lfs) {}
|
||||
|
||||
void visit(Register& value) {
|
||||
m_lfs->set_fetch_flag(FetchInstr::indexed);
|
||||
m_lfs->set_src(&value);
|
||||
value.add_use(m_lfs);
|
||||
}
|
||||
void visit(LocalArray& value) {assert(0);(void)value;}
|
||||
void visit(LocalArrayValue& value) {assert(0);(void)value;}
|
||||
void visit(UniformValue& value) {assert(0);(void)value;}
|
||||
void visit(LiteralConstant& value) {
|
||||
m_lfs->set_array_base(value.value());
|
||||
m_lfs->set_src(new Register( 0, 7, pin_none));
|
||||
}
|
||||
void visit(InlineConstant& value) {assert(0);(void)value;}
|
||||
|
||||
LoadFromScratch *m_lfs;
|
||||
};
|
||||
|
||||
|
||||
|
||||
LoadFromScratch::LoadFromScratch(const RegisterVec4& dst, const RegisterVec4::Swizzle& dst_swz, PVirtualValue addr, uint32_t scratch_size):
|
||||
FetchInstr(vc_read_scratch,
|
||||
dst, dst_swz,
|
||||
nullptr,
|
||||
0,
|
||||
no_index_offset,
|
||||
fmt_32_32_32_32,
|
||||
vtx_nf_int,
|
||||
vtx_es_none,
|
||||
0,
|
||||
nullptr)
|
||||
{
|
||||
set_fetch_flag(uncached);
|
||||
set_fetch_flag(wait_ack);
|
||||
|
||||
assert(scratch_size >= 1);
|
||||
set_array_size(scratch_size - 1);
|
||||
set_array_base(0);
|
||||
AddrResolver ar(this);
|
||||
addr->accept(ar);
|
||||
|
||||
set_print_skip(mfc);
|
||||
set_print_skip(fmt);
|
||||
set_print_skip(ftype);
|
||||
set_element_size(3);
|
||||
}
|
||||
|
||||
Instr::Pointer LoadFromScratch::from_string(std::istream& is, ValueFactory &vf)
|
||||
{
|
||||
std::string deststr;
|
||||
is >> deststr;
|
||||
|
||||
RegisterVec4::Swizzle dst_swz;
|
||||
auto dest = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
|
||||
|
||||
char help;
|
||||
is >> help;
|
||||
assert(help == ':');
|
||||
|
||||
string addrstr;
|
||||
is >> addrstr;
|
||||
auto addr_reg = vf.src_from_string(addrstr);
|
||||
|
||||
string offsetstr;
|
||||
is >> offsetstr;
|
||||
int size = int_from_string_with_prefix(offsetstr, "SIZE:");
|
||||
assert(size >= 1);
|
||||
|
||||
return new LoadFromScratch( dest, dst_swz, addr_reg, size);
|
||||
}
|
||||
|
||||
}
|
||||
|
152
src/gallium/drivers/r600/sfn/sfn_instr_fetch.h
Normal file
152
src/gallium/drivers/r600/sfn/sfn_instr_fetch.h
Normal file
@ -0,0 +1,152 @@
|
||||
#ifndef INSTR_FETCH_H
|
||||
#define INSTR_FETCH_H
|
||||
|
||||
#include "sfn_instr.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class ValueFactory;
|
||||
|
||||
class FetchInstr : public InstrWithVectorResult {
|
||||
public:
|
||||
|
||||
enum EFlags {
|
||||
fetch_whole_quad,
|
||||
use_const_field,
|
||||
format_comp_signed,
|
||||
srf_mode,
|
||||
buf_no_stride,
|
||||
alt_const,
|
||||
use_tc,
|
||||
vpm,
|
||||
is_mega_fetch,
|
||||
uncached,
|
||||
indexed,
|
||||
wait_ack,
|
||||
unknown
|
||||
};
|
||||
|
||||
enum EPrintSkip {
|
||||
fmt,
|
||||
ftype,
|
||||
mfc,
|
||||
count
|
||||
};
|
||||
|
||||
FetchInstr(EVFetchInstr opcode,
|
||||
const RegisterVec4& dst,
|
||||
const RegisterVec4::Swizzle& dest_swizzle,
|
||||
PRegister src,
|
||||
uint32_t src_offset,
|
||||
EVFetchType fetch_type,
|
||||
EVTXDataFormat data_format,
|
||||
EVFetchNumFormat num_format,
|
||||
EVFetchEndianSwap endian_swap,
|
||||
uint32_t resource_id,
|
||||
PRegister resource_offset);
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
void set_src(PRegister src) { m_src = src; }
|
||||
const auto& src() const {assert(m_src); return *m_src;}
|
||||
uint32_t src_offset() const {return m_src_offset;}
|
||||
|
||||
uint32_t resource_id() const {return m_resource_id;}
|
||||
auto resource_offset() const {return m_resource_offset;}
|
||||
|
||||
EVFetchType fetch_type() const {return m_fetch_type;}
|
||||
EVTXDataFormat data_format() const {return m_data_format;}
|
||||
void set_num_format(EVFetchNumFormat nf) {m_num_format = nf;}
|
||||
EVFetchNumFormat num_format() const {return m_num_format;}
|
||||
EVFetchEndianSwap endian_swap() const {return m_endian_swap;}
|
||||
|
||||
uint32_t mega_fetch_count() const {return m_mega_fetch_count;}
|
||||
uint32_t array_base() const {return m_array_base;}
|
||||
uint32_t array_size() const {return m_array_size;}
|
||||
uint32_t elm_size() const {return m_elm_size;}
|
||||
|
||||
void reset_fetch_flag(EFlags flag) {m_tex_flags.reset(flag);}
|
||||
void set_fetch_flag(EFlags flag) {m_tex_flags.set(flag);}
|
||||
bool has_fetch_flag(EFlags flag) const { return m_tex_flags.test(flag);}
|
||||
|
||||
EVFetchInstr opcode() const {return m_opcode;}
|
||||
|
||||
bool is_equal_to(const FetchInstr& rhs) const;
|
||||
|
||||
static Instr::Pointer from_string(std::istream& is, ValueFactory &vf);
|
||||
|
||||
void set_mfc(int mfc) {m_tex_flags.set(is_mega_fetch); m_mega_fetch_count = mfc;}
|
||||
void set_array_base(int arrb) {m_array_base = arrb;}
|
||||
void set_array_size(int arrs) {m_array_size = arrs;}
|
||||
|
||||
void set_element_size(int size) { m_elm_size = size;}
|
||||
void set_print_skip(EPrintSkip skip) {m_skip_print.set(skip);}
|
||||
uint32_t slots() const override {return 1;};
|
||||
|
||||
bool replace_source(PRegister old_src, PVirtualValue new_src) override;
|
||||
|
||||
protected:
|
||||
static Instr::Pointer from_string_impl(std::istream& is, EVFetchInstr opcode, ValueFactory &vf);
|
||||
|
||||
void override_opname(const char *opname) { m_opname = opname;}
|
||||
|
||||
private:
|
||||
bool do_ready() const override;
|
||||
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
void set_param_from_string(const std::string& next_token);
|
||||
void set_flag_from_string(const std::string& next_token);
|
||||
|
||||
static const std::map<EVTXDataFormat, const char *> s_data_format_map;
|
||||
static const std::map<const char *, EFlags> s_flag_map;
|
||||
|
||||
bool propagate_death() override;
|
||||
|
||||
EVFetchInstr m_opcode;
|
||||
|
||||
PRegister m_src;
|
||||
uint32_t m_src_offset;
|
||||
|
||||
EVFetchType m_fetch_type;
|
||||
EVTXDataFormat m_data_format;
|
||||
EVFetchNumFormat m_num_format;
|
||||
EVFetchEndianSwap m_endian_swap;
|
||||
|
||||
uint32_t m_resource_id;
|
||||
PRegister m_resource_offset;
|
||||
|
||||
std::bitset<EFlags::unknown> m_tex_flags;
|
||||
std::bitset<EPrintSkip::count> m_skip_print;
|
||||
|
||||
uint32_t m_mega_fetch_count;
|
||||
uint32_t m_array_base;
|
||||
uint32_t m_array_size;
|
||||
uint32_t m_elm_size;
|
||||
|
||||
std::string m_opname;
|
||||
};
|
||||
|
||||
class QueryBufferSizeInstr : public FetchInstr {
|
||||
public:
|
||||
QueryBufferSizeInstr(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, uint32_t resid);
|
||||
static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
|
||||
};
|
||||
|
||||
class LoadFromBuffer : public FetchInstr {
|
||||
public:
|
||||
LoadFromBuffer(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle,
|
||||
PRegister addr, uint32_t addr_offset,
|
||||
uint32_t resid, PRegister res_offset, EVTXDataFormat data_format);
|
||||
static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
|
||||
};
|
||||
|
||||
class LoadFromScratch : public FetchInstr {
|
||||
public:
|
||||
LoadFromScratch(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, PVirtualValue addr, uint32_t offset);
|
||||
static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
|
||||
};
|
||||
|
||||
}
|
||||
#endif // INSTR_FETCH_H
|
411
src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp
Normal file
411
src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp
Normal file
@ -0,0 +1,411 @@
|
||||
#include "sfn_instr_lds.h"
|
||||
#include "sfn_instr_alu.h"
|
||||
#include "sfn_debug.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
using std::istream;
|
||||
|
||||
LDSReadInstr::LDSReadInstr(std::vector<PRegister, Allocator<PRegister>>& value,
|
||||
AluInstr::SrcValues& address):
|
||||
m_address(address),
|
||||
m_dest_value(value)
|
||||
{
|
||||
assert(m_address.size() == m_dest_value.size());
|
||||
|
||||
for (auto& v: value)
|
||||
v->add_parent(this);
|
||||
|
||||
for (auto& s: m_address)
|
||||
if (s->as_register())
|
||||
s->as_register()->add_use(this);
|
||||
}
|
||||
|
||||
void LDSReadInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void LDSReadInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
bool LDSReadInstr::remove_unused_components()
|
||||
{
|
||||
uint8_t inactive_mask = 0;
|
||||
for (size_t i = 0; i < m_dest_value.size(); ++i) {
|
||||
if (m_dest_value[i]->uses().empty())
|
||||
inactive_mask |= 1 << i;
|
||||
}
|
||||
|
||||
if (!inactive_mask)
|
||||
return false;
|
||||
|
||||
auto new_addr = AluInstr::SrcValues();
|
||||
auto new_dest = std::vector<PRegister, Allocator<PRegister>>();
|
||||
|
||||
for (size_t i = 0; i < m_dest_value.size(); ++i) {
|
||||
if ((1 << i) & inactive_mask) {
|
||||
if (m_address[i]->as_register())
|
||||
m_address[i]->as_register()->del_use(this);
|
||||
m_dest_value[i]->del_parent(this);
|
||||
} else {
|
||||
new_dest.push_back(m_dest_value[i]);
|
||||
new_addr.push_back(m_address[i]);
|
||||
}
|
||||
}
|
||||
|
||||
m_dest_value.swap(new_dest);
|
||||
m_address.swap(new_addr);
|
||||
|
||||
return m_address.size() != new_addr.size();
|
||||
}
|
||||
|
||||
class SetLDSAddrProperty : public AluInstrVisitor {
|
||||
using AluInstrVisitor::visit;
|
||||
void visit(AluInstr *instr) override {
|
||||
instr->set_alu_flag(alu_lds_address);
|
||||
}
|
||||
};
|
||||
|
||||
AluInstr *LDSReadInstr::split(std::vector<AluInstr*>& out_block, AluInstr *last_lds_instr)
|
||||
{
|
||||
AluInstr* first_instr = nullptr;
|
||||
SetLDSAddrProperty prop;
|
||||
for (auto& addr: m_address) {
|
||||
auto reg = addr->as_register();
|
||||
if (reg) {
|
||||
reg->del_use(this);
|
||||
if (reg->parents().size() == 1) {
|
||||
for (auto& p: reg->parents()) {
|
||||
p->accept(prop);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto instr = new AluInstr(DS_OP_READ_RET, nullptr, nullptr, addr);
|
||||
instr->set_blockid(block_id(), index());
|
||||
|
||||
if (last_lds_instr)
|
||||
instr->add_required_instr(last_lds_instr);
|
||||
out_block.push_back(instr);
|
||||
last_lds_instr = instr;
|
||||
if (!first_instr) {
|
||||
first_instr = instr;
|
||||
first_instr->set_alu_flag(alu_lds_group_start);
|
||||
} else {
|
||||
/* In order to make it possible that the scheduler
|
||||
* keeps the loads of a group close together, we
|
||||
* require that the addresses are all already available
|
||||
* when the first read instruction is emitted.
|
||||
* Otherwise it might happen that the loads and reads from the
|
||||
* queue are split across ALU cf clauses, and this is not allowed */
|
||||
first_instr->add_extra_dependency(addr);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& dest: m_dest_value) {
|
||||
dest->del_parent(this);
|
||||
auto instr = new AluInstr(op1_mov, dest,
|
||||
new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
|
||||
AluInstr::last_write);
|
||||
instr->add_required_instr(last_lds_instr);
|
||||
instr->set_blockid(block_id(), index());
|
||||
out_block.push_back(instr);
|
||||
last_lds_instr = instr;
|
||||
}
|
||||
if (last_lds_instr)
|
||||
last_lds_instr->set_alu_flag(alu_lds_group_end);
|
||||
|
||||
return last_lds_instr;
|
||||
}
|
||||
|
||||
bool LDSReadInstr::do_ready() const
|
||||
{
|
||||
unreachable("This instruction is not handled by the schduler");
|
||||
return false;
|
||||
}
|
||||
|
||||
void LDSReadInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "LDS_READ ";
|
||||
|
||||
os << "[ ";
|
||||
for (auto d: m_dest_value) {
|
||||
os << *d << " ";
|
||||
}
|
||||
os << "] : [ ";
|
||||
for (auto a: m_address) {
|
||||
os << *a << " ";
|
||||
}
|
||||
os << "]";
|
||||
}
|
||||
|
||||
bool LDSReadInstr::is_equal_to(const LDSReadInstr& rhs) const
|
||||
{
|
||||
if (m_address.size() != rhs.m_address.size())
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < num_values(); ++i) {
|
||||
if (!m_address[i]->equal_to(*rhs.m_address[i]))
|
||||
return false;
|
||||
if (!m_dest_value[i]->equal_to(*rhs.m_dest_value[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
auto LDSReadInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer
|
||||
{
|
||||
/* LDS_READ [ d1, d2, d3 ... ] : a1 a2 a3 ... */
|
||||
|
||||
std::string temp_str;
|
||||
|
||||
is >> temp_str;
|
||||
assert(temp_str == "[");
|
||||
|
||||
std::vector<PRegister, Allocator<PRegister> > dests;
|
||||
AluInstr::SrcValues srcs;
|
||||
|
||||
is >> temp_str;
|
||||
while (temp_str != "]") {
|
||||
auto dst = value_factory.dest_from_string(temp_str);
|
||||
assert(dst);
|
||||
dests.push_back(dst);
|
||||
is >> temp_str;
|
||||
}
|
||||
|
||||
is >> temp_str;
|
||||
assert(temp_str == ":");
|
||||
is >> temp_str;
|
||||
assert(temp_str == "[");
|
||||
|
||||
is >> temp_str;
|
||||
while (temp_str != "]") {
|
||||
auto src = value_factory.src_from_string(temp_str);
|
||||
assert(src);
|
||||
srcs.push_back(src);
|
||||
is >> temp_str;
|
||||
};
|
||||
assert(srcs.size() == dests.size() && !dests.empty());
|
||||
|
||||
return new LDSReadInstr(dests, srcs);
|
||||
}
|
||||
|
||||
LDSAtomicInstr::LDSAtomicInstr(ESDOp op, PRegister dest, PVirtualValue address,
|
||||
const SrcValues& srcs):
|
||||
m_opcode(op),
|
||||
m_address(address),
|
||||
m_dest(dest),
|
||||
m_srcs(srcs)
|
||||
{
|
||||
if (m_dest)
|
||||
m_dest->add_parent(this);
|
||||
|
||||
if (m_address->as_register())
|
||||
m_address->as_register()->add_use(this);
|
||||
|
||||
for (auto& s: m_srcs) {
|
||||
if (s->as_register())
|
||||
s->as_register()->add_use(this);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void LDSAtomicInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void LDSAtomicInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
AluInstr *LDSAtomicInstr::split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr)
|
||||
{
|
||||
AluInstr::SrcValues srcs = {m_address};
|
||||
|
||||
for(auto& s : m_srcs)
|
||||
srcs.push_back(s);
|
||||
|
||||
for(auto& s :srcs) {
|
||||
if (s->as_register())
|
||||
s->as_register()->del_use(this);
|
||||
}
|
||||
|
||||
SetLDSAddrProperty prop;
|
||||
auto reg = srcs[0]->as_register();
|
||||
if (reg) {
|
||||
reg->del_use(this);
|
||||
if (reg->parents().size() == 1) {
|
||||
for (auto& p: reg->parents()) {
|
||||
p->accept(prop);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto op_instr = new AluInstr(m_opcode, srcs, {});
|
||||
op_instr->set_blockid(block_id(), index());
|
||||
|
||||
if (last_lds_instr) {
|
||||
op_instr->add_required_instr(last_lds_instr);
|
||||
}
|
||||
|
||||
out_block.push_back(op_instr);
|
||||
if (m_dest) {
|
||||
op_instr->set_alu_flag(alu_lds_group_start);
|
||||
m_dest->del_parent(this);
|
||||
auto read_instr = new AluInstr(op1_mov, m_dest,
|
||||
new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
|
||||
AluInstr::last_write);
|
||||
read_instr->add_required_instr(op_instr);
|
||||
read_instr->set_blockid(block_id(), index());
|
||||
read_instr->set_alu_flag(alu_lds_group_end);
|
||||
out_block.push_back(read_instr);
|
||||
last_lds_instr = read_instr;
|
||||
}
|
||||
return last_lds_instr;
|
||||
}
|
||||
|
||||
bool LDSAtomicInstr::replace_source(PRegister old_src, PVirtualValue new_src)
|
||||
{
|
||||
bool process = false;
|
||||
|
||||
|
||||
if (new_src->as_uniform() && m_srcs.size() > 2) {
|
||||
int nconst = 0;
|
||||
for (auto& s : m_srcs) {
|
||||
if (s->as_uniform() && !s->equal_to(*old_src))
|
||||
++nconst;
|
||||
}
|
||||
/* Conservative check: with two kcache values can always live,
|
||||
* tree might be a problem, don't care for now, just reject
|
||||
*/
|
||||
if (nconst > 2)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If the old source is an array element, we assume that there
|
||||
* might have been an (untracked) indirect access, so don't replace
|
||||
* this source */
|
||||
if (old_src->pin() == pin_array)
|
||||
return false;
|
||||
|
||||
if (new_src->get_addr()) {
|
||||
for (auto& s : m_srcs) {
|
||||
auto addr = s->get_addr();
|
||||
/* can't have two differen't indirect addresses in the same instr */
|
||||
if (addr && !addr->equal_to(*new_src->get_addr()))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < m_srcs.size(); ++i) {
|
||||
if (old_src->equal_to(*m_srcs[i])) {
|
||||
m_srcs[i] = new_src;
|
||||
process = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (process) {
|
||||
auto r = new_src->as_register();
|
||||
if (r)
|
||||
r->add_use(this);
|
||||
old_src->del_use(this);
|
||||
}
|
||||
return process;
|
||||
}
|
||||
|
||||
bool LDSAtomicInstr::do_ready() const
|
||||
{
|
||||
unreachable("This instruction is not handled by the schduler");
|
||||
return false;
|
||||
}
|
||||
|
||||
void LDSAtomicInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
auto ii = lds_ops.find(m_opcode);
|
||||
assert(ii != lds_ops.end());
|
||||
|
||||
os << "LDS " << ii->second.name << " ";
|
||||
if (m_dest)
|
||||
os << *m_dest;
|
||||
else
|
||||
os << "__.x";
|
||||
|
||||
os << " [ " << *m_address << " ] : " << *m_srcs[0];
|
||||
if (m_srcs.size() > 1)
|
||||
os << " " << *m_srcs[1];
|
||||
}
|
||||
|
||||
bool LDSAtomicInstr::is_equal_to(const LDSAtomicInstr& rhs) const
|
||||
{
|
||||
if (m_srcs.size() != rhs.m_srcs.size())
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < m_srcs.size(); ++i) {
|
||||
if (!m_srcs[i]->equal_to(*rhs.m_srcs[i]))
|
||||
return false;
|
||||
}
|
||||
|
||||
return m_opcode == rhs.m_opcode &&
|
||||
sfn_value_equal(m_address, rhs.m_address) &&
|
||||
sfn_value_equal(m_dest, rhs.m_dest);
|
||||
}
|
||||
|
||||
|
||||
auto LDSAtomicInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer
|
||||
{
|
||||
/* LDS WRITE2 __.x [ R1.x ] : R2.y R3.z */
|
||||
/* LDS WRITE __.x [ R1.x ] : R2.y */
|
||||
/* LDS ATOMIC_ADD_RET [ R5.y ] : R2.y */
|
||||
|
||||
std::string temp_str;
|
||||
|
||||
is >> temp_str;
|
||||
|
||||
ESDOp opcode = DS_OP_INVALID;
|
||||
int nsrc = 0;
|
||||
|
||||
for (auto& [op, opinfo] : lds_ops) {
|
||||
if (temp_str == opinfo.name) {
|
||||
opcode = op;
|
||||
nsrc = opinfo.nsrc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(opcode != DS_OP_INVALID);
|
||||
|
||||
is >> temp_str;
|
||||
|
||||
PRegister dest = nullptr;
|
||||
if (temp_str[0] != '_')
|
||||
dest = value_factory.dest_from_string(temp_str);
|
||||
|
||||
is >> temp_str;
|
||||
assert(temp_str == "[");
|
||||
is >> temp_str;
|
||||
auto addr = value_factory.src_from_string(temp_str);
|
||||
|
||||
is >> temp_str;
|
||||
assert(temp_str == "]");
|
||||
|
||||
is >> temp_str;
|
||||
assert(temp_str == ":");
|
||||
|
||||
AluInstr::SrcValues srcs;
|
||||
for (int i = 0; i < nsrc - 1; ++i) {
|
||||
is >> temp_str;
|
||||
auto src = value_factory.src_from_string(temp_str);
|
||||
assert(src);
|
||||
srcs.push_back(src);
|
||||
}
|
||||
|
||||
return new LDSAtomicInstr(opcode, dest, addr, srcs);
|
||||
}
|
||||
|
||||
|
||||
}
|
80
src/gallium/drivers/r600/sfn/sfn_instr_lds.h
Normal file
80
src/gallium/drivers/r600/sfn/sfn_instr_lds.h
Normal file
@ -0,0 +1,80 @@
|
||||
#ifndef LDSINSTR_H
|
||||
#define LDSINSTR_H
|
||||
|
||||
#include "sfn_instr_alu.h"
|
||||
#include "sfn_valuefactory.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class LDSReadInstr : public Instr {
|
||||
public:
|
||||
LDSReadInstr(std::vector<PRegister, Allocator<PRegister>>& value,
|
||||
AluInstr::SrcValues& address);
|
||||
|
||||
unsigned num_values() const { return m_dest_value.size();}
|
||||
auto address(unsigned i) const { return m_address[i];}
|
||||
auto dest(unsigned i) const { return m_dest_value[i];}
|
||||
|
||||
auto address(unsigned i){ return m_address[i];}
|
||||
auto dest(unsigned i) { return m_dest_value[i];}
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
AluInstr *split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr);
|
||||
bool is_equal_to(const LDSReadInstr& lhs) const;
|
||||
|
||||
static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
|
||||
|
||||
bool remove_unused_components();
|
||||
|
||||
private:
|
||||
|
||||
bool do_ready() const override;
|
||||
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
AluInstr::SrcValues m_address;
|
||||
std::vector<PRegister, Allocator<PRegister>> m_dest_value;
|
||||
};
|
||||
|
||||
class LDSAtomicInstr : public Instr {
|
||||
public:
|
||||
using SrcValues = AluInstr::SrcValues;
|
||||
|
||||
LDSAtomicInstr(ESDOp op, PRegister dest, PVirtualValue address, const SrcValues& src);
|
||||
|
||||
auto address() const { return m_address;}
|
||||
auto dest() const { return m_dest;}
|
||||
auto src0() const { return m_srcs[0];}
|
||||
auto src1() const { return m_srcs.size() > 1 ? m_srcs[1] : nullptr;}
|
||||
|
||||
PVirtualValue address() { return m_address;}
|
||||
PRegister dest() { return m_dest;}
|
||||
PVirtualValue src0() { return m_srcs[0];}
|
||||
PVirtualValue src1() { return m_srcs.size() > 1 ? m_srcs[1] : nullptr;}
|
||||
|
||||
unsigned op() const {return m_opcode;}
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
AluInstr *split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr);
|
||||
bool is_equal_to(const LDSAtomicInstr& lhs) const;
|
||||
|
||||
static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
|
||||
bool replace_source(PRegister old_src, PVirtualValue new_src) override;
|
||||
|
||||
private:
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
ESDOp m_opcode;
|
||||
PVirtualValue m_address{nullptr};
|
||||
PRegister m_dest{nullptr};
|
||||
SrcValues m_srcs;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // LDSINSTR_H
|
844
src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp
Normal file
844
src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp
Normal file
@ -0,0 +1,844 @@
|
||||
#include "sfn_instr_mem.h"
|
||||
#include "sfn_instr_fetch.h"
|
||||
#include "sfn_instr_tex.h"
|
||||
#include "sfn_shader.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
|
||||
GDSInstr::GDSInstr(ESDOp op, Register *dest,
|
||||
const RegisterVec4& src, int uav_base,
|
||||
PRegister uav_id):
|
||||
m_op(op),
|
||||
m_dest(dest),
|
||||
m_src(src),
|
||||
m_uav_base(uav_base),
|
||||
m_uav_id(uav_id)
|
||||
{
|
||||
set_always_keep();
|
||||
|
||||
m_src.add_use(this);
|
||||
m_dest->add_parent(this);
|
||||
|
||||
if (m_uav_id)
|
||||
m_uav_id->add_use(this);
|
||||
}
|
||||
|
||||
bool GDSInstr::is_equal_to(const GDSInstr& rhs) const
|
||||
{
|
||||
#define NE(X) (X != rhs. X)
|
||||
|
||||
if (NE(m_op) ||
|
||||
NE(m_src) ||
|
||||
NE(m_uav_base))
|
||||
return false;
|
||||
|
||||
sfn_value_equal(m_dest, rhs.m_dest);
|
||||
|
||||
return sfn_value_equal(m_uav_id, rhs.m_uav_id);
|
||||
}
|
||||
|
||||
void GDSInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void GDSInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
bool GDSInstr::do_ready() const
|
||||
{
|
||||
return m_src.ready(block_id(), index()) &&
|
||||
(!m_uav_id || m_uav_id->ready(block_id(), index()));
|
||||
}
|
||||
|
||||
void GDSInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "GDS " << lds_ops.at(m_op).name
|
||||
<< *m_dest;
|
||||
os << " " << m_src;
|
||||
os << " BASE:" << m_uav_base;
|
||||
|
||||
if (m_uav_id)
|
||||
os << " UAV:" << *m_uav_id;
|
||||
}
|
||||
|
||||
bool GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader)
|
||||
{
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_atomic_counter_add:
|
||||
case nir_intrinsic_atomic_counter_and:
|
||||
case nir_intrinsic_atomic_counter_exchange:
|
||||
case nir_intrinsic_atomic_counter_max:
|
||||
case nir_intrinsic_atomic_counter_min:
|
||||
case nir_intrinsic_atomic_counter_or:
|
||||
case nir_intrinsic_atomic_counter_xor:
|
||||
case nir_intrinsic_atomic_counter_comp_swap:
|
||||
return emit_atomic_op2(intr, shader);
|
||||
case nir_intrinsic_atomic_counter_read:
|
||||
case nir_intrinsic_atomic_counter_post_dec:
|
||||
return emit_atomic_read(intr, shader);
|
||||
case nir_intrinsic_atomic_counter_inc:
|
||||
return emit_atomic_inc(intr, shader);
|
||||
case nir_intrinsic_atomic_counter_pre_dec:
|
||||
return emit_atomic_pre_dec(intr, shader);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static ESDOp get_opcode(const nir_intrinsic_op opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case nir_intrinsic_atomic_counter_add:
|
||||
return DS_OP_ADD_RET;
|
||||
case nir_intrinsic_atomic_counter_and:
|
||||
return DS_OP_AND_RET;
|
||||
case nir_intrinsic_atomic_counter_exchange:
|
||||
return DS_OP_XCHG_RET;
|
||||
case nir_intrinsic_atomic_counter_inc:
|
||||
return DS_OP_INC_RET;
|
||||
case nir_intrinsic_atomic_counter_max:
|
||||
return DS_OP_MAX_UINT_RET;
|
||||
case nir_intrinsic_atomic_counter_min:
|
||||
return DS_OP_MIN_UINT_RET;
|
||||
case nir_intrinsic_atomic_counter_or:
|
||||
return DS_OP_OR_RET;
|
||||
case nir_intrinsic_atomic_counter_read:
|
||||
return DS_OP_READ_RET;
|
||||
case nir_intrinsic_atomic_counter_xor:
|
||||
return DS_OP_XOR_RET;
|
||||
case nir_intrinsic_atomic_counter_post_dec:
|
||||
return DS_OP_DEC_RET;
|
||||
case nir_intrinsic_atomic_counter_comp_swap:
|
||||
return DS_OP_CMP_XCHG_RET;
|
||||
case nir_intrinsic_atomic_counter_pre_dec:
|
||||
default:
|
||||
return DS_OP_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
static ESDOp get_opcode_wo(const nir_intrinsic_op opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case nir_intrinsic_atomic_counter_add:
|
||||
return DS_OP_ADD;
|
||||
case nir_intrinsic_atomic_counter_and:
|
||||
return DS_OP_AND;
|
||||
case nir_intrinsic_atomic_counter_inc:
|
||||
return DS_OP_INC;
|
||||
case nir_intrinsic_atomic_counter_max:
|
||||
return DS_OP_MAX_UINT;
|
||||
case nir_intrinsic_atomic_counter_min:
|
||||
return DS_OP_MIN_UINT;
|
||||
case nir_intrinsic_atomic_counter_or:
|
||||
return DS_OP_OR;
|
||||
case nir_intrinsic_atomic_counter_xor:
|
||||
return DS_OP_XOR;
|
||||
case nir_intrinsic_atomic_counter_post_dec:
|
||||
return DS_OP_DEC;
|
||||
case nir_intrinsic_atomic_counter_comp_swap:
|
||||
return DS_OP_CMP_XCHG_RET;
|
||||
case nir_intrinsic_atomic_counter_exchange:
|
||||
return DS_OP_XCHG_RET;
|
||||
case nir_intrinsic_atomic_counter_pre_dec:
|
||||
default:
|
||||
return DS_OP_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
|
||||
{
|
||||
auto& vf = shader.value_factory();
|
||||
bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
|
||||
|
||||
ESDOp op = read_result ? get_opcode(instr->intrinsic) :
|
||||
get_opcode_wo(instr->intrinsic);
|
||||
|
||||
if (DS_OP_INVALID == op)
|
||||
return false;
|
||||
|
||||
auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
|
||||
offset += nir_intrinsic_base(instr);
|
||||
|
||||
auto dest = vf.dest(instr->dest, 0, pin_free);
|
||||
|
||||
PRegister src_as_register = nullptr;
|
||||
auto src_val = vf.src(instr->src[1], 0);
|
||||
if (!src_val->as_register()) {
|
||||
auto temp_src_val = vf.temp_register();
|
||||
shader.emit_instruction(new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
|
||||
src_as_register = temp_src_val;
|
||||
} else
|
||||
src_as_register = src_val->as_register();
|
||||
|
||||
if (uav_id != nullptr)
|
||||
shader.set_flag(Shader::sh_indirect_atomic);
|
||||
|
||||
GDSInstr *ir = nullptr;
|
||||
if (shader.chip_class() < ISA_CC_CAYMAN) {
|
||||
RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free);
|
||||
ir = new GDSInstr(op, dest, src, offset, uav_id);
|
||||
|
||||
} else {
|
||||
auto dest = vf.dest(instr->dest, 0, pin_free);
|
||||
auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
|
||||
if (uav_id)
|
||||
shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
|
||||
AluInstr::write));
|
||||
else
|
||||
shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
|
||||
shader.emit_instruction(new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
|
||||
ir = new GDSInstr(op, dest, tmp, 0, nullptr);
|
||||
}
|
||||
shader.emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader)
|
||||
{
|
||||
auto& vf = shader.value_factory();
|
||||
|
||||
auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
|
||||
offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
|
||||
|
||||
auto dest = vf.dest(instr->dest, 0, pin_free);
|
||||
|
||||
GDSInstr *ir = nullptr;
|
||||
|
||||
if (shader.chip_class() < ISA_CC_CAYMAN) {
|
||||
RegisterVec4 src = RegisterVec4(0, true, {7,7,7,7});
|
||||
ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id);
|
||||
} else {
|
||||
auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7});
|
||||
if (uav_id)
|
||||
shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
|
||||
AluInstr::write));
|
||||
else
|
||||
shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
|
||||
|
||||
ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr);
|
||||
}
|
||||
|
||||
shader.emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
|
||||
{
|
||||
auto& vf = shader.value_factory();
|
||||
bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
|
||||
|
||||
auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
|
||||
offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
|
||||
|
||||
GDSInstr *ir = nullptr;
|
||||
|
||||
if (shader.chip_class() < ISA_CC_CAYMAN) {
|
||||
auto dest = vf.dest(instr->dest, 0, pin_free);
|
||||
RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
|
||||
ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
|
||||
dest, src, offset, uav_id);
|
||||
} else {
|
||||
auto dest = vf.dest(instr->dest, 0, pin_free);
|
||||
auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
|
||||
|
||||
if (uav_id)
|
||||
shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
|
||||
AluInstr::write));
|
||||
else
|
||||
shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
|
||||
|
||||
shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
|
||||
ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
|
||||
dest, tmp, 0, nullptr);
|
||||
}
|
||||
shader.emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
|
||||
{
|
||||
auto& vf = shader.value_factory();
|
||||
|
||||
auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
|
||||
offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
|
||||
|
||||
auto *tmp_dest = vf.temp_register();
|
||||
|
||||
GDSInstr *ir = nullptr;
|
||||
|
||||
if (shader.chip_class() < ISA_CC_CAYMAN) {
|
||||
RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
|
||||
ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, src, offset, uav_id);
|
||||
} else {
|
||||
auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
|
||||
if (uav_id)
|
||||
shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
|
||||
AluInstr::write));
|
||||
else
|
||||
shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
|
||||
|
||||
shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
|
||||
ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, tmp, 0, nullptr);
|
||||
}
|
||||
|
||||
shader.emit_instruction(ir);
|
||||
shader.emit_instruction(new AluInstr(op2_sub_int, vf.dest(instr->dest, 0, pin_free),
|
||||
tmp_dest, vf.one_i(), AluInstr::last_write));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
RatInstr::RatInstr(ECFOpCode cf_opcode, ERatOp rat_op,
|
||||
const RegisterVec4& data, const RegisterVec4& index,
|
||||
int rat_id, PRegister rat_id_offset,
|
||||
int burst_count, int comp_mask, int element_size):
|
||||
m_cf_opcode(cf_opcode),
|
||||
m_rat_op(rat_op),
|
||||
m_data(data),
|
||||
m_index(index),
|
||||
m_rat_id_offset(rat_id_offset),
|
||||
m_rat_id(rat_id),
|
||||
m_burst_count(burst_count),
|
||||
m_comp_mask(comp_mask),
|
||||
m_element_size(element_size)
|
||||
{
|
||||
set_always_keep();
|
||||
|
||||
m_data.add_use(this);
|
||||
m_index.add_use(this);
|
||||
if (m_rat_id_offset)
|
||||
m_rat_id_offset->add_use(this);
|
||||
}
|
||||
|
||||
|
||||
void RatInstr::accept(ConstInstrVisitor& visitor) const
|
||||
{
|
||||
visitor.visit(*this);
|
||||
}
|
||||
|
||||
void RatInstr::accept(InstrVisitor& visitor)
|
||||
{
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
bool RatInstr::is_equal_to(const RatInstr& lhs) const
|
||||
{
|
||||
(void)lhs;
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool RatInstr::do_ready() const
|
||||
{
|
||||
if (m_rat_op != STORE_TYPED) {
|
||||
for (auto i: required_instr()) {
|
||||
if (!i->is_scheduled()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return m_data.ready(block_id(), index()) &&
|
||||
m_index.ready(block_id(), index());
|
||||
}
|
||||
|
||||
void RatInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "MEM_RAT RAT " << m_rat_id;
|
||||
if (m_rat_id_offset)
|
||||
os << "+" << *m_rat_id_offset;
|
||||
os << " @" << m_index;
|
||||
os << " OP:" << m_rat_op << " " << m_data;
|
||||
os << " BC:" << m_burst_count
|
||||
<< " MASK:" << m_comp_mask
|
||||
<< " ES:" << m_element_size;
|
||||
if (m_need_ack)
|
||||
os << " ACK";
|
||||
}
|
||||
|
||||
static RatInstr::ERatOp
|
||||
get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format)
|
||||
{
|
||||
switch (opcode) {
|
||||
case nir_intrinsic_image_load:
|
||||
return RatInstr::NOP_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_add:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
return RatInstr::ADD_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_and:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
return RatInstr::AND_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_or:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
return RatInstr::OR_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
return RatInstr::MIN_INT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
return RatInstr::MAX_INT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
return RatInstr::MIN_UINT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
return RatInstr::MAX_UINT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
return RatInstr::XOR_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
if (util_format_is_float(format))
|
||||
return RatInstr::CMPXCHG_FLT_RTN;
|
||||
else
|
||||
return RatInstr::CMPXCHG_INT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
return RatInstr::XCHG_RTN;
|
||||
default:
|
||||
unreachable("Unsupported WO RAT instruction");
|
||||
}
|
||||
}
|
||||
|
||||
static RatInstr::ERatOp
|
||||
get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format)
|
||||
{
|
||||
switch (opcode) {
|
||||
case nir_intrinsic_ssbo_atomic_add:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
return RatInstr::ADD;
|
||||
case nir_intrinsic_ssbo_atomic_and:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
return RatInstr::AND;
|
||||
case nir_intrinsic_ssbo_atomic_or:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
return RatInstr::OR;
|
||||
case nir_intrinsic_ssbo_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
return RatInstr::MIN_INT;
|
||||
case nir_intrinsic_ssbo_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
return RatInstr::MAX_INT;
|
||||
case nir_intrinsic_ssbo_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
return RatInstr::MIN_UINT;
|
||||
case nir_intrinsic_ssbo_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
return RatInstr::MAX_UINT;
|
||||
case nir_intrinsic_ssbo_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
return RatInstr::XOR;
|
||||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
if (util_format_is_float(format))
|
||||
return RatInstr::CMPXCHG_FLT;
|
||||
else
|
||||
return RatInstr::CMPXCHG_INT;
|
||||
case nir_intrinsic_ssbo_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
return RatInstr::XCHG_RTN;
|
||||
default:
|
||||
unreachable("Unsupported WO RAT instruction");
|
||||
}
|
||||
}
|
||||
|
||||
bool RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
|
||||
{
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_ssbo:
|
||||
return emit_ssbo_load(intr, shader);
|
||||
case nir_intrinsic_store_ssbo:
|
||||
return emit_ssbo_store(intr, shader);
|
||||
case nir_intrinsic_ssbo_atomic_add:
|
||||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||
case nir_intrinsic_ssbo_atomic_or:
|
||||
case nir_intrinsic_ssbo_atomic_xor:
|
||||
case nir_intrinsic_ssbo_atomic_imax:
|
||||
case nir_intrinsic_ssbo_atomic_imin:
|
||||
case nir_intrinsic_ssbo_atomic_umax:
|
||||
case nir_intrinsic_ssbo_atomic_umin:
|
||||
case nir_intrinsic_ssbo_atomic_and:
|
||||
case nir_intrinsic_ssbo_atomic_exchange:
|
||||
return emit_ssbo_atomic_op(intr, shader);
|
||||
case nir_intrinsic_image_store:
|
||||
return emit_image_store(intr, shader);
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
return emit_image_load_or_atomic(intr, shader);
|
||||
case nir_intrinsic_image_size:
|
||||
return emit_image_size(intr, shader);
|
||||
case nir_intrinsic_get_ssbo_size:
|
||||
return emit_ssbo_size(intr, shader);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
|
||||
{
|
||||
auto &vf = shader.value_factory();
|
||||
auto dest = vf.dest_vec4(intr->dest, pin_group);
|
||||
|
||||
/** src0 not used, should be some offset */
|
||||
auto addr = vf.src(intr->src[1], 0);
|
||||
auto addr_temp = vf.temp_register();
|
||||
|
||||
/** Should be lowered in nir */
|
||||
shader.emit_instruction(new AluInstr(op2_lshr_int, addr_temp, addr, vf.literal(2),
|
||||
{alu_write, alu_last_instr}));
|
||||
|
||||
const EVTXDataFormat formats[4] = {
|
||||
fmt_32,
|
||||
fmt_32_32,
|
||||
fmt_32_32_32,
|
||||
fmt_32_32_32_32
|
||||
};
|
||||
|
||||
RegisterVec4::Swizzle dest_swz[4] = {
|
||||
{0,7,7,7},
|
||||
{0,1,7,7},
|
||||
{0,1,2,7},
|
||||
{0,1,2,3}
|
||||
};
|
||||
|
||||
int comp_idx = nir_dest_num_components(intr->dest) - 1;
|
||||
|
||||
auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0); {}
|
||||
|
||||
auto res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + offset +
|
||||
shader.ssbo_image_offset();
|
||||
|
||||
auto ir = new LoadFromBuffer(dest, dest_swz[comp_idx], addr_temp, 0,
|
||||
res_id, res_offset, formats[comp_idx]);
|
||||
ir->set_fetch_flag(FetchInstr::use_tc);
|
||||
ir->set_num_format(vtx_nf_int);
|
||||
|
||||
shader.emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
|
||||
{
|
||||
|
||||
/* Forche the scheduler to not move the preparation too far away, by starting
|
||||
* a new block (TODO: better priority handling in the scheduler)*/
|
||||
if (nir_src_num_components(instr->src[0]) > 2)
|
||||
shader.start_new_block(0);
|
||||
|
||||
auto &vf = shader.value_factory();
|
||||
auto orig_addr = vf.src(instr->src[2], 0);
|
||||
|
||||
auto addr_base = vf.temp_register();
|
||||
|
||||
auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1);
|
||||
|
||||
shader.emit_instruction(new AluInstr(op2_lshr_int, addr_base, orig_addr,
|
||||
vf.literal(2), AluInstr::write));
|
||||
|
||||
for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
|
||||
auto addr_vec = vf.temp_vec4(pin_group, {0,1,2,7});
|
||||
if (i == 0) {
|
||||
shader.emit_instruction(new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
|
||||
} else {
|
||||
shader.emit_instruction(new AluInstr(op2_add_int, addr_vec[0], addr_base,
|
||||
vf.literal(i),
|
||||
AluInstr::last_write));
|
||||
}
|
||||
auto value = vf.src(instr->src[0], i);
|
||||
PRegister v = vf.temp_register(0);
|
||||
shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
|
||||
auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
|
||||
auto store = new RatInstr(cf_mem_rat, RatInstr::STORE_TYPED,
|
||||
value_vec, addr_vec, offset + shader.ssbo_image_offset(),
|
||||
rat_id, 1, 1, 0);
|
||||
shader.emit_instruction(store);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
|
||||
{
|
||||
auto& vf = shader.value_factory();
|
||||
auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0); {}
|
||||
|
||||
bool read_result = !intr->dest.is_ssa || !list_is_empty(&intr->dest.ssa.uses);
|
||||
auto opcode = read_result ? get_rat_opcode(intr->intrinsic, PIPE_FORMAT_R32_UINT) :
|
||||
get_rat_opcode_wo(intr->intrinsic, PIPE_FORMAT_R32_UINT);
|
||||
|
||||
auto coord_orig = vf.src(intr->src[1], 0);
|
||||
auto coord = vf.temp_register(0);
|
||||
|
||||
auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
|
||||
|
||||
shader.emit_instruction(new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
|
||||
|
||||
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
|
||||
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
|
||||
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write));
|
||||
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
|
||||
vf.src(intr->src[2], 0), {alu_last_instr, alu_write}));
|
||||
} else {
|
||||
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
|
||||
}
|
||||
|
||||
|
||||
RegisterVec4 out_vec(coord, coord, coord, coord, pin_group);
|
||||
|
||||
auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, out_vec, imageid + shader.ssbo_image_offset(),
|
||||
image_offset, 1, 0xf, 0);
|
||||
shader.emit_instruction(atomic);
|
||||
|
||||
atomic->set_ack();
|
||||
if (read_result) {
|
||||
atomic->set_instr_flag(ack_rat_return_write);
|
||||
auto dest = vf.dest_vec4(intr->dest, pin_group);
|
||||
|
||||
auto fetch = new FetchInstr(vc_fetch,
|
||||
dest, {0, 1, 2, 3},
|
||||
shader.rat_return_address(),
|
||||
0,
|
||||
no_index_offset,
|
||||
fmt_32,
|
||||
vtx_nf_int,
|
||||
vtx_es_none,
|
||||
R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
|
||||
image_offset);
|
||||
fetch->set_mfc(15);
|
||||
fetch->set_fetch_flag(FetchInstr::srf_mode);
|
||||
fetch->set_fetch_flag(FetchInstr::use_tc);
|
||||
fetch->set_fetch_flag(FetchInstr::vpm);
|
||||
fetch->set_fetch_flag(FetchInstr::wait_ack);
|
||||
fetch->add_required_instr(atomic);
|
||||
shader.chain_ssbo_read(fetch);
|
||||
shader.emit_instruction(fetch);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
bool RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader)
|
||||
{
|
||||
auto& vf = shader.value_factory();
|
||||
auto dest = vf.dest_vec4(intr->dest, pin_group);
|
||||
|
||||
auto const_offset = nir_src_as_const_value(intr->src[0]);
|
||||
int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
|
||||
if (const_offset)
|
||||
res_id += const_offset[0].u32;
|
||||
else
|
||||
assert(0 && "dynamic buffer offset not supported in buffer_size");
|
||||
|
||||
shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3},res_id));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
|
||||
{
|
||||
auto& vf = shader.value_factory();
|
||||
auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
|
||||
|
||||
|
||||
auto coord_load = vf.src_vec4(intrin->src[1], pin_chan);
|
||||
auto coord = vf.temp_vec4(pin_group);
|
||||
|
||||
auto value_load = vf.src_vec4(intrin->src[3], pin_chan);
|
||||
auto value = vf.temp_vec4(pin_group);
|
||||
|
||||
RegisterVec4::Swizzle swizzle = {0,1,2,3};
|
||||
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
|
||||
nir_intrinsic_image_array(intrin))
|
||||
swizzle = {0,2,1,3};
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
|
||||
shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
|
||||
}
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
|
||||
shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
|
||||
}
|
||||
|
||||
auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
|
||||
auto store = new RatInstr(op, RatInstr::STORE_TYPED, value, coord, imageid,
|
||||
image_offset, 1, 0xf, 0);
|
||||
|
||||
if (nir_intrinsic_has_access(intrin) & ACCESS_COHERENT)
|
||||
store->set_ack();
|
||||
shader.emit_instruction(store);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
|
||||
{
|
||||
auto& vf = shader.value_factory();
|
||||
auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
|
||||
|
||||
bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
|
||||
auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
|
||||
get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
|
||||
|
||||
auto coord_orig = vf.src_vec4(intrin->src[1], pin_chan);
|
||||
auto coord = vf.temp_vec4(pin_group);
|
||||
|
||||
auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
|
||||
|
||||
RegisterVec4::Swizzle swizzle = {0,1,2,3};
|
||||
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
|
||||
nir_intrinsic_image_array(intrin))
|
||||
swizzle = {0,2,1,3};
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
|
||||
shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
|
||||
}
|
||||
|
||||
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
|
||||
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write));
|
||||
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
|
||||
vf.src(intrin->src[3], 0), AluInstr::last_write));
|
||||
} else {
|
||||
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0],
|
||||
vf.src(intrin->src[3], 0), AluInstr::write));
|
||||
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
|
||||
}
|
||||
|
||||
auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid,
|
||||
image_offset, 1, 0xf, 0);
|
||||
shader.emit_instruction(atomic);
|
||||
|
||||
atomic->set_ack();
|
||||
if (read_result) {
|
||||
atomic->set_instr_flag(ack_rat_return_write);
|
||||
auto dest = vf.dest_vec4(intrin->dest, pin_group);
|
||||
|
||||
pipe_format format = nir_intrinsic_format(intrin);
|
||||
unsigned fmt = fmt_32;
|
||||
unsigned num_format = 0;
|
||||
unsigned format_comp = 0;
|
||||
unsigned endian = 0;
|
||||
r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
|
||||
|
||||
auto fetch = new FetchInstr(vc_fetch,
|
||||
dest, {0, 1, 2, 3},
|
||||
shader.rat_return_address(),
|
||||
0,
|
||||
no_index_offset,
|
||||
(EVTXDataFormat)fmt,
|
||||
(EVFetchNumFormat)num_format,
|
||||
(EVFetchEndianSwap)endian,
|
||||
R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
|
||||
image_offset);
|
||||
fetch->set_mfc(3);
|
||||
fetch->set_fetch_flag(FetchInstr::srf_mode);
|
||||
fetch->set_fetch_flag(FetchInstr::use_tc);
|
||||
fetch->set_fetch_flag(FetchInstr::vpm);
|
||||
fetch->set_fetch_flag(FetchInstr::wait_ack);
|
||||
if (format_comp)
|
||||
fetch->set_fetch_flag(FetchInstr::format_comp_signed);
|
||||
|
||||
shader.chain_ssbo_read(fetch);
|
||||
shader.emit_instruction(fetch);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
|
||||
|
||||
bool RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
|
||||
{
|
||||
auto& vf = shader.value_factory();
|
||||
|
||||
auto src = RegisterVec4(0, true, {4,4,4,4});
|
||||
|
||||
assert(nir_src_as_uint(intrin->src[1]) == 0);
|
||||
|
||||
auto const_offset = nir_src_as_const_value(intrin->src[0]);
|
||||
PRegister dyn_offset = nullptr;
|
||||
|
||||
int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
|
||||
if (const_offset)
|
||||
res_id += const_offset[0].u32;
|
||||
else
|
||||
dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
|
||||
|
||||
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
|
||||
auto dest = vf.dest_vec4(intrin->dest, pin_group);
|
||||
shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3}, res_id));
|
||||
return true;
|
||||
} else {
|
||||
|
||||
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
|
||||
nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
|
||||
/* Need to load the layers from a const buffer */
|
||||
|
||||
auto dest = vf.dest_vec4(intrin->dest, pin_group);
|
||||
shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,7,3},
|
||||
src, 0/* ?? */, res_id, dyn_offset));
|
||||
|
||||
shader.set_flag(Shader::sh_txs_cube_array_comp);
|
||||
|
||||
if (const_offset) {
|
||||
unsigned lookup_resid = const_offset[0].u32;
|
||||
shader.emit_instruction(new AluInstr(op1_mov, dest[2],
|
||||
vf.uniform(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
|
||||
R600_BUFFER_INFO_CONST_BUFFER),
|
||||
AluInstr::last_write));
|
||||
} else {
|
||||
/* If the adressing is indirect we have to get the z-value by using a binary search */
|
||||
auto addr = vf.temp_register();
|
||||
auto comp1 = vf.temp_register();
|
||||
auto comp2 = vf.temp_register();
|
||||
auto low_bit = vf.temp_register();
|
||||
auto high_bit = vf.temp_register();
|
||||
|
||||
auto trgt = vf.temp_vec4(pin_group);
|
||||
|
||||
shader.emit_instruction(new AluInstr(op2_lshr_int, addr, vf.src(intrin->src[0], 0),
|
||||
vf.literal(2), AluInstr::write));
|
||||
shader.emit_instruction(new AluInstr(op2_and_int, low_bit, vf.src(intrin->src[0], 0),
|
||||
vf.one_i(), AluInstr::write));
|
||||
shader.emit_instruction(new AluInstr(op2_and_int, high_bit, vf.src(intrin->src[0], 0),
|
||||
vf.literal(2), AluInstr::last_write));
|
||||
|
||||
shader.emit_instruction(new LoadFromBuffer(trgt, {0, 1, 2, 3}, addr, R600_SHADER_BUFFER_INFO_SEL,
|
||||
R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float));
|
||||
|
||||
// this may be wrong
|
||||
shader.emit_instruction(new AluInstr(op3_cnde_int, comp1, high_bit, trgt[0], trgt[2],
|
||||
AluInstr::write));
|
||||
shader.emit_instruction(new AluInstr(op3_cnde_int, comp2, high_bit, trgt[1], trgt[3],
|
||||
AluInstr::last_write));
|
||||
shader.emit_instruction(new AluInstr(op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
|
||||
}
|
||||
} else {
|
||||
auto dest = vf.dest_vec4(intrin->dest, pin_group);
|
||||
shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,2,3},
|
||||
src, 0/* ?? */, res_id, dyn_offset));
|
||||
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
177
src/gallium/drivers/r600/sfn/sfn_instr_mem.h
Normal file
177
src/gallium/drivers/r600/sfn/sfn_instr_mem.h
Normal file
@ -0,0 +1,177 @@
|
||||
#ifndef GDSINSTR_H
|
||||
#define GDSINSTR_H
|
||||
|
||||
#include "sfn_instr.h"
|
||||
#include "sfn_valuefactory.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class Shader;
|
||||
|
||||
class GDSInstr : public Instr {
|
||||
public:
|
||||
|
||||
GDSInstr(ESDOp op, Register *dest,
|
||||
const RegisterVec4& src, int uav_base,
|
||||
PRegister uav_id);
|
||||
|
||||
bool is_equal_to(const GDSInstr& lhs) const;
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
bool do_ready() const override;
|
||||
|
||||
auto opcode() const {return m_op;}
|
||||
auto src() const { return m_src;}
|
||||
|
||||
const auto& dest() const { return m_dest;}
|
||||
auto& dest() { return m_dest;}
|
||||
|
||||
auto uav_id() const {return m_uav_id;}
|
||||
auto uav_base() const {return m_uav_base;}
|
||||
|
||||
static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
|
||||
|
||||
static bool emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader);
|
||||
uint32_t slots() const override {return 1;};
|
||||
|
||||
private:
|
||||
|
||||
static bool emit_atomic_read(nir_intrinsic_instr *intr, Shader& shader);
|
||||
static bool emit_atomic_op2(nir_intrinsic_instr *intr, Shader& shader);
|
||||
static bool emit_atomic_inc(nir_intrinsic_instr *intr, Shader& shader);
|
||||
static bool emit_atomic_pre_dec(nir_intrinsic_instr *intr, Shader& shader);
|
||||
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
ESDOp m_op{DS_OP_INVALID};
|
||||
Register *m_dest;
|
||||
|
||||
RegisterVec4 m_src;
|
||||
|
||||
int m_uav_base{0};
|
||||
PRegister m_uav_id{nullptr};
|
||||
std::bitset<8> m_tex_flags;
|
||||
};
|
||||
|
||||
|
||||
class RatInstr : public Instr {
|
||||
|
||||
public:
|
||||
enum ERatOp {
|
||||
NOP,
|
||||
STORE_TYPED,
|
||||
STORE_RAW,
|
||||
STORE_RAW_FDENORM,
|
||||
CMPXCHG_INT,
|
||||
CMPXCHG_FLT,
|
||||
CMPXCHG_FDENORM,
|
||||
ADD,
|
||||
SUB,
|
||||
RSUB,
|
||||
MIN_INT,
|
||||
MIN_UINT,
|
||||
MAX_INT,
|
||||
MAX_UINT,
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
MSKOR,
|
||||
INC_UINT,
|
||||
DEC_UINT,
|
||||
NOP_RTN = 32,
|
||||
XCHG_RTN = 34,
|
||||
XCHG_FDENORM_RTN,
|
||||
CMPXCHG_INT_RTN,
|
||||
CMPXCHG_FLT_RTN,
|
||||
CMPXCHG_FDENORM_RTN,
|
||||
ADD_RTN,
|
||||
SUB_RTN,
|
||||
RSUB_RTN,
|
||||
MIN_INT_RTN,
|
||||
MIN_UINT_RTN,
|
||||
MAX_INT_RTN,
|
||||
MAX_UINT_RTN,
|
||||
AND_RTN,
|
||||
OR_RTN,
|
||||
XOR_RTN,
|
||||
MSKOR_RTN,
|
||||
UINT_RTN,
|
||||
UNSUPPORTED
|
||||
};
|
||||
|
||||
RatInstr(ECFOpCode cf_opcode, ERatOp rat_op,
|
||||
const RegisterVec4& data, const RegisterVec4& index,
|
||||
int rat_id, PRegister rat_id_offset,
|
||||
int burst_count, int comp_mask, int element_size);
|
||||
|
||||
auto rat_id_offset() const { return m_rat_id_offset;}
|
||||
int rat_id() const { return m_rat_id;}
|
||||
|
||||
ERatOp rat_op() const {return m_rat_op;}
|
||||
|
||||
const auto& value() const { return m_data;}
|
||||
auto& value() { return m_data;}
|
||||
|
||||
const auto& addr() const { return m_index;}
|
||||
auto& addr() { return m_index;}
|
||||
|
||||
int data_gpr() const {return m_data.sel();}
|
||||
int index_gpr() const {return m_index.sel();}
|
||||
int elm_size() const {return m_element_size;}
|
||||
|
||||
int comp_mask() const {return m_comp_mask;}
|
||||
|
||||
bool need_ack() const {return m_need_ack;}
|
||||
int burst_count() const {return m_burst_count;}
|
||||
|
||||
int data_swz(int chan) const {return m_data[chan]->chan();}
|
||||
|
||||
ECFOpCode cf_opcode() const { return m_cf_opcode;}
|
||||
|
||||
void set_ack() {m_need_ack = true; set_mark(); }
|
||||
void set_mark() {m_need_mark = true; }
|
||||
bool mark() {return m_need_mark;}
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
bool is_equal_to(const RatInstr& lhs) const;
|
||||
|
||||
static bool emit(nir_intrinsic_instr *intr, Shader& shader);
|
||||
|
||||
private:
|
||||
|
||||
static bool emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader);
|
||||
static bool emit_ssbo_store(nir_intrinsic_instr *intr, Shader& shader);
|
||||
static bool emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader);
|
||||
static bool emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader);
|
||||
|
||||
static bool emit_image_store(nir_intrinsic_instr *intr, Shader& shader);
|
||||
static bool emit_image_load_or_atomic(nir_intrinsic_instr *intr, Shader& shader);
|
||||
static bool emit_image_size(nir_intrinsic_instr *intr, Shader& shader);
|
||||
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
ECFOpCode m_cf_opcode;
|
||||
ERatOp m_rat_op;
|
||||
|
||||
RegisterVec4 m_data;
|
||||
RegisterVec4 m_index;
|
||||
PRegister m_rat_id_offset{nullptr};
|
||||
|
||||
int m_rat_id{0};
|
||||
int m_burst_count{0};
|
||||
int m_comp_mask{15};
|
||||
int m_element_size{3};
|
||||
bool m_need_ack{false};
|
||||
bool m_need_mark{false};
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // GDSINSTR_H
|
1011
src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp
Normal file
1011
src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp
Normal file
File diff suppressed because it is too large
Load Diff
166
src/gallium/drivers/r600/sfn/sfn_instr_tex.h
Normal file
166
src/gallium/drivers/r600/sfn/sfn_instr_tex.h
Normal file
@ -0,0 +1,166 @@
|
||||
#ifndef INSTR_TEX_H
|
||||
#define INSTR_TEX_H
|
||||
|
||||
#include "sfn_instr.h"
|
||||
#include "sfn_valuefactory.h"
|
||||
|
||||
#include "sfn_shader.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class TexInstr : public InstrWithVectorResult {
|
||||
public:
|
||||
enum Opcode {
|
||||
ld = FETCH_OP_LD,
|
||||
get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
|
||||
get_nsamples = FETCH_OP_GET_NUMBER_OF_SAMPLES,
|
||||
get_tex_lod = FETCH_OP_GET_LOD,
|
||||
get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
|
||||
get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
|
||||
set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
|
||||
keep_gradients = FETCH_OP_KEEP_GRADIENTS,
|
||||
set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
|
||||
set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
|
||||
sample = FETCH_OP_SAMPLE,
|
||||
sample_l = FETCH_OP_SAMPLE_L,
|
||||
sample_lb = FETCH_OP_SAMPLE_LB,
|
||||
sample_lz = FETCH_OP_SAMPLE_LZ,
|
||||
sample_g = FETCH_OP_SAMPLE_G,
|
||||
sample_g_lb = FETCH_OP_SAMPLE_G_L,
|
||||
gather4 = FETCH_OP_GATHER4,
|
||||
gather4_o = FETCH_OP_GATHER4_O,
|
||||
|
||||
sample_c = FETCH_OP_SAMPLE_C,
|
||||
sample_c_l = FETCH_OP_SAMPLE_C_L,
|
||||
sample_c_lb = FETCH_OP_SAMPLE_C_LB,
|
||||
sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
|
||||
sample_c_g = FETCH_OP_SAMPLE_C_G,
|
||||
sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
|
||||
gather4_c = FETCH_OP_GATHER4_C,
|
||||
gather4_c_o = FETCH_OP_GATHER4_C_O,
|
||||
unknown = 255
|
||||
};
|
||||
|
||||
enum Flags {
|
||||
x_unnormalized,
|
||||
y_unnormalized,
|
||||
z_unnormalized,
|
||||
w_unnormalized,
|
||||
grad_fine,
|
||||
num_tex_flag
|
||||
};
|
||||
|
||||
struct Inputs {
|
||||
Inputs(const nir_tex_instr& instr, ValueFactory &vf);
|
||||
const nir_variable *sampler_deref;
|
||||
const nir_variable *texture_deref;
|
||||
RegisterVec4 coord;
|
||||
PVirtualValue bias;
|
||||
PVirtualValue comperator;
|
||||
PVirtualValue lod;
|
||||
RegisterVec4 ddx;
|
||||
RegisterVec4 ddy;
|
||||
nir_src *offset;
|
||||
PVirtualValue gather_comp;
|
||||
PVirtualValue ms_index;
|
||||
PVirtualValue sampler_offset;
|
||||
PVirtualValue texture_offset;
|
||||
|
||||
RegisterVec4::Swizzle swizzle_from_ncomps(int comps) const;
|
||||
|
||||
Opcode opcode;
|
||||
private:
|
||||
auto get_opcode(const nir_tex_instr& instr) -> Opcode;
|
||||
};
|
||||
|
||||
TexInstr(Opcode op, const RegisterVec4& dest,
|
||||
const RegisterVec4::Swizzle& dest_swizzle,
|
||||
const RegisterVec4& src, unsigned sid, unsigned rid,
|
||||
PVirtualValue sampler_offs = nullptr);
|
||||
|
||||
TexInstr(const TexInstr& orig) = delete;
|
||||
TexInstr(const TexInstr&& orig) = delete;
|
||||
TexInstr& operator =(const TexInstr& orig) = delete;
|
||||
TexInstr& operator =(const TexInstr&& orig) = delete;
|
||||
|
||||
void accept(ConstInstrVisitor& visitor) const override;
|
||||
void accept(InstrVisitor& visitor) override;
|
||||
|
||||
const auto& src() const {return m_src;}
|
||||
auto& src() {return m_src;}
|
||||
|
||||
unsigned opcode() const {return m_opcode;}
|
||||
unsigned sampler_id() const {return m_sampler_id;}
|
||||
unsigned resource_id() const {return m_resource_id;}
|
||||
|
||||
void set_offset(unsigned index, int32_t val);
|
||||
int get_offset(unsigned index) const;
|
||||
|
||||
void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
|
||||
int inst_mode() const { return m_inst_mode;}
|
||||
|
||||
void set_tex_flag(Flags flag) {m_tex_flags.set(flag);}
|
||||
bool has_tex_flag(Flags flag) const {return m_tex_flags.test(flag);}
|
||||
|
||||
void set_sampler_offset(PVirtualValue ofs) {m_sampler_offset = ofs;}
|
||||
auto* sampler_offset() const {return m_sampler_offset;}
|
||||
|
||||
void set_gather_comp(int cmp);
|
||||
bool is_equal_to(const TexInstr& lhs) const;
|
||||
|
||||
static Opcode op_from_string(const std::string& s);
|
||||
static Instr::Pointer from_string(std::istream& is, ValueFactory& value_fctory);
|
||||
|
||||
static bool from_nir(nir_tex_instr *tex, Shader& shader);
|
||||
|
||||
uint32_t slots() const override {return 1;};
|
||||
|
||||
auto prepare_instr() const { return m_prepare_instr;}
|
||||
|
||||
private:
|
||||
|
||||
bool do_ready() const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
bool propagate_death() override;
|
||||
|
||||
static const char *opname(Opcode code);
|
||||
static bool is_gather(Opcode op);
|
||||
|
||||
void read_tex_coord_normalitazion(const std::string& next_token);
|
||||
void set_tex_param(const std::string& next_token);
|
||||
|
||||
static auto prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader &shader) -> RegisterVec4;
|
||||
|
||||
static bool emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader);
|
||||
static bool emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader);
|
||||
static bool emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader);
|
||||
static bool emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader);
|
||||
static bool emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader);
|
||||
static bool emit_tex_txs(nir_tex_instr *tex, Inputs& src,
|
||||
RegisterVec4::Swizzle dest_swz, Shader& shader);
|
||||
static bool emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader);
|
||||
static bool emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader);
|
||||
static bool emit_tex_tg4(nir_tex_instr* instr, Inputs& src , Shader& shader);
|
||||
static bool emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader);
|
||||
|
||||
void set_coord_offsets(nir_src *offset);
|
||||
void set_rect_coordinate_flags(nir_tex_instr* instr);
|
||||
void add_prepare_instr(TexInstr *ir) {m_prepare_instr.push_back(ir);};
|
||||
|
||||
Opcode m_opcode;
|
||||
|
||||
RegisterVec4 m_src;
|
||||
PVirtualValue m_sampler_offset;
|
||||
std::bitset<num_tex_flag> m_tex_flags;
|
||||
int m_offset[3];
|
||||
int m_inst_mode;
|
||||
unsigned m_sampler_id;
|
||||
unsigned m_resource_id;
|
||||
|
||||
static const std::map<Opcode, std::string> s_opcode_map;
|
||||
std::list<TexInstr *> m_prepare_instr;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // INSTR_TEX_H
|
188
src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp
Normal file
188
src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp
Normal file
@ -0,0 +1,188 @@
|
||||
#include "sfn_instrfactory.h"
|
||||
|
||||
#include "sfn_instr_alugroup.h"
|
||||
#include "sfn_debug.h"
|
||||
#include "sfn_instr_controlflow.h"
|
||||
#include "sfn_instr_export.h"
|
||||
#include "sfn_instr_fetch.h"
|
||||
#include "sfn_instr_lds.h"
|
||||
#include "sfn_instr_mem.h"
|
||||
#include "sfn_instr_tex.h"
|
||||
|
||||
#include "sfn_alu_defines.h"
|
||||
|
||||
#include "sfn_shader.h"
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
InstrFactory::InstrFactory():
|
||||
group(nullptr)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
PInst InstrFactory::from_string(const std::string& s, int nesting_depth)
|
||||
{
|
||||
string type;
|
||||
std::istringstream is(s);
|
||||
|
||||
PInst result = nullptr;
|
||||
|
||||
do {
|
||||
is >> type;
|
||||
} while (type.empty() && is.good());
|
||||
|
||||
if (type == "ALU_GROUP_BEGIN") {
|
||||
group = new AluGroup();
|
||||
group->set_nesting_depth(nesting_depth);
|
||||
return nullptr;
|
||||
} else if (type == "ALU_GROUP_END") {
|
||||
AluGroup *retval = group;
|
||||
group = nullptr;
|
||||
return retval;
|
||||
} else if (type == "ALU") {
|
||||
result = AluInstr::from_string(is, m_value_factory, group);
|
||||
} else if (type == "TEX") {
|
||||
result = TexInstr::from_string(is, m_value_factory);
|
||||
} else if (type == "EXPORT") {
|
||||
result = ExportInstr::from_string(is, m_value_factory);
|
||||
} else if (type == "EXPORT_DONE") {
|
||||
result = ExportInstr::last_from_string(is, m_value_factory);
|
||||
} else if (type == "VFETCH") {
|
||||
result = FetchInstr::from_string(is, m_value_factory);
|
||||
} else if (type == "GET_BUF_RESINFO") {
|
||||
result = QueryBufferSizeInstr::from_string(is, m_value_factory);
|
||||
} else if (type == "LOAD_BUF") {
|
||||
result = LoadFromBuffer::from_string(is, m_value_factory);
|
||||
} else if (type == "READ_SCRATCH") {
|
||||
result = LoadFromScratch::from_string(is, m_value_factory);
|
||||
} else if (type == "IF") {
|
||||
result = IfInstr::from_string(is, m_value_factory);
|
||||
} else if (type == "WRITE_SCRATCH") {
|
||||
result = WriteScratchInstr::from_string(is, m_value_factory);
|
||||
} else if (type == "MEM_RING") {
|
||||
result = MemRingOutInstr::from_string(is, m_value_factory);
|
||||
} else if (type == "EMIT_VERTEX") {
|
||||
result = EmitVertexInstr::from_string(is, false);
|
||||
} else if (type == "EMIT_CUT_VERTEX") {
|
||||
result = EmitVertexInstr::from_string(is, true);
|
||||
} else if (type == "LDS_READ") {
|
||||
result = LDSReadInstr::from_string(is, m_value_factory);
|
||||
} else if (type == "LDS") {
|
||||
result = LDSAtomicInstr::from_string(is, m_value_factory);
|
||||
} else if (type == "WRITE_TF") {
|
||||
result = WriteTFInstr::from_string(is, m_value_factory);
|
||||
} else
|
||||
result = ControlFlowInstr::from_string(type);
|
||||
|
||||
if (!result && !group) {
|
||||
std::cerr << "Error translating '" << s << "'\n";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool InstrFactory::from_nir(nir_instr *instr, Shader& shader)
|
||||
{
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu:
|
||||
return AluInstr::from_nir(nir_instr_as_alu(instr), shader);
|
||||
case nir_instr_type_intrinsic:
|
||||
return shader.process_intrinsic(nir_instr_as_intrinsic(instr));
|
||||
case nir_instr_type_load_const:
|
||||
return load_const(nir_instr_as_load_const(instr), shader);
|
||||
case nir_instr_type_tex:
|
||||
return TexInstr::from_nir(nir_instr_as_tex(instr), shader);
|
||||
case nir_instr_type_jump:
|
||||
return process_jump(nir_instr_as_jump(instr), shader);
|
||||
case nir_instr_type_ssa_undef:
|
||||
return process_undef(nir_instr_as_ssa_undef(instr), shader);
|
||||
default:
|
||||
fprintf(stderr, "Instruction type %d not supported\n", instr->type);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader)
|
||||
{
|
||||
AluInstr *ir = nullptr;
|
||||
|
||||
if (literal->def.bit_size == 64) {
|
||||
for (int i = 0; i < literal->def.num_components; ++i) {
|
||||
auto dest0 = m_value_factory.dest(literal->def, 2 * i, pin_none);
|
||||
auto src0 = m_value_factory.literal(literal->value[i].u64 & 0xffffffff);
|
||||
shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, {alu_write}));
|
||||
|
||||
auto dest1 = m_value_factory.dest(literal->def, 2 * i + 1, pin_none);
|
||||
auto src1 = m_value_factory.literal((literal->value[i].u64 >> 32) & 0xffffffff);
|
||||
shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::last_write));
|
||||
}
|
||||
} else {
|
||||
Pin pin = literal->def.num_components == 1 ? pin_free : pin_none;
|
||||
for (int i = 0; i < literal->def.num_components; ++i) {
|
||||
auto dest = m_value_factory.dest(literal->def, i, pin);
|
||||
uint32_t v = literal->value[i].i32;
|
||||
PVirtualValue src = nullptr;
|
||||
switch (v) {
|
||||
case 0: src = m_value_factory.zero(); break;
|
||||
case 1: src = m_value_factory.one_i(); break;
|
||||
case 0xffffffff: src = m_value_factory.inline_const(ALU_SRC_M_1_INT, 0); break;
|
||||
case 0x3f800000: src = m_value_factory.inline_const(ALU_SRC_1, 0); break;
|
||||
case 0x3f000000: src = m_value_factory.inline_const(ALU_SRC_0_5, 0); break;
|
||||
default: src = m_value_factory.literal(v);
|
||||
}
|
||||
|
||||
ir = new AluInstr(op1_mov, dest, src, {alu_write});
|
||||
shader.emit_instruction(ir);
|
||||
}
|
||||
if (ir)
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InstrFactory::process_jump(nir_jump_instr *instr, Shader& shader)
|
||||
{
|
||||
ControlFlowInstr::CFType type;
|
||||
switch (instr->type) {
|
||||
case nir_jump_break:
|
||||
type = ControlFlowInstr::cf_loop_break;
|
||||
break;
|
||||
|
||||
case nir_jump_continue:
|
||||
type = ControlFlowInstr::cf_loop_continue;
|
||||
break;
|
||||
|
||||
default: {
|
||||
nir_instr *i = reinterpret_cast<nir_instr*>(instr);
|
||||
sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
shader.emit_instruction(new ControlFlowInstr(type));
|
||||
shader.start_new_block(0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InstrFactory::process_undef(nir_ssa_undef_instr *undef, Shader& shader)
|
||||
{
|
||||
for (int i = 0; i < undef->def.num_components; ++i) {
|
||||
auto dest = shader.value_factory().undef(undef->def.index, i);
|
||||
shader.emit_instruction(new AluInstr(op1_mov, dest,
|
||||
value_factory().zero(),
|
||||
AluInstr::last_write));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
}
|
34
src/gallium/drivers/r600/sfn/sfn_instrfactory.h
Normal file
34
src/gallium/drivers/r600/sfn/sfn_instrfactory.h
Normal file
@ -0,0 +1,34 @@
|
||||
#ifndef INSTRFACTORY_H
|
||||
#define INSTRFACTORY_H
|
||||
|
||||
#include "sfn_instr.h"
|
||||
#include "sfn_valuefactory.h"
|
||||
|
||||
|
||||
#include <iosfwd>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class Shader;
|
||||
class InstrFactory : public Allocate {
|
||||
public:
|
||||
InstrFactory();
|
||||
|
||||
PInst from_string(const std::string &s, int nesting_depth);
|
||||
bool from_nir(nir_instr *instr, Shader& shader);
|
||||
auto& value_factory() { return m_value_factory;}
|
||||
|
||||
private:
|
||||
bool load_const(nir_load_const_instr *lc, Shader& shader);
|
||||
bool process_jump(nir_jump_instr *instr, Shader& shader);
|
||||
bool process_undef(nir_ssa_undef_instr *undef, Shader& shader);
|
||||
|
||||
Instr::Pointer export_from_string(std::istream& is, bool is_last);
|
||||
|
||||
ValueFactory m_value_factory;
|
||||
AluGroup *group;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // INSTRFACTORY_H
|
@ -1,183 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sfn_instruction_alu.h"
|
||||
#include "sfn_valuepool.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
const AluModifiers AluInstruction::src_abs_flags[2] =
|
||||
{alu_src0_abs, alu_src1_abs};
|
||||
const AluModifiers AluInstruction::src_neg_flags[3] =
|
||||
{alu_src0_neg, alu_src1_neg, alu_src2_neg};
|
||||
const AluModifiers AluInstruction::src_rel_flags[3] =
|
||||
{alu_src0_rel, alu_src1_rel, alu_src2_rel};
|
||||
|
||||
AluInstruction::AluInstruction(EAluOp opcode):
|
||||
Instruction (Instruction::alu),
|
||||
m_opcode(opcode),
|
||||
m_src(alu_ops.at(opcode).nsrc),
|
||||
m_bank_swizzle(alu_vec_unknown),
|
||||
m_cf_type(cf_alu)
|
||||
{
|
||||
if (alu_ops.at(opcode).nsrc == 3)
|
||||
m_flags.set(alu_op3);
|
||||
}
|
||||
|
||||
AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
|
||||
std::vector<PValue> src,
|
||||
const std::set<AluModifiers>& flags):
|
||||
Instruction (Instruction::alu),
|
||||
m_opcode(opcode),
|
||||
m_dest(dest),
|
||||
m_bank_swizzle(alu_vec_unknown),
|
||||
m_cf_type(cf_alu)
|
||||
{
|
||||
assert(dest);
|
||||
m_src.swap(src);
|
||||
for (auto f : flags)
|
||||
m_flags.set(f);
|
||||
|
||||
if (alu_ops.at(opcode).nsrc == 3)
|
||||
m_flags.set(alu_op3);
|
||||
|
||||
for (auto &s: m_src)
|
||||
add_remappable_src_value(&s);
|
||||
|
||||
add_remappable_dst_value(&m_dest);
|
||||
}
|
||||
|
||||
AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
|
||||
const std::set<AluModifiers>& flags):
|
||||
AluInstruction(opcode, dest, std::vector<PValue>{src0}, flags)
|
||||
{
|
||||
}
|
||||
|
||||
AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
|
||||
PValue src0, PValue src1,
|
||||
const std::set<AluModifiers> &m_flags):
|
||||
AluInstruction(opcode, dest, {src0, src1}, m_flags)
|
||||
{
|
||||
}
|
||||
|
||||
AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
|
||||
PValue src1, PValue src2,
|
||||
const std::set<AluModifiers> &flags):
|
||||
AluInstruction(opcode, dest, {src0, src1, src2}, flags)
|
||||
{
|
||||
}
|
||||
|
||||
bool AluInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
assert(lhs.type() == alu);
|
||||
const auto& oth = static_cast<const AluInstruction&>(lhs);
|
||||
|
||||
if (m_opcode != oth.m_opcode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (*m_dest != *oth.m_dest)
|
||||
return false;
|
||||
|
||||
if (m_src.size() != oth.m_src.size())
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < m_src.size(); ++i)
|
||||
if (*m_src[i] != *oth.m_src[i]) {
|
||||
return false;
|
||||
}
|
||||
return (m_flags == oth.m_flags && m_cf_type == oth.m_cf_type);
|
||||
}
|
||||
|
||||
void AluInstruction::replace_values(const ValueSet& candidates, PValue new_value)
|
||||
{
|
||||
for (auto c: candidates) {
|
||||
if (*c == *m_dest)
|
||||
m_dest = new_value;
|
||||
|
||||
for (auto& s: m_src) {
|
||||
if (*c == *s)
|
||||
s = new_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PValue AluInstruction::remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
|
||||
ValueMap &values)
|
||||
{
|
||||
auto new_index = map[reg->sel()];
|
||||
if (new_index.valid)
|
||||
reg = values.get_or_inject(new_index.new_reg, reg->chan());
|
||||
map[reg->sel()].used = true;
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
void AluInstruction::set_flag(AluModifiers flag)
|
||||
{
|
||||
m_flags.set(flag);
|
||||
}
|
||||
|
||||
void AluInstruction::set_bank_swizzle(AluBankSwizzle bswz)
|
||||
{
|
||||
m_bank_swizzle = bswz;
|
||||
}
|
||||
|
||||
unsigned AluInstruction::n_sources() const
|
||||
{
|
||||
return m_src.size();
|
||||
}
|
||||
|
||||
void AluInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "ALU " << alu_ops.at(m_opcode).name;
|
||||
if (m_flags.test(alu_dst_clamp))
|
||||
os << "_CLAMP";
|
||||
if (m_dest)
|
||||
os << ' ' << *m_dest << " : " ;
|
||||
|
||||
for (unsigned i = 0; i < m_src.size(); ++i) {
|
||||
int pflags = 0;
|
||||
if (i)
|
||||
os << ' ';
|
||||
if (m_flags.test(src_neg_flags[i])) pflags |= Value::PrintFlags::has_neg;
|
||||
if (m_flags.test(src_rel_flags[i])) pflags |= Value::PrintFlags::is_rel;
|
||||
if (i < 2)
|
||||
if (m_flags.test(src_abs_flags[i])) pflags |= Value::PrintFlags::has_abs;
|
||||
m_src[i]->print(os, Value::PrintFlags(0, pflags));
|
||||
}
|
||||
os << " {";
|
||||
os << (m_flags.test(alu_write) ? 'W' : ' ');
|
||||
os << (m_flags.test(alu_last_instr) ? 'L' : ' ');
|
||||
os << (m_flags.test(alu_update_exec) ? 'E' : ' ');
|
||||
os << (m_flags.test(alu_update_pred) ? 'P' : ' ');
|
||||
os << "}";
|
||||
|
||||
os << " BS:" << m_bank_swizzle;
|
||||
os << " CF:" << m_cf_type;
|
||||
}
|
||||
|
||||
}
|
@ -1,142 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef sfn_r600_instruction_alu_h
|
||||
#define sfn_r600_instruction_alu_h
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
#include "sfn_alu_defines.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
enum AluModifiers {
|
||||
alu_src0_neg,
|
||||
alu_src0_abs,
|
||||
alu_src0_rel,
|
||||
alu_src1_neg,
|
||||
alu_src1_abs,
|
||||
alu_src1_rel,
|
||||
alu_src2_neg,
|
||||
alu_src2_rel,
|
||||
alu_dst_clamp,
|
||||
alu_dst_rel,
|
||||
alu_last_instr,
|
||||
alu_update_exec,
|
||||
alu_update_pred,
|
||||
alu_write,
|
||||
alu_op3
|
||||
};
|
||||
|
||||
enum AluDstModifiers {
|
||||
omod_off = 0,
|
||||
omod_mul2 = 1,
|
||||
omod_mul4 = 2,
|
||||
omod_divl2 = 3
|
||||
};
|
||||
|
||||
enum AluPredSel {
|
||||
pred_off = 0,
|
||||
pred_zero = 2,
|
||||
pred_one = 3
|
||||
};
|
||||
|
||||
enum AluBankSwizzle {
|
||||
alu_vec_012 = 0,
|
||||
sq_alu_scl_201 = 0,
|
||||
alu_vec_021 = 1,
|
||||
sq_alu_scl_122 = 1,
|
||||
alu_vec_120 = 2,
|
||||
sq_alu_scl_212 = 2,
|
||||
alu_vec_102 = 3,
|
||||
sq_alu_scl_221 = 3,
|
||||
alu_vec_201 = 4,
|
||||
alu_vec_210 = 5,
|
||||
alu_vec_unknown = 6
|
||||
};
|
||||
|
||||
class AluInstruction : public Instruction {
|
||||
public:
|
||||
|
||||
static const AluModifiers src_abs_flags[2];
|
||||
static const AluModifiers src_neg_flags[3];
|
||||
static const AluModifiers src_rel_flags[3];
|
||||
|
||||
AluInstruction(EAluOp opcode);
|
||||
AluInstruction(EAluOp opcode, PValue dest,
|
||||
std::vector<PValue> src0,
|
||||
const std::set<AluModifiers>& m_flags);
|
||||
|
||||
AluInstruction(EAluOp opcode, PValue dest, PValue src0,
|
||||
const std::set<AluModifiers>& m_flags);
|
||||
|
||||
AluInstruction(EAluOp opcode, PValue dest,
|
||||
PValue src0, PValue src1,
|
||||
const std::set<AluModifiers>& m_flags);
|
||||
|
||||
AluInstruction(EAluOp opcode, PValue dest, PValue src0, PValue src1,
|
||||
PValue src2,
|
||||
const std::set<AluModifiers>& m_flags);
|
||||
|
||||
void set_flag(AluModifiers flag);
|
||||
unsigned n_sources() const;
|
||||
|
||||
PValue dest() {return m_dest;}
|
||||
EAluOp opcode() const {return m_opcode;}
|
||||
const Value *dest() const {return m_dest.get();}
|
||||
Value& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
|
||||
PValue *psrc(unsigned i) {assert(i < m_src.size()); return &m_src[i];}
|
||||
bool is_last() const {return m_flags.test(alu_last_instr);}
|
||||
bool write() const {return m_flags.test(alu_write);}
|
||||
bool flag(AluModifiers f) const {return m_flags.test(f);}
|
||||
void set_bank_swizzle(AluBankSwizzle swz);
|
||||
int bank_swizzle() const {return m_bank_swizzle;}
|
||||
ECFAluOpCode cf_type() const {return m_cf_type;}
|
||||
void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
|
||||
|
||||
void replace_values(const ValueSet& candidates, PValue new_value) override;
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
PValue remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
|
||||
ValueMap &values);
|
||||
|
||||
|
||||
EAluOp m_opcode;
|
||||
PValue m_dest;
|
||||
std::vector<PValue> m_src;
|
||||
AluOpFlags m_flags;
|
||||
AluBankSwizzle m_bank_swizzle;
|
||||
ECFAluOpCode m_cf_type;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,187 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
#include "sfn_liverange.h"
|
||||
#include "sfn_valuepool.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
ValueRemapper::ValueRemapper(std::vector<rename_reg_pair>& m,
|
||||
ValueMap& values):
|
||||
m_map(m),
|
||||
m_values(values)
|
||||
{
|
||||
}
|
||||
|
||||
void ValueRemapper::remap(PValue& v)
|
||||
{
|
||||
if (!v)
|
||||
return;
|
||||
if (v->type() == Value::gpr) {
|
||||
v = remap_one_registers(v);
|
||||
} else if (v->type() == Value::gpr_array_value) {
|
||||
GPRArrayValue& val = static_cast<GPRArrayValue&>(*v);
|
||||
auto value = val.value();
|
||||
auto addr = val.indirect();
|
||||
val.reset_value(remap_one_registers(value));
|
||||
if (addr) {
|
||||
if (addr->type() == Value::gpr)
|
||||
val.reset_addr(remap_one_registers(addr));
|
||||
}
|
||||
size_t range_start = val.sel();
|
||||
size_t range_end = range_start + val.array_size();
|
||||
while (range_start < range_end)
|
||||
m_map[range_start++].used = true;
|
||||
} else if (v->type() == Value::kconst) {
|
||||
auto& val = static_cast<UniformValue&>(*v);
|
||||
auto addr = val.addr();
|
||||
if (addr && addr->type() == Value::gpr)
|
||||
val.reset_addr(remap_one_registers(addr));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ValueRemapper::remap(GPRVector& v)
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (v.reg_i(i)) {
|
||||
auto& ns_idx = m_map[v.reg_i(i)->sel()];
|
||||
if (ns_idx.valid)
|
||||
v.set_reg_i(i,m_values.get_or_inject(ns_idx.new_reg, v.reg_i(i)->chan()));
|
||||
m_map[v.reg_i(i)->sel()].used = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PValue ValueRemapper::remap_one_registers(PValue& reg)
|
||||
{
|
||||
auto new_index = m_map[reg->sel()];
|
||||
if (new_index.valid)
|
||||
reg = m_values.get_or_inject(new_index.new_reg, reg->chan());
|
||||
m_map[reg->sel()].used = true;
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
Instruction::Instruction(instr_type t):
|
||||
m_type(t)
|
||||
{
|
||||
}
|
||||
|
||||
Instruction::~Instruction()
|
||||
{
|
||||
}
|
||||
|
||||
void Instruction::print(std::ostream& os) const
|
||||
{
|
||||
os << "OP:";
|
||||
do_print(os);
|
||||
}
|
||||
|
||||
|
||||
void Instruction::remap_registers(ValueRemapper& map)
|
||||
{
|
||||
sfn_log << SfnLog::merge << "REMAP " << *this << "\n";
|
||||
for (auto& v: m_mappable_src_registers)
|
||||
map.remap(*v);
|
||||
|
||||
for (auto& v: m_mappable_src_vectors)
|
||||
map.remap(*v);
|
||||
|
||||
for (auto& v: m_mappable_dst_registers)
|
||||
map.remap(*v);
|
||||
|
||||
for (auto& v: m_mappable_dst_vectors)
|
||||
map.remap(*v);
|
||||
sfn_log << SfnLog::merge << "TO " << *this << "\n\n";
|
||||
}
|
||||
|
||||
void Instruction::add_remappable_src_value(PValue *v)
|
||||
{
|
||||
if (*v)
|
||||
m_mappable_src_registers.push_back(v);
|
||||
}
|
||||
|
||||
void Instruction::add_remappable_src_value(GPRVector *v)
|
||||
{
|
||||
m_mappable_src_vectors.push_back(v);
|
||||
}
|
||||
|
||||
void Instruction::add_remappable_dst_value(PValue *v)
|
||||
{
|
||||
if (v)
|
||||
m_mappable_dst_registers.push_back(v);
|
||||
}
|
||||
|
||||
void Instruction::add_remappable_dst_value(GPRVector *v)
|
||||
{
|
||||
m_mappable_dst_vectors.push_back(v);
|
||||
}
|
||||
|
||||
void Instruction::replace_values(UNUSED const ValueSet& candidates, UNUSED PValue new_value)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void Instruction::evalue_liveness(LiverangeEvaluator& eval) const
|
||||
{
|
||||
sfn_log << SfnLog::merge << "Scan " << *this << "\n";
|
||||
for (const auto& s: m_mappable_src_registers)
|
||||
if (*s)
|
||||
eval.record_read(**s);
|
||||
|
||||
for (const auto& s: m_mappable_src_vectors)
|
||||
eval.record_read(*s);
|
||||
|
||||
for (const auto& s: m_mappable_dst_registers)
|
||||
if (*s)
|
||||
eval.record_write(**s);
|
||||
|
||||
for (const auto& s: m_mappable_dst_vectors)
|
||||
eval.record_write(*s);
|
||||
|
||||
do_evalue_liveness(eval);
|
||||
}
|
||||
|
||||
void Instruction::do_evalue_liveness(UNUSED LiverangeEvaluator& eval) const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
bool operator == (const Instruction& lhs, const Instruction& rhs)
|
||||
{
|
||||
if (rhs.m_type != lhs.m_type)
|
||||
return false;
|
||||
|
||||
return lhs.is_equal_to(rhs);
|
||||
}
|
||||
|
||||
}
|
@ -1,155 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef sfn_r600_instr_h
|
||||
#define sfn_r600_instr_h
|
||||
|
||||
#include "sfn_instructionvisitor.h"
|
||||
#include "sfn_value_gpr.h"
|
||||
#include "sfn_defines.h"
|
||||
|
||||
#include "gallium/drivers/r600/r600_isa.h"
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
struct rename_reg_pair {
|
||||
bool valid;
|
||||
bool used;
|
||||
int new_reg;
|
||||
};
|
||||
|
||||
class LiverangeEvaluator;
|
||||
class ValueMap;
|
||||
|
||||
|
||||
class ValueRemapper {
|
||||
public:
|
||||
ValueRemapper(std::vector<rename_reg_pair>& m,
|
||||
ValueMap& values);
|
||||
|
||||
void remap(PValue& v);
|
||||
void remap(GPRVector& v);
|
||||
private:
|
||||
PValue remap_one_registers(PValue& reg);
|
||||
|
||||
std::vector<rename_reg_pair>& m_map;
|
||||
ValueMap& m_values;
|
||||
};
|
||||
|
||||
|
||||
using OutputRegisterMap = std::map<unsigned, const GPRVector *>;
|
||||
|
||||
class Instruction {
|
||||
public:
|
||||
enum instr_type {
|
||||
alu,
|
||||
exprt,
|
||||
tex,
|
||||
vtx,
|
||||
wait_ack,
|
||||
cond_if,
|
||||
cond_else,
|
||||
cond_endif,
|
||||
lds_atomic,
|
||||
lds_read,
|
||||
lds_write,
|
||||
loop_begin,
|
||||
loop_end,
|
||||
loop_break,
|
||||
loop_continue,
|
||||
phi,
|
||||
streamout,
|
||||
ring,
|
||||
emit_vtx,
|
||||
mem_wr_scratch,
|
||||
gds,
|
||||
rat,
|
||||
tf_write,
|
||||
block,
|
||||
unknown
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<Instruction> Pointer;
|
||||
|
||||
friend bool operator == (const Instruction& lhs, const Instruction& rhs);
|
||||
|
||||
Instruction(instr_type t);
|
||||
|
||||
virtual ~Instruction();
|
||||
|
||||
instr_type type() const { return m_type;}
|
||||
|
||||
void print(std::ostream& os) const;
|
||||
|
||||
virtual void replace_values(const ValueSet& candidates, PValue new_value);
|
||||
|
||||
void evalue_liveness(LiverangeEvaluator& eval) const;
|
||||
|
||||
void remap_registers(ValueRemapper& map);
|
||||
|
||||
virtual bool accept(InstructionVisitor& visitor) = 0;
|
||||
virtual bool accept(ConstInstructionVisitor& visitor) const = 0;
|
||||
|
||||
protected:
|
||||
|
||||
void add_remappable_src_value(PValue *v);
|
||||
void add_remappable_src_value(GPRVector *v);
|
||||
void add_remappable_dst_value(PValue *v);
|
||||
void add_remappable_dst_value(GPRVector *v);
|
||||
|
||||
private:
|
||||
|
||||
virtual void do_evalue_liveness(LiverangeEvaluator& eval) const;
|
||||
|
||||
virtual bool is_equal_to(const Instruction& lhs) const = 0;
|
||||
|
||||
instr_type m_type;
|
||||
|
||||
virtual void do_print(std::ostream& os) const = 0;
|
||||
|
||||
std::vector<PValue*> m_mappable_src_registers;
|
||||
std::vector<GPRVector*> m_mappable_src_vectors;
|
||||
std::vector<PValue*> m_mappable_dst_registers;
|
||||
std::vector<GPRVector*> m_mappable_dst_vectors;
|
||||
};
|
||||
|
||||
using PInstruction=Instruction::Pointer;
|
||||
|
||||
inline std::ostream& operator << (std::ostream& os, const Instruction& instr)
|
||||
{
|
||||
instr.print(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
bool operator == (const Instruction& lhs, const Instruction& rhs);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,57 +0,0 @@
|
||||
#include "sfn_instruction_block.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
|
||||
InstructionBlock::InstructionBlock(unsigned nesting_depth, unsigned block_number):
|
||||
Instruction(block),
|
||||
m_block_number(block_number),
|
||||
m_nesting_depth(nesting_depth)
|
||||
{
|
||||
}
|
||||
|
||||
void InstructionBlock::emit(PInstruction instr)
|
||||
{
|
||||
m_block.push_back(instr);
|
||||
}
|
||||
|
||||
void InstructionBlock::remap_registers(ValueRemapper& map)
|
||||
{
|
||||
for(auto& i: m_block)
|
||||
i->remap_registers(map);
|
||||
}
|
||||
|
||||
void InstructionBlock::do_evalue_liveness(LiverangeEvaluator& eval) const
|
||||
{
|
||||
for(auto& i: m_block)
|
||||
i->evalue_liveness(eval);
|
||||
}
|
||||
|
||||
bool InstructionBlock::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
assert(lhs.type() == block);
|
||||
auto& l = static_cast<const InstructionBlock&>(lhs);
|
||||
|
||||
if (m_block.size() != l.m_block.size())
|
||||
return false;
|
||||
|
||||
if (m_block_number != l.m_block_number)
|
||||
return false;
|
||||
|
||||
return std::equal(m_block.begin(), m_block.end(), l.m_block.begin(),
|
||||
[](PInstruction ri, PInstruction li) {return *ri == *li;});
|
||||
}
|
||||
|
||||
PInstruction InstructionBlock::last_instruction()
|
||||
{
|
||||
return m_block.size() ? *m_block.rbegin() : nullptr;
|
||||
}
|
||||
|
||||
void InstructionBlock::do_print(std::ostream& os) const
|
||||
{
|
||||
std::string space(" ", 2 * m_nesting_depth);
|
||||
for(auto& i: m_block)
|
||||
os << space << *i << "\n";
|
||||
}
|
||||
|
||||
}
|
@ -1,82 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef sfn_instruction_block_h
|
||||
#define sfn_instruction_block_h
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class InstructionBlock : public Instruction
|
||||
{
|
||||
public:
|
||||
InstructionBlock(unsigned nesting_depth, unsigned block_number);
|
||||
|
||||
void emit(PInstruction instr);
|
||||
|
||||
|
||||
std::vector<PInstruction>::const_iterator begin() const {
|
||||
return m_block.begin();
|
||||
}
|
||||
std::vector<PInstruction>::const_iterator end() const {
|
||||
return m_block.end();
|
||||
}
|
||||
|
||||
void remap_registers(ValueRemapper& map);
|
||||
|
||||
size_t size() const {
|
||||
return m_block.size();
|
||||
}
|
||||
|
||||
const PInstruction& operator [] (int i) const {
|
||||
return m_block[i];
|
||||
}
|
||||
|
||||
unsigned number() const {
|
||||
return m_block_number;
|
||||
}
|
||||
|
||||
PInstruction last_instruction();
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
std::vector<PInstruction> m_block;
|
||||
|
||||
unsigned m_block_number;
|
||||
unsigned m_nesting_depth;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // INSTRUCTIONBLOCK_H
|
@ -1,195 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sfn_instruction_cf.h"
|
||||
#include "sfn_liverange.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
CFInstruction::CFInstruction(instr_type type):Instruction(type)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
IfElseInstruction::IfElseInstruction(instr_type type):
|
||||
CFInstruction (type)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
IfInstruction::IfInstruction(AluInstruction *pred):
|
||||
IfElseInstruction(cond_if),
|
||||
m_pred(pred)
|
||||
{
|
||||
PValue *v = m_pred->psrc(0);
|
||||
add_remappable_src_value(v);
|
||||
pred->set_cf_type(cf_alu_push_before);
|
||||
}
|
||||
|
||||
void IfInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
|
||||
{
|
||||
eval.scope_if();
|
||||
}
|
||||
|
||||
bool IfInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
assert(lhs.type() == cond_if);
|
||||
const IfInstruction& l = static_cast<const IfInstruction&>(lhs);
|
||||
return *l.m_pred == *m_pred;
|
||||
}
|
||||
|
||||
void IfInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "PRED = " << *m_pred << "\n";
|
||||
os << "IF (PRED)";
|
||||
}
|
||||
|
||||
ElseInstruction::ElseInstruction(IfInstruction *jump_src):
|
||||
IfElseInstruction(cond_else),
|
||||
m_jump_src(jump_src)
|
||||
{
|
||||
}
|
||||
|
||||
void ElseInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
|
||||
{
|
||||
eval.scope_else();
|
||||
}
|
||||
|
||||
|
||||
bool ElseInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
if (lhs.type() != cond_else)
|
||||
return false;
|
||||
auto& l = static_cast<const ElseInstruction&>(lhs);
|
||||
return (*m_jump_src == *l.m_jump_src);
|
||||
}
|
||||
|
||||
void ElseInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "ELSE";
|
||||
}
|
||||
|
||||
IfElseEndInstruction::IfElseEndInstruction():
|
||||
IfElseInstruction(cond_endif)
|
||||
{
|
||||
}
|
||||
|
||||
void IfElseEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
|
||||
{
|
||||
eval.scope_endif();
|
||||
}
|
||||
|
||||
bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
if (lhs.type() != cond_endif)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void IfElseEndInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "ENDIF";
|
||||
}
|
||||
|
||||
LoopBeginInstruction::LoopBeginInstruction():
|
||||
CFInstruction(loop_begin)
|
||||
{
|
||||
}
|
||||
|
||||
void LoopBeginInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
|
||||
{
|
||||
eval.scope_loop_begin();
|
||||
}
|
||||
|
||||
bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
assert(lhs.type() == loop_begin);
|
||||
return true;
|
||||
}
|
||||
|
||||
void LoopBeginInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "BGNLOOP";
|
||||
}
|
||||
|
||||
LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start):
|
||||
CFInstruction (loop_end),
|
||||
m_start(start)
|
||||
{
|
||||
}
|
||||
|
||||
void LoopEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
|
||||
{
|
||||
eval.scope_loop_end();
|
||||
}
|
||||
|
||||
bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
assert(lhs.type() == loop_end);
|
||||
const auto& other = static_cast<const LoopEndInstruction&>(lhs);
|
||||
return *m_start == *other.m_start;
|
||||
}
|
||||
|
||||
void LoopEndInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "ENDLOOP";
|
||||
}
|
||||
|
||||
LoopBreakInstruction::LoopBreakInstruction():
|
||||
CFInstruction (loop_break)
|
||||
{
|
||||
}
|
||||
|
||||
void LoopBreakInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
|
||||
{
|
||||
eval.scope_loop_break();
|
||||
}
|
||||
|
||||
bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void LoopBreakInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "BREAK";
|
||||
}
|
||||
|
||||
LoopContInstruction::LoopContInstruction():
|
||||
CFInstruction (loop_continue)
|
||||
{
|
||||
}
|
||||
|
||||
bool LoopContInstruction::is_equal_to(UNUSED const Instruction& lhs) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
void LoopContInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "CONTINUE";
|
||||
}
|
||||
|
||||
}
|
@ -1,142 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SFN_IFELSEINSTRUCTION_H
|
||||
#define SFN_IFELSEINSTRUCTION_H
|
||||
|
||||
#include "sfn_instruction_alu.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class CFInstruction : public Instruction {
|
||||
protected:
|
||||
CFInstruction(instr_type type);
|
||||
};
|
||||
|
||||
class IfElseInstruction : public CFInstruction {
|
||||
public:
|
||||
IfElseInstruction(instr_type type);
|
||||
|
||||
};
|
||||
|
||||
class IfInstruction : public IfElseInstruction {
|
||||
public:
|
||||
IfInstruction(AluInstruction *pred);
|
||||
const AluInstruction& pred() const {return *m_pred;}
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
std::shared_ptr<AluInstruction> m_pred;
|
||||
};
|
||||
|
||||
class ElseInstruction : public IfElseInstruction {
|
||||
public:
|
||||
ElseInstruction(IfInstruction *jump_src);
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
IfElseInstruction *m_jump_src;
|
||||
};
|
||||
|
||||
class IfElseEndInstruction : public IfElseInstruction {
|
||||
public:
|
||||
IfElseEndInstruction();
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
};
|
||||
|
||||
class LoopBeginInstruction: public CFInstruction {
|
||||
public:
|
||||
LoopBeginInstruction();
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
};
|
||||
|
||||
class LoopEndInstruction: public CFInstruction {
|
||||
public:
|
||||
LoopEndInstruction(LoopBeginInstruction *start);
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
LoopBeginInstruction *m_start;
|
||||
};
|
||||
|
||||
class LoopBreakInstruction: public CFInstruction {
|
||||
public:
|
||||
LoopBreakInstruction();
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
};
|
||||
|
||||
class LoopContInstruction: public CFInstruction {
|
||||
public:
|
||||
LoopContInstruction();
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // SFN_IFELSEINSTRUCTION_H
|
@ -1,341 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "sfn_instruction_export.h"
|
||||
#include "sfn_liverange.h"
|
||||
#include "sfn_valuepool.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
WriteoutInstruction::WriteoutInstruction(instr_type t, const GPRVector& value):
|
||||
Instruction(t),
|
||||
m_value(value)
|
||||
{
|
||||
add_remappable_src_value(&m_value);
|
||||
}
|
||||
|
||||
void WriteoutInstruction::replace_values(const ValueSet& candidates, PValue new_value)
|
||||
{
|
||||
// I wonder whether we can actually end up here ...
|
||||
for (auto c: candidates) {
|
||||
if (*c == *m_value.reg_i(c->chan()))
|
||||
m_value.set_reg_i(c->chan(), new_value);
|
||||
}
|
||||
|
||||
replace_values_child(candidates, new_value);
|
||||
}
|
||||
|
||||
void WriteoutInstruction::replace_values_child(UNUSED const ValueSet& candidates,
|
||||
UNUSED PValue new_value)
|
||||
{
|
||||
}
|
||||
|
||||
void WriteoutInstruction::remap_registers_child(UNUSED std::vector<rename_reg_pair>& map,
|
||||
UNUSED ValueMap& values)
|
||||
{
|
||||
}
|
||||
|
||||
ExportInstruction::ExportInstruction(unsigned loc, const GPRVector &value, ExportType type):
|
||||
WriteoutInstruction(Instruction::exprt, value),
|
||||
m_type(type),
|
||||
m_loc(loc),
|
||||
m_is_last(false)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool ExportInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
assert(lhs.type() == exprt);
|
||||
const auto& oth = static_cast<const ExportInstruction&>(lhs);
|
||||
|
||||
return (gpr() == oth.gpr()) &&
|
||||
(m_type == oth.m_type) &&
|
||||
(m_loc == oth.m_loc) &&
|
||||
(m_is_last == oth.m_is_last);
|
||||
}
|
||||
|
||||
void ExportInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << (m_is_last ? "EXPORT_DONE ":"EXPORT ");
|
||||
switch (m_type) {
|
||||
case et_pixel: os << "PIXEL "; break;
|
||||
case et_pos: os << "POS "; break;
|
||||
case et_param: os << "PARAM "; break;
|
||||
}
|
||||
os << m_loc << " " << gpr();
|
||||
}
|
||||
|
||||
void ExportInstruction::update_output_map(OutputRegisterMap& map) const
|
||||
{
|
||||
map[m_loc] = gpr_ptr();
|
||||
}
|
||||
|
||||
void ExportInstruction::set_last()
|
||||
{
|
||||
m_is_last = true;
|
||||
}
|
||||
|
||||
WriteScratchInstruction::WriteScratchInstruction(unsigned loc, const GPRVector& value,
|
||||
int align, int align_offset, int writemask):
|
||||
WriteoutInstruction (Instruction::mem_wr_scratch, value),
|
||||
m_loc(loc),
|
||||
m_align(align),
|
||||
m_align_offset(align_offset),
|
||||
m_writemask(writemask),
|
||||
m_array_size(0)
|
||||
{
|
||||
}
|
||||
|
||||
WriteScratchInstruction::WriteScratchInstruction(const PValue& address, const GPRVector& value,
|
||||
int align, int align_offset, int writemask, int array_size):
|
||||
WriteoutInstruction (Instruction::mem_wr_scratch, value),
|
||||
m_loc(0),
|
||||
m_address(address),
|
||||
m_align(align),
|
||||
m_align_offset(align_offset),
|
||||
m_writemask(writemask),
|
||||
m_array_size(array_size - 1)
|
||||
{
|
||||
add_remappable_src_value(&m_address);
|
||||
}
|
||||
|
||||
bool WriteScratchInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
if (lhs.type() != Instruction::mem_wr_scratch)
|
||||
return false;
|
||||
const auto& other = static_cast<const WriteScratchInstruction&>(lhs);
|
||||
|
||||
if (m_address) {
|
||||
if (!other.m_address)
|
||||
return false;
|
||||
if (*m_address != *other.m_address)
|
||||
return false;
|
||||
} else {
|
||||
if (other.m_address)
|
||||
return false;
|
||||
}
|
||||
|
||||
return gpr() == other.gpr() &&
|
||||
m_loc == other.m_loc &&
|
||||
m_align == other.m_align &&
|
||||
m_align_offset == other.m_align_offset &&
|
||||
m_writemask == other.m_writemask;
|
||||
}
|
||||
|
||||
static char *writemask_to_swizzle(int writemask, char *buf)
|
||||
{
|
||||
const char *swz = "xyzw";
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
void WriteScratchInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
char buf[5];
|
||||
|
||||
os << "MEM_SCRATCH_WRITE ";
|
||||
if (m_address)
|
||||
os << "@" << *m_address << "+";
|
||||
|
||||
os << m_loc << "." << writemask_to_swizzle(m_writemask, buf)
|
||||
<< " " << gpr() << " AL:" << m_align << " ALO:" << m_align_offset;
|
||||
}
|
||||
|
||||
void WriteScratchInstruction::replace_values_child(const ValueSet& candidates, PValue new_value)
|
||||
{
|
||||
if (!m_address)
|
||||
return;
|
||||
|
||||
for (auto c: candidates) {
|
||||
if (*c == *m_address)
|
||||
m_address = new_value;
|
||||
}
|
||||
}
|
||||
|
||||
void WriteScratchInstruction::remap_registers_child(std::vector<rename_reg_pair>& map,
|
||||
ValueMap& values)
|
||||
{
|
||||
if (!m_address)
|
||||
return;
|
||||
sfn_log << SfnLog::merge << "Remap " << *m_address << " of type " << m_address->type() << "\n";
|
||||
assert(m_address->type() == Value::gpr);
|
||||
auto new_index = map[m_address->sel()];
|
||||
if (new_index.valid)
|
||||
m_address = values.get_or_inject(new_index.new_reg, m_address->chan());
|
||||
map[m_address->sel()].used = true;
|
||||
}
|
||||
|
||||
StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components,
|
||||
int array_base, int comp_mask, int out_buffer,
|
||||
int stream):
|
||||
WriteoutInstruction(Instruction::streamout, value),
|
||||
m_element_size(num_components == 3 ? 3 : num_components - 1),
|
||||
m_burst_count(1),
|
||||
m_array_base(array_base),
|
||||
m_array_size(0xfff),
|
||||
m_writemask(comp_mask),
|
||||
m_output_buffer(out_buffer),
|
||||
m_stream(stream)
|
||||
{
|
||||
}
|
||||
|
||||
unsigned StreamOutIntruction::op() const
|
||||
{
|
||||
int op = 0;
|
||||
switch (m_output_buffer) {
|
||||
case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
|
||||
case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
|
||||
case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
|
||||
case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
|
||||
}
|
||||
return 4 * m_stream + op;
|
||||
}
|
||||
|
||||
bool StreamOutIntruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
assert(lhs.type() == streamout);
|
||||
const auto& oth = static_cast<const StreamOutIntruction&>(lhs);
|
||||
|
||||
return gpr() == oth.gpr() &&
|
||||
m_element_size == oth.m_element_size &&
|
||||
m_burst_count == oth.m_burst_count &&
|
||||
m_array_base == oth.m_array_base &&
|
||||
m_array_size == oth.m_array_size &&
|
||||
m_writemask == oth.m_writemask &&
|
||||
m_output_buffer == oth.m_output_buffer &&
|
||||
m_stream == oth.m_stream;
|
||||
}
|
||||
|
||||
void StreamOutIntruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "WRITE STREAM(" << m_stream << ") " << gpr()
|
||||
<< " ES:" << m_element_size
|
||||
<< " BC:" << m_burst_count
|
||||
<< " BUF:" << m_output_buffer
|
||||
<< " ARRAY:" << m_array_base;
|
||||
if (m_array_size != 0xfff)
|
||||
os << "+" << m_array_size;
|
||||
}
|
||||
|
||||
MemRingOutIntruction::MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
|
||||
const GPRVector& value,
|
||||
unsigned base_addr, unsigned ncomp,
|
||||
PValue index):
|
||||
WriteoutInstruction(Instruction::ring, value),
|
||||
m_ring_op(ring),
|
||||
m_type(type),
|
||||
m_base_address(base_addr),
|
||||
m_num_comp(ncomp),
|
||||
m_index(index)
|
||||
{
|
||||
add_remappable_src_value(&m_index);
|
||||
|
||||
assert(m_ring_op == cf_mem_ring || m_ring_op == cf_mem_ring1||
|
||||
m_ring_op == cf_mem_ring2 || m_ring_op == cf_mem_ring3);
|
||||
assert(m_num_comp <= 4);
|
||||
}
|
||||
|
||||
unsigned MemRingOutIntruction::ncomp() const
|
||||
{
|
||||
switch (m_num_comp) {
|
||||
case 1: return 0;
|
||||
case 2: return 1;
|
||||
case 3:
|
||||
case 4: return 3;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
|
||||
bool MemRingOutIntruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
assert(lhs.type() == streamout);
|
||||
const auto& oth = static_cast<const MemRingOutIntruction&>(lhs);
|
||||
|
||||
bool equal = gpr() == oth.gpr() &&
|
||||
m_ring_op == oth.m_ring_op &&
|
||||
m_type == oth.m_type &&
|
||||
m_num_comp == oth.m_num_comp &&
|
||||
m_base_address == oth.m_base_address;
|
||||
|
||||
if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
|
||||
equal &= (*m_index == *oth.m_index);
|
||||
return equal;
|
||||
|
||||
}
|
||||
|
||||
static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" };
|
||||
void MemRingOutIntruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "MEM_RING " << m_ring_op;
|
||||
os << " " << write_type_str[m_type] << " " << m_base_address;
|
||||
os << " " << gpr();
|
||||
if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
|
||||
os << " @" << *m_index;
|
||||
os << " ES:" << m_num_comp;
|
||||
}
|
||||
|
||||
|
||||
void MemRingOutIntruction::replace_values_child(const ValueSet& candidates,
|
||||
PValue new_value)
|
||||
{
|
||||
if (!m_index)
|
||||
return;
|
||||
|
||||
for (auto c: candidates) {
|
||||
if (*c == *m_index)
|
||||
m_index = new_value;
|
||||
}
|
||||
}
|
||||
|
||||
void MemRingOutIntruction::remap_registers_child(std::vector<rename_reg_pair>& map,
|
||||
ValueMap& values)
|
||||
{
|
||||
if (!m_index)
|
||||
return;
|
||||
|
||||
assert(m_index->type() == Value::gpr);
|
||||
auto new_index = map[m_index->sel()];
|
||||
if (new_index.valid)
|
||||
m_index = values.get_or_inject(new_index.new_reg, m_index->chan());
|
||||
map[m_index->sel()].used = true;
|
||||
}
|
||||
|
||||
void MemRingOutIntruction::patch_ring(int stream, PValue index)
|
||||
{
|
||||
const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
|
||||
|
||||
assert(stream < 4);
|
||||
m_ring_op = ring_op[stream];
|
||||
m_index = index;
|
||||
}
|
||||
|
||||
}
|
@ -1,185 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SFN_EXPORTINSTRUCTION_H
|
||||
#define SFN_EXPORTINSTRUCTION_H
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class WriteoutInstruction: public Instruction {
|
||||
public:
|
||||
void replace_values(const ValueSet& candidates, PValue new_value) override;
|
||||
const GPRVector& gpr() const {return m_value;}
|
||||
const GPRVector *gpr_ptr() const {return &m_value;}
|
||||
protected:
|
||||
WriteoutInstruction(instr_type t, const GPRVector& value);
|
||||
private:
|
||||
virtual void replace_values_child(const ValueSet& candidates, PValue new_value);
|
||||
virtual void remap_registers_child(std::vector<rename_reg_pair>& map,
|
||||
ValueMap& values);
|
||||
|
||||
GPRVector m_value;
|
||||
};
|
||||
|
||||
class ExportInstruction : public WriteoutInstruction {
|
||||
public:
|
||||
enum ExportType {
|
||||
et_pixel,
|
||||
et_pos,
|
||||
et_param
|
||||
};
|
||||
|
||||
ExportInstruction(unsigned loc, const GPRVector& value, ExportType type);
|
||||
void set_last();
|
||||
|
||||
ExportType export_type() const {return m_type;}
|
||||
|
||||
unsigned location() const {return m_loc;}
|
||||
bool is_last_export() const {return m_is_last;}
|
||||
|
||||
void update_output_map(OutputRegisterMap& map) const;
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
|
||||
private:
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
ExportType m_type;
|
||||
unsigned m_loc;
|
||||
bool m_is_last;
|
||||
};
|
||||
|
||||
class WriteScratchInstruction : public WriteoutInstruction {
|
||||
public:
|
||||
|
||||
WriteScratchInstruction(unsigned loc, const GPRVector& value, int align,
|
||||
int align_offset, int writemask);
|
||||
WriteScratchInstruction(const PValue& address, const GPRVector& value,
|
||||
int align, int align_offset, int writemask, int array_size);
|
||||
unsigned location() const {return m_loc;}
|
||||
|
||||
int write_mask() const { return m_writemask;}
|
||||
int address() const { assert(m_address); return m_address->sel();}
|
||||
bool indirect() const { return !!m_address;}
|
||||
int array_size() const { return m_array_size;}
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
void replace_values_child(const ValueSet& candidates, PValue new_value) override;
|
||||
void remap_registers_child(std::vector<rename_reg_pair>& map,
|
||||
ValueMap& values)override;
|
||||
|
||||
unsigned m_loc;
|
||||
PValue m_address;
|
||||
unsigned m_align;
|
||||
unsigned m_align_offset;
|
||||
unsigned m_writemask;
|
||||
int m_array_size;
|
||||
};
|
||||
|
||||
|
||||
class StreamOutIntruction: public WriteoutInstruction {
|
||||
public:
|
||||
StreamOutIntruction(const GPRVector& value, int num_components,
|
||||
int array_base, int comp_mask, int out_buffer,
|
||||
int stream);
|
||||
int element_size() const { return m_element_size;}
|
||||
int burst_count() const { return m_burst_count;}
|
||||
int array_base() const { return m_array_base;}
|
||||
int array_size() const { return m_array_size;}
|
||||
int comp_mask() const { return m_writemask;}
|
||||
unsigned op() const;
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
int m_element_size;
|
||||
int m_burst_count;
|
||||
int m_array_base;
|
||||
int m_array_size;
|
||||
int m_writemask;
|
||||
int m_output_buffer;
|
||||
int m_stream;
|
||||
};
|
||||
|
||||
enum EMemWriteType {
|
||||
mem_write = 0,
|
||||
mem_write_ind = 1,
|
||||
mem_write_ack = 2,
|
||||
mem_write_ind_ack = 3,
|
||||
};
|
||||
|
||||
class MemRingOutIntruction: public WriteoutInstruction {
|
||||
public:
|
||||
|
||||
MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
|
||||
const GPRVector& value, unsigned base_addr,
|
||||
unsigned ncomp, PValue m_index);
|
||||
|
||||
unsigned op() const{return m_ring_op;}
|
||||
unsigned ncomp() const;
|
||||
unsigned addr() const {return m_base_address;}
|
||||
EMemWriteType type() const {return m_type;}
|
||||
unsigned index_reg() const {return m_index->sel();}
|
||||
unsigned array_base() const {return m_base_address; }
|
||||
void replace_values_child(const ValueSet& candidates, PValue new_value) override;
|
||||
void remap_registers_child(std::vector<rename_reg_pair>& map,
|
||||
ValueMap& values) override;
|
||||
void patch_ring(int stream, PValue index);
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
ECFOpCode m_ring_op;
|
||||
EMemWriteType m_type;
|
||||
unsigned m_base_address;
|
||||
unsigned m_num_comp;
|
||||
PValue m_index;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif // SFN_EXPORTINSTRUCTION_H
|
@ -1,480 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sfn_instruction_fetch.h"
|
||||
|
||||
#include "gallium/drivers/r600/r600_pipe.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
/* refactor this to add status create methods for specific tasks */
|
||||
FetchInstruction::FetchInstruction(EVFetchInstr op,
|
||||
EVFetchType type,
|
||||
GPRVector dst,
|
||||
PValue src, int offset,
|
||||
int buffer_id, PValue buffer_offset,
|
||||
EBufferIndexMode cp_rel,
|
||||
bool use_const_field):
|
||||
Instruction(vtx),
|
||||
m_vc_opcode(op),
|
||||
m_fetch_type(type),
|
||||
m_endian_swap(vtx_es_none),
|
||||
m_src(src),
|
||||
m_dst(dst),
|
||||
m_offset(offset),
|
||||
m_is_mega_fetch(1),
|
||||
m_mega_fetch_count(16),
|
||||
m_buffer_id(buffer_id),
|
||||
m_semantic_id(0),
|
||||
m_buffer_index_mode(cp_rel),
|
||||
m_flags(0),
|
||||
m_uncached(false),
|
||||
m_indexed(false),
|
||||
m_array_base(0),
|
||||
m_array_size(0),
|
||||
m_elm_size(0),
|
||||
m_buffer_offset(buffer_offset),
|
||||
m_dest_swizzle({0,1,2,3})
|
||||
{
|
||||
if (use_const_field) {
|
||||
m_flags.set(vtx_use_const_field);
|
||||
m_data_format = fmt_invalid;
|
||||
m_num_format = vtx_nf_norm;
|
||||
} else {
|
||||
m_flags.set(vtx_format_comp_signed);
|
||||
m_data_format = fmt_32_32_32_32_float;
|
||||
m_num_format = vtx_nf_scaled;
|
||||
}
|
||||
|
||||
add_remappable_src_value(&m_src);
|
||||
add_remappable_src_value(&m_buffer_offset);
|
||||
|
||||
add_remappable_dst_value(&m_dst);
|
||||
}
|
||||
|
||||
/* Resource query */
|
||||
FetchInstruction::FetchInstruction(EVFetchInstr vc_opcode,
|
||||
EVFetchType fetch_type,
|
||||
EVTXDataFormat data_format,
|
||||
EVFetchNumFormat num_format,
|
||||
EVFetchEndianSwap endian_swap,
|
||||
const PValue src,
|
||||
const GPRVector dst,
|
||||
uint32_t offset,
|
||||
bool is_mega_fetch,
|
||||
uint32_t mega_fetch_count,
|
||||
uint32_t buffer_id,
|
||||
uint32_t semantic_id,
|
||||
|
||||
EBufferIndexMode buffer_index_mode,
|
||||
bool uncached,
|
||||
bool indexed,
|
||||
int array_base,
|
||||
int array_size,
|
||||
int elm_size,
|
||||
PValue buffer_offset,
|
||||
const std::array<int, 4>& dest_swizzle):
|
||||
Instruction(vtx),
|
||||
m_vc_opcode(vc_opcode),
|
||||
m_fetch_type(fetch_type),
|
||||
m_data_format(data_format),
|
||||
m_num_format(num_format),
|
||||
m_endian_swap(endian_swap),
|
||||
m_src(src),
|
||||
m_dst(dst),
|
||||
m_offset(offset),
|
||||
m_is_mega_fetch(is_mega_fetch),
|
||||
m_mega_fetch_count(mega_fetch_count),
|
||||
m_buffer_id(buffer_id),
|
||||
m_semantic_id(semantic_id),
|
||||
m_buffer_index_mode(buffer_index_mode),
|
||||
m_uncached(uncached),
|
||||
m_indexed(indexed),
|
||||
m_array_base(array_base),
|
||||
m_array_size(array_size),
|
||||
m_elm_size(elm_size),
|
||||
m_buffer_offset(buffer_offset),
|
||||
m_dest_swizzle(dest_swizzle)
|
||||
{
|
||||
add_remappable_src_value(&m_src);
|
||||
add_remappable_dst_value(&m_dst);
|
||||
add_remappable_src_value(&m_buffer_offset);
|
||||
}
|
||||
|
||||
FetchInstruction::FetchInstruction(GPRVector dst,
|
||||
PValue src,
|
||||
int buffer_id, PValue buffer_offset,
|
||||
EVTXDataFormat format,
|
||||
EVFetchNumFormat num_format):
|
||||
Instruction(vtx),
|
||||
m_vc_opcode(vc_fetch),
|
||||
m_fetch_type(no_index_offset),
|
||||
m_data_format(format),
|
||||
m_num_format(num_format),
|
||||
m_endian_swap(vtx_es_none),
|
||||
m_src(src),
|
||||
m_dst(dst),
|
||||
m_offset(0),
|
||||
m_is_mega_fetch(0),
|
||||
m_mega_fetch_count(0),
|
||||
m_buffer_id(buffer_id),
|
||||
m_semantic_id(0),
|
||||
m_buffer_index_mode(bim_none),
|
||||
m_flags(0),
|
||||
m_uncached(false),
|
||||
m_indexed(false),
|
||||
m_array_base(0),
|
||||
m_array_size(0),
|
||||
m_elm_size(1),
|
||||
m_buffer_offset(buffer_offset),
|
||||
m_dest_swizzle({0,1,2,3})
|
||||
{
|
||||
m_flags.set(vtx_format_comp_signed);
|
||||
|
||||
add_remappable_src_value(&m_src);
|
||||
add_remappable_dst_value(&m_dst);
|
||||
add_remappable_src_value(&m_buffer_offset);
|
||||
}
|
||||
|
||||
|
||||
/* Resource query */
|
||||
FetchInstruction::FetchInstruction(GPRVector dst,
|
||||
PValue src,
|
||||
int buffer_id,
|
||||
EBufferIndexMode cp_rel):
|
||||
Instruction(vtx),
|
||||
m_vc_opcode(vc_get_buf_resinfo),
|
||||
m_fetch_type(no_index_offset),
|
||||
m_data_format(fmt_32_32_32_32),
|
||||
m_num_format(vtx_nf_norm),
|
||||
m_endian_swap(vtx_es_none),
|
||||
m_src(src),
|
||||
m_dst(dst),
|
||||
m_offset(0),
|
||||
m_is_mega_fetch(0),
|
||||
m_mega_fetch_count(16),
|
||||
m_buffer_id(buffer_id),
|
||||
m_semantic_id(0),
|
||||
m_buffer_index_mode(cp_rel),
|
||||
m_flags(0),
|
||||
m_uncached(false),
|
||||
m_indexed(false),
|
||||
m_array_base(0),
|
||||
m_array_size(0),
|
||||
m_elm_size(0),
|
||||
m_dest_swizzle({0,1,2,3})
|
||||
{
|
||||
m_flags.set(vtx_format_comp_signed);
|
||||
add_remappable_src_value(&m_src);
|
||||
add_remappable_dst_value(&m_dst);
|
||||
add_remappable_src_value(&m_buffer_offset);
|
||||
}
|
||||
|
||||
FetchInstruction::FetchInstruction(GPRVector dst, PValue src, int scratch_size):
|
||||
Instruction(vtx),
|
||||
m_vc_opcode(vc_read_scratch),
|
||||
m_fetch_type(vertex_data),
|
||||
m_data_format(fmt_32_32_32_32),
|
||||
m_num_format(vtx_nf_int),
|
||||
m_endian_swap(vtx_es_none),
|
||||
m_dst(dst),
|
||||
m_offset(0),
|
||||
m_is_mega_fetch(0),
|
||||
m_mega_fetch_count(16),
|
||||
m_buffer_id(0),
|
||||
m_semantic_id(0),
|
||||
m_buffer_index_mode(bim_none),
|
||||
m_flags(0),
|
||||
m_uncached(true),
|
||||
m_array_base(0),
|
||||
m_array_size(0),
|
||||
m_elm_size(3),
|
||||
m_dest_swizzle({0,1,2,3})
|
||||
{
|
||||
if (src->type() == Value::literal) {
|
||||
const auto& lv = static_cast<const LiteralValue&>(*src);
|
||||
m_array_base = lv.value();
|
||||
m_indexed = false;
|
||||
m_src.reset(new GPRValue(0,0));
|
||||
m_array_size = 0;
|
||||
} else {
|
||||
m_array_base = 0;
|
||||
m_src = src;
|
||||
m_indexed = true;
|
||||
m_array_size = scratch_size - 1;
|
||||
}
|
||||
add_remappable_src_value(&m_src);
|
||||
add_remappable_dst_value(&m_dst);
|
||||
add_remappable_src_value(&m_buffer_offset);
|
||||
}
|
||||
|
||||
void FetchInstruction::replace_values(const ValueSet& candidates, PValue new_value)
|
||||
{
|
||||
if (!m_src)
|
||||
return;
|
||||
for (auto c: candidates) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (*c == *m_dst.reg_i(i))
|
||||
m_dst.set_reg_i(i, new_value);
|
||||
}
|
||||
if (*m_src == *c)
|
||||
m_src = new_value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool FetchInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
auto& l = static_cast<const FetchInstruction&>(lhs);
|
||||
if (m_src) {
|
||||
if (!l.m_src)
|
||||
return false;
|
||||
if (*m_src != *l.m_src)
|
||||
return false;
|
||||
} else {
|
||||
if (l.m_src)
|
||||
return false;
|
||||
}
|
||||
|
||||
return m_vc_opcode == l.m_vc_opcode &&
|
||||
m_fetch_type == l.m_fetch_type &&
|
||||
m_data_format == l.m_data_format &&
|
||||
m_num_format == l.m_num_format &&
|
||||
m_endian_swap == l.m_endian_swap &&
|
||||
m_dst == l.m_dst &&
|
||||
m_offset == l.m_offset &&
|
||||
m_buffer_id == l.m_buffer_id &&
|
||||
m_semantic_id == l.m_semantic_id &&
|
||||
m_buffer_index_mode == l.m_buffer_index_mode &&
|
||||
m_flags == l.m_flags &&
|
||||
m_indexed == l.m_indexed &&
|
||||
m_uncached == l.m_uncached;
|
||||
}
|
||||
|
||||
void FetchInstruction::set_format(EVTXDataFormat fmt)
|
||||
{
|
||||
m_data_format = fmt;
|
||||
}
|
||||
|
||||
|
||||
void FetchInstruction::set_dest_swizzle(const std::array<int,4>& swz)
|
||||
{
|
||||
m_dest_swizzle = swz;
|
||||
}
|
||||
|
||||
void FetchInstruction::prelude_append(Instruction *instr)
|
||||
{
|
||||
assert(instr);
|
||||
m_prelude.push_back(PInstruction(instr));
|
||||
}
|
||||
|
||||
const std::vector<PInstruction>& FetchInstruction::prelude() const
|
||||
{
|
||||
return m_prelude;
|
||||
}
|
||||
|
||||
LoadFromScratch::LoadFromScratch(GPRVector dst, PValue src, int scratch_size):
|
||||
FetchInstruction(dst, src, scratch_size)
|
||||
{
|
||||
}
|
||||
|
||||
FetchGDSOpResult::FetchGDSOpResult(const GPRVector dst, const PValue src):
|
||||
FetchInstruction(vc_fetch,
|
||||
no_index_offset,
|
||||
fmt_32,
|
||||
vtx_nf_int,
|
||||
vtx_es_none,
|
||||
src,
|
||||
dst,
|
||||
0,
|
||||
false,
|
||||
0xf,
|
||||
R600_IMAGE_IMMED_RESOURCE_OFFSET,
|
||||
0,
|
||||
bim_none,
|
||||
false,
|
||||
false,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
PValue(),
|
||||
{0,7,7,7})
|
||||
{
|
||||
set_flag(vtx_srf_mode);
|
||||
set_flag(vtx_vpm);
|
||||
}
|
||||
|
||||
FetchTCSIOParam::FetchTCSIOParam(GPRVector dst, PValue src, int offset):
|
||||
FetchInstruction(vc_fetch,
|
||||
no_index_offset,
|
||||
fmt_32_32_32_32,
|
||||
vtx_nf_scaled,
|
||||
vtx_es_none,
|
||||
src,
|
||||
dst,
|
||||
offset,
|
||||
false,
|
||||
16,
|
||||
R600_LDS_INFO_CONST_BUFFER,
|
||||
0,
|
||||
bim_none,
|
||||
false,
|
||||
false,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
PValue(),
|
||||
{0,1,2,3})
|
||||
{
|
||||
set_flag(vtx_srf_mode);
|
||||
set_flag(vtx_format_comp_signed);
|
||||
}
|
||||
|
||||
|
||||
static const char *fmt_descr[64] = {
|
||||
"INVALID",
|
||||
"8",
|
||||
"4_4",
|
||||
"3_3_2",
|
||||
"RESERVED_4",
|
||||
"16",
|
||||
"16F",
|
||||
"8_8",
|
||||
"5_6_5",
|
||||
"6_5_5",
|
||||
"1_5_5_5",
|
||||
"4_4_4_4",
|
||||
"5_5_5_1",
|
||||
"32",
|
||||
"32F",
|
||||
"16_16",
|
||||
"16_16F",
|
||||
"8_24",
|
||||
"8_24F",
|
||||
"24_8",
|
||||
"24_8F",
|
||||
"10_11_11",
|
||||
"10_11_11F",
|
||||
"11_11_10",
|
||||
"11_11_10F",
|
||||
"2_10_10_10",
|
||||
"8_8_8_8",
|
||||
"10_10_10_2",
|
||||
"X24_8_32F",
|
||||
"32_32",
|
||||
"32_32F",
|
||||
"16_16_16_16",
|
||||
"16_16_16_16F",
|
||||
"RESERVED_33",
|
||||
"32_32_32_32",
|
||||
"32_32_32_32F",
|
||||
"RESERVED_36",
|
||||
"1",
|
||||
"1_REVERSED",
|
||||
"GB_GR",
|
||||
"BG_RG",
|
||||
"32_AS_8",
|
||||
"32_AS_8_8",
|
||||
"5_9_9_9_SHAREDEXP",
|
||||
"8_8_8",
|
||||
"16_16_16",
|
||||
"16_16_16F",
|
||||
"32_32_32",
|
||||
"32_32_32F",
|
||||
"BC1",
|
||||
"BC2",
|
||||
"BC3",
|
||||
"BC4",
|
||||
"BC5",
|
||||
"APC0",
|
||||
"APC1",
|
||||
"APC2",
|
||||
"APC3",
|
||||
"APC4",
|
||||
"APC5",
|
||||
"APC6",
|
||||
"APC7",
|
||||
"CTX1",
|
||||
"RESERVED_63"
|
||||
};
|
||||
|
||||
|
||||
void FetchInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
static const std::string num_format_char[] = {"norm", "int", "scaled"};
|
||||
static const std::string endian_swap_code[] = {
|
||||
"noswap", "8in16", "8in32"
|
||||
};
|
||||
static const char buffer_index_mode_char[] = "_01E";
|
||||
static const char *flag_string[] = {"WQM", "CF", "signed", "no_zero",
|
||||
"nostride", "AC", "TC", "VPM"};
|
||||
switch (m_vc_opcode) {
|
||||
case vc_fetch:
|
||||
os << "Fetch " << m_dst;
|
||||
break;
|
||||
case vc_semantic:
|
||||
os << "Fetch Semantic ID:" << m_semantic_id;
|
||||
break;
|
||||
case vc_get_buf_resinfo:
|
||||
os << "Fetch BufResinfo:" << m_dst;
|
||||
break;
|
||||
case vc_read_scratch:
|
||||
os << "MEM_READ_SCRATCH:" << m_dst;
|
||||
break;
|
||||
default:
|
||||
os << "Fetch ERROR";
|
||||
return;
|
||||
}
|
||||
|
||||
os << ", " << *m_src;
|
||||
|
||||
if (m_offset)
|
||||
os << "+" << m_offset;
|
||||
|
||||
os << " BUFID:" << m_buffer_id
|
||||
<< " FMT:(" << fmt_descr[m_data_format]
|
||||
<< " " << num_format_char[m_num_format]
|
||||
<< " " << endian_swap_code[m_endian_swap]
|
||||
<< ")";
|
||||
if (m_buffer_index_mode > 0)
|
||||
os << " IndexMode:" << buffer_index_mode_char[m_buffer_index_mode];
|
||||
|
||||
|
||||
if (m_is_mega_fetch)
|
||||
os << " MFC:" << m_mega_fetch_count;
|
||||
else
|
||||
os << " mfc*:" << m_mega_fetch_count;
|
||||
|
||||
if (m_flags.any()) {
|
||||
os << " Flags:";
|
||||
for( int i = 0; i < vtx_unknown; ++i) {
|
||||
if (m_flags.test(i))
|
||||
os << ' ' << flag_string[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,187 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SFN_INSTRUCTION_FETCH_H
|
||||
#define SFN_INSTRUCTION_FETCH_H
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class FetchInstruction : public Instruction {
|
||||
public:
|
||||
|
||||
FetchInstruction(EVFetchInstr vc_opcode,
|
||||
EVFetchType fetch_type,
|
||||
EVTXDataFormat data_format,
|
||||
EVFetchNumFormat num_format,
|
||||
EVFetchEndianSwap endian_swap,
|
||||
const PValue src,
|
||||
const GPRVector dst,
|
||||
uint32_t offset,
|
||||
bool is_mega_fetch,
|
||||
uint32_t mega_fetch_count,
|
||||
uint32_t buffer_id,
|
||||
uint32_t semantic_id,
|
||||
|
||||
EBufferIndexMode buffer_index_mode,
|
||||
bool uncached,
|
||||
bool indexed,
|
||||
int array_base,
|
||||
int array_size,
|
||||
int elm_size,
|
||||
PValue buffer_offset,
|
||||
const std::array<int, 4>& dest_swizzle);
|
||||
|
||||
FetchInstruction(EVFetchInstr op,
|
||||
EVFetchType type,
|
||||
GPRVector dst,
|
||||
PValue src, int offset,
|
||||
int buffer_id, PValue buffer_offset,
|
||||
EBufferIndexMode cp_rel,
|
||||
bool use_const_field = false);
|
||||
|
||||
FetchInstruction(GPRVector dst,
|
||||
PValue src,
|
||||
int buffer_id,
|
||||
PValue buffer_offset,
|
||||
EVTXDataFormat format,
|
||||
EVFetchNumFormat num_format);
|
||||
|
||||
FetchInstruction(GPRVector dst,
|
||||
PValue src,
|
||||
int buffer_id,
|
||||
EBufferIndexMode cp_rel);
|
||||
|
||||
FetchInstruction(GPRVector dst, PValue src, int scratch_size);
|
||||
|
||||
void replace_values(const ValueSet& candidates, PValue new_value) override;
|
||||
EVFetchInstr vc_opcode() const { return m_vc_opcode;}
|
||||
EVFetchType fetch_type() const { return m_fetch_type;}
|
||||
|
||||
EVTXDataFormat data_format() const { return m_data_format;}
|
||||
EVFetchNumFormat num_format() const { return m_num_format;}
|
||||
EVFetchEndianSwap endian_swap() const { return m_endian_swap;}
|
||||
|
||||
const Value& src() const { return *m_src;}
|
||||
const GPRVector& dst() const { return m_dst;}
|
||||
uint32_t offset() const { return m_offset;}
|
||||
|
||||
bool is_mega_fetchconst() { return m_is_mega_fetch;}
|
||||
uint32_t mega_fetch_count() const { return m_mega_fetch_count;}
|
||||
|
||||
uint32_t buffer_id() const { return m_buffer_id;}
|
||||
uint32_t semantic_id() const { return m_semantic_id;}
|
||||
EBufferIndexMode buffer_index_mode() const{ return m_buffer_index_mode;}
|
||||
|
||||
bool is_signed() const { return m_flags.test(vtx_format_comp_signed);}
|
||||
bool use_const_fields() const { return m_flags.test(vtx_use_const_field);}
|
||||
|
||||
bool srf_mode_no_zero() const { return m_flags.test(vtx_srf_mode);}
|
||||
|
||||
void set_flag(EVFetchFlagShift flag) {m_flags.set(flag);}
|
||||
|
||||
bool uncached() const {return m_uncached; }
|
||||
bool indexed() const {return m_indexed; }
|
||||
int array_base()const {return m_array_base; }
|
||||
int array_size() const {return m_array_size; }
|
||||
int elm_size() const {return m_elm_size; }
|
||||
|
||||
void set_buffer_offset(PValue buffer_offset) {
|
||||
m_buffer_offset = buffer_offset;
|
||||
add_remappable_src_value(&m_buffer_offset);
|
||||
}
|
||||
PValue buffer_offset() const { return m_buffer_offset; }
|
||||
|
||||
void set_dest_swizzle(const std::array<int,4>& swz);
|
||||
void set_format(EVTXDataFormat fmt);
|
||||
|
||||
int swz(int idx) const { return m_dest_swizzle[idx];}
|
||||
|
||||
bool use_tc() const {return m_flags.test(vtx_use_tc);}
|
||||
|
||||
bool use_vpm() const {return m_flags.test(vtx_vpm);}
|
||||
|
||||
void prelude_append(Instruction *instr);
|
||||
|
||||
const std::vector<PInstruction>& prelude() const;
|
||||
|
||||
bool has_prelude() const {return !m_prelude.empty();}
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
EVFetchInstr m_vc_opcode;
|
||||
EVFetchType m_fetch_type;
|
||||
|
||||
EVTXDataFormat m_data_format;
|
||||
EVFetchNumFormat m_num_format;
|
||||
EVFetchEndianSwap m_endian_swap;
|
||||
|
||||
PValue m_src;
|
||||
GPRVector m_dst;
|
||||
uint32_t m_offset;
|
||||
|
||||
bool m_is_mega_fetch;
|
||||
uint32_t m_mega_fetch_count;
|
||||
|
||||
uint32_t m_buffer_id;
|
||||
uint32_t m_semantic_id;
|
||||
|
||||
EBufferIndexMode m_buffer_index_mode;
|
||||
std::bitset<16> m_flags;
|
||||
bool m_uncached;
|
||||
bool m_indexed;
|
||||
int m_array_base;
|
||||
int m_array_size;
|
||||
int m_elm_size;
|
||||
PValue m_buffer_offset;
|
||||
std::array<int, 4> m_dest_swizzle;
|
||||
std::vector<PInstruction> m_prelude;
|
||||
};
|
||||
|
||||
class LoadFromScratch: public FetchInstruction {
|
||||
public:
|
||||
LoadFromScratch(GPRVector dst, PValue src, int scratch_size);
|
||||
};
|
||||
|
||||
class FetchGDSOpResult : public FetchInstruction {
|
||||
public:
|
||||
FetchGDSOpResult(const GPRVector dst, const PValue src);
|
||||
};
|
||||
|
||||
class FetchTCSIOParam : public FetchInstruction {
|
||||
public:
|
||||
FetchTCSIOParam(GPRVector dst, PValue src, int offset);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // SFN_INSTRUCTION_FETCH_H
|
@ -1,180 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sfn_instruction_gds.h"
|
||||
#include "sfn_liverange.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
|
||||
const PValue& value2, const PValue& uav_id, int uav_base):
|
||||
Instruction(gds),
|
||||
m_op(op),
|
||||
m_src(value),
|
||||
m_src2(value2),
|
||||
m_dest(dest),
|
||||
m_dest_swizzle({PIPE_SWIZZLE_X,7,7,7}),
|
||||
m_src_swizzle({PIPE_SWIZZLE_0, PIPE_SWIZZLE_X, PIPE_SWIZZLE_0}),
|
||||
m_buffer_index_mode(bim_none),
|
||||
m_uav_id(uav_id),
|
||||
m_uav_base(uav_base),
|
||||
m_flags(0)
|
||||
{
|
||||
add_remappable_src_value(&m_src);
|
||||
add_remappable_src_value(&m_src2);
|
||||
add_remappable_src_value(&m_uav_id);
|
||||
add_remappable_dst_value(&m_dest);
|
||||
m_dest_swizzle[0] = m_dest.chan_i(0);
|
||||
}
|
||||
|
||||
GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
|
||||
const PValue& uav_id, int uav_base):
|
||||
GDSInstr(op, dest, value, PValue(), uav_id, uav_base)
|
||||
{
|
||||
assert(value);
|
||||
m_src_swizzle[1] = value->chan();
|
||||
m_src_swizzle[2] = PIPE_SWIZZLE_0;
|
||||
}
|
||||
|
||||
GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,
|
||||
const PValue& uav_id, int uav_base):
|
||||
GDSInstr(op, dest, PValue(), PValue(), uav_id, uav_base)
|
||||
{
|
||||
m_src_swizzle[1] = PIPE_SWIZZLE_0;
|
||||
}
|
||||
|
||||
bool GDSInstr::is_equal_to(UNUSED const Instruction& lhs) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void GDSInstr::do_print(std::ostream& os) const
|
||||
{
|
||||
const char *swz = "xyzw01?_";
|
||||
os << lds_ops.at(m_op).name << " R" << m_dest.sel() << ".";
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
os << swz[m_dest_swizzle[i]];
|
||||
}
|
||||
if (m_src)
|
||||
os << " " << *m_src;
|
||||
|
||||
os << " UAV:" << *m_uav_id;
|
||||
}
|
||||
|
||||
RatInstruction::RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
|
||||
const GPRVector& data, const GPRVector& index,
|
||||
int rat_id, const PValue& rat_id_offset,
|
||||
int burst_count, int comp_mask, int element_size, bool ack):
|
||||
Instruction(rat),
|
||||
m_cf_opcode(cf_opcode),
|
||||
m_rat_op(rat_op),
|
||||
m_data(data),
|
||||
m_index(index),
|
||||
m_rat_id(rat_id),
|
||||
m_rat_id_offset(rat_id_offset),
|
||||
m_burst_count(burst_count),
|
||||
m_comp_mask(comp_mask),
|
||||
m_element_size(element_size),
|
||||
m_need_ack(ack)
|
||||
{
|
||||
add_remappable_src_value(&m_data);
|
||||
add_remappable_src_value(&m_rat_id_offset);
|
||||
add_remappable_src_value(&m_index);
|
||||
}
|
||||
|
||||
bool RatInstruction::is_equal_to(UNUSED const Instruction& lhs) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void RatInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "MEM_RAT RAT(" << m_rat_id;
|
||||
if (m_rat_id_offset)
|
||||
os << "+" << *m_rat_id_offset;
|
||||
os << ") @" << m_index;
|
||||
os << " OP:" << m_rat_op << " " << m_data;
|
||||
os << " BC:" << m_burst_count
|
||||
<< " MASK:" << m_comp_mask
|
||||
<< " ES:" << m_element_size;
|
||||
if (m_need_ack)
|
||||
os << " ACK";
|
||||
}
|
||||
|
||||
RatInstruction::ERatOp RatInstruction::opcode(nir_intrinsic_op opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case nir_intrinsic_ssbo_atomic_add:
|
||||
return ADD_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_and:
|
||||
return AND_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_exchange:
|
||||
return XCHG_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_umax:
|
||||
return MAX_UINT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_umin:
|
||||
return MIN_UINT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_imax:
|
||||
return MAX_INT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_imin:
|
||||
return MIN_INT_RTN;
|
||||
case nir_intrinsic_ssbo_atomic_xor:
|
||||
return XOR_RTN;
|
||||
default:
|
||||
return UNSUPPORTED;
|
||||
}
|
||||
}
|
||||
|
||||
GDSStoreTessFactor::GDSStoreTessFactor(GPRVector& value):
|
||||
Instruction(tf_write),
|
||||
m_value(value)
|
||||
{
|
||||
add_remappable_src_value(&m_value);
|
||||
}
|
||||
|
||||
void GDSStoreTessFactor::replace_values(const ValueSet& candidates, PValue new_value)
|
||||
{
|
||||
for (auto& c: candidates) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (*c == *m_value[i])
|
||||
m_value[i] = new_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool GDSStoreTessFactor::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
auto& other = static_cast<const GDSStoreTessFactor&>(lhs);
|
||||
return m_value == other.m_value;
|
||||
}
|
||||
|
||||
void GDSStoreTessFactor::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "TF_WRITE " << m_value;
|
||||
}
|
||||
|
||||
}
|
@ -1,225 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SFN_GDSINSTR_H
|
||||
#define SFN_GDSINSTR_H
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
|
||||
#include <bitset>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class GDSInstr : public Instruction
|
||||
{
|
||||
public:
|
||||
GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
|
||||
const PValue &uav_id, int uav_base);
|
||||
GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
|
||||
const PValue& value2, const PValue &uav_id, int uav_base);
|
||||
GDSInstr(ESDOp op, const GPRVector& dest, const PValue &uav_id, int uav_base);
|
||||
|
||||
ESDOp op() const {return m_op;}
|
||||
|
||||
int src_sel() const {
|
||||
if (!m_src)
|
||||
return 0;
|
||||
|
||||
assert(m_src->type() == Value::gpr);
|
||||
return m_src->sel();
|
||||
}
|
||||
|
||||
int src2_chan() const {
|
||||
if (!m_src2)
|
||||
return 0;
|
||||
|
||||
assert(m_src->type() == Value::gpr);
|
||||
return m_src->chan();
|
||||
}
|
||||
|
||||
int src_swizzle(int idx) const {assert(idx < 3); return m_src_swizzle[idx];}
|
||||
|
||||
int dest_sel() const {
|
||||
return m_dest.sel();
|
||||
}
|
||||
|
||||
int dest_swizzle(int i) const {
|
||||
if (i < 4)
|
||||
return m_dest_swizzle[i];
|
||||
return 7;
|
||||
}
|
||||
|
||||
void set_dest_swizzle(const std::array<int,4>& swz) {
|
||||
m_dest_swizzle = swz;
|
||||
}
|
||||
|
||||
PValue uav_id() const {return m_uav_id;}
|
||||
int uav_base() const {return m_uav_base;}
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
ESDOp m_op;
|
||||
|
||||
PValue m_src;
|
||||
PValue m_src2;
|
||||
GPRVector m_dest;
|
||||
std::array <int, 4> m_dest_swizzle;
|
||||
std::array <int, 3> m_src_swizzle;
|
||||
|
||||
EBufferIndexMode m_buffer_index_mode;
|
||||
PValue m_uav_id;
|
||||
int m_uav_base;
|
||||
std::bitset<8> m_flags;
|
||||
|
||||
};
|
||||
|
||||
class RatInstruction : public Instruction {
|
||||
|
||||
public:
|
||||
enum ERatOp {
|
||||
NOP,
|
||||
STORE_TYPED,
|
||||
STORE_RAW,
|
||||
STORE_RAW_FDENORM,
|
||||
CMPXCHG_INT,
|
||||
CMPXCHG_FLT,
|
||||
CMPXCHG_FDENORM,
|
||||
ADD,
|
||||
SUB,
|
||||
RSUB,
|
||||
MIN_INT,
|
||||
MIN_UINT,
|
||||
MAX_INT,
|
||||
MAX_UINT,
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
MSKOR,
|
||||
INC_UINT,
|
||||
DEC_UINT,
|
||||
NOP_RTN = 32,
|
||||
XCHG_RTN = 34,
|
||||
XCHG_FDENORM_RTN,
|
||||
CMPXCHG_INT_RTN,
|
||||
CMPXCHG_FLT_RTN,
|
||||
CMPXCHG_FDENORM_RTN,
|
||||
ADD_RTN,
|
||||
SUB_RTN,
|
||||
RSUB_RTN,
|
||||
MIN_INT_RTN,
|
||||
MIN_UINT_RTN,
|
||||
MAX_INT_RTN,
|
||||
MAX_UINT_RTN,
|
||||
AND_RTN,
|
||||
OR_RTN,
|
||||
XOR_RTN,
|
||||
MSKOR_RTN,
|
||||
UINT_RTN,
|
||||
UNSUPPORTED
|
||||
};
|
||||
|
||||
RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
|
||||
const GPRVector& data, const GPRVector& index,
|
||||
int rat_id, const PValue& rat_id_offset,
|
||||
int burst_count, int comp_mask, int element_size,
|
||||
bool ack);
|
||||
|
||||
PValue rat_id_offset() const { return m_rat_id_offset;}
|
||||
int rat_id() const { return m_rat_id;}
|
||||
|
||||
ERatOp rat_op() const {return m_rat_op;}
|
||||
|
||||
int data_gpr() const {return m_data.sel();}
|
||||
int index_gpr() const {return m_index.sel();}
|
||||
int elm_size() const {return m_element_size;}
|
||||
|
||||
int comp_mask() const {return m_comp_mask;}
|
||||
|
||||
bool need_ack() const {return m_need_ack;}
|
||||
int burst_count() const {return m_burst_count;}
|
||||
|
||||
static ERatOp opcode(nir_intrinsic_op opcode);
|
||||
|
||||
int data_swz(int chan) const {return m_data.chan_i(chan);}
|
||||
|
||||
ECFOpCode cf_opcode() const { return m_cf_opcode;}
|
||||
|
||||
void set_ack() {m_need_ack = true; }
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
|
||||
private:
|
||||
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
ECFOpCode m_cf_opcode;
|
||||
ERatOp m_rat_op;
|
||||
|
||||
GPRVector m_data;
|
||||
GPRVector m_index;
|
||||
|
||||
int m_rat_id;
|
||||
PValue m_rat_id_offset;
|
||||
int m_burst_count;
|
||||
int m_comp_mask;
|
||||
int m_element_size;
|
||||
|
||||
std::bitset<8> m_flags;
|
||||
|
||||
bool m_need_ack;
|
||||
|
||||
};
|
||||
|
||||
class GDSStoreTessFactor : public Instruction {
|
||||
public:
|
||||
GDSStoreTessFactor(GPRVector& value);
|
||||
int sel() const {return m_value.sel();}
|
||||
int chan(int i ) const {return m_value.chan_i(i);}
|
||||
|
||||
void replace_values(const ValueSet& candiates, PValue new_value) override;
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
GPRVector m_value;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // SFN_GDSINSTR_H
|
@ -1,151 +0,0 @@
|
||||
#include "sfn_instruction_lds.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
void LDSReadInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "LDS Read [";
|
||||
for (auto& v : m_dest_value)
|
||||
os << *v << " ";
|
||||
os << "], ";
|
||||
for (auto& a : m_address)
|
||||
os << *a << " ";
|
||||
}
|
||||
|
||||
LDSReadInstruction::LDSReadInstruction(std::vector<PValue>& address, std::vector<PValue>& value):
|
||||
Instruction(lds_read),
|
||||
m_address(address),
|
||||
m_dest_value(value)
|
||||
{
|
||||
assert(address.size() == value.size());
|
||||
|
||||
for (unsigned i = 0; i < address.size(); ++i) {
|
||||
add_remappable_src_value(&m_address[i]);
|
||||
add_remappable_dst_value(&m_dest_value[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void LDSReadInstruction::replace_values(const ValueSet& candidates, PValue new_value)
|
||||
{
|
||||
for (auto& c : candidates) {
|
||||
for (auto& d: m_dest_value) {
|
||||
if (*c == *d)
|
||||
d = new_value;
|
||||
}
|
||||
|
||||
for (auto& a: m_address) {
|
||||
if (*c == *a)
|
||||
a = new_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool LDSReadInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
auto& other = static_cast<const LDSReadInstruction&>(lhs);
|
||||
return m_address == other.m_address &&
|
||||
m_dest_value == other.m_dest_value;
|
||||
}
|
||||
|
||||
LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op):
|
||||
Instruction(lds_atomic),
|
||||
m_address(address),
|
||||
m_dest_value(dest),
|
||||
m_src0_value(src0),
|
||||
m_src1_value(src1),
|
||||
m_opcode(op)
|
||||
{
|
||||
add_remappable_src_value(&m_src0_value);
|
||||
add_remappable_src_value(&m_src1_value);
|
||||
add_remappable_src_value(&m_address);
|
||||
add_remappable_dst_value(&m_dest_value);
|
||||
}
|
||||
|
||||
LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op):
|
||||
LDSAtomicInstruction(dest, src0, PValue(), address, op)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
void LDSAtomicInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "LDS " << m_opcode << " " << *m_dest_value << " ";
|
||||
os << "[" << *m_address << "] " << *m_src0_value;
|
||||
if (m_src1_value)
|
||||
os << ", " << *m_src1_value;
|
||||
}
|
||||
|
||||
bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
auto& other = static_cast<const LDSAtomicInstruction&>(lhs);
|
||||
|
||||
return m_opcode == other.m_opcode &&
|
||||
*m_dest_value == *other.m_dest_value &&
|
||||
*m_src0_value == *other.m_src0_value &&
|
||||
*m_address == *other.m_address &&
|
||||
((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) ||
|
||||
(!m_src1_value && !other.m_src1_value));
|
||||
}
|
||||
|
||||
LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0):
|
||||
LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue())
|
||||
|
||||
{
|
||||
}
|
||||
|
||||
LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1):
|
||||
Instruction(lds_write),
|
||||
m_address(address),
|
||||
m_value0(value0),
|
||||
m_value1(value1),
|
||||
m_idx_offset(idx_offset)
|
||||
{
|
||||
add_remappable_src_value(&m_address);
|
||||
add_remappable_src_value(&m_value0);
|
||||
if (m_value1)
|
||||
add_remappable_src_value(&m_value1);
|
||||
}
|
||||
|
||||
|
||||
void LDSWriteInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "LDS Write" << num_components()
|
||||
<< " " << address() << ", " << value0();
|
||||
if (num_components() > 1)
|
||||
os << ", " << value1();
|
||||
}
|
||||
|
||||
void LDSWriteInstruction::replace_values(const ValueSet& candidates, PValue new_value)
|
||||
{
|
||||
for (auto c: candidates) {
|
||||
if (*c == *m_address)
|
||||
m_address = new_value;
|
||||
|
||||
if (*c == *m_value0)
|
||||
m_value0 = new_value;
|
||||
|
||||
if (*c == *m_value1)
|
||||
m_value1 = new_value;
|
||||
}
|
||||
}
|
||||
|
||||
bool LDSWriteInstruction::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
auto& other = static_cast<const LDSWriteInstruction&>(lhs);
|
||||
|
||||
if (m_value1) {
|
||||
if (!other.m_value1)
|
||||
return false;
|
||||
if (*m_value1 != *other.m_value1)
|
||||
return false;
|
||||
} else {
|
||||
if (other.m_value1)
|
||||
return false;
|
||||
}
|
||||
|
||||
return (m_value0 != other.m_value0 &&
|
||||
*m_address != *other.m_address);
|
||||
}
|
||||
|
||||
} // namespace r600
|
@ -1,82 +0,0 @@
|
||||
#ifndef LDSINSTRUCTION_H
|
||||
#define LDSINSTRUCTION_H
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class LDSReadInstruction : public Instruction {
|
||||
public:
|
||||
LDSReadInstruction(std::vector<PValue>& value, std::vector<PValue>& address);
|
||||
void replace_values(const ValueSet& candidates, PValue new_value) override;
|
||||
|
||||
unsigned num_values() const { return m_dest_value.size();}
|
||||
const Value& address(unsigned i) const { return *m_address[i];}
|
||||
const Value& dest(unsigned i) const { return *m_dest_value[i];}
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
void do_print(std::ostream& os) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
|
||||
std::vector<PValue> m_address;
|
||||
std::vector<PValue> m_dest_value;
|
||||
};
|
||||
|
||||
class LDSAtomicInstruction : public Instruction {
|
||||
public:
|
||||
LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op);
|
||||
LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op);
|
||||
|
||||
const Value& address() const { return *m_address;}
|
||||
const Value& dest() const { return *m_dest_value;}
|
||||
const Value& src0() const { return *m_src0_value;}
|
||||
const PValue& src1() const { return m_src1_value;}
|
||||
unsigned op() const {return m_opcode;}
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
void do_print(std::ostream& os) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
|
||||
PValue m_address;
|
||||
PValue m_dest_value;
|
||||
PValue m_src0_value;
|
||||
PValue m_src1_value;
|
||||
unsigned m_opcode;
|
||||
};
|
||||
|
||||
class LDSWriteInstruction : public Instruction {
|
||||
public:
|
||||
LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0);
|
||||
LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1);
|
||||
|
||||
const Value& address() const {return *m_address;};
|
||||
const Value& value0() const { return *m_value0;}
|
||||
const Value& value1() const { return *m_value1;}
|
||||
unsigned num_components() const { return m_value1 ? 2 : 1;}
|
||||
unsigned idx_offset() const {return m_idx_offset;};
|
||||
|
||||
void replace_values(const ValueSet& candidates, PValue new_value) override;
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
void do_print(std::ostream& os) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
|
||||
PValue m_address;
|
||||
PValue m_value0;
|
||||
PValue m_value1;
|
||||
unsigned m_idx_offset;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // LDSINSTRUCTION_H
|
@ -1,68 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sfn_instruction_misc.h"
|
||||
|
||||
namespace r600 {
|
||||
EmitVertex::EmitVertex(int stream, bool cut):
|
||||
Instruction (emit_vtx),
|
||||
m_stream(stream),
|
||||
m_cut(cut)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
bool EmitVertex::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
auto& oth = static_cast<const EmitVertex&>(lhs);
|
||||
return oth.m_stream == m_stream &&
|
||||
oth.m_cut == m_cut;
|
||||
}
|
||||
|
||||
void EmitVertex::do_print(std::ostream& os) const
|
||||
{
|
||||
os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream;
|
||||
}
|
||||
|
||||
WaitAck::WaitAck(int nack):
|
||||
Instruction (wait_ack),
|
||||
m_nack(nack)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
bool WaitAck::is_equal_to(const Instruction& lhs) const
|
||||
{
|
||||
const auto& l = static_cast<const WaitAck&>(lhs);
|
||||
return m_nack == l.m_nack;
|
||||
}
|
||||
|
||||
void WaitAck::do_print(std::ostream& os) const
|
||||
{
|
||||
os << "WAIT_ACK @" << m_nack;
|
||||
}
|
||||
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SFN_INSTRUCTION_MISC_H
|
||||
#define SFN_INSTRUCTION_MISC_H
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class EmitVertex : public Instruction {
|
||||
public:
|
||||
EmitVertex(int stream, bool cut);
|
||||
ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;}
|
||||
int stream() const { return m_stream;}
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
int m_stream;
|
||||
bool m_cut;
|
||||
};
|
||||
|
||||
class WaitAck : public Instruction {
|
||||
public:
|
||||
WaitAck(int nack);
|
||||
ECFOpCode op() const {return cf_wait_ack;}
|
||||
int n_ack() const {return m_nack;}
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
int m_nack;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // SFN_INSTRUCTION_MISC_H
|
@ -1,143 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef INSTRUCTION_TEX_H
|
||||
#define INSTRUCTION_TEX_H
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class TexInstruction : public Instruction {
|
||||
public:
|
||||
enum Opcode {
|
||||
ld = FETCH_OP_LD,
|
||||
get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
|
||||
get_nsampled = FETCH_OP_GET_NUMBER_OF_SAMPLES,
|
||||
get_tex_lod = FETCH_OP_GET_LOD,
|
||||
get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
|
||||
get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
|
||||
set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
|
||||
keep_gradients = FETCH_OP_KEEP_GRADIENTS,
|
||||
set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
|
||||
set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
|
||||
sample = FETCH_OP_SAMPLE,
|
||||
sample_l = FETCH_OP_SAMPLE_L,
|
||||
sample_lb = FETCH_OP_SAMPLE_LB,
|
||||
sample_lz = FETCH_OP_SAMPLE_LZ,
|
||||
sample_g = FETCH_OP_SAMPLE_G,
|
||||
sample_g_lb = FETCH_OP_SAMPLE_G_L,
|
||||
gather4 = FETCH_OP_GATHER4,
|
||||
gather4_o = FETCH_OP_GATHER4_O,
|
||||
|
||||
sample_c = FETCH_OP_SAMPLE_C,
|
||||
sample_c_l = FETCH_OP_SAMPLE_C_L,
|
||||
sample_c_lb = FETCH_OP_SAMPLE_C_LB,
|
||||
sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
|
||||
sample_c_g = FETCH_OP_SAMPLE_C_G,
|
||||
sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
|
||||
gather4_c = FETCH_OP_GATHER4_C,
|
||||
gather4_c_o = FETCH_OP_GATHER4_C_O,
|
||||
|
||||
};
|
||||
|
||||
enum Flags {
|
||||
x_unnormalized,
|
||||
y_unnormalized,
|
||||
z_unnormalized,
|
||||
w_unnormalized,
|
||||
grad_fine
|
||||
};
|
||||
|
||||
TexInstruction(Opcode op, const GPRVector& dest, const GPRVector& src, unsigned sid,
|
||||
unsigned rid, PValue sampler_offset);
|
||||
|
||||
const GPRVector& src() const {return m_src;}
|
||||
const GPRVector& dst() const {return m_dst;}
|
||||
unsigned opcode() const {return m_opcode;}
|
||||
unsigned sampler_id() const {return m_sampler_id;}
|
||||
unsigned resource_id() const {return m_resource_id;}
|
||||
|
||||
void replace_values(const ValueSet& candidates, PValue new_value) override;
|
||||
|
||||
void set_offset(unsigned index, int32_t val);
|
||||
int get_offset(unsigned index) const;
|
||||
|
||||
void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
|
||||
|
||||
int inst_mode() const { return m_inst_mode;}
|
||||
|
||||
void set_flag(Flags flag) {
|
||||
m_flags.set(flag);
|
||||
}
|
||||
|
||||
PValue sampler_offset() const {
|
||||
return m_sampler_offset;
|
||||
}
|
||||
|
||||
bool has_flag(Flags flag) const {
|
||||
return m_flags.test(flag);
|
||||
}
|
||||
|
||||
int dest_swizzle(int i) const {
|
||||
assert(i < 4);
|
||||
return m_dest_swizzle[i];
|
||||
}
|
||||
|
||||
void set_dest_swizzle(const std::array<int,4>& swz) {
|
||||
m_dest_swizzle = swz;
|
||||
}
|
||||
|
||||
void set_gather_comp(int cmp);
|
||||
|
||||
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
|
||||
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
|
||||
|
||||
private:
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
static const char *opname(Opcode code);
|
||||
|
||||
Opcode m_opcode;
|
||||
GPRVector m_dst;
|
||||
GPRVector m_src;
|
||||
unsigned m_sampler_id;
|
||||
unsigned m_resource_id;
|
||||
std::bitset<8> m_flags;
|
||||
int m_offset[3];
|
||||
int m_inst_mode;
|
||||
std::array<int,4> m_dest_swizzle;
|
||||
PValue m_sampler_offset;
|
||||
};
|
||||
|
||||
bool r600_nir_lower_int_tg4(nir_shader *nir);
|
||||
bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
|
||||
bool r600_nir_lower_cube_to_2darray(nir_shader *shader);
|
||||
|
||||
}
|
||||
|
||||
#endif // INSTRUCTION_TEX_H
|
@ -1,91 +0,0 @@
|
||||
#ifndef INSTRUCTIONVISITOR_H
|
||||
#define INSTRUCTIONVISITOR_H
|
||||
|
||||
namespace r600 {
|
||||
|
||||
|
||||
class AluInstruction;
|
||||
class ExportInstruction;
|
||||
class TexInstruction;
|
||||
class FetchInstruction;
|
||||
class IfInstruction;
|
||||
class ElseInstruction;
|
||||
class IfElseEndInstruction;
|
||||
class LoopBeginInstruction;
|
||||
class LoopEndInstruction;
|
||||
class LoopBreakInstruction;
|
||||
class LoopContInstruction;
|
||||
class StreamOutIntruction;
|
||||
class MemRingOutIntruction;
|
||||
class EmitVertex;
|
||||
class WaitAck;
|
||||
class WriteScratchInstruction;
|
||||
class GDSInstr;
|
||||
class RatInstruction;
|
||||
class LDSWriteInstruction;
|
||||
class LDSReadInstruction;
|
||||
class LDSAtomicInstruction;
|
||||
class GDSStoreTessFactor;
|
||||
class InstructionBlock;
|
||||
|
||||
class InstructionVisitor
|
||||
{
|
||||
public:
|
||||
virtual ~InstructionVisitor() {};
|
||||
virtual bool visit(AluInstruction& i) = 0;
|
||||
virtual bool visit(ExportInstruction& i) = 0;
|
||||
virtual bool visit(TexInstruction& i) = 0;
|
||||
virtual bool visit(FetchInstruction& i) = 0;
|
||||
virtual bool visit(IfInstruction& i) = 0;
|
||||
virtual bool visit(ElseInstruction& i) = 0;
|
||||
virtual bool visit(IfElseEndInstruction& i) = 0;
|
||||
virtual bool visit(LoopBeginInstruction& i) = 0;
|
||||
virtual bool visit(LoopEndInstruction& i) = 0;
|
||||
virtual bool visit(LoopBreakInstruction& i) = 0;
|
||||
virtual bool visit(LoopContInstruction& i) = 0;
|
||||
virtual bool visit(StreamOutIntruction& i) = 0;
|
||||
virtual bool visit(MemRingOutIntruction& i) = 0;
|
||||
virtual bool visit(EmitVertex& i) = 0;
|
||||
virtual bool visit(WaitAck& i) = 0;
|
||||
virtual bool visit(WriteScratchInstruction& i) = 0;
|
||||
virtual bool visit(GDSInstr& i) = 0;
|
||||
virtual bool visit(RatInstruction& i) = 0;
|
||||
virtual bool visit(LDSWriteInstruction& i) = 0;
|
||||
virtual bool visit(LDSReadInstruction& i) = 0;
|
||||
virtual bool visit(LDSAtomicInstruction& i) = 0;
|
||||
virtual bool visit(GDSStoreTessFactor& i) = 0;
|
||||
virtual bool visit(InstructionBlock& i) = 0;
|
||||
};
|
||||
|
||||
class ConstInstructionVisitor
|
||||
{
|
||||
public:
|
||||
virtual ~ConstInstructionVisitor() {};
|
||||
virtual bool visit(const AluInstruction& i) = 0;
|
||||
virtual bool visit(const ExportInstruction& i) = 0;
|
||||
virtual bool visit(const TexInstruction& i) = 0;
|
||||
virtual bool visit(const FetchInstruction& i) = 0;
|
||||
virtual bool visit(const IfInstruction& i) = 0;
|
||||
virtual bool visit(const ElseInstruction& i) = 0;
|
||||
virtual bool visit(const IfElseEndInstruction& i) = 0;
|
||||
virtual bool visit(const LoopBeginInstruction& i) = 0;
|
||||
virtual bool visit(const LoopEndInstruction& i) = 0;
|
||||
virtual bool visit(const LoopBreakInstruction& i) = 0;
|
||||
virtual bool visit(const LoopContInstruction& i) = 0;
|
||||
virtual bool visit(const StreamOutIntruction& i) = 0;
|
||||
virtual bool visit(const MemRingOutIntruction& i) = 0;
|
||||
virtual bool visit(const EmitVertex& i) = 0;
|
||||
virtual bool visit(const WaitAck& i) = 0;
|
||||
virtual bool visit(const WriteScratchInstruction& i) = 0;
|
||||
virtual bool visit(const GDSInstr& i) = 0;
|
||||
virtual bool visit(const RatInstruction& i) = 0;
|
||||
virtual bool visit(const LDSWriteInstruction& i) = 0;
|
||||
virtual bool visit(const LDSReadInstruction& i) = 0;
|
||||
virtual bool visit(const LDSAtomicInstruction& i) = 0;
|
||||
virtual bool visit(const GDSStoreTessFactor& i) = 0;
|
||||
virtual bool visit(const InstructionBlock& i) = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // INSTRUCTIONVISITOR_H
|
@ -1,45 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "sfn_nir.h"
|
||||
|
||||
struct r600_shader;
|
||||
union r600_shader_key;
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class AssemblyFromShaderLegacy : public AssemblyFromShader {
|
||||
public:
|
||||
AssemblyFromShaderLegacy(struct r600_shader *sh, r600_shader_key *key);
|
||||
~AssemblyFromShaderLegacy() override;
|
||||
private:
|
||||
bool do_lower(const std::vector<InstructionBlock> &ir) override ;
|
||||
|
||||
struct AssemblyFromShaderLegacyImpl *impl;
|
||||
};
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,314 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018-2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SFN_LIVERANGE_H
|
||||
#define SFN_LIVERANGE_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
#include "sfn_nir.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
/** Storage to record the required live range of a temporary register
|
||||
* begin == end == -1 indicates that the register can be reused without
|
||||
* limitations. Otherwise, "begin" indicates the first instruction in which
|
||||
* a write operation may target this temporary, and end indicates the
|
||||
* last instruction in which a value can be read from this temporary.
|
||||
* Hence, a register R2 can be merged with a register R1 if R1.end <= R2.begin.
|
||||
*/
|
||||
struct register_live_range {
|
||||
int begin;
|
||||
int end;
|
||||
bool is_array_elm;
|
||||
};
|
||||
|
||||
enum prog_scope_type {
|
||||
outer_scope, /* Outer program scope */
|
||||
loop_body, /* Inside a loop */
|
||||
if_branch, /* Inside if branch */
|
||||
else_branch, /* Inside else branch */
|
||||
switch_body, /* Inside switch statement */
|
||||
switch_case_branch, /* Inside switch case statement */
|
||||
switch_default_branch, /* Inside switch default statement */
|
||||
undefined_scope
|
||||
};
|
||||
|
||||
class prog_scope {
|
||||
public:
|
||||
prog_scope();
|
||||
prog_scope(prog_scope *parent, prog_scope_type type, int id,
|
||||
int depth, int begin);
|
||||
|
||||
prog_scope_type type() const;
|
||||
prog_scope *parent() const;
|
||||
int nesting_depth() const;
|
||||
int id() const;
|
||||
int end() const;
|
||||
int begin() const;
|
||||
int loop_break_line() const;
|
||||
|
||||
const prog_scope *in_else_scope() const;
|
||||
const prog_scope *in_ifelse_scope() const;
|
||||
const prog_scope *in_parent_ifelse_scope() const;
|
||||
const prog_scope *innermost_loop() const;
|
||||
const prog_scope *outermost_loop() const;
|
||||
const prog_scope *enclosing_conditional() const;
|
||||
|
||||
bool is_loop() const;
|
||||
bool is_in_loop() const;
|
||||
bool is_switchcase_scope_in_loop() const;
|
||||
bool is_conditional() const;
|
||||
bool is_child_of(const prog_scope *scope) const;
|
||||
bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const;
|
||||
|
||||
bool break_is_for_switchcase() const;
|
||||
bool contains_range_of(const prog_scope& other) const;
|
||||
|
||||
void set_end(int end);
|
||||
void set_loop_break_line(int line);
|
||||
|
||||
private:
|
||||
prog_scope_type scope_type;
|
||||
int scope_id;
|
||||
int scope_nesting_depth;
|
||||
int scope_begin;
|
||||
int scope_end;
|
||||
int break_loop_line;
|
||||
prog_scope *parent_scope;
|
||||
};
|
||||
|
||||
/* Some storage class to encapsulate the prog_scope (de-)allocations */
|
||||
class prog_scope_storage {
|
||||
public:
|
||||
prog_scope_storage(int n);
|
||||
~prog_scope_storage();
|
||||
prog_scope * create(prog_scope *p, prog_scope_type type, int id,
|
||||
int lvl, int s_begin);
|
||||
private:
|
||||
int current_slot;
|
||||
std::vector<prog_scope> storage;
|
||||
};
|
||||
|
||||
/* Class to track the access to a component of a temporary register. */
|
||||
|
||||
class temp_comp_access {
|
||||
public:
|
||||
temp_comp_access();
|
||||
|
||||
void record_read(int line, prog_scope *scope);
|
||||
void record_write(int line, prog_scope *scope);
|
||||
register_live_range get_required_live_range();
|
||||
private:
|
||||
void propagate_live_range_to_dominant_write_scope();
|
||||
bool conditional_ifelse_write_in_loop() const;
|
||||
|
||||
void record_ifelse_write(const prog_scope& scope);
|
||||
void record_if_write(const prog_scope& scope);
|
||||
void record_else_write(const prog_scope& scope);
|
||||
|
||||
prog_scope *last_read_scope;
|
||||
prog_scope *first_read_scope;
|
||||
prog_scope *first_write_scope;
|
||||
|
||||
int first_write;
|
||||
int last_read;
|
||||
int last_write;
|
||||
int first_read;
|
||||
|
||||
/* This member variable tracks the current resolution of conditional writing
|
||||
* to this temporary in IF/ELSE clauses.
|
||||
*
|
||||
* The initial value "conditionality_untouched" indicates that this
|
||||
* temporary has not yet been written to within an if clause.
|
||||
*
|
||||
* A positive (other than "conditionality_untouched") number refers to the
|
||||
* last loop id for which the write was resolved as unconditional. With each
|
||||
* new loop this value will be overwitten by "conditionality_unresolved"
|
||||
* on entering the first IF clause writing this temporary.
|
||||
*
|
||||
* The value "conditionality_unresolved" indicates that no resolution has
|
||||
* been achieved so far. If the variable is set to this value at the end of
|
||||
* the processing of the whole shader it also indicates a conditional write.
|
||||
*
|
||||
* The value "write_is_conditional" marks that the variable is written
|
||||
* conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
|
||||
* least one loop.
|
||||
*/
|
||||
int conditionality_in_loop_id;
|
||||
|
||||
/* Helper constants to make the tracking code more readable. */
|
||||
static const int write_is_conditional = -1;
|
||||
static const int conditionality_unresolved = 0;
|
||||
static const int conditionality_untouched;
|
||||
static const int write_is_unconditional;
|
||||
|
||||
/* A bit field tracking the nexting levels of if-else clauses where the
|
||||
* temporary has (so far) been written to in the if branch, but not in the
|
||||
* else branch.
|
||||
*/
|
||||
unsigned int if_scope_write_flags;
|
||||
|
||||
int next_ifelse_nesting_depth;
|
||||
static const int supported_ifelse_nesting_depth = 32;
|
||||
|
||||
/* Tracks the last if scope in which the temporary was written to
|
||||
* without a write in the corresponding else branch. Is also used
|
||||
* to track read-before-write in the according scope.
|
||||
*/
|
||||
const prog_scope *current_unpaired_if_write_scope;
|
||||
|
||||
/* Flag to resolve read-before-write in the else scope. */
|
||||
bool was_written_in_current_else_scope;
|
||||
};
|
||||
|
||||
/* Class to track the access to all components of a temporary register. */
|
||||
class temp_access {
|
||||
public:
|
||||
temp_access();
|
||||
void record_read(int line, prog_scope *scope, int swizzle, bool is_array_elm);
|
||||
void record_write(int line, prog_scope *scope, int writemask, bool is_array_elm);
|
||||
register_live_range get_required_live_range();
|
||||
private:
|
||||
void update_access_mask(int mask);
|
||||
|
||||
temp_comp_access comp[4];
|
||||
int access_mask;
|
||||
bool needs_component_tracking;
|
||||
bool is_array_element;
|
||||
};
|
||||
|
||||
/* Helper class to merge the live ranges of an arrays.
|
||||
*
|
||||
* For arrays the array length, live range, and component access needs to
|
||||
* be kept, because when live ranges are merged or arrays are interleaved
|
||||
* one can only merge or interleave an array into another with equal or more
|
||||
* elements. For interleaving it is also required that the sum of used swizzles
|
||||
* is at most four.
|
||||
*/
|
||||
|
||||
class array_live_range {
|
||||
public:
|
||||
array_live_range();
|
||||
array_live_range(unsigned aid, unsigned alength);
|
||||
array_live_range(unsigned aid, unsigned alength, int first_access,
|
||||
int last_access, int mask);
|
||||
|
||||
void set_live_range(int first_access, int last_access);
|
||||
void set_begin(int _begin){first_access = _begin;}
|
||||
void set_end(int _end){last_access = _end;}
|
||||
void set_access_mask(int s);
|
||||
|
||||
static void merge(array_live_range *a, array_live_range *b);
|
||||
static void interleave(array_live_range *a, array_live_range *b);
|
||||
|
||||
int array_id() const {return id;}
|
||||
int target_array_id() const {return target_array ? target_array->id : 0;}
|
||||
const array_live_range *final_target() const {return target_array ?
|
||||
target_array->final_target() : this;}
|
||||
unsigned array_length() const { return length;}
|
||||
int begin() const { return first_access;}
|
||||
int end() const { return last_access;}
|
||||
int access_mask() const { return component_access_mask;}
|
||||
int used_components() const {return used_component_count;}
|
||||
|
||||
bool time_doesnt_overlap(const array_live_range& other) const;
|
||||
|
||||
void print(std::ostream& os) const;
|
||||
|
||||
bool is_mapped() const { return target_array != nullptr;}
|
||||
|
||||
int8_t remap_one_swizzle(int8_t idx) const;
|
||||
|
||||
private:
|
||||
void init_swizzles();
|
||||
void set_target(array_live_range *target);
|
||||
void merge_live_range_from(array_live_range *other);
|
||||
void interleave_into(array_live_range *other);
|
||||
|
||||
unsigned id;
|
||||
unsigned length;
|
||||
int first_access;
|
||||
int last_access;
|
||||
uint8_t component_access_mask;
|
||||
uint8_t used_component_count;
|
||||
array_live_range *target_array;
|
||||
int8_t swizzle_map[4];
|
||||
};
|
||||
|
||||
|
||||
|
||||
class LiverangeEvaluator {
|
||||
public:
|
||||
LiverangeEvaluator();
|
||||
|
||||
void run(const Shader& shader,
|
||||
std::vector<register_live_range> ®ister_live_ranges);
|
||||
|
||||
void scope_if();
|
||||
void scope_else();
|
||||
void scope_endif();
|
||||
void scope_loop_begin();
|
||||
void scope_loop_end();
|
||||
void scope_loop_break();
|
||||
|
||||
void record_read(const Value& src, bool is_array_elm = false);
|
||||
void record_write(const Value& dst, bool is_array_elm = false);
|
||||
|
||||
void record_read(const GPRVector& src);
|
||||
void record_write(const GPRVector& dst);
|
||||
|
||||
private:
|
||||
|
||||
prog_scope *create_scope(prog_scope *parent, prog_scope_type type, int id,
|
||||
int lvl, int s_begin);
|
||||
|
||||
|
||||
void get_required_live_ranges(std::vector<register_live_range>& register_live_ranges);
|
||||
|
||||
int line;
|
||||
int loop_id;
|
||||
int if_id;
|
||||
int switch_id;
|
||||
bool is_at_end;
|
||||
int n_scopes;
|
||||
std::unique_ptr<prog_scope_storage> scopes;
|
||||
prog_scope *cur_scope;
|
||||
|
||||
std::vector<temp_access> temp_acc;
|
||||
|
||||
};
|
||||
|
||||
std::vector<rename_reg_pair>
|
||||
get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges);
|
||||
|
||||
} // end namespace r600
|
||||
|
||||
#endif
|
438
src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp
Normal file
438
src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp
Normal file
@ -0,0 +1,438 @@
|
||||
#include "sfn_liverangeevaluator.h"
|
||||
#include "sfn_liverangeevaluator_helpers.h"
|
||||
|
||||
#include "sfn_instr_alugroup.h"
|
||||
#include "sfn_instr_controlflow.h"
|
||||
#include "sfn_instr_export.h"
|
||||
#include "sfn_instr_fetch.h"
|
||||
#include "sfn_instr_mem.h"
|
||||
#include "sfn_instr_tex.h"
|
||||
#include "sfn_shader.h"
|
||||
#include "sfn_debug.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class LiveRangeInstrVisitor : public InstrVisitor {
|
||||
public:
|
||||
LiveRangeInstrVisitor(LiveRangeMap& live_range_map);
|
||||
|
||||
void visit(AluInstr *instr) override;
|
||||
void visit(AluGroup *instr) override;
|
||||
void visit(TexInstr *instr) override;
|
||||
void visit(ExportInstr *instr) override;
|
||||
void visit(FetchInstr *instr) override;
|
||||
void visit(Block *instr) override;
|
||||
void visit(ControlFlowInstr *instr) override;
|
||||
void visit(IfInstr *instr) override;
|
||||
void visit(WriteScratchInstr *instr) override;
|
||||
void visit(StreamOutInstr *instr) override;
|
||||
void visit(MemRingOutInstr *instr) override;
|
||||
void visit(EmitVertexInstr *instr) override {(void)instr;}
|
||||
void visit(GDSInstr *instr) override;
|
||||
void visit(WriteTFInstr *instr) override;
|
||||
void visit(LDSAtomicInstr *instr) override;
|
||||
void visit(LDSReadInstr *instr) override;
|
||||
void visit(RatInstr *instr) override;
|
||||
|
||||
void finalize();
|
||||
private:
|
||||
|
||||
void record_write(const Register *reg);
|
||||
void record_read(const Register *reg, LiveRangeEntry::EUse use);
|
||||
|
||||
void record_write(const RegisterVec4& reg, const RegisterVec4::Swizzle& swizzle);
|
||||
void record_read(const RegisterVec4 ®, LiveRangeEntry::EUse use);
|
||||
|
||||
void scope_if();
|
||||
void scope_else();
|
||||
void scope_endif();
|
||||
void scope_loop_begin();
|
||||
void scope_loop_end();
|
||||
void scope_loop_break();
|
||||
ProgramScope *create_scope(ProgramScope *parent, ProgramScopeType type,
|
||||
int id, int nesting_depth, int line);
|
||||
|
||||
std::vector<std::unique_ptr<ProgramScope>> m_scopes;
|
||||
ProgramScope *m_current_scope;
|
||||
LiveRangeMap& m_live_range_map;
|
||||
RegisterAccess m_register_access;
|
||||
|
||||
int m_line{0};
|
||||
int m_if_id{1};
|
||||
int m_loop_id{1};
|
||||
};
|
||||
|
||||
LiveRangeEvaluator::LiveRangeEvaluator()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
LiveRangeMap LiveRangeEvaluator::run(Shader& sh)
|
||||
{
|
||||
|
||||
LiveRangeMap range_map = sh.prepare_live_range_map();
|
||||
|
||||
|
||||
LiveRangeInstrVisitor evaluator(range_map);
|
||||
|
||||
for (auto& b : sh.func())
|
||||
b->accept(evaluator);
|
||||
|
||||
evaluator.finalize();
|
||||
|
||||
return range_map;
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::finalize()
|
||||
{
|
||||
m_current_scope->set_end(m_line);
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
|
||||
auto& live_ranges = m_live_range_map.component(i);
|
||||
for(const auto& r : live_ranges) {
|
||||
if (r.m_register->live_end_pinned())
|
||||
record_read(r.m_register, LiveRangeEntry::use_unspecified);
|
||||
}
|
||||
|
||||
auto& comp_access = m_register_access.component(i);
|
||||
|
||||
for (size_t i = 0; i < comp_access.size(); ++i) {
|
||||
sfn_log << SfnLog::merge << "Evaluae access for " << *live_ranges[i].m_register << "\n";
|
||||
|
||||
auto& rca = comp_access[i];
|
||||
rca.update_required_live_range();
|
||||
live_ranges[i].m_start = rca.range().start;
|
||||
live_ranges[i].m_end = rca.range().end;
|
||||
live_ranges[i].m_use = rca.use_type();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LiveRangeInstrVisitor::LiveRangeInstrVisitor(LiveRangeMap& live_range_map):
|
||||
m_live_range_map(live_range_map),
|
||||
m_register_access(live_range_map.sizes())
|
||||
{
|
||||
if (sfn_log.has_debug_flag(SfnLog::merge)) {
|
||||
sfn_log << SfnLog::merge << "Have component register numbers: ";
|
||||
for (auto n : live_range_map.sizes())
|
||||
sfn_log << n << " ";
|
||||
sfn_log << "\n";
|
||||
}
|
||||
|
||||
m_scopes.push_back(std::make_unique<ProgramScope>(nullptr, outer_scope, 0, 0, 0));
|
||||
m_current_scope = m_scopes[0].get();
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
const auto& comp = live_range_map.component(i);
|
||||
for(const auto& r : comp) {
|
||||
if (r.m_register->live_start_pinned())
|
||||
record_write(r.m_register);
|
||||
}
|
||||
}
|
||||
m_line = 1;
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::record_write(const RegisterVec4& reg, const RegisterVec4::Swizzle &swizzle)
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (swizzle[i] < 6 && reg[i]->chan() < 4)
|
||||
record_write(reg[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::record_read(const RegisterVec4& reg, LiveRangeEntry::EUse use)
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (reg[i]->chan() < 4)
|
||||
record_read(reg[i], use);
|
||||
}
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::scope_if()
|
||||
{
|
||||
m_current_scope = create_scope(m_current_scope, if_branch, m_if_id++,
|
||||
m_current_scope->nesting_depth() + 1, m_line + 1);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::scope_else()
|
||||
{
|
||||
assert(m_current_scope->type() == if_branch);
|
||||
m_current_scope->set_end(m_line - 1);
|
||||
|
||||
m_current_scope = create_scope(m_current_scope->parent(), else_branch, m_current_scope->id(),
|
||||
m_current_scope->nesting_depth() + 1, m_line + 1);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::scope_endif()
|
||||
{
|
||||
m_current_scope->set_end(m_line - 1);
|
||||
m_current_scope = m_current_scope->parent();
|
||||
assert(m_current_scope);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::scope_loop_begin()
|
||||
{
|
||||
m_current_scope = create_scope(m_current_scope, loop_body, m_loop_id++,
|
||||
m_current_scope->nesting_depth() + 1, m_line);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::scope_loop_end()
|
||||
{
|
||||
m_current_scope->set_end(m_line);
|
||||
m_current_scope = m_current_scope->parent();
|
||||
assert(m_current_scope);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::scope_loop_break()
|
||||
{
|
||||
m_current_scope->set_loop_break_line(m_line);
|
||||
}
|
||||
|
||||
ProgramScope *LiveRangeInstrVisitor::create_scope(ProgramScope *parent, ProgramScopeType type,
|
||||
int id, int nesting_depth, int line)
|
||||
{
|
||||
m_scopes.emplace_back(std::make_unique<ProgramScope>(parent, type, id, nesting_depth, line));
|
||||
return m_scopes[m_scopes.size() - 1].get();
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(AluInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
|
||||
if (instr->has_alu_flag(alu_write))
|
||||
record_write(instr->dest());
|
||||
for (unsigned i = 0; i < instr->n_sources(); ++i) {
|
||||
record_read(instr->src(i).as_register(), LiveRangeEntry::use_unspecified);
|
||||
auto uniform = instr->src(i).as_uniform();
|
||||
if (uniform && uniform->buf_addr()) {
|
||||
record_read(uniform->buf_addr()->as_register(), LiveRangeEntry::use_unspecified);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(AluGroup *group)
|
||||
{
|
||||
for (auto i : *group)
|
||||
if (i)
|
||||
i->accept(*this);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(TexInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
|
||||
record_write(instr->dst(), instr->all_dest_swizzle());
|
||||
|
||||
auto src = instr->src();
|
||||
record_read(src, LiveRangeEntry::use_unspecified);
|
||||
|
||||
if (instr->sampler_offset() && instr->sampler_offset()->as_register())
|
||||
record_read(instr->sampler_offset()->as_register(), LiveRangeEntry::use_unspecified);
|
||||
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(ExportInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
|
||||
auto src = instr->value();
|
||||
record_read(src, LiveRangeEntry::use_export);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(FetchInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
|
||||
record_write(instr->dst(), instr->all_dest_swizzle());
|
||||
auto& src = instr->src();
|
||||
if (src.chan() < 4) /* Channel can be 7 to disable source */
|
||||
record_read(&src, LiveRangeEntry::use_unspecified);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(Block *instr)
|
||||
{
|
||||
sfn_log << SfnLog::merge << "Visit block\n";
|
||||
for (auto i : *instr) {
|
||||
i->accept(*this);
|
||||
if (i->end_group())
|
||||
++m_line;
|
||||
}
|
||||
sfn_log << SfnLog::merge << "End block\n";
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(WriteScratchInstr *instr)
|
||||
{
|
||||
auto& src = instr->value();
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if ((1 << i) & instr->write_mask()) {
|
||||
record_read(src[i], LiveRangeEntry::use_unspecified);
|
||||
}
|
||||
}
|
||||
|
||||
auto addr = instr->address();
|
||||
if (addr)
|
||||
record_read(addr, LiveRangeEntry::use_unspecified);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(StreamOutInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
|
||||
auto src = instr->value();
|
||||
record_read(src, LiveRangeEntry::use_export);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(MemRingOutInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
|
||||
auto src = instr->value();
|
||||
record_read(src, LiveRangeEntry::use_export);
|
||||
|
||||
auto idx = instr->export_index();
|
||||
if (idx && idx->as_register())
|
||||
record_read(idx->as_register(), LiveRangeEntry::use_unspecified);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(ControlFlowInstr *instr)
|
||||
{
|
||||
switch (instr->cf_type()) {
|
||||
case ControlFlowInstr::cf_else: scope_else(); break;
|
||||
case ControlFlowInstr::cf_endif: scope_endif(); break;
|
||||
case ControlFlowInstr::cf_loop_begin: scope_loop_begin(); break;
|
||||
case ControlFlowInstr::cf_loop_end: scope_loop_end(); break;
|
||||
case ControlFlowInstr::cf_loop_break: scope_loop_break(); break;
|
||||
case ControlFlowInstr::cf_loop_continue: break;
|
||||
case ControlFlowInstr::cf_wait_ack: break;
|
||||
default:
|
||||
unreachable("Flow control unreachanble");
|
||||
}
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(IfInstr *instr)
|
||||
{
|
||||
instr->predicate()->accept(*this);
|
||||
scope_if();
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(GDSInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
|
||||
record_read(instr->src(), LiveRangeEntry::use_unspecified);
|
||||
if (instr->uav_id())
|
||||
record_read(instr->uav_id(), LiveRangeEntry::use_unspecified);
|
||||
record_write(instr->dest());
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(RatInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
|
||||
record_read(instr->value(), LiveRangeEntry::use_unspecified);
|
||||
record_read(instr->addr(), LiveRangeEntry::use_unspecified);
|
||||
|
||||
auto idx = instr->rat_id_offset();
|
||||
if (idx)
|
||||
record_read(idx, LiveRangeEntry::use_unspecified);
|
||||
}
|
||||
|
||||
|
||||
void LiveRangeInstrVisitor::visit(WriteTFInstr *instr)
|
||||
{
|
||||
record_read(instr->value(), LiveRangeEntry::use_export);
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(UNUSED LDSAtomicInstr *instr)
|
||||
{
|
||||
unreachable("LDSAtomicInstr must be lowered before scheduling and live range evaluation");
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::visit(UNUSED LDSReadInstr *instr)
|
||||
{
|
||||
unreachable("LDSReadInstr must be lowered before scheduling and live range evaluation");
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::record_write(const Register *reg)
|
||||
{
|
||||
auto addr = reg->get_addr();
|
||||
if (addr && addr->as_register()) {
|
||||
record_read(addr->as_register(), LiveRangeEntry::use_unspecified);
|
||||
|
||||
const auto av = static_cast<const LocalArrayValue *>(reg);
|
||||
auto& array = av->array();
|
||||
|
||||
sfn_log << SfnLog::merge << array << " write:" << m_line << "\n";
|
||||
|
||||
for (auto i = 0u; i < array.size(); ++i) {
|
||||
auto& rav = m_register_access(array(i, reg->chan()));
|
||||
rav.record_write(m_line, m_current_scope);
|
||||
}
|
||||
} else {
|
||||
auto& ra = m_register_access(*reg);
|
||||
sfn_log << SfnLog::merge << *reg << " write:" << m_line << "\n";
|
||||
ra.record_write(m_line, m_current_scope);
|
||||
}
|
||||
}
|
||||
|
||||
void LiveRangeInstrVisitor::record_read(const Register *reg, LiveRangeEntry::EUse use)
|
||||
{
|
||||
if (!reg)
|
||||
return;
|
||||
|
||||
auto addr = reg->get_addr();
|
||||
if (addr && addr->as_register()) {
|
||||
sfn_log << SfnLog::merge << "Record reading address register " << *addr << "\n";
|
||||
|
||||
auto& ra = m_register_access(*addr->as_register());
|
||||
ra.record_read(m_line, m_current_scope, use);
|
||||
|
||||
const auto av = static_cast<const LocalArrayValue *>(reg);
|
||||
auto& array = av->array();
|
||||
sfn_log << SfnLog::merge << array << " read:" << m_line << "\n";
|
||||
|
||||
for (auto i = 0u; i < array.size(); ++i) {
|
||||
auto& rav = m_register_access(array(i, reg->chan()));
|
||||
rav.record_read(m_line, m_current_scope, use);
|
||||
}
|
||||
} else {
|
||||
sfn_log << SfnLog::merge << *reg << " read:" << m_line << "\n";
|
||||
auto& ra = m_register_access(*reg);
|
||||
ra.record_read(m_line, m_current_scope, use);
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator << (std::ostream& os, const LiveRangeMap& lrm)
|
||||
{
|
||||
os << "Live ranges\n";
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
const auto& comp = lrm.component(i);
|
||||
for (auto& range : comp)
|
||||
os << " " << range << "\n";
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
bool operator == (const LiveRangeMap& lhs, const LiveRangeMap& rhs)
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
const auto& lc = lhs.component(i);
|
||||
const auto& rc = rhs.component(i);
|
||||
if (lc.size() != rc.size())
|
||||
return false;
|
||||
|
||||
for (auto j = 0u; j < lc.size(); ++j) {
|
||||
const auto& lv = lc[j];
|
||||
const auto& rv = rc[j];
|
||||
|
||||
if (lv.m_start != rv.m_start ||
|
||||
lv.m_end != rv.m_end ||
|
||||
lv.m_color != rv.m_color ||
|
||||
!lv.m_register->equal_to(*rv.m_register))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
23
src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h
Normal file
23
src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h
Normal file
@ -0,0 +1,23 @@
|
||||
#ifndef LIFERANGEEVALUATOR_H
|
||||
#define LIFERANGEEVALUATOR_H
|
||||
|
||||
#include "sfn_valuefactory.h"
|
||||
|
||||
#include <map>
|
||||
#include <cassert>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class Shader;
|
||||
|
||||
class LiveRangeEvaluator {
|
||||
public:
|
||||
|
||||
LiveRangeEvaluator();
|
||||
|
||||
LiveRangeMap run(Shader &sh);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // LIFERANGEEVALUATOR_H
|
623
src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp
Normal file
623
src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp
Normal file
@ -0,0 +1,623 @@
|
||||
#include "sfn_liverangeevaluator_helpers.h"
|
||||
|
||||
#include "sfn_virtualvalues.h"
|
||||
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include <limits>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
ProgramScope::ProgramScope(ProgramScope *parent, ProgramScopeType type, int id,
|
||||
int depth, int scope_begin):
|
||||
scope_type(type),
|
||||
scope_id(id),
|
||||
scope_nesting_depth(depth),
|
||||
scope_begin(scope_begin),
|
||||
scope_end(-1),
|
||||
break_loop_line(std::numeric_limits<int>::max()),
|
||||
parent_scope(parent)
|
||||
{
|
||||
}
|
||||
|
||||
ProgramScope::ProgramScope():
|
||||
ProgramScope(nullptr, undefined_scope, -1, -1, -1)
|
||||
{
|
||||
}
|
||||
|
||||
ProgramScopeType ProgramScope::type() const
|
||||
{
|
||||
return scope_type;
|
||||
}
|
||||
|
||||
ProgramScope *ProgramScope::parent() const
|
||||
{
|
||||
return parent_scope;
|
||||
}
|
||||
|
||||
int ProgramScope::nesting_depth() const
|
||||
{
|
||||
return scope_nesting_depth;
|
||||
}
|
||||
|
||||
bool ProgramScope::is_loop() const
|
||||
{
|
||||
return (scope_type == loop_body);
|
||||
}
|
||||
|
||||
bool ProgramScope::is_in_loop() const
|
||||
{
|
||||
if (scope_type == loop_body)
|
||||
return true;
|
||||
|
||||
if (parent_scope)
|
||||
return parent_scope->is_in_loop();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
const ProgramScope *ProgramScope::innermost_loop() const
|
||||
{
|
||||
if (scope_type == loop_body)
|
||||
return this;
|
||||
|
||||
if (parent_scope)
|
||||
return parent_scope->innermost_loop();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const ProgramScope *ProgramScope::outermost_loop() const
|
||||
{
|
||||
const ProgramScope *loop = nullptr;
|
||||
const ProgramScope *p = this;
|
||||
|
||||
do {
|
||||
if (p->type() == loop_body)
|
||||
loop = p;
|
||||
p = p->parent();
|
||||
} while (p);
|
||||
|
||||
return loop;
|
||||
}
|
||||
|
||||
bool ProgramScope::is_child_of_ifelse_id_sibling(const ProgramScope *scope) const
|
||||
{
|
||||
const ProgramScope *my_parent = in_parent_ifelse_scope();
|
||||
while (my_parent) {
|
||||
/* is a direct child? */
|
||||
if (my_parent == scope)
|
||||
return false;
|
||||
/* is a child of the conditions sibling? */
|
||||
if (my_parent->id() == scope->id())
|
||||
return true;
|
||||
my_parent = my_parent->in_parent_ifelse_scope();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ProgramScope::is_child_of(const ProgramScope *scope) const
|
||||
{
|
||||
const ProgramScope *my_parent = parent();
|
||||
while (my_parent) {
|
||||
if (my_parent == scope)
|
||||
return true;
|
||||
my_parent = my_parent->parent();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const ProgramScope *ProgramScope::enclosing_conditional() const
|
||||
{
|
||||
if (is_conditional())
|
||||
return this;
|
||||
|
||||
if (parent_scope)
|
||||
return parent_scope->enclosing_conditional();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool ProgramScope::contains_range_of(const ProgramScope& other) const
|
||||
{
|
||||
return (begin() <= other.begin()) && (end() >= other.end());
|
||||
}
|
||||
|
||||
bool ProgramScope::is_conditional() const
|
||||
{
|
||||
return scope_type == if_branch ||
|
||||
scope_type == else_branch ||
|
||||
scope_type == switch_case_branch ||
|
||||
scope_type == switch_default_branch;
|
||||
}
|
||||
|
||||
const ProgramScope *ProgramScope::in_else_scope() const
|
||||
{
|
||||
if (scope_type == else_branch)
|
||||
return this;
|
||||
|
||||
if (parent_scope)
|
||||
return parent_scope->in_else_scope();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const ProgramScope *ProgramScope::in_parent_ifelse_scope() const
|
||||
{
|
||||
if (parent_scope)
|
||||
return parent_scope->in_ifelse_scope();
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const ProgramScope *ProgramScope::in_ifelse_scope() const
|
||||
{
|
||||
if (scope_type == if_branch ||
|
||||
scope_type == else_branch)
|
||||
return this;
|
||||
|
||||
if (parent_scope)
|
||||
return parent_scope->in_ifelse_scope();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool ProgramScope::is_switchcase_scope_in_loop() const
|
||||
{
|
||||
return (scope_type == switch_case_branch ||
|
||||
scope_type == switch_default_branch) &&
|
||||
is_in_loop();
|
||||
}
|
||||
|
||||
bool ProgramScope::break_is_for_switchcase() const
|
||||
{
|
||||
if (scope_type == loop_body)
|
||||
return false;
|
||||
|
||||
if (scope_type == switch_case_branch ||
|
||||
scope_type == switch_default_branch ||
|
||||
scope_type == switch_body)
|
||||
return true;
|
||||
|
||||
if (parent_scope)
|
||||
return parent_scope->break_is_for_switchcase();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int ProgramScope::id() const
|
||||
{
|
||||
return scope_id;
|
||||
}
|
||||
|
||||
int ProgramScope::begin() const
|
||||
{
|
||||
return scope_begin;
|
||||
}
|
||||
|
||||
int ProgramScope::end() const
|
||||
{
|
||||
return scope_end;
|
||||
}
|
||||
|
||||
void ProgramScope::set_end(int end)
|
||||
{
|
||||
if (scope_end == -1)
|
||||
scope_end = end;
|
||||
}
|
||||
|
||||
void ProgramScope::set_loop_break_line(int line)
|
||||
{
|
||||
if (scope_type == loop_body) {
|
||||
break_loop_line = MIN2(break_loop_line, line);
|
||||
} else {
|
||||
if (parent_scope)
|
||||
parent()->set_loop_break_line(line);
|
||||
}
|
||||
}
|
||||
|
||||
int ProgramScope::loop_break_line() const
|
||||
{
|
||||
return break_loop_line;
|
||||
}
|
||||
|
||||
RegisterCompAccess::RegisterCompAccess(LiveRange range):
|
||||
last_read_scope(nullptr),
|
||||
first_read_scope(nullptr),
|
||||
first_write_scope(nullptr),
|
||||
first_write(range.start),
|
||||
last_read(range.end),
|
||||
last_write(range.start),
|
||||
first_read(std::numeric_limits<int>::max()),
|
||||
conditionality_in_loop_id(conditionality_untouched),
|
||||
if_scope_write_flags(0),
|
||||
next_ifelse_nesting_depth(0),
|
||||
current_unpaired_if_write_scope(nullptr),
|
||||
was_written_in_current_else_scope(false),
|
||||
m_range(range)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
RegisterCompAccess::RegisterCompAccess():
|
||||
RegisterCompAccess(LiveRange(-1,-1))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void RegisterCompAccess::record_read(int line, ProgramScope *scope, LiveRangeEntry::EUse use)
|
||||
{
|
||||
last_read_scope = scope;
|
||||
if (use != LiveRangeEntry::use_unspecified)
|
||||
m_use_type.set(use);
|
||||
if (last_read < line)
|
||||
last_read = line;
|
||||
|
||||
if (first_read > line) {
|
||||
first_read = line;
|
||||
first_read_scope = scope;
|
||||
}
|
||||
|
||||
/* If the conditionality of the first write is already resolved then
|
||||
* no further checks are required.
|
||||
*/
|
||||
if (conditionality_in_loop_id == write_is_unconditional ||
|
||||
conditionality_in_loop_id == write_is_conditional)
|
||||
return;
|
||||
|
||||
/* Check whether we are in a condition within a loop */
|
||||
const ProgramScope *ifelse_scope = scope->in_ifelse_scope();
|
||||
const ProgramScope *enclosing_loop;
|
||||
if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) {
|
||||
|
||||
/* If we have either not yet written to this register nor writes are
|
||||
* resolved as unconditional in the enclosing loop then check whether
|
||||
* we read before write in an IF/ELSE branch.
|
||||
*/
|
||||
if ((conditionality_in_loop_id != write_is_conditional) &&
|
||||
(conditionality_in_loop_id != enclosing_loop->id())) {
|
||||
|
||||
if (current_unpaired_if_write_scope) {
|
||||
|
||||
/* Has been written in this or a parent scope? - this makes the temporary
|
||||
* unconditionally set at this point.
|
||||
*/
|
||||
if (scope->is_child_of(current_unpaired_if_write_scope))
|
||||
return;
|
||||
|
||||
/* Has been written in the same scope before it was read? */
|
||||
if (ifelse_scope->type() == if_branch) {
|
||||
if (current_unpaired_if_write_scope->id() == scope->id())
|
||||
return;
|
||||
} else {
|
||||
if (was_written_in_current_else_scope)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* The temporary was read (conditionally) before it is written, hence
|
||||
* it should survive a loop. This can be signaled like if it were
|
||||
* conditionally written.
|
||||
*/
|
||||
conditionality_in_loop_id = write_is_conditional;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterCompAccess::record_write(int line, ProgramScope *scope)
|
||||
{
|
||||
last_write = line;
|
||||
|
||||
if (first_write < 0) {
|
||||
first_write = line;
|
||||
first_write_scope = scope;
|
||||
|
||||
/* If the first write we encounter is not in a conditional branch, or
|
||||
* the conditional write is not within a loop, then this is to be
|
||||
* considered an unconditional dominant write.
|
||||
*/
|
||||
const ProgramScope *conditional = scope->enclosing_conditional();
|
||||
if (!conditional || !conditional->innermost_loop()) {
|
||||
conditionality_in_loop_id = write_is_unconditional;
|
||||
}
|
||||
}
|
||||
|
||||
/* The conditionality of the first write is already resolved. */
|
||||
if (conditionality_in_loop_id == write_is_unconditional ||
|
||||
conditionality_in_loop_id == write_is_conditional)
|
||||
return;
|
||||
|
||||
/* If the nesting depth is larger than the supported level,
|
||||
* then we assume conditional writes.
|
||||
*/
|
||||
if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) {
|
||||
conditionality_in_loop_id = write_is_conditional;
|
||||
return;
|
||||
}
|
||||
|
||||
/* If we are in an IF/ELSE scope within a loop and the loop has not
|
||||
* been resolved already, then record this write.
|
||||
*/
|
||||
const ProgramScope *ifelse_scope = scope->in_ifelse_scope();
|
||||
if (ifelse_scope && ifelse_scope->innermost_loop() &&
|
||||
ifelse_scope->innermost_loop()->id() != conditionality_in_loop_id)
|
||||
record_ifelse_write(*ifelse_scope);
|
||||
}
|
||||
|
||||
void RegisterCompAccess::record_ifelse_write(const ProgramScope& scope)
|
||||
{
|
||||
if (scope.type() == if_branch) {
|
||||
/* The first write in an IF branch within a loop implies unresolved
|
||||
* conditionality (if it was untouched or unconditional before).
|
||||
*/
|
||||
conditionality_in_loop_id = conditionality_unresolved;
|
||||
was_written_in_current_else_scope = false;
|
||||
record_if_write(scope);
|
||||
} else {
|
||||
was_written_in_current_else_scope = true;
|
||||
record_else_write(scope);
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterCompAccess::record_if_write(const ProgramScope& scope)
|
||||
{
|
||||
/* Don't record write if this IF scope if it ...
|
||||
* - is not the first write in this IF scope,
|
||||
* - has already been written in a parent IF scope.
|
||||
* In both cases this write is a secondary write that doesn't contribute
|
||||
* to resolve conditionality.
|
||||
*
|
||||
* Record the write if it
|
||||
* - is the first one (obviously),
|
||||
* - happens in an IF branch that is a child of the ELSE branch of the
|
||||
* last active IF/ELSE pair. In this case recording this write is used to
|
||||
* established whether the write is (un-)conditional in the scope enclosing
|
||||
* this outer IF/ELSE pair.
|
||||
*/
|
||||
if (!current_unpaired_if_write_scope ||
|
||||
(current_unpaired_if_write_scope->id() != scope.id() &&
|
||||
scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope))) {
|
||||
if_scope_write_flags |= 1 << next_ifelse_nesting_depth;
|
||||
current_unpaired_if_write_scope = &scope;
|
||||
next_ifelse_nesting_depth++;
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterCompAccess::record_else_write(const ProgramScope& scope)
|
||||
{
|
||||
int mask = 1 << (next_ifelse_nesting_depth - 1);
|
||||
|
||||
/* If the temporary was written in an IF branch on the same scope level
|
||||
* and this branch is the sibling of this ELSE branch, then we have a
|
||||
* pair of writes that makes write access to this temporary unconditional
|
||||
* in the enclosing scope.
|
||||
*/
|
||||
|
||||
if ((if_scope_write_flags & mask) &&
|
||||
(scope.id() == current_unpaired_if_write_scope->id())) {
|
||||
--next_ifelse_nesting_depth;
|
||||
if_scope_write_flags &= ~mask;
|
||||
|
||||
/* The following code deals with propagating unconditionality from
|
||||
* inner levels of nested IF/ELSE to the outer levels like in
|
||||
*
|
||||
* 1: var t;
|
||||
* 2: if (a) { <- start scope A
|
||||
* 3: if (b)
|
||||
* 4: t = ...
|
||||
* 5: else
|
||||
* 6: t = ...
|
||||
* 7: } else { <- start scope B
|
||||
* 8: if (c)
|
||||
* 9: t = ...
|
||||
* A: else <- start scope C
|
||||
* B: t = ...
|
||||
* C: }
|
||||
*
|
||||
*/
|
||||
|
||||
const ProgramScope *parent_ifelse = scope.parent()->in_ifelse_scope();
|
||||
|
||||
if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) {
|
||||
/* We are at the end of scope C and already recorded a write
|
||||
* within an IF scope (A), the sibling of the parent ELSE scope B,
|
||||
* and it is not yet resolved. Mark that as the last relevant
|
||||
* IF scope. Below the write will be resolved for the A/B
|
||||
* scope pair.
|
||||
*/
|
||||
current_unpaired_if_write_scope = parent_ifelse;
|
||||
} else {
|
||||
current_unpaired_if_write_scope = nullptr;
|
||||
}
|
||||
/* Promote the first write scope to the enclosing scope because
|
||||
* the current IF/ELSE pair is now irrelevant for the analysis.
|
||||
* This is also required to evaluate the minimum life time for t in
|
||||
* {
|
||||
* var t;
|
||||
* if (a)
|
||||
* t = ...
|
||||
* else
|
||||
* t = ...
|
||||
* x = t;
|
||||
* ...
|
||||
* }
|
||||
*/
|
||||
first_write_scope = scope.parent();
|
||||
|
||||
/* If some parent is IF/ELSE and in a loop then propagate the
|
||||
* write to that scope. Otherwise the write is unconditional
|
||||
* because it happens in both corresponding IF/ELSE branches
|
||||
* in this loop, and hence, record the loop id to signal the
|
||||
* resolution.
|
||||
*/
|
||||
if (parent_ifelse && parent_ifelse->is_in_loop()) {
|
||||
record_ifelse_write(*parent_ifelse);
|
||||
} else {
|
||||
conditionality_in_loop_id = scope.innermost_loop()->id();
|
||||
}
|
||||
} else {
|
||||
/* The temporary was not written in the IF branch corresponding
|
||||
* to this ELSE branch, hence the write is conditional.
|
||||
*/
|
||||
conditionality_in_loop_id = write_is_conditional;
|
||||
}
|
||||
}
|
||||
|
||||
bool RegisterCompAccess::conditional_ifelse_write_in_loop() const
|
||||
{
|
||||
return conditionality_in_loop_id <= conditionality_unresolved;
|
||||
}
|
||||
|
||||
void RegisterCompAccess::propagate_live_range_to_dominant_write_scope()
|
||||
{
|
||||
first_write = first_write_scope->begin();
|
||||
int lr = first_write_scope->end();
|
||||
|
||||
if (last_read < lr)
|
||||
last_read = lr;
|
||||
}
|
||||
|
||||
void RegisterCompAccess::update_required_live_range()
|
||||
{
|
||||
bool keep_for_full_loop = false;
|
||||
|
||||
/* This register component is not used at all, or only read,
|
||||
* mark it as unused and ignore it when renaming.
|
||||
* glsl_to_tgsi_visitor::renumber_registers will take care of
|
||||
* eliminating registers that are not written to.
|
||||
*/
|
||||
if (last_write < 0) {
|
||||
m_range.start = -1;
|
||||
m_range.end = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only written to, just make sure the register component is not
|
||||
* reused in the range it is used to write to
|
||||
*/
|
||||
if (!last_read_scope) {
|
||||
m_range.start = first_write;
|
||||
m_range.end = last_write + 1;
|
||||
return;
|
||||
}
|
||||
|
||||
assert(first_write_scope || m_range.start >= 0);
|
||||
|
||||
/* The register was pre-defines, so th first write scope is the outerpost scopw */
|
||||
if (!first_write_scope) {
|
||||
first_write_scope = first_read_scope;
|
||||
while (first_write_scope->parent())
|
||||
first_write_scope = first_write_scope->parent();
|
||||
}
|
||||
|
||||
const ProgramScope *enclosing_scope_first_read = first_read_scope;
|
||||
const ProgramScope *enclosing_scope_first_write = first_write_scope;
|
||||
|
||||
/* We read before writing in a loop
|
||||
* hence the value must survive the loops
|
||||
*/
|
||||
if ((first_read <= first_write) &&
|
||||
first_read_scope->is_in_loop()) {
|
||||
keep_for_full_loop = true;
|
||||
enclosing_scope_first_read = first_read_scope->outermost_loop();
|
||||
}
|
||||
|
||||
/* A conditional write within a (nested) loop must survive the outermost
|
||||
* loop if the last read was not within the same scope.
|
||||
*/
|
||||
const ProgramScope *conditional = enclosing_scope_first_write->enclosing_conditional();
|
||||
if (conditional && !conditional->contains_range_of(*last_read_scope) &&
|
||||
(conditional->is_switchcase_scope_in_loop() ||
|
||||
conditional_ifelse_write_in_loop())) {
|
||||
keep_for_full_loop = true;
|
||||
enclosing_scope_first_write = conditional->outermost_loop();
|
||||
}
|
||||
|
||||
/* Evaluate the scope that is shared by all: required first write scope,
|
||||
* required first read before write scope, and last read scope.
|
||||
*/
|
||||
const ProgramScope *enclosing_scope = enclosing_scope_first_read;
|
||||
if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
|
||||
enclosing_scope = enclosing_scope_first_write;
|
||||
|
||||
if (last_read_scope->contains_range_of(*enclosing_scope))
|
||||
enclosing_scope = last_read_scope;
|
||||
|
||||
while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
|
||||
!enclosing_scope->contains_range_of(*last_read_scope)) {
|
||||
enclosing_scope = enclosing_scope->parent();
|
||||
assert(enclosing_scope);
|
||||
}
|
||||
|
||||
/* Propagate the last read scope to the target scope */
|
||||
while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
|
||||
/* If the read is in a loop and we have to move up the scope we need to
|
||||
* extend the live range to the end of this current loop because at this
|
||||
* point we don't know whether the component was written before
|
||||
* un-conditionally in the same loop.
|
||||
*/
|
||||
if (last_read_scope->is_loop())
|
||||
last_read = last_read_scope->end();
|
||||
|
||||
last_read_scope = last_read_scope->parent();
|
||||
}
|
||||
|
||||
/* If the variable has to be kept for the whole loop, and we
|
||||
* are currently in a loop, then propagate the live range.
|
||||
*/
|
||||
if (keep_for_full_loop && first_write_scope->is_loop())
|
||||
propagate_live_range_to_dominant_write_scope();
|
||||
|
||||
/* Propagate the first_dominant_write scope to the target scope */
|
||||
while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
|
||||
/* Propagate live_range if there was a break in a loop and the write was
|
||||
* after the break inside that loop. Note, that this is only needed if
|
||||
* we move up in the scopes.
|
||||
*/
|
||||
if (first_write_scope->loop_break_line() < first_write) {
|
||||
keep_for_full_loop = true;
|
||||
propagate_live_range_to_dominant_write_scope();
|
||||
}
|
||||
|
||||
first_write_scope = first_write_scope->parent();
|
||||
|
||||
/* Propagate live_range if we are now in a loop */
|
||||
if (keep_for_full_loop && first_write_scope->is_loop())
|
||||
propagate_live_range_to_dominant_write_scope();
|
||||
}
|
||||
|
||||
/* The last write past the last read is dead code, but we have to
|
||||
* ensure that the component is not reused too early, hence extend the
|
||||
* live_range past the last write.
|
||||
*/
|
||||
if (last_write >= last_read)
|
||||
last_read = last_write + 1;
|
||||
|
||||
/* Here we are at the same scope, all is resolved */
|
||||
m_range.start = first_write;
|
||||
m_range.end = last_read;
|
||||
}
|
||||
|
||||
const int
|
||||
RegisterCompAccess::conditionality_untouched = std::numeric_limits<int>::max();
|
||||
|
||||
const int
|
||||
RegisterCompAccess::write_is_unconditional = std::numeric_limits<int>::max() - 1;
|
||||
|
||||
|
||||
RegisterAccess::RegisterAccess(const std::array<size_t, 4>& sizes)
|
||||
{
|
||||
for (int i = 0; i < 4; ++i)
|
||||
m_access_record[i].resize(sizes[i]);
|
||||
}
|
||||
|
||||
RegisterCompAccess& RegisterAccess::operator() (const Register& reg)
|
||||
{
|
||||
assert(reg.chan() < 4);
|
||||
assert(m_access_record[reg.chan()].size() > (size_t)reg.index());
|
||||
return m_access_record[reg.chan()][reg.index()];
|
||||
}
|
||||
|
||||
}
|
162
src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h
Normal file
162
src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h
Normal file
@ -0,0 +1,162 @@
|
||||
#ifndef SFN_LIFERANGEEVALUATOR_HELPERS_H
|
||||
#define SFN_LIFERANGEEVALUATOR_HELPERS_H
|
||||
|
||||
#include "sfn_valuefactory.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
enum ProgramScopeType {
|
||||
outer_scope, /* Outer program scope */
|
||||
loop_body, /* Inside a loop */
|
||||
if_branch, /* Inside if branch */
|
||||
else_branch, /* Inside else branch */
|
||||
switch_body, /* Inside switch statement */
|
||||
switch_case_branch, /* Inside switch case statement */
|
||||
switch_default_branch, /* Inside switch default statement */
|
||||
undefined_scope
|
||||
};
|
||||
|
||||
class ProgramScope {
|
||||
public:
|
||||
ProgramScope();
|
||||
ProgramScope(ProgramScope *parent, ProgramScopeType type, int id,
|
||||
int depth, int begin);
|
||||
|
||||
ProgramScopeType type() const;
|
||||
ProgramScope *parent() const;
|
||||
int nesting_depth() const;
|
||||
int id() const;
|
||||
int end() const;
|
||||
int begin() const;
|
||||
int loop_break_line() const;
|
||||
|
||||
const ProgramScope *in_else_scope() const;
|
||||
const ProgramScope *in_ifelse_scope() const;
|
||||
const ProgramScope *in_parent_ifelse_scope() const;
|
||||
const ProgramScope *innermost_loop() const;
|
||||
const ProgramScope *outermost_loop() const;
|
||||
const ProgramScope *enclosing_conditional() const;
|
||||
|
||||
bool is_loop() const;
|
||||
bool is_in_loop() const;
|
||||
bool is_switchcase_scope_in_loop() const;
|
||||
bool is_conditional() const;
|
||||
bool is_child_of(const ProgramScope *scope) const;
|
||||
bool is_child_of_ifelse_id_sibling(const ProgramScope *scope) const;
|
||||
|
||||
bool break_is_for_switchcase() const;
|
||||
bool contains_range_of(const ProgramScope& other) const;
|
||||
|
||||
void set_end(int end);
|
||||
void set_loop_break_line(int line);
|
||||
|
||||
private:
|
||||
ProgramScopeType scope_type;
|
||||
int scope_id;
|
||||
int scope_nesting_depth;
|
||||
int scope_begin;
|
||||
int scope_end;
|
||||
int break_loop_line;
|
||||
ProgramScope *parent_scope;
|
||||
};
|
||||
|
||||
/* Class to track the access to a component of a temporary register. */
|
||||
|
||||
struct LiveRange;
|
||||
|
||||
class RegisterCompAccess {
|
||||
public:
|
||||
RegisterCompAccess();
|
||||
RegisterCompAccess(LiveRange range);
|
||||
|
||||
void record_read(int line, ProgramScope *scope, LiveRangeEntry::EUse use);
|
||||
void record_write(int line, ProgramScope *scope);
|
||||
|
||||
void update_required_live_range();
|
||||
|
||||
const auto& range() { return m_range;}
|
||||
|
||||
const auto& use_type() { return m_use_type; }
|
||||
private:
|
||||
void propagate_live_range_to_dominant_write_scope();
|
||||
bool conditional_ifelse_write_in_loop() const;
|
||||
|
||||
void record_ifelse_write(const ProgramScope& scope);
|
||||
void record_if_write(const ProgramScope& scope);
|
||||
void record_else_write(const ProgramScope& scope);
|
||||
|
||||
ProgramScope *last_read_scope;
|
||||
ProgramScope *first_read_scope;
|
||||
ProgramScope *first_write_scope;
|
||||
|
||||
int first_write;
|
||||
int last_read;
|
||||
int last_write;
|
||||
int first_read;
|
||||
|
||||
/* This member variable tracks the current resolution of conditional writing
|
||||
* to this temporary in IF/ELSE clauses.
|
||||
*
|
||||
* The initial value "conditionality_untouched" indicates that this
|
||||
* temporary has not yet been written to within an if clause.
|
||||
*
|
||||
* A positive (other than "conditionality_untouched") number refers to the
|
||||
* last loop id for which the write was resolved as unconditional. With each
|
||||
* new loop this value will be overwitten by "conditionality_unresolved"
|
||||
* on entering the first IF clause writing this temporary.
|
||||
*
|
||||
* The value "conditionality_unresolved" indicates that no resolution has
|
||||
* been achieved so far. If the variable is set to this value at the end of
|
||||
* the processing of the whole shader it also indicates a conditional write.
|
||||
*
|
||||
* The value "write_is_conditional" marks that the variable is written
|
||||
* conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
|
||||
* least one loop.
|
||||
*/
|
||||
int conditionality_in_loop_id;
|
||||
|
||||
/* Helper constants to make the tracking code more readable. */
|
||||
static const int write_is_conditional = -1;
|
||||
static const int conditionality_unresolved = 0;
|
||||
static const int conditionality_untouched;
|
||||
static const int write_is_unconditional;
|
||||
|
||||
/* A bit field tracking the nexting levels of if-else clauses where the
|
||||
* temporary has (so far) been written to in the if branch, but not in the
|
||||
* else branch.
|
||||
*/
|
||||
unsigned int if_scope_write_flags;
|
||||
|
||||
int next_ifelse_nesting_depth;
|
||||
static const int supported_ifelse_nesting_depth = 32;
|
||||
|
||||
/* Tracks the last if scope in which the temporary was written to
|
||||
* without a write in the corresponding else branch. Is also used
|
||||
* to track read-before-write in the according scope.
|
||||
*/
|
||||
const ProgramScope *current_unpaired_if_write_scope;
|
||||
|
||||
/* Flag to resolve read-before-write in the else scope. */
|
||||
bool was_written_in_current_else_scope;
|
||||
|
||||
LiveRange m_range;
|
||||
|
||||
std::bitset<LiveRangeEntry::use_unspecified> m_use_type;
|
||||
};
|
||||
|
||||
class RegisterAccess {
|
||||
public:
|
||||
using RegisterCompAccessVector = std::vector<RegisterCompAccess>;
|
||||
|
||||
RegisterAccess(const std::array<size_t, 4>& sizes);
|
||||
|
||||
RegisterCompAccess& operator() (const Register& reg);
|
||||
|
||||
auto& component(int i) { return m_access_record[i]; }
|
||||
|
||||
private:
|
||||
std::array<RegisterCompAccessVector, 4> m_access_record;
|
||||
};
|
||||
|
||||
}
|
||||
#endif // SFN_LIFERANGEEVALUATOR_HELPERS_H
|
86
src/gallium/drivers/r600/sfn/sfn_memorypool.cpp
Normal file
86
src/gallium/drivers/r600/sfn/sfn_memorypool.cpp
Normal file
@ -0,0 +1,86 @@
|
||||
#include "sfn_memorypool.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
struct MemoryPoolImpl {
|
||||
public:
|
||||
MemoryPoolImpl();
|
||||
~MemoryPoolImpl();
|
||||
|
||||
using MemoryBacking = ::std::pmr::monotonic_buffer_resource;
|
||||
|
||||
MemoryBacking *pool;
|
||||
};
|
||||
|
||||
MemoryPool::MemoryPool() noexcept : impl(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
MemoryPool& MemoryPool::instance()
|
||||
{
|
||||
static thread_local MemoryPool me;
|
||||
me.initialize();
|
||||
return me;
|
||||
}
|
||||
|
||||
void MemoryPool::free()
|
||||
{
|
||||
delete impl;
|
||||
impl = nullptr;
|
||||
}
|
||||
|
||||
void MemoryPool::initialize()
|
||||
{
|
||||
if (!impl)
|
||||
impl = new MemoryPoolImpl();
|
||||
}
|
||||
|
||||
void *MemoryPool::allocate(size_t size)
|
||||
{
|
||||
return impl->pool->allocate(size);
|
||||
}
|
||||
|
||||
void *MemoryPool::allocate(size_t size, size_t align)
|
||||
{
|
||||
return impl->pool->allocate(size, align);
|
||||
}
|
||||
|
||||
void MemoryPool::release_all()
|
||||
{
|
||||
instance().free();
|
||||
}
|
||||
|
||||
void init_pool()
|
||||
{
|
||||
MemoryPool::instance();
|
||||
}
|
||||
|
||||
void release_pool()
|
||||
{
|
||||
MemoryPool::release_all();
|
||||
}
|
||||
|
||||
void *Allocate::operator new(size_t size)
|
||||
{
|
||||
return MemoryPool::instance().allocate(size);
|
||||
}
|
||||
|
||||
void Allocate::operator delete (void *p, size_t size)
|
||||
{
|
||||
// MemoryPool::instance().deallocate(p, size);
|
||||
}
|
||||
|
||||
MemoryPoolImpl::MemoryPoolImpl()
|
||||
{
|
||||
pool = new MemoryBacking();
|
||||
}
|
||||
|
||||
MemoryPoolImpl::~MemoryPoolImpl()
|
||||
{
|
||||
delete pool;
|
||||
}
|
||||
|
||||
}
|
69
src/gallium/drivers/r600/sfn/sfn_memorypool.h
Normal file
69
src/gallium/drivers/r600/sfn/sfn_memorypool.h
Normal file
@ -0,0 +1,69 @@
|
||||
#ifndef MEMORYPOOL_H
|
||||
#define MEMORYPOOL_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <stack>
|
||||
|
||||
#if __cplusplus >= 21703L
|
||||
#include <memory_resource>
|
||||
#define R600_POINTER_TYPE(X) X *
|
||||
#else
|
||||
#error Need C++17
|
||||
#endif
|
||||
|
||||
namespace r600 {
|
||||
|
||||
void init_pool();
|
||||
void release_pool();
|
||||
|
||||
class Allocate
|
||||
{
|
||||
public:
|
||||
void * operator new(size_t size);
|
||||
void operator delete (void *p, size_t size);
|
||||
};
|
||||
|
||||
class MemoryPool {
|
||||
public:
|
||||
static MemoryPool& instance();
|
||||
static void release_all();
|
||||
|
||||
void free();
|
||||
void initialize();
|
||||
|
||||
void *allocate(size_t size);
|
||||
void *allocate(size_t size, size_t align);
|
||||
|
||||
private:
|
||||
MemoryPool() noexcept;
|
||||
|
||||
struct MemoryPoolImpl* impl;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct Allocator {
|
||||
using value_type = T;
|
||||
|
||||
Allocator() = default;
|
||||
Allocator(const Allocator& other) = default;
|
||||
|
||||
template <typename U>
|
||||
Allocator(const Allocator<U>& other) {(void)other;}
|
||||
|
||||
T *allocate(size_t n) {
|
||||
return (T *)MemoryPool::instance().allocate(n * sizeof(T), alignof(T));
|
||||
}
|
||||
|
||||
void deallocate(void *p, size_t n) {
|
||||
(void)p; (void)n;
|
||||
//MemoryPool::instance().deallocate(p, n * sizeof(T), alignof(T));
|
||||
}
|
||||
|
||||
friend bool operator == (const Allocator<T>& lhs, const Allocator<T>& rhs) {
|
||||
(void)lhs; (void)rhs; return true;}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // MEMORYPOOL_H
|
@ -30,19 +30,19 @@
|
||||
#include "../r600_pipe.h"
|
||||
#include "../r600_shader.h"
|
||||
|
||||
|
||||
#include "util/u_prim.h"
|
||||
|
||||
#include "sfn_instruction_tex.h"
|
||||
|
||||
#include "sfn_shader_vertex.h"
|
||||
#include "sfn_shader_fragment.h"
|
||||
#include "sfn_shader_geometry.h"
|
||||
#include "sfn_shader_compute.h"
|
||||
#include "sfn_shader_tcs.h"
|
||||
#include "sfn_shader_tess_eval.h"
|
||||
#include "sfn_shader.h"
|
||||
#include "sfn_assembler.h"
|
||||
#include "sfn_debug.h"
|
||||
#include "sfn_liverangeevaluator.h"
|
||||
#include "sfn_nir_lower_fs_out_to_vector.h"
|
||||
#include "sfn_ir_to_assembly.h"
|
||||
#include "sfn_nir_lower_alu.h"
|
||||
#include "sfn_nir_lower_tex.h"
|
||||
#include "sfn_optimizer.h"
|
||||
#include "sfn_ra.h"
|
||||
#include "sfn_scheduler.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@ -78,264 +78,11 @@ bool NirLowerInstruction::run(nir_shader *shader)
|
||||
(void *)this);
|
||||
}
|
||||
|
||||
|
||||
ShaderFromNir::ShaderFromNir():sh(nullptr),
|
||||
gfx_level(CLASS_UNKNOWN),
|
||||
m_current_if_id(0),
|
||||
m_current_loop_id(0),
|
||||
scratch_size(0)
|
||||
{
|
||||
}
|
||||
|
||||
bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
|
||||
r600_pipe_shader_selector *sel, r600_shader_key& key,
|
||||
struct r600_shader* gs_shader, enum amd_gfx_level _chip_class)
|
||||
{
|
||||
sh = shader;
|
||||
gfx_level = _chip_class;
|
||||
assert(sh);
|
||||
|
||||
switch (shader->info.stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, gfx_level));
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
sfn_log << SfnLog::trans << "Start TCS\n";
|
||||
impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, gfx_level));
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
|
||||
impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, gfx_level));
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
sfn_log << SfnLog::trans << "Start GS\n";
|
||||
impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, gfx_level));
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
sfn_log << SfnLog::trans << "Start FS\n";
|
||||
impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, gfx_level));
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
sfn_log << SfnLog::trans << "Start CS\n";
|
||||
impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, gfx_level));
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
sfn_log << SfnLog::trans << "Process declarations\n";
|
||||
if (!process_declaration())
|
||||
return false;
|
||||
|
||||
// at this point all functions should be inlined
|
||||
const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
|
||||
|
||||
sfn_log << SfnLog::trans << "Scan shader\n";
|
||||
|
||||
if (sfn_log.has_debug_flag(SfnLog::instr))
|
||||
nir_print_shader(const_cast<nir_shader *>(shader), stderr);
|
||||
|
||||
nir_foreach_block(block, func->impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (!impl->scan_instruction(instr)) {
|
||||
fprintf(stderr, "Unhandled sysvalue access ");
|
||||
nir_print_instr(instr, stderr);
|
||||
fprintf(stderr, "\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sfn_log << SfnLog::trans << "Reserve registers\n";
|
||||
if (!impl->allocate_reserved_registers()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ValuePool::array_list arrays;
|
||||
sfn_log << SfnLog::trans << "Allocate local registers\n";
|
||||
foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
|
||||
impl->allocate_local_register(*reg, arrays);
|
||||
}
|
||||
|
||||
sfn_log << SfnLog::trans << "Emit shader start\n";
|
||||
impl->allocate_arrays(arrays);
|
||||
|
||||
impl->emit_shader_start();
|
||||
|
||||
sfn_log << SfnLog::trans << "Process shader \n";
|
||||
foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
|
||||
if (!process_cf_node(node))
|
||||
return false;
|
||||
}
|
||||
|
||||
// Add optimizations here
|
||||
sfn_log << SfnLog::trans << "Finalize\n";
|
||||
impl->finalize();
|
||||
|
||||
impl->get_array_info(pipe_shader->shader);
|
||||
|
||||
if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
|
||||
sfn_log << SfnLog::trans << "Merge registers\n";
|
||||
impl->remap_registers();
|
||||
}
|
||||
|
||||
sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
|
||||
return true;
|
||||
}
|
||||
|
||||
Shader ShaderFromNir::shader() const
|
||||
{
|
||||
return Shader{impl->m_output, impl->get_temp_registers()};
|
||||
}
|
||||
|
||||
|
||||
bool ShaderFromNir::process_cf_node(nir_cf_node *node)
|
||||
{
|
||||
SFN_TRACE_FUNC(SfnLog::flow, "CF");
|
||||
switch (node->type) {
|
||||
case nir_cf_node_block:
|
||||
return process_block(nir_cf_node_as_block(node));
|
||||
case nir_cf_node_if:
|
||||
return process_if(nir_cf_node_as_if(node));
|
||||
case nir_cf_node_loop:
|
||||
return process_loop(nir_cf_node_as_loop(node));
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ShaderFromNir::process_if(nir_if *if_stmt)
|
||||
{
|
||||
SFN_TRACE_FUNC(SfnLog::flow, "IF");
|
||||
|
||||
if (!impl->emit_if_start(m_current_if_id, if_stmt))
|
||||
return false;
|
||||
|
||||
int if_id = m_current_if_id++;
|
||||
m_if_stack.push(if_id);
|
||||
|
||||
foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
|
||||
if (!process_cf_node(n)) return false;
|
||||
|
||||
if (!if_stmt->then_list.is_empty()) {
|
||||
if (!impl->emit_else_start(if_id))
|
||||
return false;
|
||||
|
||||
foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
|
||||
if (!process_cf_node(n)) return false;
|
||||
}
|
||||
|
||||
if (!impl->emit_ifelse_end(if_id))
|
||||
return false;
|
||||
|
||||
m_if_stack.pop();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShaderFromNir::process_loop(nir_loop *node)
|
||||
{
|
||||
SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
|
||||
int loop_id = m_current_loop_id++;
|
||||
|
||||
if (!impl->emit_loop_start(loop_id))
|
||||
return false;
|
||||
|
||||
foreach_list_typed(nir_cf_node, n, node, &node->body)
|
||||
if (!process_cf_node(n)) return false;
|
||||
|
||||
if (!impl->emit_loop_end(loop_id))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShaderFromNir::process_block(nir_block *block)
|
||||
{
|
||||
SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
|
||||
nir_foreach_instr(instr, block) {
|
||||
int r = emit_instruction(instr);
|
||||
if (!r) {
|
||||
sfn_log << SfnLog::err << "R600: Unsupported instruction: "
|
||||
<< *instr << "\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
ShaderFromNir::~ShaderFromNir()
|
||||
{
|
||||
}
|
||||
|
||||
pipe_shader_type ShaderFromNir::processor_type() const
|
||||
{
|
||||
return impl->m_processor_type;
|
||||
}
|
||||
|
||||
|
||||
bool ShaderFromNir::emit_instruction(nir_instr *instr)
|
||||
{
|
||||
assert(impl);
|
||||
|
||||
sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
|
||||
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu:
|
||||
return impl->emit_alu_instruction(instr);
|
||||
case nir_instr_type_deref:
|
||||
return impl->emit_deref_instruction(nir_instr_as_deref(instr));
|
||||
case nir_instr_type_intrinsic:
|
||||
return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
|
||||
case nir_instr_type_load_const: /* const values are loaded when needed */
|
||||
return true;
|
||||
case nir_instr_type_tex:
|
||||
return impl->emit_tex_instruction(instr);
|
||||
case nir_instr_type_jump:
|
||||
return impl->emit_jump_instruction(nir_instr_as_jump(instr));
|
||||
default:
|
||||
fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
|
||||
nir_print_instr(instr, stderr);
|
||||
fprintf(stderr, "'\n");
|
||||
return false;
|
||||
case nir_instr_type_ssa_undef:
|
||||
return impl->create_undef(nir_instr_as_ssa_undef(instr));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool ShaderFromNir::process_declaration()
|
||||
{
|
||||
impl->set_shader_info(sh);
|
||||
|
||||
if (!impl->scan_inputs_read(sh))
|
||||
return false;
|
||||
|
||||
// scan declarations
|
||||
nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
|
||||
nir_var_mem_ubo |
|
||||
nir_var_mem_ssbo) {
|
||||
if (!impl->process_uniforms(variable)) {
|
||||
fprintf(stderr, "R600: error parsing outputs variable %s\n", variable->name);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
|
||||
{
|
||||
assert(impl);
|
||||
return impl->m_output;
|
||||
}
|
||||
|
||||
|
||||
AssemblyFromShader::~AssemblyFromShader()
|
||||
{
|
||||
}
|
||||
|
||||
bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
|
||||
bool AssemblyFromShader::lower(const Shader& ir)
|
||||
{
|
||||
return do_lower(ir);
|
||||
}
|
||||
@ -557,7 +304,6 @@ r600_nir_lower_atomics(nir_shader *shader)
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
}
|
||||
using r600::r600_nir_lower_int_tg4;
|
||||
using r600::r600_lower_scratch_addresses;
|
||||
using r600::r600_lower_fs_out_to_vector;
|
||||
using r600::r600_lower_ubo_to_align16;
|
||||
@ -676,6 +422,7 @@ r600_lower_shared_io(nir_shader *nir)
|
||||
static nir_ssa_def *
|
||||
r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
|
||||
{
|
||||
(void)_options;
|
||||
auto old_ir = nir_instr_as_intrinsic(instr);
|
||||
auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
|
||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||
@ -693,6 +440,8 @@ r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
|
||||
|
||||
bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
|
||||
{
|
||||
(void)_options;
|
||||
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
@ -713,7 +462,7 @@ bool r600_lower_fs_pos_input(nir_shader *shader)
|
||||
};
|
||||
|
||||
static bool
|
||||
optimize_once(nir_shader *shader, bool vectorize)
|
||||
optimize_once(nir_shader *shader)
|
||||
{
|
||||
bool progress = false;
|
||||
NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
|
||||
@ -722,9 +471,6 @@ optimize_once(nir_shader *shader, bool vectorize)
|
||||
NIR_PASS(progress, shader, nir_opt_algebraic);
|
||||
NIR_PASS(progress, shader, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
|
||||
if (vectorize)
|
||||
NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
|
||||
|
||||
NIR_PASS(progress, shader, nir_opt_remove_phis);
|
||||
|
||||
if (nir_opt_trivial_continues(shader)) {
|
||||
@ -777,13 +523,9 @@ bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
|
||||
case nir_op_fdot2:
|
||||
case nir_op_fdot3:
|
||||
case nir_op_fdot4:
|
||||
return nir_src_bit_size(alu->src[0].src) == 64;
|
||||
case nir_op_cube_r600:
|
||||
return false;
|
||||
case nir_op_bany_fnequal2:
|
||||
case nir_op_ball_fequal2:
|
||||
case nir_op_bany_inequal2:
|
||||
case nir_op_ball_iequal2:
|
||||
return nir_src_bit_size(alu->src[0].src) != 64;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
@ -793,15 +535,13 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
struct r600_pipe_shader *pipeshader,
|
||||
r600_shader_key *key)
|
||||
{
|
||||
char filename[4000];
|
||||
struct r600_pipe_shader_selector *sel = pipeshader->selector;
|
||||
|
||||
bool lower_64bit = ((sel->nir->options->lower_int64_options ||
|
||||
bool lower_64bit = (rctx->b.gfx_level < CAYMAN &&
|
||||
(sel->nir->options->lower_int64_options ||
|
||||
sel->nir->options->lower_doubles_options) &&
|
||||
(sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64);
|
||||
|
||||
r600::ShaderFromNir convert;
|
||||
|
||||
if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
|
||||
fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
|
||||
nir_print_shader(sel->nir, stderr);
|
||||
@ -813,10 +553,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
/* Cayman seems very crashy about accessing images that don't exists or are
|
||||
* accessed out of range, this lowering seems to help (but it can also be
|
||||
* another problem */
|
||||
if (sel->nir->info.num_images > 0 && rctx->b.gfx_level == CAYMAN)
|
||||
NIR_PASS_V(sel->nir, r600_legalize_image_load_store);
|
||||
|
||||
NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
|
||||
nir_lower_idiv_options idiv_options = {0};
|
||||
idiv_options.imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE;
|
||||
@ -828,7 +565,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
|
||||
if (lower_64bit)
|
||||
NIR_PASS_V(sel->nir, nir_lower_int64);
|
||||
while(optimize_once(sel->nir, false));
|
||||
while(optimize_once(sel->nir));
|
||||
|
||||
NIR_PASS_V(sel->nir, r600_lower_shared_io);
|
||||
NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
|
||||
@ -839,8 +576,8 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
lower_tex_options.lower_invalid_implicit_lod = true;
|
||||
|
||||
NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
|
||||
NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
|
||||
NIR_PASS_V(sel->nir, r600::r600_nir_lower_cube_to_2darray);
|
||||
NIR_PASS_V(sel->nir, r600_nir_lower_txl_txf_array_or_cube);
|
||||
NIR_PASS_V(sel->nir, r600_nir_lower_cube_to_2darray);
|
||||
|
||||
NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
|
||||
|
||||
@ -851,30 +588,11 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
NIR_PASS_V(sel->nir, nir_lower_fragcoord_wtrans);
|
||||
NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
|
||||
}
|
||||
nir_variable_mode io_modes = nir_var_uniform |
|
||||
nir_var_shader_in |
|
||||
nir_var_shader_out;
|
||||
|
||||
nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in;
|
||||
|
||||
//if (sel->nir->info.stage != MESA_SHADER_FRAGMENT)
|
||||
io_modes |= nir_var_shader_out;
|
||||
|
||||
if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
|
||||
/* Lower IO to temporaries late, because otherwise we get into trouble
|
||||
* with the glsl 4.40 interpolateAt swizzle tests. There seems to be a bug
|
||||
* somewhere that results in the input alweas reading from the same temp
|
||||
* regardless of interpolation when the lowering is done early */
|
||||
NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(sel->nir),
|
||||
true, true);
|
||||
|
||||
/* Since we're doing nir_lower_io_to_temporaries late, we need
|
||||
* to lower all the copy_deref's introduced by
|
||||
* lower_io_to_temporaries before calling nir_lower_io.
|
||||
*/
|
||||
NIR_PASS_V(sel->nir, nir_split_var_copies);
|
||||
NIR_PASS_V(sel->nir, nir_lower_var_copies);
|
||||
NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
|
||||
}
|
||||
|
||||
NIR_PASS_V(sel->nir, nir_opt_combine_stores, nir_var_shader_out);
|
||||
NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size,
|
||||
nir_lower_io_lower_64bit_to_32);
|
||||
|
||||
@ -916,14 +634,27 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
NIR_PASS_V(sh, r600_lower_tess_coord, u_tess_prim_from_shader(sh->info.tess._primitive_mode));
|
||||
}
|
||||
|
||||
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
|
||||
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
|
||||
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
|
||||
|
||||
NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
|
||||
NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
|
||||
NIR_PASS_V(sh, nir_split_64bit_vec3_and_vec4);
|
||||
NIR_PASS_V(sh, nir_lower_int64);
|
||||
|
||||
NIR_PASS_V(sh, nir_lower_ubo_vec4);
|
||||
|
||||
|
||||
if (lower_64bit)
|
||||
NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
|
||||
|
||||
NIR_PASS_V(sh, r600::r600_split_64bit_uniforms_and_ubo);
|
||||
/* Lower to scalar to let some optimization work out better */
|
||||
while(optimize_once(sh, false));
|
||||
while(optimize_once(sh));
|
||||
|
||||
NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
|
||||
if (lower_64bit)
|
||||
NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
|
||||
|
||||
NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
|
||||
NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_out, NULL);
|
||||
@ -934,7 +665,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
40,
|
||||
r600_get_natural_size_align_bytes);
|
||||
|
||||
while (optimize_once(sh, true));
|
||||
while (optimize_once(sh));
|
||||
|
||||
NIR_PASS_V(sh, nir_lower_bool_to_int32);
|
||||
NIR_PASS_V(sh, r600_nir_lower_int_tg4);
|
||||
@ -945,8 +676,6 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
|
||||
NIR_PASS_V(sh, nir_lower_locals_to_regs);
|
||||
|
||||
//NIR_PASS_V(sh, nir_opt_algebraic);
|
||||
//NIR_PASS_V(sh, nir_copy_prop);
|
||||
NIR_PASS_V(sh, nir_lower_to_source_mods,
|
||||
(nir_lower_to_source_mods_flags)(nir_lower_float_source_mods |
|
||||
nir_lower_64bit_source_mods));
|
||||
@ -974,33 +703,66 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
pipeshader->shader.cc_dist_mask = (1 << (sh->info.cull_distance_array_size +
|
||||
sh->info.clip_distance_array_size)) - 1;
|
||||
}
|
||||
|
||||
struct r600_shader* gs_shader = nullptr;
|
||||
struct r600_shader* gs_shader = nullptr;
|
||||
if (rctx->gs_shader)
|
||||
gs_shader = &rctx->gs_shader->current->shader;
|
||||
r600_screen *rscreen = rctx->screen;
|
||||
|
||||
bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.gfx_level);
|
||||
if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
|
||||
static int shnr = 0;
|
||||
r600::Shader *shader = r600::Shader::translate_from_nir(sh, &sel->so, gs_shader,
|
||||
*key, rctx->isa->hw_class);
|
||||
|
||||
snprintf(filename, 4000, "nir-%s_%d.inc", sh->info.name, shnr++);
|
||||
assert(shader);
|
||||
if (!shader)
|
||||
return -2;
|
||||
|
||||
if (access(filename, F_OK) == -1) {
|
||||
FILE *f = fopen(filename, "w");
|
||||
pipeshader->enabled_stream_buffers_mask = shader->enabled_stream_buffers_mask();
|
||||
pipeshader->selector->info.file_count[TGSI_FILE_HW_ATOMIC] += shader->atomic_file_count();
|
||||
pipeshader->selector->info.writes_memory = shader->has_flag(r600::Shader::sh_writes_memory);
|
||||
|
||||
if (f) {
|
||||
fprintf(f, "const char *shader_blob_%s = {\nR\"(", sh->info.name);
|
||||
nir_print_shader(sh, f);
|
||||
fprintf(f, ")\";\n");
|
||||
fclose(f);
|
||||
}
|
||||
}
|
||||
if (!r)
|
||||
return -2;
|
||||
if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
|
||||
std::cerr << "Shader after conversion from nir\n";
|
||||
shader->print(std::cerr);
|
||||
}
|
||||
|
||||
auto shader = convert.shader();
|
||||
if (!r600::sfn_log.has_debug_flag(r600::SfnLog::noopt)) {
|
||||
optimize(*shader);
|
||||
|
||||
if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
|
||||
std::cerr << "Shader after optimization\n";
|
||||
shader->print(std::cerr);
|
||||
}
|
||||
}
|
||||
|
||||
auto scheduled_shader = r600::schedule(shader);
|
||||
if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
|
||||
std::cerr << "Shader after scheduling\n";
|
||||
shader->print(std::cerr);
|
||||
}
|
||||
|
||||
if (!r600::sfn_log.has_debug_flag(r600::SfnLog::nomerge)) {
|
||||
|
||||
if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge)) {
|
||||
r600::sfn_log << r600::SfnLog::merge << "Shader before RA\n";
|
||||
scheduled_shader->print(std::cerr);
|
||||
}
|
||||
|
||||
r600::sfn_log << r600::SfnLog::trans << "Merge registers\n";
|
||||
auto lrm = r600::LiveRangeEvaluator().run(*scheduled_shader);
|
||||
|
||||
if (!r600::register_allocation(lrm)) {
|
||||
R600_ERR("%s: Register allocation failed\n", __func__);
|
||||
/* For now crash if the shader could not be benerated */
|
||||
assert(0);
|
||||
return -1;
|
||||
} else if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge) ||
|
||||
r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
|
||||
r600::sfn_log << "Shader after RA\n";
|
||||
scheduled_shader->print(std::cerr);
|
||||
}
|
||||
}
|
||||
|
||||
scheduled_shader->get_shader_info(&pipeshader->shader);
|
||||
pipeshader->shader.uses_doubles = sh->info.bit_sizes_float & 64 ? 1 : 0;
|
||||
|
||||
r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.gfx_level, rscreen->b.family,
|
||||
rscreen->has_compressed_msaa_texturing);
|
||||
@ -1012,9 +774,13 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
pipeshader->shader.bc.type = pipeshader->shader.processor_type;
|
||||
pipeshader->shader.bc.isa = rctx->isa;
|
||||
|
||||
r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
|
||||
if (!afs.lower(shader.m_ir)) {
|
||||
r600::Assembler afs(&pipeshader->shader, *key);
|
||||
if (!afs.lower(scheduled_shader)) {
|
||||
R600_ERR("%s: Lowering to assembly failed\n", __func__);
|
||||
|
||||
scheduled_shader->print(std::cerr);
|
||||
/* For now crash if the shader could not be benerated */
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -1025,8 +791,5 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
} else {
|
||||
r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
|
||||
}
|
||||
if (pipeshader->shader.bc.ngpr < 6)
|
||||
pipeshader->shader.bc.ngpr = 6;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -31,7 +31,7 @@
|
||||
#include "nir_builder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include "sfn_shader_base.h"
|
||||
#include "sfn_shader.h"
|
||||
#include <vector>
|
||||
|
||||
namespace r600 {
|
||||
@ -64,56 +64,16 @@ bool r600_nir_64_to_vec2(nir_shader *sh);
|
||||
|
||||
bool r600_merge_vec2_stores(nir_shader *shader);
|
||||
|
||||
class Shader {
|
||||
public:
|
||||
std::vector<InstructionBlock>& m_ir;
|
||||
ValueMap m_temp;
|
||||
};
|
||||
|
||||
class ShaderFromNir {
|
||||
public:
|
||||
ShaderFromNir();
|
||||
~ShaderFromNir();
|
||||
|
||||
unsigned ninputs() const;
|
||||
|
||||
bool lower(const nir_shader *shader, r600_pipe_shader *sh,
|
||||
r600_pipe_shader_selector *sel, r600_shader_key &key,
|
||||
r600_shader *gs_shader, enum amd_gfx_level gfx_level);
|
||||
|
||||
bool process_declaration();
|
||||
|
||||
pipe_shader_type processor_type() const;
|
||||
|
||||
bool emit_instruction(nir_instr *instr);
|
||||
|
||||
const std::vector<InstructionBlock> &shader_ir() const;
|
||||
|
||||
Shader shader() const;
|
||||
private:
|
||||
|
||||
bool process_block();
|
||||
bool process_cf_node(nir_cf_node *node);
|
||||
bool process_if(nir_if *node);
|
||||
bool process_loop(nir_loop *node);
|
||||
bool process_block(nir_block *node);
|
||||
|
||||
std::unique_ptr<ShaderFromNirProcessor> impl;
|
||||
const nir_shader *sh;
|
||||
|
||||
enum amd_gfx_level gfx_level;
|
||||
int m_current_if_id;
|
||||
int m_current_loop_id;
|
||||
std::stack<int> m_if_stack;
|
||||
int scratch_size;
|
||||
};
|
||||
bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh);
|
||||
bool r600_lower_64bit_to_vec2(nir_shader *sh);
|
||||
bool r600_split_64bit_alu_and_phi(nir_shader *sh);
|
||||
|
||||
class AssemblyFromShader {
|
||||
public:
|
||||
virtual ~AssemblyFromShader();
|
||||
bool lower(const std::vector<InstructionBlock> &ir);
|
||||
bool lower(const Shader& s);
|
||||
private:
|
||||
virtual bool do_lower(const std::vector<InstructionBlock>& ir) = 0 ;
|
||||
virtual bool do_lower(const Shader& s) = 0 ;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -32,7 +32,8 @@
|
||||
|
||||
|
||||
static nir_ssa_def *
|
||||
r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, void *_options)
|
||||
r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr,
|
||||
UNUSED void *_options)
|
||||
{
|
||||
b->cursor = nir_before_instr(instr);
|
||||
auto ir = nir_instr_as_intrinsic(instr);
|
||||
@ -143,7 +144,8 @@ r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, void *_opt
|
||||
}
|
||||
|
||||
static bool
|
||||
r600_legalize_image_load_store_filter(const nir_instr *instr, const void *_options)
|
||||
r600_legalize_image_load_store_filter(const nir_instr *instr,
|
||||
UNUSED const void *_options)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
@ -108,6 +108,193 @@ private:
|
||||
};
|
||||
|
||||
|
||||
class LowerLoad64Uniform : public NirLowerInstruction {
|
||||
bool filter(const nir_instr *instr) const override;
|
||||
nir_ssa_def *lower(nir_instr *instr) override;
|
||||
};
|
||||
|
||||
bool LowerLoad64Uniform::filter(const nir_instr *instr) const
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
auto intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_load_uniform &&
|
||||
intr->intrinsic != nir_intrinsic_load_ubo &&
|
||||
intr->intrinsic != nir_intrinsic_load_ubo_vec4)
|
||||
return false;
|
||||
|
||||
return nir_dest_bit_size(intr->dest) == 64;
|
||||
}
|
||||
|
||||
|
||||
nir_ssa_def *LowerLoad64Uniform::lower(nir_instr *instr)
|
||||
{
|
||||
auto intr = nir_instr_as_intrinsic(instr);
|
||||
int old_components = nir_dest_num_components(intr->dest);
|
||||
assert(old_components <= 2);
|
||||
assert(intr->dest.is_ssa);
|
||||
intr->dest.ssa.num_components *= 2;
|
||||
intr->dest.ssa.bit_size = 32;
|
||||
intr->num_components *= 2;
|
||||
|
||||
if (intr->intrinsic ==nir_intrinsic_load_ubo ||
|
||||
intr->intrinsic ==nir_intrinsic_load_ubo_vec4)
|
||||
nir_intrinsic_set_component(intr, 2 * nir_intrinsic_component(intr));
|
||||
|
||||
nir_ssa_def *result_vec[2] = {nullptr, nullptr};
|
||||
|
||||
for (int i = 0; i < old_components; ++i) {
|
||||
result_vec[i] = nir_pack_64_2x32_split(b,
|
||||
nir_channel(b, &intr->dest.ssa, 2 * i),
|
||||
nir_channel(b, &intr->dest.ssa, 2 * i + 1));
|
||||
}
|
||||
if (old_components == 1)
|
||||
return result_vec[0];
|
||||
|
||||
return nir_vec2(b, result_vec[0], result_vec[1]);
|
||||
}
|
||||
|
||||
bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh)
|
||||
{
|
||||
return LowerLoad64Uniform().run(sh);
|
||||
}
|
||||
|
||||
class LowerSplit64op : public NirLowerInstruction {
|
||||
bool filter(const nir_instr *instr) const override {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu: {
|
||||
auto alu = nir_instr_as_alu(instr);
|
||||
switch (alu->op) {
|
||||
case nir_op_bcsel:
|
||||
return nir_dest_bit_size(alu->dest.dest) == 64;
|
||||
case nir_op_f2b1:
|
||||
case nir_op_f2i32:
|
||||
case nir_op_f2u32:
|
||||
case nir_op_f2i64:
|
||||
case nir_op_f2u64:
|
||||
case nir_op_u2f64:
|
||||
case nir_op_i2f64:
|
||||
return nir_src_bit_size(alu->src[0].src) == 64;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
case nir_instr_type_phi: {
|
||||
auto phi = nir_instr_as_phi(instr);
|
||||
return nir_dest_num_components(phi->dest) == 64;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *lower(nir_instr *instr) override {
|
||||
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu: {
|
||||
auto alu = nir_instr_as_alu(instr);
|
||||
switch (alu->op) {
|
||||
|
||||
case nir_op_bcsel: {
|
||||
auto lo = nir_bcsel(b, nir_ssa_for_src(b, alu->src[0].src, 1),
|
||||
nir_unpack_64_2x32_split_x(b, nir_ssa_for_alu_src(b, alu, 1)),
|
||||
nir_unpack_64_2x32_split_x(b, nir_ssa_for_alu_src(b, alu, 2)));
|
||||
auto hi = nir_bcsel(b, nir_ssa_for_src(b, alu->src[0].src, 1),
|
||||
nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 1)),
|
||||
nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 2)));
|
||||
return nir_pack_64_2x32_split(b, lo, hi);
|
||||
}
|
||||
case nir_op_f2b1: {
|
||||
auto mask = nir_component_mask(nir_dest_num_components(alu->dest.dest));
|
||||
return nir_fneu(b, nir_channels(b, nir_ssa_for_alu_src(b, alu, 0), mask),
|
||||
nir_imm_zero(b, nir_dest_num_components(alu->dest.dest), 64));
|
||||
}
|
||||
case nir_op_f2i32: {
|
||||
auto src = nir_ssa_for_alu_src(b, alu, 0);
|
||||
auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
|
||||
auto abs_src = nir_fabs(b, src);
|
||||
auto value = nir_f2u32(b, abs_src);
|
||||
return nir_bcsel(b, gt0, value, nir_ineg(b, value));
|
||||
}
|
||||
case nir_op_f2u32: {
|
||||
/* fp32 doesn't hold suffient bits to represent the full range of
|
||||
* u32, therefore we have to split the values, and because f2f32
|
||||
* rounds, we have to remove the fractional part in the hi bits
|
||||
* For values > UINT_MAX the result is undefined */
|
||||
auto src = nir_ssa_for_alu_src(b, alu, 0);
|
||||
auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
|
||||
auto highval = nir_fmul_imm(b, src, 1.0/65536.0);
|
||||
auto fract = nir_ffract(b, highval);
|
||||
auto high = nir_f2u32(b, nir_f2f32(b, nir_fsub(b, highval, fract)));
|
||||
auto lowval = nir_fmul_imm(b, fract, 65536.0);
|
||||
auto low = nir_f2u32(b, nir_f2f32(b, lowval));
|
||||
return nir_bcsel(b, gt0, nir_ior(b, nir_ishl_imm(b, high, 16), low),
|
||||
nir_imm_int(b, 0));
|
||||
}
|
||||
case nir_op_f2i64: {
|
||||
auto src = nir_ssa_for_alu_src(b, alu, 0);
|
||||
auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
|
||||
auto abs_src = nir_fabs(b, src);
|
||||
auto value = nir_f2u64(b, abs_src);
|
||||
return nir_bcsel(b, gt0, value, nir_isub(b, nir_imm_zero(b, 1, 64), value));
|
||||
}
|
||||
case nir_op_f2u64: {
|
||||
auto src = nir_ssa_for_alu_src(b, alu, 0);
|
||||
auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
|
||||
auto highval = nir_fmul_imm(b, src, 1.0/(65536.0 * 65536.0));
|
||||
auto fract = nir_ffract(b, highval);
|
||||
auto high = nir_f2u32(b, nir_fsub(b, highval, fract));
|
||||
auto low = nir_f2u32(b, nir_fmul_imm(b, fract, 65536.0 * 65536.0));
|
||||
return nir_bcsel(b, gt0, nir_pack_64_2x32_split(b, low, high),
|
||||
nir_imm_zero(b, 1, 64));
|
||||
}
|
||||
case nir_op_u2f64: {
|
||||
auto src = nir_ssa_for_alu_src(b, alu, 0);
|
||||
auto low = nir_unpack_64_2x32_split_x(b, src);
|
||||
auto high = nir_unpack_64_2x32_split_y(b, src);
|
||||
auto flow = nir_u2f64(b, low);
|
||||
auto fhigh = nir_u2f64(b, high);
|
||||
return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
|
||||
}
|
||||
case nir_op_i2f64: {
|
||||
auto src = nir_ssa_for_alu_src(b, alu, 0);
|
||||
auto low = nir_unpack_64_2x32_split_x(b, src);
|
||||
auto high = nir_unpack_64_2x32_split_y(b, src);
|
||||
auto flow = nir_u2f64(b, low);
|
||||
auto fhigh = nir_i2f64(b, high);
|
||||
return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
|
||||
}
|
||||
default:
|
||||
unreachable("trying to lower instruction that was not in filter");
|
||||
}
|
||||
}
|
||||
case nir_instr_type_phi: {
|
||||
auto phi = nir_instr_as_phi(instr);
|
||||
auto phi_lo = nir_phi_instr_create(b->shader);
|
||||
auto phi_hi = nir_phi_instr_create(b->shader);
|
||||
nir_ssa_dest_init(&phi_lo->instr, &phi_lo->dest, phi->dest.ssa.num_components * 2, 32, "");
|
||||
nir_ssa_dest_init(&phi_hi->instr, &phi_hi->dest, phi->dest.ssa.num_components * 2, 32, "");
|
||||
nir_foreach_phi_src(s, phi) {
|
||||
auto lo = nir_unpack_32_2x16_split_x(b, nir_ssa_for_src(b, s->src, 1));
|
||||
auto hi = nir_unpack_32_2x16_split_x(b, nir_ssa_for_src(b, s->src, 1));
|
||||
nir_phi_instr_add_src(phi_lo, s->pred, nir_src_for_ssa(lo));
|
||||
nir_phi_instr_add_src(phi_hi, s->pred, nir_src_for_ssa(hi));
|
||||
}
|
||||
return nir_pack_64_2x32_split(b, &phi_lo->dest.ssa, &phi_hi->dest.ssa);
|
||||
}
|
||||
default:
|
||||
unreachable("Trying to lower instruction that was not in filter");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool r600_split_64bit_alu_and_phi(nir_shader *sh)
|
||||
{
|
||||
return LowerSplit64op().run(sh);
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
LowerSplit64BitVar::filter(const nir_instr *instr) const
|
||||
{
|
||||
@ -271,7 +458,7 @@ LowerSplit64BitVar::split_store_deref_array(nir_intrinsic_instr *intr, nir_deref
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, nir_deref_instr *deref)
|
||||
LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, UNUSED nir_deref_instr *deref)
|
||||
{
|
||||
auto old_var = nir_intrinsic_get_var(intr, 0);
|
||||
unsigned old_components = old_var->type->without_array()->components();
|
||||
@ -556,8 +743,6 @@ LowerSplit64BitVar::lower(nir_instr *instr)
|
||||
}
|
||||
case nir_instr_type_alu: {
|
||||
auto alu = nir_instr_as_alu(instr);
|
||||
nir_print_instr(instr, stderr);
|
||||
fprintf(stderr, "\n");
|
||||
switch (alu->op) {
|
||||
case nir_op_bany_fnequal3:
|
||||
return split_reduction3(alu, nir_op_bany_fnequal2, nir_op_fneu, nir_op_ior);
|
||||
@ -845,7 +1030,7 @@ static bool store_64bit_intr(nir_src *src, void *state)
|
||||
return !*s;
|
||||
}
|
||||
|
||||
static bool double2vec2(nir_src *src, void *state)
|
||||
static bool double2vec2(nir_src *src, UNUSED void *state)
|
||||
{
|
||||
if (nir_src_bit_size(*src) != 64)
|
||||
return true;
|
||||
@ -1058,6 +1243,206 @@ bool r600_merge_vec2_stores(nir_shader *shader)
|
||||
return merger.combine();
|
||||
}
|
||||
|
||||
static bool
|
||||
r600_lower_64bit_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
b->cursor = nir_after_instr(&instr->instr);
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_ubo_vec4:
|
||||
case nir_intrinsic_load_uniform:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
case nir_intrinsic_load_per_vertex_input:
|
||||
case nir_intrinsic_store_output:
|
||||
case nir_intrinsic_store_per_vertex_output:
|
||||
case nir_intrinsic_store_ssbo:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
if (instr->num_components <= 2)
|
||||
return false;
|
||||
|
||||
bool has_dest = nir_intrinsic_infos[instr->intrinsic].has_dest;
|
||||
if (has_dest) {
|
||||
if (nir_dest_bit_size(instr->dest) != 64)
|
||||
return false;
|
||||
} else {
|
||||
if (nir_src_bit_size(instr->src[0]) != 64)
|
||||
return false;
|
||||
}
|
||||
|
||||
nir_intrinsic_instr *first =
|
||||
nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
|
||||
nir_intrinsic_instr *second =
|
||||
nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_ubo_vec4:
|
||||
case nir_intrinsic_load_uniform:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_store_ssbo:
|
||||
break;
|
||||
|
||||
default: {
|
||||
nir_io_semantics semantics = nir_intrinsic_io_semantics(second);
|
||||
semantics.location++;
|
||||
semantics.num_slots--;
|
||||
nir_intrinsic_set_io_semantics(second, semantics);
|
||||
|
||||
nir_intrinsic_set_base(second, nir_intrinsic_base(second) + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
first->num_components = 2;
|
||||
second->num_components -= 2;
|
||||
if (has_dest) {
|
||||
first->dest.ssa.num_components = 2;
|
||||
second->dest.ssa.num_components -= 2;
|
||||
}
|
||||
|
||||
nir_builder_instr_insert(b, &first->instr);
|
||||
nir_builder_instr_insert(b, &second->instr);
|
||||
|
||||
if (has_dest) {
|
||||
/* Merge the two loads' results back into a vector. */
|
||||
nir_ssa_scalar channels[4] = {
|
||||
nir_get_ssa_scalar(&first->dest.ssa, 0),
|
||||
nir_get_ssa_scalar(&first->dest.ssa, 1),
|
||||
nir_get_ssa_scalar(&second->dest.ssa, 0),
|
||||
nir_get_ssa_scalar(&second->dest.ssa, second->num_components > 1 ? 1 : 0),
|
||||
};
|
||||
nir_ssa_def *new_ir = nir_vec_scalars(b, channels, instr->num_components);
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.ssa, new_ir);
|
||||
} else {
|
||||
/* Split the src value across the two stores. */
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
|
||||
nir_ssa_def *src0 = instr->src[0].ssa;
|
||||
nir_ssa_scalar channels[4] = { 0 };
|
||||
for (int i = 0; i < instr->num_components; i++)
|
||||
channels[i] = nir_get_ssa_scalar(src0, i);
|
||||
|
||||
nir_intrinsic_set_write_mask(first, nir_intrinsic_write_mask(instr) & 3);
|
||||
nir_intrinsic_set_write_mask(second, nir_intrinsic_write_mask(instr) >> 2);
|
||||
|
||||
nir_instr_rewrite_src(&first->instr, &first->src[0],
|
||||
nir_src_for_ssa(nir_vec_scalars(b, channels, 2)));
|
||||
nir_instr_rewrite_src(&second->instr, &second->src[0],
|
||||
nir_src_for_ssa(nir_vec_scalars(b, &channels[2],
|
||||
second->num_components)));
|
||||
}
|
||||
|
||||
int offset_src = -1;
|
||||
uint32_t offset_amount = 16;
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_ubo:
|
||||
offset_src = 1;
|
||||
break;
|
||||
case nir_intrinsic_load_ubo_vec4:
|
||||
case nir_intrinsic_load_uniform:
|
||||
offset_src = 0;
|
||||
offset_amount = 1;
|
||||
break;
|
||||
case nir_intrinsic_store_ssbo:
|
||||
offset_src = 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (offset_src != -1) {
|
||||
b->cursor = nir_before_instr(&second->instr);
|
||||
nir_ssa_def *second_offset =
|
||||
nir_iadd_imm(b, second->src[offset_src].ssa, offset_amount);
|
||||
nir_instr_rewrite_src(&second->instr, &second->src[offset_src],
|
||||
nir_src_for_ssa(second_offset));
|
||||
}
|
||||
|
||||
/* DCE stores we generated with no writemask (nothing else does this
|
||||
* currently).
|
||||
*/
|
||||
if (!has_dest) {
|
||||
if (nir_intrinsic_write_mask(first) == 0)
|
||||
nir_instr_remove(&first->instr);
|
||||
if (nir_intrinsic_write_mask(second) == 0)
|
||||
nir_instr_remove(&second->instr);
|
||||
}
|
||||
|
||||
nir_instr_remove(&instr->instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
r600_lower_64bit_load_const(nir_builder *b, nir_load_const_instr *instr)
|
||||
{
|
||||
int num_components = instr->def.num_components;
|
||||
|
||||
if (instr->def.bit_size != 64 || num_components <= 2)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
|
||||
nir_load_const_instr *first =
|
||||
nir_load_const_instr_create(b->shader, 2, 64);
|
||||
nir_load_const_instr *second =
|
||||
nir_load_const_instr_create(b->shader, num_components - 2, 64);
|
||||
|
||||
first->value[0] = instr->value[0];
|
||||
first->value[1] = instr->value[1];
|
||||
second->value[0] = instr->value[2];
|
||||
if (num_components == 4)
|
||||
second->value[1] = instr->value[3];
|
||||
|
||||
nir_builder_instr_insert(b, &first->instr);
|
||||
nir_builder_instr_insert(b, &second->instr);
|
||||
|
||||
nir_ssa_def *channels[4] = {
|
||||
nir_channel(b, &first->def, 0),
|
||||
nir_channel(b, &first->def, 1),
|
||||
nir_channel(b, &second->def, 0),
|
||||
num_components == 4 ? nir_channel(b, &second->def, 1) : NULL,
|
||||
};
|
||||
nir_ssa_def *new_ir = nir_vec(b, channels, num_components);
|
||||
nir_ssa_def_rewrite_uses(&instr->def, new_ir);
|
||||
nir_instr_remove(&instr->instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
r600_lower_64bit_to_vec2_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_load_const:
|
||||
return r600_lower_64bit_load_const(b, nir_instr_as_load_const(instr));
|
||||
|
||||
case nir_instr_type_intrinsic:
|
||||
return r600_lower_64bit_intrinsic(b, nir_instr_as_intrinsic(instr));
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
r600_lower_64bit_to_vec2(nir_shader *s)
|
||||
{
|
||||
return nir_shader_instructions_pass(s,
|
||||
r600_lower_64bit_to_vec2_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
||||
} // end namespace r600
|
||||
|
||||
|
||||
|
@ -95,13 +95,14 @@ emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_i
|
||||
|
||||
auto idx2 = nir_src_as_const_value(op->src[1]);
|
||||
if (!idx2 || idx2->u32 != 0)
|
||||
offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
|
||||
offset = nir_iadd(b, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)), offset);
|
||||
|
||||
return nir_iadd(b, addr, offset);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset)
|
||||
emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op,
|
||||
UNUSED nir_variable_mode mode, int src_offset)
|
||||
{
|
||||
|
||||
nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
|
||||
@ -552,7 +553,7 @@ r600_lower_tess_coord_filter(const nir_instr *instr, UNUSED const void *_options
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
r600_lower_tess_coord_impl(nir_builder *b, nir_instr *instr, void *_options)
|
||||
r600_lower_tess_coord_impl(nir_builder *b, UNUSED nir_instr *instr, void *_options)
|
||||
{
|
||||
pipe_prim_type prim_type = *(pipe_prim_type *)_options;
|
||||
|
||||
|
@ -1,142 +1,9 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "sfn_nir_lower_tex.h"
|
||||
|
||||
#include "sfn_instruction_tex.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_builtin_builder.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src,
|
||||
unsigned sid, unsigned rid, PValue sampler_offset):
|
||||
Instruction(tex),
|
||||
m_opcode(op),
|
||||
m_dst(dest),
|
||||
m_src(src),
|
||||
m_sampler_id(sid),
|
||||
m_resource_id(rid),
|
||||
m_flags(0),
|
||||
m_inst_mode(0),
|
||||
m_dest_swizzle{0,1,2,3},
|
||||
m_sampler_offset(sampler_offset)
|
||||
|
||||
{
|
||||
memset(m_offset, 0, sizeof (m_offset));
|
||||
|
||||
add_remappable_src_value(&m_src);
|
||||
add_remappable_src_value(&m_sampler_offset);
|
||||
add_remappable_dst_value(&m_dst);
|
||||
}
|
||||
|
||||
void TexInstruction::set_gather_comp(int cmp)
|
||||
{
|
||||
m_inst_mode = cmp;
|
||||
}
|
||||
|
||||
void TexInstruction::replace_values(const ValueSet& candidates, PValue new_value)
|
||||
{
|
||||
// I wonder whether we can actually end up here ...
|
||||
for (auto c: candidates) {
|
||||
if (*c == *m_src.reg_i(c->chan()))
|
||||
m_src.set_reg_i(c->chan(), new_value);
|
||||
if (*c == *m_dst.reg_i(c->chan()))
|
||||
m_dst.set_reg_i(c->chan(), new_value);
|
||||
}
|
||||
}
|
||||
|
||||
void TexInstruction::set_offset(unsigned index, int32_t val)
|
||||
{
|
||||
assert(index < 3);
|
||||
m_offset[index] = val;
|
||||
}
|
||||
|
||||
int TexInstruction::get_offset(unsigned index) const
|
||||
{
|
||||
assert(index < 3);
|
||||
return (m_offset[index] << 1 & 0x1f);
|
||||
}
|
||||
|
||||
bool TexInstruction::is_equal_to(const Instruction& rhs) const
|
||||
{
|
||||
assert(rhs.type() == tex);
|
||||
const auto& r = static_cast<const TexInstruction&>(rhs);
|
||||
return (m_opcode == r.m_opcode &&
|
||||
m_dst == r.m_dst &&
|
||||
m_src == r.m_src &&
|
||||
m_sampler_id == r.m_sampler_id &&
|
||||
m_resource_id == r.m_resource_id);
|
||||
}
|
||||
|
||||
void TexInstruction::do_print(std::ostream& os) const
|
||||
{
|
||||
const char *map_swz = "xyzw01?_";
|
||||
os << opname(m_opcode) << " R" << m_dst.sel() << ".";
|
||||
for (int i = 0; i < 4; ++i)
|
||||
os << map_swz[m_dest_swizzle[i]];
|
||||
|
||||
os << " " << m_src
|
||||
<< " RESID:" << m_resource_id << " SAMPLER:"
|
||||
<< m_sampler_id;
|
||||
}
|
||||
|
||||
const char *TexInstruction::opname(Opcode op)
|
||||
{
|
||||
switch (op) {
|
||||
case ld: return "LD";
|
||||
case get_resinfo: return "GET_TEXTURE_RESINFO";
|
||||
case get_nsampled: return "GET_NUMBER_OF_SAMPLES";
|
||||
case get_tex_lod: return "GET_LOD";
|
||||
case get_gradient_h: return "GET_GRADIENTS_H";
|
||||
case get_gradient_v: return "GET_GRADIENTS_V";
|
||||
case set_offsets: return "SET_TEXTURE_OFFSETS";
|
||||
case keep_gradients: return "KEEP_GRADIENTS";
|
||||
case set_gradient_h: return "SET_GRADIENTS_H";
|
||||
case set_gradient_v: return "SET_GRADIENTS_V";
|
||||
case sample: return "SAMPLE";
|
||||
case sample_l: return "SAMPLE_L";
|
||||
case sample_lb: return "SAMPLE_LB";
|
||||
case sample_lz: return "SAMPLE_LZ";
|
||||
case sample_g: return "SAMPLE_G";
|
||||
case sample_g_lb: return "SAMPLE_G_L";
|
||||
case gather4: return "GATHER4";
|
||||
case gather4_o: return "GATHER4_O";
|
||||
case sample_c: return "SAMPLE_C";
|
||||
case sample_c_l: return "SAMPLE_C_L";
|
||||
case sample_c_lb: return "SAMPLE_C_LB";
|
||||
case sample_c_lz: return "SAMPLE_C_LZ";
|
||||
case sample_c_g: return "SAMPLE_C_G";
|
||||
case sample_c_g_lb: return "SAMPLE_C_G_L";
|
||||
case gather4_c: return "GATHER4_C";
|
||||
case gather4_c_o: return "OP_GATHER4_C_O";
|
||||
}
|
||||
return "ERROR";
|
||||
}
|
||||
|
||||
|
||||
|
||||
static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
@ -273,14 +140,14 @@ bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
|
||||
nir_ssa_def *lambda_exp = nir_fexp2(b, lod);
|
||||
nir_ssa_def *scale = NULL;
|
||||
|
||||
if (tex->is_array) {
|
||||
if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
|
||||
unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
|
||||
scale = nir_frcp(b, nir_channels(b, size, 1));
|
||||
scale = nir_swizzle(b, scale, swizzle, 3);
|
||||
} else if (tex->is_array) {
|
||||
int cmp_mask = (1 << (size->num_components - 1)) - 1;
|
||||
scale = nir_frcp(b, nir_channels(b, size,
|
||||
(nir_component_mask_t)cmp_mask));
|
||||
} else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
|
||||
unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
|
||||
scale = nir_frcp(b, nir_channels(b, size, 1));
|
||||
scale = nir_swizzle(b, scale, swizzle, 3);
|
||||
}
|
||||
|
||||
nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
|
||||
@ -408,7 +275,3 @@ r600_nir_lower_cube_to_2darray(nir_shader *shader)
|
||||
r600_nir_lower_cube_to_2darray_filer,
|
||||
r600_nir_lower_cube_to_2darray_impl, nullptr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
10
src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h
Normal file
10
src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef SFN_NIR_LOWER_TEX_H
|
||||
#define SFN_NIR_LOWER_TEX_H
|
||||
|
||||
struct nir_shader;
|
||||
|
||||
bool r600_nir_lower_int_tg4(nir_shader *nir);
|
||||
bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
|
||||
bool r600_nir_lower_cube_to_2darray(nir_shader *shader);
|
||||
|
||||
#endif // LALA_H
|
627
src/gallium/drivers/r600/sfn/sfn_optimizer.cpp
Normal file
627
src/gallium/drivers/r600/sfn/sfn_optimizer.cpp
Normal file
@ -0,0 +1,627 @@
|
||||
#include "sfn_optimizer.h"
|
||||
|
||||
#include "sfn_instr_alugroup.h"
|
||||
#include "sfn_instr_controlflow.h"
|
||||
#include "sfn_instr_export.h"
|
||||
#include "sfn_instr_tex.h"
|
||||
#include "sfn_instr_fetch.h"
|
||||
#include "sfn_instr_lds.h"
|
||||
#include "sfn_peephole.h"
|
||||
#include "sfn_debug.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
bool optimize(Shader& shader)
|
||||
{
|
||||
bool progress;
|
||||
|
||||
sfn_log << SfnLog::opt << "Shader before optimization\n";
|
||||
if (sfn_log.has_debug_flag(SfnLog::opt)) {
|
||||
std::stringstream ss;
|
||||
shader.print(ss);
|
||||
sfn_log << ss.str() << "\n\n";
|
||||
}
|
||||
|
||||
do {
|
||||
progress = false;
|
||||
progress |= copy_propagation_fwd(shader);
|
||||
progress |= dead_code_elimination(shader);
|
||||
progress |= copy_propagation_backward(shader);
|
||||
progress |= dead_code_elimination(shader);
|
||||
progress |= simplify_source_vectors(shader);
|
||||
progress |= peephole(shader);
|
||||
progress |= dead_code_elimination(shader);
|
||||
} while (progress);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
class DCEVisitor : public InstrVisitor {
|
||||
public:
|
||||
DCEVisitor();
|
||||
|
||||
void visit(AluInstr *instr) override;
|
||||
void visit(AluGroup *instr) override;
|
||||
void visit(TexInstr *instr) override;
|
||||
void visit(ExportInstr *instr) override {(void)instr;};
|
||||
void visit(FetchInstr *instr) override;
|
||||
void visit(Block *instr) override;
|
||||
|
||||
void visit(ControlFlowInstr *instr) override {(void)instr;};
|
||||
void visit(IfInstr *instr) override {(void)instr;};
|
||||
void visit(WriteScratchInstr *instr) override {(void)instr;};
|
||||
void visit(StreamOutInstr *instr) override {(void)instr;};
|
||||
void visit(MemRingOutInstr *instr) override {(void)instr;};
|
||||
void visit(EmitVertexInstr *instr) override {(void)instr;};
|
||||
void visit(GDSInstr *instr) override {(void)instr;};
|
||||
void visit(WriteTFInstr *instr) override {(void)instr;};
|
||||
void visit(LDSAtomicInstr *instr) override {(void)instr;};
|
||||
void visit(LDSReadInstr *instr) override;
|
||||
void visit(RatInstr *instr) override {(void)instr;};
|
||||
|
||||
|
||||
bool progress;
|
||||
};
|
||||
|
||||
bool dead_code_elimination(Shader& shader)
|
||||
{
|
||||
DCEVisitor dce;
|
||||
|
||||
do {
|
||||
|
||||
sfn_log << SfnLog::opt << "start dce run\n";
|
||||
|
||||
dce.progress = false;
|
||||
for (auto& b : shader.func())
|
||||
b->accept(dce);
|
||||
|
||||
sfn_log << SfnLog::opt << "finished dce run\n\n";
|
||||
|
||||
} while (dce.progress);
|
||||
|
||||
sfn_log << SfnLog::opt << "Shader after DCE\n";
|
||||
if (sfn_log.has_debug_flag(SfnLog::opt)) {
|
||||
std::stringstream ss;
|
||||
shader.print(ss);
|
||||
sfn_log << ss.str() << "\n\n";
|
||||
}
|
||||
|
||||
return dce.progress;
|
||||
}
|
||||
|
||||
DCEVisitor::DCEVisitor():progress(false)
|
||||
{
|
||||
}
|
||||
|
||||
void DCEVisitor::visit(AluInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::opt << "DCE: visit '" << *instr;
|
||||
|
||||
if (instr->has_instr_flag(Instr::dead))
|
||||
return;
|
||||
|
||||
if (instr->dest() && instr->dest()->has_uses()) {
|
||||
sfn_log << SfnLog::opt << " dest used\n";
|
||||
return;
|
||||
}
|
||||
|
||||
switch (instr->opcode()) {
|
||||
case op2_kille:
|
||||
case op2_killne:
|
||||
case op2_kille_int:
|
||||
case op2_killne_int:
|
||||
case op2_killge:
|
||||
case op2_killge_int:
|
||||
case op2_killge_uint:
|
||||
case op2_killgt:
|
||||
case op2_killgt_int:
|
||||
case op2_killgt_uint:
|
||||
case op0_group_barrier:
|
||||
sfn_log << SfnLog::opt << " never kill\n";
|
||||
return;
|
||||
default:
|
||||
;
|
||||
}
|
||||
|
||||
bool dead = instr->set_dead();
|
||||
sfn_log << SfnLog::opt << (dead ? "dead" : "alive") << "\n";
|
||||
progress |= dead;
|
||||
}
|
||||
|
||||
void DCEVisitor::visit(LDSReadInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::opt << "visit " << *instr << "\n";
|
||||
progress |= instr->remove_unused_components();
|
||||
}
|
||||
|
||||
void DCEVisitor::visit(AluGroup *instr)
|
||||
{
|
||||
/* Groups are created because the instructions are used together
|
||||
* so don't try to eliminate code there */
|
||||
(void)instr;
|
||||
}
|
||||
|
||||
void DCEVisitor::visit(TexInstr *instr)
|
||||
{
|
||||
auto& dest = instr->dst();
|
||||
|
||||
bool has_uses = false;
|
||||
RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!dest[i]->has_uses())
|
||||
swz[i] = 7;
|
||||
else
|
||||
has_uses |= true;
|
||||
}
|
||||
instr->set_dest_swizzle(swz);
|
||||
|
||||
if (has_uses)
|
||||
return;
|
||||
|
||||
progress |= instr->set_dead();
|
||||
}
|
||||
|
||||
void DCEVisitor::visit(FetchInstr *instr)
|
||||
{
|
||||
auto& dest = instr->dst();
|
||||
|
||||
bool has_uses = false;
|
||||
RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!dest[i]->has_uses())
|
||||
swz[i] = 7;
|
||||
else
|
||||
has_uses |= true;
|
||||
}
|
||||
instr->set_dest_swizzle(swz);
|
||||
|
||||
if (has_uses)
|
||||
return;
|
||||
|
||||
sfn_log << SfnLog::opt << "set dead: " << *instr << "\n";
|
||||
|
||||
progress |= instr->set_dead();
|
||||
}
|
||||
|
||||
void DCEVisitor::visit(Block *block)
|
||||
{
|
||||
auto i = block->begin();
|
||||
auto e = block->end();
|
||||
while (i != e) {
|
||||
auto n = i++;
|
||||
if (!(*n)->keep()) {
|
||||
(*n)->accept(*this);
|
||||
if ((*n)->is_dead()) {
|
||||
block->erase(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void visit(ControlFlowInstr *instr)
|
||||
{
|
||||
(void)instr;
|
||||
}
|
||||
|
||||
void visit(IfInstr *instr)
|
||||
{
|
||||
(void)instr;
|
||||
}
|
||||
|
||||
class CopyPropFwdVisitor : public InstrVisitor {
|
||||
public:
|
||||
CopyPropFwdVisitor();
|
||||
|
||||
void visit(AluInstr *instr) override;
|
||||
void visit(AluGroup *instr) override;
|
||||
void visit(TexInstr *instr) override;
|
||||
void visit(ExportInstr *instr) override {(void)instr;}
|
||||
void visit(FetchInstr *instr) override;
|
||||
void visit(Block *instr) override;
|
||||
void visit(ControlFlowInstr *instr) override {(void)instr;}
|
||||
void visit(IfInstr *instr) override {(void)instr;}
|
||||
void visit(WriteScratchInstr *instr) override {(void)instr;}
|
||||
void visit(StreamOutInstr *instr) override {(void)instr;}
|
||||
void visit(MemRingOutInstr *instr) override {(void)instr;}
|
||||
void visit(EmitVertexInstr *instr) override {(void)instr;}
|
||||
void visit(GDSInstr *instr) override {(void)instr;};
|
||||
void visit(WriteTFInstr *instr) override {(void)instr;};
|
||||
void visit(RatInstr *instr) override {(void)instr;};
|
||||
|
||||
// TODO: these two should use copy propagation
|
||||
void visit(LDSAtomicInstr *instr) override {(void)instr;};
|
||||
void visit(LDSReadInstr *instr) override {(void)instr;};
|
||||
|
||||
bool progress;
|
||||
};
|
||||
|
||||
|
||||
class CopyPropBackVisitor : public InstrVisitor {
|
||||
public:
|
||||
CopyPropBackVisitor();
|
||||
|
||||
void visit(AluInstr *instr) override;
|
||||
void visit(AluGroup *instr) override;
|
||||
void visit(TexInstr *instr) override;
|
||||
void visit(ExportInstr *instr) override {(void)instr;}
|
||||
void visit(FetchInstr *instr) override;
|
||||
void visit(Block *instr) override;
|
||||
void visit(ControlFlowInstr *instr) override {(void)instr;}
|
||||
void visit(IfInstr *instr) override {(void)instr;}
|
||||
void visit(WriteScratchInstr *instr) override {(void)instr;}
|
||||
void visit(StreamOutInstr *instr) override {(void)instr;}
|
||||
void visit(MemRingOutInstr *instr) override {(void)instr;}
|
||||
void visit(EmitVertexInstr *instr) override {(void)instr;}
|
||||
void visit(GDSInstr *instr) override {(void)instr;};
|
||||
void visit(WriteTFInstr *instr) override {(void)instr;};
|
||||
void visit(LDSAtomicInstr *instr) override {(void)instr;};
|
||||
void visit(LDSReadInstr *instr) override {(void)instr;};
|
||||
void visit(RatInstr *instr) override {(void)instr;};
|
||||
|
||||
bool progress;
|
||||
};
|
||||
|
||||
bool copy_propagation_fwd(Shader& shader)
|
||||
{
|
||||
auto& root = shader.func();
|
||||
CopyPropFwdVisitor copy_prop;
|
||||
|
||||
do {
|
||||
copy_prop.progress = false;
|
||||
for (auto b : root)
|
||||
b->accept(copy_prop);
|
||||
} while (copy_prop.progress);
|
||||
|
||||
sfn_log << SfnLog::opt << "Shader after Copy Prop forward\n";
|
||||
if (sfn_log.has_debug_flag(SfnLog::opt)) {
|
||||
std::stringstream ss;
|
||||
shader.print(ss);
|
||||
sfn_log << ss.str() << "\n\n";
|
||||
}
|
||||
|
||||
|
||||
return copy_prop.progress;
|
||||
}
|
||||
|
||||
bool copy_propagation_backward(Shader& shader)
|
||||
{
|
||||
CopyPropBackVisitor copy_prop;
|
||||
|
||||
do {
|
||||
copy_prop.progress = false;
|
||||
for (auto b: shader.func())
|
||||
b->accept(copy_prop);
|
||||
} while (copy_prop.progress);
|
||||
|
||||
sfn_log << SfnLog::opt << "Shader after Copy Prop backwards\n";
|
||||
if (sfn_log.has_debug_flag(SfnLog::opt)) {
|
||||
std::stringstream ss;
|
||||
shader.print(ss);
|
||||
sfn_log << ss.str() << "\n\n";
|
||||
}
|
||||
|
||||
return copy_prop.progress;
|
||||
}
|
||||
|
||||
CopyPropFwdVisitor::CopyPropFwdVisitor():
|
||||
progress(false)
|
||||
{}
|
||||
|
||||
void CopyPropFwdVisitor::visit(AluInstr *instr)
|
||||
{
|
||||
sfn_log << SfnLog::opt << "CopyPropFwdVisitor:["
|
||||
<< instr->block_id() << ":" << instr->index() << "] " << *instr
|
||||
<< " dset=" << instr->dest() << " ";
|
||||
|
||||
|
||||
|
||||
if (instr->dest()) {
|
||||
sfn_log << SfnLog::opt << "has uses; "
|
||||
<< instr->dest()->uses().size();
|
||||
}
|
||||
|
||||
sfn_log << SfnLog::opt << "\n";
|
||||
|
||||
if (!instr->can_propagate_src()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto src = instr->psrc(0);
|
||||
auto dest = instr->dest();
|
||||
|
||||
for (auto& i : instr->dest()->uses()) {
|
||||
/* SSA can always be propagated, registers only in the same block
|
||||
* and only if they are not assigned to more than once */
|
||||
if (dest->is_ssa() ||
|
||||
(instr->block_id() == i->block_id() &&
|
||||
instr->index() < i->index() &&
|
||||
dest->uses().size() == 1)) {
|
||||
sfn_log << SfnLog::opt << " Try replace in "
|
||||
<< i->block_id() << ":" << i->index()
|
||||
<< *i<< "\n";
|
||||
progress |= i->replace_source(dest, src);
|
||||
}
|
||||
}
|
||||
if (instr->dest()) {
|
||||
sfn_log << SfnLog::opt << "has uses; "
|
||||
<< instr->dest()->uses().size();
|
||||
}
|
||||
sfn_log << SfnLog::opt << " done\n";
|
||||
}
|
||||
|
||||
|
||||
void CopyPropFwdVisitor::visit(AluGroup *instr)
|
||||
{
|
||||
(void)instr;
|
||||
}
|
||||
|
||||
void CopyPropFwdVisitor::visit(TexInstr *instr)
|
||||
{
|
||||
(void)instr;
|
||||
}
|
||||
|
||||
void CopyPropFwdVisitor::visit(FetchInstr *instr)
|
||||
{
|
||||
(void)instr;
|
||||
}
|
||||
|
||||
void CopyPropFwdVisitor::visit(Block *instr)
|
||||
{
|
||||
for (auto& i: *instr)
|
||||
i->accept(*this);
|
||||
}
|
||||
|
||||
CopyPropBackVisitor::CopyPropBackVisitor():
|
||||
progress(false)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void CopyPropBackVisitor::visit(AluInstr *instr)
|
||||
{
|
||||
bool local_progress = false;
|
||||
|
||||
sfn_log << SfnLog::opt << "CopyPropBackVisitor:["
|
||||
<< instr->block_id() << ":" << instr->index() << "] " << *instr << "\n";
|
||||
|
||||
|
||||
if (!instr->can_propagate_dest()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto src_reg = instr->psrc(0)->as_register();
|
||||
if (!src_reg) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (src_reg->uses().size() > 1)
|
||||
return;
|
||||
|
||||
auto dest = instr->dest();
|
||||
if (!dest ||
|
||||
!instr->has_alu_flag(alu_write)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!dest->is_ssa() && dest->parents().size() > 1)
|
||||
return;
|
||||
|
||||
for (auto& i: src_reg->parents()) {
|
||||
sfn_log << SfnLog::opt << "Try replace dest in "
|
||||
<< i->block_id() << ":" << i->index()
|
||||
<< *i<< "\n";
|
||||
|
||||
if (i->replace_dest(dest, instr)) {
|
||||
dest->del_parent(instr);
|
||||
dest->add_parent(i);
|
||||
for (auto d : instr->dependend_instr()) {
|
||||
d->add_required_instr(i);
|
||||
}
|
||||
local_progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (local_progress)
|
||||
instr->set_dead();
|
||||
|
||||
progress |= local_progress;
|
||||
}
|
||||
|
||||
void CopyPropBackVisitor::visit(AluGroup *instr)
|
||||
{
|
||||
for (auto& i: *instr) {
|
||||
if (i)
|
||||
i->accept(*this);
|
||||
}
|
||||
}
|
||||
|
||||
void CopyPropBackVisitor::visit(TexInstr *instr)
|
||||
{
|
||||
(void)instr;
|
||||
}
|
||||
|
||||
void CopyPropBackVisitor::visit(FetchInstr *instr)
|
||||
{
|
||||
(void)instr;
|
||||
}
|
||||
|
||||
void CopyPropBackVisitor::visit(Block *instr)
|
||||
{
|
||||
for (auto i = instr->rbegin(); i != instr->rend(); ++i)
|
||||
if (!(*i)->is_dead())
|
||||
(*i)->accept(*this);
|
||||
}
|
||||
|
||||
class SimplifySourceVecVisitor : public InstrVisitor {
|
||||
public:
|
||||
SimplifySourceVecVisitor():progress(false) {}
|
||||
|
||||
void visit(AluInstr *instr) override{(void)instr;}
|
||||
void visit(AluGroup *instr) override{(void)instr;}
|
||||
void visit(TexInstr *instr) override;
|
||||
void visit(ExportInstr *instr) override;
|
||||
void visit(FetchInstr *instr) override;
|
||||
void visit(Block *instr) override;
|
||||
void visit(ControlFlowInstr *instr) override;
|
||||
void visit(IfInstr *instr) override;
|
||||
void visit(WriteScratchInstr *instr) override;
|
||||
void visit(StreamOutInstr *instr) override;
|
||||
void visit(MemRingOutInstr *instr) override;
|
||||
void visit(EmitVertexInstr *instr) override {(void)instr;}
|
||||
void visit(GDSInstr *instr) override {(void)instr;};
|
||||
void visit(WriteTFInstr *instr) override {(void)instr;};
|
||||
void visit(LDSAtomicInstr *instr) override {(void)instr;};
|
||||
void visit(LDSReadInstr *instr) override {(void)instr;};
|
||||
void visit(RatInstr *instr) override {(void)instr;};
|
||||
|
||||
void replace_src(Instr *instr, RegisterVec4& reg4);
|
||||
|
||||
bool progress;
|
||||
};
|
||||
|
||||
bool simplify_source_vectors(Shader& sh)
|
||||
{
|
||||
SimplifySourceVecVisitor visitor;
|
||||
|
||||
for (auto b: sh.func())
|
||||
b->accept(visitor);
|
||||
|
||||
return visitor.progress;
|
||||
}
|
||||
|
||||
void SimplifySourceVecVisitor::visit(TexInstr *instr)
|
||||
{
|
||||
if (instr->opcode() != TexInstr::get_resinfo) {
|
||||
replace_src(instr, instr->src());
|
||||
}
|
||||
}
|
||||
|
||||
void SimplifySourceVecVisitor::visit(WriteScratchInstr *instr)
|
||||
{
|
||||
(void) instr;
|
||||
}
|
||||
|
||||
class ReplaceConstSource : public AluInstrVisitor {
|
||||
public:
|
||||
ReplaceConstSource(Instr *old_use_, RegisterVec4& vreg_, int i):
|
||||
old_use(old_use_), vreg(vreg_), index(i),success(false) {}
|
||||
|
||||
using AluInstrVisitor::visit;
|
||||
|
||||
void visit(AluInstr *alu) override;
|
||||
|
||||
Instr *old_use;
|
||||
RegisterVec4& vreg;
|
||||
int index;
|
||||
bool success;
|
||||
};
|
||||
|
||||
void SimplifySourceVecVisitor::visit(ExportInstr *instr)
|
||||
{
|
||||
replace_src(instr, instr->value());
|
||||
}
|
||||
|
||||
void SimplifySourceVecVisitor::replace_src(Instr *instr, RegisterVec4& reg4)
|
||||
{
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto s = reg4[i];
|
||||
|
||||
if (s->chan() > 3)
|
||||
continue;
|
||||
|
||||
if (!s->is_ssa())
|
||||
continue;
|
||||
|
||||
/* Cayman trans ops have more then one parent for
|
||||
* one dest */
|
||||
if (s->parents().size() != 1)
|
||||
continue;
|
||||
|
||||
auto& op = *s->parents().begin();
|
||||
|
||||
ReplaceConstSource visitor(instr, reg4, i);
|
||||
|
||||
op->accept(visitor);
|
||||
|
||||
progress |= visitor.success;
|
||||
}
|
||||
}
|
||||
|
||||
void SimplifySourceVecVisitor::visit(StreamOutInstr *instr)
|
||||
{
|
||||
(void)instr;
|
||||
}
|
||||
|
||||
void SimplifySourceVecVisitor::visit(MemRingOutInstr *instr)
|
||||
{
|
||||
(void)instr;
|
||||
}
|
||||
|
||||
void ReplaceConstSource::visit(AluInstr *alu)
|
||||
{
|
||||
if (alu->opcode() != op1_mov)
|
||||
return;
|
||||
|
||||
if (alu->has_alu_flag(alu_src0_abs) ||
|
||||
alu->has_alu_flag(alu_src0_neg))
|
||||
return;
|
||||
|
||||
auto src = alu->psrc(0);
|
||||
assert(src);
|
||||
|
||||
int override_chan = -1;
|
||||
|
||||
auto ic = src->as_inline_const();
|
||||
if (ic) {
|
||||
if (ic->sel() == ALU_SRC_0)
|
||||
override_chan = 4;
|
||||
|
||||
if (ic->sel() == ALU_SRC_1)
|
||||
override_chan = 5;
|
||||
}
|
||||
|
||||
auto literal = src->as_literal();
|
||||
if (literal) {
|
||||
|
||||
if (literal->value() == 0)
|
||||
override_chan = 4;
|
||||
|
||||
if (literal->value() == 0x3F800000)
|
||||
override_chan = 5;
|
||||
}
|
||||
|
||||
if (override_chan >= 0) {
|
||||
vreg[index]->del_use(old_use);
|
||||
auto reg = new Register(vreg.sel(), override_chan, vreg[index]->pin());
|
||||
vreg.set_value(index, reg);
|
||||
success = true;
|
||||
}
|
||||
}
|
||||
|
||||
void SimplifySourceVecVisitor::visit(FetchInstr *instr)
|
||||
{
|
||||
(void) instr;
|
||||
}
|
||||
|
||||
void SimplifySourceVecVisitor::visit(Block *instr)
|
||||
{
|
||||
for (auto i = instr->rbegin(); i != instr->rend(); ++i)
|
||||
if (!(*i)->is_dead())
|
||||
(*i)->accept(*this);
|
||||
}
|
||||
|
||||
void SimplifySourceVecVisitor::visit(ControlFlowInstr *instr)
|
||||
{
|
||||
(void) instr;
|
||||
}
|
||||
|
||||
void SimplifySourceVecVisitor::visit(IfInstr *instr)
|
||||
{
|
||||
(void) instr;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
17
src/gallium/drivers/r600/sfn/sfn_optimizer.h
Normal file
17
src/gallium/drivers/r600/sfn/sfn_optimizer.h
Normal file
@ -0,0 +1,17 @@
|
||||
#ifndef OPTIMIZER_H
|
||||
#define OPTIMIZER_H
|
||||
|
||||
#include "sfn_shader.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
bool dead_code_elimination(Shader& shader);
|
||||
bool copy_propagation_fwd(Shader& shader);
|
||||
bool copy_propagation_backward(Shader& shader);
|
||||
bool simplify_source_vectors(Shader& sh);
|
||||
|
||||
bool optimize(Shader& shader);
|
||||
|
||||
}
|
||||
|
||||
#endif // OPTIMIZER_H
|
@ -1,12 +0,0 @@
|
||||
#include "sfn_optimizers.h"
|
||||
#include "sfn_instruction_block.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
std::vector<PInstruction>
|
||||
flatten_shader(const std::vector<InstructionBlock> &ir)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
#ifndef SFN_OPTIMIZERS_H
|
||||
#define SFN_OPTIMIZERS_H
|
||||
|
||||
#include "sfn_instruction_base.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
std::vector<PInstruction>
|
||||
flatten_alu_ops(const std::vector<InstructionBlock> &ir);
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // SFN_OPTIMIZERS_H
|
212
src/gallium/drivers/r600/sfn/sfn_peephole.cpp
Normal file
212
src/gallium/drivers/r600/sfn/sfn_peephole.cpp
Normal file
@ -0,0 +1,212 @@
|
||||
#include "sfn_peephole.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
|
||||
class PeepholeVisitor : public InstrVisitor {
|
||||
public:
|
||||
void visit(AluInstr *instr) override;
|
||||
void visit(AluGroup *instr) override;
|
||||
void visit(TexInstr *instr) override {(void)instr;};
|
||||
void visit(ExportInstr *instr) override {(void)instr;}
|
||||
void visit(FetchInstr *instr) override {(void)instr;}
|
||||
void visit(Block *instr) override;
|
||||
void visit(ControlFlowInstr *instr) override {(void)instr;}
|
||||
void visit(IfInstr *instr) override;
|
||||
void visit(WriteScratchInstr *instr) override {(void)instr;}
|
||||
void visit(StreamOutInstr *instr) override {(void)instr;}
|
||||
void visit(MemRingOutInstr *instr) override {(void)instr;}
|
||||
void visit(EmitVertexInstr *instr) override {(void)instr;}
|
||||
void visit(GDSInstr *instr) override {(void)instr;};
|
||||
void visit(WriteTFInstr *instr) override {(void)instr;};
|
||||
void visit(LDSAtomicInstr *instr) override {(void)instr;};
|
||||
void visit(LDSReadInstr *instr) override {(void)instr;};
|
||||
void visit(RatInstr *instr) override {(void)instr;};
|
||||
|
||||
bool src_is_zero(PVirtualValue value);
|
||||
bool src_is_one(PVirtualValue value);
|
||||
|
||||
void convert_to_mov(AluInstr *alu, int src_idx);
|
||||
|
||||
|
||||
bool progress{false};
|
||||
};
|
||||
|
||||
|
||||
bool peephole(Shader& sh)
|
||||
{
|
||||
PeepholeVisitor peephole;
|
||||
for(auto b : sh.func())
|
||||
b->accept(peephole);
|
||||
return peephole.progress;
|
||||
}
|
||||
|
||||
void PeepholeVisitor::visit(AluInstr *instr)
|
||||
{
|
||||
switch (instr->opcode()) {
|
||||
case op2_add:
|
||||
case op2_add_int:
|
||||
if (src_is_zero(instr->psrc(0)))
|
||||
convert_to_mov(instr, 1);
|
||||
else if (src_is_zero(instr->psrc(1)))
|
||||
convert_to_mov(instr, 0);
|
||||
break;
|
||||
case op2_mul:
|
||||
case op2_mul_ieee:
|
||||
if (src_is_one(instr->psrc(0)))
|
||||
convert_to_mov(instr, 1);
|
||||
else if (src_is_one(instr->psrc(1)))
|
||||
convert_to_mov(instr, 0);
|
||||
break;
|
||||
case op3_muladd:
|
||||
case op3_muladd_ieee:
|
||||
if (src_is_zero(instr->psrc(0)) ||
|
||||
src_is_zero(instr->psrc(1)))
|
||||
convert_to_mov(instr, 2);
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
bool PeepholeVisitor::src_is_zero(PVirtualValue value)
|
||||
{
|
||||
if (value->as_inline_const() &&
|
||||
value->as_inline_const()->sel() == ALU_SRC_0)
|
||||
return true;
|
||||
|
||||
if (value->as_literal() &&
|
||||
value->as_literal()->value() == 0)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PeepholeVisitor::src_is_one(PVirtualValue value)
|
||||
{
|
||||
if (value->as_inline_const() &&
|
||||
value->as_inline_const()->sel() == ALU_SRC_1)
|
||||
return true;
|
||||
|
||||
if (value->as_literal() &&
|
||||
value->as_literal()->value() == 0x3f800000)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void PeepholeVisitor::convert_to_mov(AluInstr *alu, int src_idx)
|
||||
{
|
||||
AluInstr::SrcValues new_src{alu->psrc(src_idx)};
|
||||
alu->set_sources(new_src);
|
||||
alu->set_op(op1_mov);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
|
||||
void PeepholeVisitor::visit(AluGroup *instr)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void PeepholeVisitor::visit(Block *instr)
|
||||
{
|
||||
for (auto& i: *instr)
|
||||
i->accept(*this);
|
||||
}
|
||||
|
||||
class ReplaceIfPredicate : public AluInstrVisitor {
|
||||
public:
|
||||
ReplaceIfPredicate(AluInstr *pred):
|
||||
m_pred(pred) {}
|
||||
|
||||
using AluInstrVisitor::visit;
|
||||
|
||||
void visit(AluInstr *alu) override;
|
||||
|
||||
AluInstr *m_pred;
|
||||
bool success{false};
|
||||
};
|
||||
|
||||
void PeepholeVisitor::visit(IfInstr *instr)
|
||||
{
|
||||
auto pred = instr->predicate();
|
||||
|
||||
auto& src1 = pred->src(1);
|
||||
if (src1.as_inline_const() &&
|
||||
src1.as_inline_const()->sel() == ALU_SRC_0) {
|
||||
auto src0 = pred->src(0).as_register();
|
||||
if (src0 && src0->is_ssa()) {
|
||||
assert(!src0->parents().empty());
|
||||
auto parent = *src0->parents().begin();
|
||||
|
||||
ReplaceIfPredicate visitor(pred);
|
||||
parent->accept(visitor);
|
||||
progress |= visitor.success;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static EAluOp pred_from_op(EAluOp pred_op, EAluOp op)
|
||||
{
|
||||
switch (pred_op) {
|
||||
case op2_pred_setne_int:
|
||||
switch (op) {
|
||||
/*
|
||||
case op2_setge_dx10 : return op2_pred_setge_int;
|
||||
case op2_setgt_dx10 : return op2_pred_setgt_int;
|
||||
case op2_sete_dx10 : return op2_prede_int;
|
||||
case op2_setne_dx10 : return op2_pred_setne_int;
|
||||
*/
|
||||
case op2_setge_int : return op2_pred_setge_int;
|
||||
case op2_setgt_int : return op2_pred_setgt_int;
|
||||
case op2_setge_uint : return op2_pred_setge_uint;
|
||||
case op2_setgt_uint : return op2_pred_setgt_uint;
|
||||
case op2_sete_int : return op2_prede_int;
|
||||
case op2_setne_int : return op2_pred_setne_int;
|
||||
default:
|
||||
return op0_nop;
|
||||
}
|
||||
case op2_prede_int:
|
||||
switch (op) {
|
||||
case op2_sete_int : return op2_pred_setne_int;
|
||||
case op2_setne_int : return op2_prede_int;
|
||||
default:
|
||||
return op0_nop;
|
||||
}
|
||||
default:
|
||||
return op0_nop;
|
||||
}
|
||||
}
|
||||
|
||||
void ReplaceIfPredicate::visit(AluInstr *alu)
|
||||
{
|
||||
auto new_op = pred_from_op(m_pred->opcode(), alu->opcode());
|
||||
|
||||
if (new_op == op0_nop)
|
||||
return;
|
||||
|
||||
/* Have to figure out how to pass the dependency correctly */
|
||||
/*for (auto& s : alu->sources()) {
|
||||
if (s->as_register() && s->as_register()->addr())
|
||||
return;
|
||||
}*/
|
||||
|
||||
m_pred->set_op(new_op);
|
||||
m_pred->set_sources(alu->sources());
|
||||
|
||||
if (alu->has_alu_flag(alu_src0_abs))
|
||||
m_pred->set_alu_flag(alu_src0_abs);
|
||||
if (alu->has_alu_flag(alu_src1_abs))
|
||||
m_pred->set_alu_flag(alu_src1_abs);
|
||||
|
||||
if (alu->has_alu_flag(alu_src0_neg))
|
||||
m_pred->set_alu_flag(alu_src0_neg);
|
||||
|
||||
if (alu->has_alu_flag(alu_src1_neg))
|
||||
m_pred->set_alu_flag(alu_src1_neg);
|
||||
|
||||
success = true;
|
||||
}
|
||||
|
||||
}
|
13
src/gallium/drivers/r600/sfn/sfn_peephole.h
Normal file
13
src/gallium/drivers/r600/sfn/sfn_peephole.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef PEEPHOLE_H
|
||||
#define PEEPHOLE_H
|
||||
|
||||
#include "sfn_shader.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
bool peephole(Shader& sh);
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif // PEEPHOLE_H
|
268
src/gallium/drivers/r600/sfn/sfn_ra.cpp
Normal file
268
src/gallium/drivers/r600/sfn/sfn_ra.cpp
Normal file
@ -0,0 +1,268 @@
|
||||
#include "sfn_debug.h"
|
||||
#include "sfn_ra.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <queue>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
void ComponentInterference::prepare_row(int row)
|
||||
{
|
||||
m_rows.resize(row + 1);
|
||||
|
||||
}
|
||||
|
||||
void ComponentInterference::add(size_t idx1, size_t idx2)
|
||||
{
|
||||
assert(idx1 > idx2);
|
||||
assert(m_rows.size() > idx1);
|
||||
m_rows[idx1].push_back(idx2);
|
||||
m_rows[idx2].push_back(idx1);
|
||||
}
|
||||
|
||||
|
||||
Interference::Interference(LiveRangeMap& map):
|
||||
m_map(map)
|
||||
{
|
||||
initialize();
|
||||
}
|
||||
|
||||
void Interference::initialize()
|
||||
{
|
||||
for(int i = 0; i < 4; ++i) {
|
||||
initialize(m_components_maps[i], m_map.component(i));
|
||||
}
|
||||
}
|
||||
|
||||
void Interference::initialize(ComponentInterference& comp_interference,
|
||||
LiveRangeMap::ChannelLiveRange& clr)
|
||||
{
|
||||
for (size_t row = 0; row < clr.size(); ++row) {
|
||||
auto& row_entry = clr[row];
|
||||
comp_interference.prepare_row(row);
|
||||
for (size_t col = 0; col < row; ++col) {
|
||||
auto& col_entry = clr[col];
|
||||
if (row_entry.m_end >= col_entry.m_start &&
|
||||
row_entry.m_start <= col_entry.m_end)
|
||||
comp_interference.add(row, col);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Group {
|
||||
int priority;
|
||||
std::array<PRegister, 4> channels;
|
||||
};
|
||||
|
||||
static inline bool operator < (const Group& lhs, const Group& rhs)
|
||||
{
|
||||
return lhs.priority < rhs.priority;
|
||||
}
|
||||
|
||||
using GroupRegisters = std::priority_queue<Group>;
|
||||
|
||||
static bool
|
||||
group_allocation (LiveRangeMap& lrm, const Interference& interference, GroupRegisters& groups)
|
||||
{
|
||||
int color = 0;
|
||||
// allocate grouped registers
|
||||
while (!groups.empty()) {
|
||||
auto group = groups.top();
|
||||
groups.pop();
|
||||
|
||||
int start_comp = 0;
|
||||
while (!group.channels[start_comp])
|
||||
++start_comp;
|
||||
|
||||
sfn_log << SfnLog::merge << "Color group with " << *group.channels[start_comp] << "\n";
|
||||
|
||||
// don't restart registers for exports, we may be able tp merge the
|
||||
// export calls, is fthe registers are consecutive
|
||||
if (group.priority > 0)
|
||||
color = 0;
|
||||
|
||||
while (color < 124) {
|
||||
/* Find the coloring for the first channel */
|
||||
bool color_in_use = false;
|
||||
int comp = start_comp;
|
||||
|
||||
auto& adjecency = interference.row(start_comp, group.channels[comp]->index());
|
||||
auto& regs = lrm.component(comp);
|
||||
|
||||
sfn_log << SfnLog::merge << "Try color "<< color;
|
||||
|
||||
for (auto adj : adjecency) {
|
||||
if (regs[adj].m_color == color) {
|
||||
color_in_use = true;
|
||||
sfn_log << SfnLog::merge << " in use\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (color_in_use) {
|
||||
++color;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* First channel color found, check whether it can be used for all channels */
|
||||
while (comp < 4) {
|
||||
sfn_log << SfnLog::merge << " interference: ";
|
||||
if (group.channels[comp]) {
|
||||
auto& component_life_ranges = lrm.component(comp);
|
||||
auto& adjecencies = interference.row(comp, group.channels[comp]->index());
|
||||
|
||||
for (auto adj_index : adjecencies) {
|
||||
sfn_log << SfnLog::merge << *component_life_ranges[adj_index].m_register << " ";
|
||||
if (component_life_ranges[adj_index].m_color == color) {
|
||||
color_in_use = true;
|
||||
sfn_log << SfnLog::merge << "used";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (color_in_use)
|
||||
break;
|
||||
}
|
||||
++comp;
|
||||
}
|
||||
|
||||
/* We couldn't allocate all channels with this color, so try next */
|
||||
if (color_in_use) {
|
||||
++color;
|
||||
sfn_log << SfnLog::merge << "\n";
|
||||
continue;
|
||||
}
|
||||
sfn_log << SfnLog::merge << " success\n";
|
||||
|
||||
/* Coloring successful */
|
||||
for (auto reg : group.channels) {
|
||||
if (reg) {
|
||||
auto& vregs = lrm.component(reg->chan());
|
||||
auto& vreg_cmp = vregs[reg->index()];
|
||||
assert(vreg_cmp.m_start != -1 || vreg_cmp.m_end != -1);
|
||||
vreg_cmp.m_color = color;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (color == 124)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
scalar_allocation (LiveRangeMap& lrm, const Interference& interference)
|
||||
{
|
||||
for (int comp = 0; comp < 4; ++comp) {
|
||||
auto& live_ranges = lrm.component(comp);
|
||||
for (auto& r : live_ranges) {
|
||||
if (r.m_color != -1)
|
||||
continue;
|
||||
|
||||
if (r.m_start == -1 &&
|
||||
r.m_end == -1)
|
||||
continue;
|
||||
|
||||
sfn_log << SfnLog::merge << "Color " << *r.m_register << "\n";
|
||||
|
||||
auto& adjecency = interference.row(comp, r.m_register->index());
|
||||
|
||||
int color = 0;
|
||||
|
||||
while (color < 124) {
|
||||
bool color_in_use = false;
|
||||
for (auto adj : adjecency) {
|
||||
if (live_ranges[adj].m_color == color) {
|
||||
color_in_use = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (color_in_use) {
|
||||
++color;
|
||||
continue;
|
||||
}
|
||||
|
||||
r.m_color = color;
|
||||
break;
|
||||
}
|
||||
if (color == 124)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool register_allocation(LiveRangeMap& lrm)
|
||||
{
|
||||
Interference interference(lrm);
|
||||
|
||||
std::map<int, Group> groups;
|
||||
|
||||
// setup fixed colors and group relationships
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto& comp = lrm.component(i);
|
||||
for (auto& entry : comp) {
|
||||
sfn_log << SfnLog::merge << "Prepare RA for "
|
||||
<< *entry.m_register
|
||||
<< " [" << entry.m_start << ", " << entry.m_end << "]\n";
|
||||
auto pin = entry.m_register->pin();
|
||||
if (entry.m_start == -1 && entry.m_end == -1) {
|
||||
if (pin == pin_group || pin == pin_chgr)
|
||||
entry.m_register->set_chan(7);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto sel = entry.m_register->sel();
|
||||
/* fully pinned registers contain system values with the
|
||||
* definite register index, and array values are allocated
|
||||
* right after the system registers, so just reuse the IDs (for now) */
|
||||
if (pin == pin_fully || pin == pin_array) {
|
||||
/* Must set all array element entries */
|
||||
sfn_log << SfnLog::merge << "Pin color " << sel << " to " << *entry.m_register << "\n";
|
||||
entry.m_color = sel;
|
||||
} else if (pin == pin_group || pin == pin_chgr) {
|
||||
/* Groups must all have the same sel() value, because they are used
|
||||
* as vec4 registers */
|
||||
auto igroup = groups.find(sel);
|
||||
if (igroup != groups.end()) {
|
||||
igroup->second.channels[i] = entry.m_register;
|
||||
assert(comp[entry.m_register->index()].m_register->index() == entry.m_register->index());
|
||||
} else {
|
||||
int priority = entry.m_use.test(LiveRangeEntry::use_export) ? - entry.m_end : entry.m_start;
|
||||
Group group{priority, {nullptr, nullptr, nullptr, nullptr}};
|
||||
group.channels[i] = entry.m_register;
|
||||
assert(comp[group.channels[i]->index()].m_register->index() == entry.m_register->index());
|
||||
groups[sel] = group;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GroupRegisters groups_sorted;
|
||||
for (auto& [sel, group] : groups)
|
||||
groups_sorted.push(group);
|
||||
|
||||
if (!group_allocation (lrm, interference, groups_sorted))
|
||||
return false;
|
||||
|
||||
if (!scalar_allocation(lrm, interference))
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto& comp = lrm.component(i);
|
||||
for (auto& entry : comp) {
|
||||
sfn_log << SfnLog::merge << "Set " << *entry.m_register << " to ";
|
||||
entry.m_register->set_sel(entry.m_color);
|
||||
entry.m_register->set_pin(pin_none);
|
||||
sfn_log << SfnLog::merge << *entry.m_register << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
51
src/gallium/drivers/r600/sfn/sfn_ra.h
Normal file
51
src/gallium/drivers/r600/sfn/sfn_ra.h
Normal file
@ -0,0 +1,51 @@
|
||||
#ifndef INTERFERENCE_H
|
||||
#define INTERFERENCE_H
|
||||
|
||||
#include "sfn_valuefactory.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class ComponentInterference
|
||||
{
|
||||
public:
|
||||
|
||||
using Row = std::vector<int>;
|
||||
|
||||
void prepare_row(int row);
|
||||
|
||||
void add(size_t idx1, size_t idx2);
|
||||
|
||||
auto row(int idx) const -> const Row& {
|
||||
assert((size_t)idx < m_rows.size()); return m_rows[idx];}
|
||||
|
||||
private:
|
||||
|
||||
std::vector<Row> m_rows;
|
||||
};
|
||||
|
||||
class Interference {
|
||||
public:
|
||||
Interference(LiveRangeMap& map);
|
||||
|
||||
const auto& row(int comp, int index) const {
|
||||
assert(comp < 4);
|
||||
return m_components_maps[comp].row(index);
|
||||
}
|
||||
|
||||
private:
|
||||
void initialize();
|
||||
void initialize(ComponentInterference& comp, LiveRangeMap::ChannelLiveRange& clr);
|
||||
|
||||
LiveRangeMap& m_map;
|
||||
std::array<ComponentInterference, 4> m_components_maps;
|
||||
|
||||
|
||||
};
|
||||
|
||||
bool register_allocation(LiveRangeMap& lrm);
|
||||
|
||||
}
|
||||
|
||||
#endif // INTERFERENCE_H
|
890
src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
Normal file
890
src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
Normal file
@ -0,0 +1,890 @@
|
||||
#include "sfn_scheduler.h"
|
||||
#include "sfn_instr_alugroup.h"
|
||||
#include "sfn_instr_controlflow.h"
|
||||
#include "sfn_instr_export.h"
|
||||
#include "sfn_instr_fetch.h"
|
||||
#include "sfn_instr_mem.h"
|
||||
#include "sfn_instr_lds.h"
|
||||
#include "sfn_instr_tex.h"
|
||||
#include "sfn_debug.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class CollectInstructions : public InstrVisitor {
|
||||
|
||||
public:
|
||||
CollectInstructions(ValueFactory& vf):
|
||||
m_value_factory(vf) {}
|
||||
|
||||
void visit(AluInstr *instr) override {
|
||||
if (instr->has_alu_flag(alu_is_trans))
|
||||
alu_trans.push_back(instr);
|
||||
else {
|
||||
if (instr->alu_slots() == 1)
|
||||
alu_vec.push_back(instr);
|
||||
else
|
||||
alu_groups.push_back(instr->split(m_value_factory));
|
||||
}
|
||||
}
|
||||
void visit(AluGroup *instr) override {
|
||||
alu_groups.push_back(instr);
|
||||
}
|
||||
void visit(TexInstr *instr) override {
|
||||
tex.push_back(instr);
|
||||
}
|
||||
void visit(ExportInstr *instr) override {
|
||||
exports.push_back(instr);
|
||||
}
|
||||
void visit(FetchInstr *instr) override {
|
||||
fetches.push_back(instr);
|
||||
}
|
||||
void visit(Block *instr) override {
|
||||
for (auto& i: *instr)
|
||||
i->accept(*this);
|
||||
}
|
||||
|
||||
void visit(ControlFlowInstr *instr) override {
|
||||
assert(!m_cf_instr);
|
||||
m_cf_instr = instr;
|
||||
}
|
||||
|
||||
void visit(IfInstr *instr) override {
|
||||
assert(!m_cf_instr);
|
||||
m_cf_instr = instr;
|
||||
}
|
||||
|
||||
void visit(EmitVertexInstr *instr) override {
|
||||
assert(!m_cf_instr);
|
||||
m_cf_instr = instr;
|
||||
}
|
||||
|
||||
void visit(WriteScratchInstr *instr) override {
|
||||
mem_write_instr.push_back(instr);
|
||||
}
|
||||
|
||||
void visit(StreamOutInstr *instr) override {
|
||||
mem_write_instr.push_back(instr);
|
||||
}
|
||||
|
||||
void visit(MemRingOutInstr *instr) override {
|
||||
mem_ring_writes.push_back(instr);
|
||||
}
|
||||
|
||||
void visit(GDSInstr *instr) override {
|
||||
gds_op.push_back(instr);
|
||||
}
|
||||
|
||||
void visit(WriteTFInstr *instr) override {
|
||||
write_tf.push_back(instr);
|
||||
}
|
||||
|
||||
void visit(LDSReadInstr *instr) override {
|
||||
std::vector<AluInstr*> buffer;
|
||||
m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
|
||||
for (auto& i: buffer) {
|
||||
i->accept(*this);
|
||||
}
|
||||
}
|
||||
|
||||
void visit(LDSAtomicInstr *instr) override {
|
||||
std::vector<AluInstr*> buffer;
|
||||
m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
|
||||
for (auto& i: buffer) {
|
||||
i->accept(*this);
|
||||
}
|
||||
}
|
||||
|
||||
void visit(RatInstr *instr) override {
|
||||
rat_instr.push_back(instr);
|
||||
}
|
||||
|
||||
|
||||
std::list<AluInstr *> alu_trans;
|
||||
std::list<AluInstr *> alu_vec;
|
||||
std::list<TexInstr *> tex;
|
||||
std::list<AluGroup *> alu_groups;
|
||||
std::list<ExportInstr *> exports;
|
||||
std::list<FetchInstr *> fetches;
|
||||
std::list<WriteOutInstr *> mem_write_instr;
|
||||
std::list<MemRingOutInstr *> mem_ring_writes;
|
||||
std::list<GDSInstr *> gds_op;
|
||||
std::list<WriteTFInstr *> write_tf;
|
||||
std::list<RatInstr *> rat_instr;
|
||||
|
||||
Instr *m_cf_instr{nullptr};
|
||||
ValueFactory& m_value_factory;
|
||||
|
||||
AluInstr *m_last_lds_instr{nullptr};
|
||||
};
|
||||
|
||||
class BlockSheduler {
|
||||
public:
|
||||
BlockSheduler();
|
||||
void run(Shader *shader);
|
||||
|
||||
void finalize();
|
||||
|
||||
private:
|
||||
|
||||
void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf);
|
||||
|
||||
bool collect_ready(CollectInstructions &available);
|
||||
|
||||
template <typename T>
|
||||
bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
|
||||
|
||||
bool collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available);
|
||||
|
||||
bool schedule_tex(Shader::ShaderBlocks& out_blocks);
|
||||
bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
|
||||
|
||||
template <typename I>
|
||||
bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
|
||||
|
||||
template <typename I>
|
||||
bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
|
||||
|
||||
bool schedule_alu(Shader::ShaderBlocks& out_blocks);
|
||||
void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type);
|
||||
|
||||
bool schedule_alu_to_group_vec(AluGroup *group);
|
||||
bool schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist);
|
||||
|
||||
bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list);
|
||||
|
||||
template <typename I>
|
||||
bool schedule(std::list<I *>& ready_list);
|
||||
|
||||
template <typename I>
|
||||
bool schedule_block(std::list<I *>& ready_list);
|
||||
|
||||
std::list<AluInstr *> alu_vec_ready;
|
||||
std::list<AluInstr *> alu_trans_ready;
|
||||
std::list<AluGroup *> alu_groups_ready;
|
||||
std::list<TexInstr *> tex_ready;
|
||||
std::list<ExportInstr *> exports_ready;
|
||||
std::list<FetchInstr *> fetches_ready;
|
||||
std::list<WriteOutInstr *> memops_ready;
|
||||
std::list<MemRingOutInstr *> mem_ring_writes_ready;
|
||||
std::list<GDSInstr *> gds_ready;
|
||||
std::list<WriteTFInstr *> write_tf_ready;
|
||||
std::list<RatInstr *> rat_instr_ready;
|
||||
|
||||
enum {
|
||||
sched_alu,
|
||||
sched_tex,
|
||||
sched_fetch,
|
||||
sched_free,
|
||||
sched_mem_ring,
|
||||
sched_gds,
|
||||
sched_write_tf,
|
||||
sched_rat,
|
||||
} current_shed;
|
||||
|
||||
ExportInstr *m_last_pos;
|
||||
ExportInstr *m_last_pixel;
|
||||
ExportInstr *m_last_param;
|
||||
|
||||
Block *m_current_block;
|
||||
|
||||
int m_lds_addr_count{0};
|
||||
int m_alu_groups_schduled{0};
|
||||
|
||||
};
|
||||
|
||||
Shader *schedule(Shader *original)
|
||||
{
|
||||
AluGroup::set_chipclass(original->chip_class());
|
||||
|
||||
sfn_log << SfnLog::schedule << "Original shader\n";
|
||||
if (sfn_log.has_debug_flag(SfnLog::schedule)) {
|
||||
std::stringstream ss;
|
||||
original->print(ss);
|
||||
sfn_log << ss.str() << "\n\n";
|
||||
}
|
||||
|
||||
// TODO later it might be necessary to clone the shader
|
||||
// to be able to re-start scheduling
|
||||
|
||||
auto scheduled_shader = original;
|
||||
BlockSheduler s;
|
||||
s.run(scheduled_shader);
|
||||
s.finalize();
|
||||
|
||||
sfn_log << SfnLog::schedule << "Scheduled shader\n";
|
||||
if (sfn_log.has_debug_flag(SfnLog::schedule)) {
|
||||
std::stringstream ss;
|
||||
scheduled_shader->print(ss);
|
||||
sfn_log << ss.str() << "\n\n";
|
||||
}
|
||||
|
||||
return scheduled_shader;
|
||||
}
|
||||
|
||||
BlockSheduler::BlockSheduler():
|
||||
current_shed(sched_alu),
|
||||
m_last_pos(nullptr),
|
||||
m_last_pixel(nullptr),
|
||||
m_last_param(nullptr),
|
||||
m_current_block(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
void BlockSheduler::run( Shader *shader)
|
||||
{
|
||||
Shader::ShaderBlocks scheduled_blocks;
|
||||
|
||||
for (auto& block : shader->func()) {
|
||||
sfn_log << SfnLog::schedule << "Process block " << block->id() <<"\n";
|
||||
if (sfn_log.has_debug_flag(SfnLog::schedule)) {
|
||||
std::stringstream ss;
|
||||
block->print(ss);
|
||||
sfn_log << ss.str() << "\n";
|
||||
}
|
||||
schedule_block(*block, scheduled_blocks, shader->value_factory());
|
||||
}
|
||||
|
||||
shader->reset_function(scheduled_blocks);
|
||||
}
|
||||
|
||||
void BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf)
|
||||
{
|
||||
|
||||
assert(in_block.id() >= 0);
|
||||
|
||||
|
||||
current_shed = sched_fetch;
|
||||
auto last_shed = sched_fetch;
|
||||
|
||||
CollectInstructions cir(vf);
|
||||
in_block.accept(cir);
|
||||
|
||||
bool have_instr = collect_ready(cir);
|
||||
|
||||
m_current_block = new Block(in_block.nesting_depth(), in_block.id());
|
||||
|
||||
assert(m_current_block->id() >= 0);
|
||||
|
||||
while (have_instr) {
|
||||
|
||||
sfn_log << SfnLog::schedule << "Have ready instructions\n";
|
||||
|
||||
if (alu_vec_ready.size())
|
||||
sfn_log << SfnLog::schedule << " ALU V:" << alu_vec_ready.size() << "\n";
|
||||
|
||||
if (alu_trans_ready.size())
|
||||
sfn_log << SfnLog::schedule << " ALU T:" << alu_trans_ready.size() << "\n";
|
||||
|
||||
if (alu_groups_ready.size())
|
||||
sfn_log << SfnLog::schedule << " ALU G:" << alu_groups_ready.size() << "\n";
|
||||
|
||||
if (exports_ready.size())
|
||||
sfn_log << SfnLog::schedule << " EXP:" << exports_ready.size()
|
||||
<< "\n";
|
||||
if (tex_ready.size())
|
||||
sfn_log << SfnLog::schedule << " TEX:" << tex_ready.size()
|
||||
<< "\n";
|
||||
if (fetches_ready.size())
|
||||
sfn_log << SfnLog::schedule << " FETCH:" << fetches_ready.size()
|
||||
<< "\n";
|
||||
if (mem_ring_writes_ready.size())
|
||||
sfn_log << SfnLog::schedule << " MEM_RING:" << mem_ring_writes_ready.size()
|
||||
<< "\n";
|
||||
if (memops_ready.size())
|
||||
sfn_log << SfnLog::schedule << " MEM_OPS:" << mem_ring_writes_ready.size()
|
||||
<< "\n";
|
||||
|
||||
if (!m_current_block->lds_group_active()) {
|
||||
if (last_shed != sched_free && memops_ready.size() > 8)
|
||||
current_shed = sched_free;
|
||||
else if (mem_ring_writes_ready.size() > 5)
|
||||
current_shed = sched_mem_ring;
|
||||
else if (rat_instr_ready.size() > 3)
|
||||
current_shed = sched_rat;
|
||||
else if (gds_ready.size() > 3)
|
||||
current_shed = sched_gds;
|
||||
else if (tex_ready.size() > 3)
|
||||
current_shed = sched_tex;
|
||||
}
|
||||
|
||||
switch (current_shed) {
|
||||
case sched_alu:
|
||||
if (!schedule_alu(out_blocks)) {
|
||||
assert(!m_current_block->lds_group_active());
|
||||
current_shed = sched_tex;
|
||||
continue;
|
||||
}
|
||||
last_shed = current_shed;
|
||||
break;
|
||||
case sched_tex:
|
||||
if (tex_ready.empty() || !schedule_tex(out_blocks)) {
|
||||
current_shed = sched_fetch;
|
||||
continue;
|
||||
}
|
||||
last_shed = current_shed;
|
||||
break;
|
||||
case sched_fetch:
|
||||
if (!fetches_ready.empty()) {
|
||||
schedule_vtx(out_blocks);
|
||||
last_shed = current_shed;
|
||||
}
|
||||
current_shed = sched_gds;
|
||||
continue;
|
||||
case sched_gds:
|
||||
if (!gds_ready.empty()) {
|
||||
schedule_gds(out_blocks, gds_ready);
|
||||
last_shed = current_shed;
|
||||
}
|
||||
current_shed = sched_mem_ring;
|
||||
continue;
|
||||
case sched_mem_ring:
|
||||
if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) {
|
||||
current_shed = sched_write_tf;
|
||||
continue;
|
||||
}
|
||||
last_shed = current_shed;
|
||||
break;
|
||||
case sched_write_tf:
|
||||
if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) {
|
||||
current_shed = sched_rat;
|
||||
continue;
|
||||
}
|
||||
last_shed = current_shed;
|
||||
break;
|
||||
case sched_rat:
|
||||
if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) {
|
||||
current_shed = sched_free;
|
||||
continue;
|
||||
}
|
||||
last_shed = current_shed;
|
||||
break;
|
||||
case sched_free:
|
||||
if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) {
|
||||
current_shed = sched_alu;
|
||||
break;
|
||||
}
|
||||
last_shed = current_shed;
|
||||
}
|
||||
|
||||
have_instr = collect_ready(cir);
|
||||
}
|
||||
|
||||
/* Emit exports always at end of a block */
|
||||
while (collect_ready_type(exports_ready, cir.exports))
|
||||
schedule_exports(out_blocks, exports_ready);
|
||||
|
||||
bool fail = false;
|
||||
|
||||
if (!cir.alu_groups.empty()) {
|
||||
std::cerr << "Unscheduled ALU groups:\n";
|
||||
for (auto& a : cir.alu_groups) {
|
||||
std::cerr << " " << *a << "\n";
|
||||
}
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!cir.alu_vec.empty()){
|
||||
std::cerr << "Unscheduled ALU vec ops:\n";
|
||||
for (auto& a : cir.alu_vec) {
|
||||
std::cerr << " " << *a << "\n";
|
||||
}
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!cir.alu_trans.empty()){
|
||||
std::cerr << "Unscheduled ALU trans ops:\n";
|
||||
for (auto& a : cir.alu_trans) {
|
||||
std::cerr << " " << *a << "\n";
|
||||
}
|
||||
fail = true;
|
||||
}
|
||||
if (!cir.mem_write_instr.empty()){
|
||||
std::cerr << "Unscheduled MEM ops:\n";
|
||||
for (auto& a : cir.mem_write_instr) {
|
||||
std::cerr << " " << *a << "\n";
|
||||
}
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!cir.fetches.empty()){
|
||||
std::cerr << "Unscheduled Fetch ops:\n";
|
||||
for (auto& a : cir.fetches) {
|
||||
std::cerr << " " << *a << "\n";
|
||||
}
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!cir.tex.empty()){
|
||||
std::cerr << "Unscheduled Tex ops:\n";
|
||||
for (auto& a : cir.tex) {
|
||||
std::cerr << " " << *a << "\n";
|
||||
}
|
||||
fail = true;
|
||||
}
|
||||
|
||||
assert(cir.tex.empty());
|
||||
assert(cir.exports.empty());
|
||||
assert(cir.fetches.empty());
|
||||
assert(cir.alu_vec.empty());
|
||||
assert(cir.mem_write_instr.empty());
|
||||
assert(cir.mem_ring_writes.empty());
|
||||
|
||||
assert (!fail);
|
||||
|
||||
if (cir.m_cf_instr) {
|
||||
// Assert that if condition is ready
|
||||
m_current_block->push_back(cir.m_cf_instr);
|
||||
cir.m_cf_instr->set_scheduled();
|
||||
}
|
||||
|
||||
out_blocks.push_back(m_current_block);
|
||||
}
|
||||
|
||||
void BlockSheduler::finalize()
|
||||
{
|
||||
if (m_last_pos)
|
||||
m_last_pos->set_is_last_export(true);
|
||||
if (m_last_pixel)
|
||||
m_last_pixel->set_is_last_export(true);
|
||||
if (m_last_param)
|
||||
m_last_param->set_is_last_export(true);
|
||||
}
|
||||
|
||||
bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
|
||||
{
|
||||
bool success = false;
|
||||
AluGroup *group = nullptr;
|
||||
|
||||
bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty();
|
||||
|
||||
bool has_lds_ready = !alu_vec_ready.empty() &&
|
||||
(*alu_vec_ready.begin())->has_lds_access();
|
||||
|
||||
/* Schedule groups first. unless we have a pending LDS instuction
|
||||
* We don't want the LDS instructions to be too far apart because the
|
||||
* fetch + read from queue has to be in the same ALU CF block */
|
||||
if (!alu_groups_ready.empty() && !has_lds_ready) {
|
||||
group = *alu_groups_ready.begin();
|
||||
alu_groups_ready.erase(alu_groups_ready.begin());
|
||||
sfn_log << SfnLog::schedule << "Schedule ALU group\n";
|
||||
success = true;
|
||||
} else {
|
||||
if (has_alu_ready) {
|
||||
group = new AluGroup();
|
||||
sfn_log << SfnLog::schedule << "START new ALU group\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (group) {
|
||||
int free_slots = group->free_slots();
|
||||
|
||||
if (free_slots && has_alu_ready) {
|
||||
if (!alu_vec_ready.empty())
|
||||
success |= schedule_alu_to_group_vec(group);
|
||||
|
||||
/* Apparently one can't schedule a t-slot if there is already
|
||||
* and LDS instruction scheduled.
|
||||
* TODO: check whether this is only relevant for actual LDS instructions
|
||||
* or also for instructions that read from the LDS return value queue */
|
||||
|
||||
if (free_slots & 0x10 && !has_lds_ready) {
|
||||
sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
|
||||
if (!alu_trans_ready.empty())
|
||||
success |= schedule_alu_to_group_trans(group, alu_trans_ready);
|
||||
if (!alu_vec_ready.empty())
|
||||
success |= schedule_alu_to_group_trans(group, alu_vec_ready);
|
||||
}
|
||||
}
|
||||
|
||||
sfn_log << SfnLog::schedule << "Finalize ALU group\n";
|
||||
group->set_scheduled();
|
||||
group->fix_last_flag();
|
||||
group->set_nesting_depth(m_current_block->nesting_depth());
|
||||
|
||||
|
||||
if (m_current_block->type() != Block::alu) {
|
||||
start_new_block(out_blocks, Block::alu);
|
||||
m_alu_groups_schduled = 0;
|
||||
}
|
||||
|
||||
/* Pessimistic hack: If we have started an LDS group,
|
||||
* make sure 8 instructions groups still fit into the CF
|
||||
* TODO: take care of Address slot emission
|
||||
* TODO: maybe do this CF split only in the assembler
|
||||
*/
|
||||
/*if (group->slots() > m_current_block->remaining_slots() ||
|
||||
(group->has_lds_group_start() &&
|
||||
m_current_block->remaining_slots() < 7 * 8)) {
|
||||
//assert(!m_current_block->lds_group_active());
|
||||
start_new_block(out_blocks, Block::alu);
|
||||
}*/
|
||||
|
||||
if (!m_current_block->try_reserve_kcache(*group)) {
|
||||
assert(!m_current_block->lds_group_active());
|
||||
start_new_block(out_blocks, Block::alu);
|
||||
m_current_block->set_instr_flag(Instr::force_cf);
|
||||
}
|
||||
|
||||
assert(m_current_block->try_reserve_kcache(*group));
|
||||
|
||||
if (group->has_lds_group_start())
|
||||
m_current_block->lds_group_start(*group->begin());
|
||||
|
||||
m_current_block->push_back(group);
|
||||
if (group->has_lds_group_end())
|
||||
m_current_block->lds_group_end();
|
||||
}
|
||||
|
||||
if (success)
|
||||
++m_alu_groups_schduled;
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
bool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
|
||||
{
|
||||
if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() == 0) {
|
||||
start_new_block(out_blocks, Block::tex);
|
||||
m_current_block->set_instr_flag(Instr::force_cf);
|
||||
}
|
||||
|
||||
|
||||
if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) {
|
||||
auto ii = tex_ready.begin();
|
||||
sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
|
||||
|
||||
if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
|
||||
start_new_block(out_blocks, Block::tex);
|
||||
|
||||
for (auto prep : (*ii)->prepare_instr()) {
|
||||
prep->set_scheduled();
|
||||
m_current_block->push_back(prep);
|
||||
}
|
||||
|
||||
(*ii)->set_scheduled();
|
||||
m_current_block->push_back(*ii);
|
||||
tex_ready.erase(ii);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks)
|
||||
{
|
||||
if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) {
|
||||
start_new_block(out_blocks, Block::vtx);
|
||||
m_current_block->set_instr_flag(Instr::force_cf);
|
||||
}
|
||||
return schedule_block(fetches_ready);
|
||||
}
|
||||
|
||||
template <typename I>
|
||||
bool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
|
||||
{
|
||||
bool was_full = m_current_block->remaining_slots() == 0;
|
||||
if (m_current_block->type() != Block::gds || was_full) {
|
||||
start_new_block(out_blocks, Block::gds);
|
||||
if (was_full)
|
||||
m_current_block->set_instr_flag(Instr::force_cf);
|
||||
}
|
||||
return schedule_block(ready_list);
|
||||
}
|
||||
|
||||
|
||||
void BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type)
|
||||
{
|
||||
if (!m_current_block->empty()) {
|
||||
sfn_log << SfnLog::schedule << "Start new block\n";
|
||||
assert(!m_current_block->lds_group_active());
|
||||
out_blocks.push_back(m_current_block);
|
||||
m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id());
|
||||
}
|
||||
m_current_block->set_type(type);
|
||||
}
|
||||
|
||||
template <typename I>
|
||||
bool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
|
||||
{
|
||||
if (ready_list.empty())
|
||||
return false;
|
||||
if (m_current_block->type() != Block::cf)
|
||||
start_new_block(out_blocks, Block::cf);
|
||||
return schedule(ready_list);
|
||||
}
|
||||
|
||||
|
||||
bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
|
||||
{
|
||||
assert(group);
|
||||
assert(!alu_vec_ready.empty());
|
||||
|
||||
bool success = false;
|
||||
auto i = alu_vec_ready.begin();
|
||||
auto e = alu_vec_ready.end();
|
||||
while (i != e) {
|
||||
sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
|
||||
if (group->add_vec_instructions(*i)) {
|
||||
auto old_i = i;
|
||||
++i;
|
||||
if ((*old_i)->has_alu_flag(alu_is_lds)) {
|
||||
--m_lds_addr_count;
|
||||
}
|
||||
|
||||
alu_vec_ready.erase(old_i);
|
||||
success = true;
|
||||
sfn_log << SfnLog::schedule << " success\n";
|
||||
} else {
|
||||
++i;
|
||||
sfn_log << SfnLog::schedule << " failed\n";
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist)
|
||||
{
|
||||
assert(group);
|
||||
|
||||
bool success = false;
|
||||
auto i = readylist.begin();
|
||||
auto e = readylist.end();
|
||||
while (i != e) {
|
||||
sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
|
||||
if (group->add_trans_instructions(*i)) {
|
||||
auto old_i = i;
|
||||
++i;
|
||||
readylist.erase(old_i);
|
||||
success = true;
|
||||
sfn_log << SfnLog::schedule << " sucess\n";
|
||||
break;
|
||||
} else {
|
||||
++i;
|
||||
sfn_log << SfnLog::schedule << " failed\n";
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
template <typename I>
|
||||
bool BlockSheduler::schedule(std::list<I *>& ready_list)
|
||||
{
|
||||
if (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
|
||||
auto ii = ready_list.begin();
|
||||
sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
|
||||
(*ii)->set_scheduled();
|
||||
m_current_block->push_back(*ii);
|
||||
ready_list.erase(ii);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename I>
|
||||
bool BlockSheduler::schedule_block(std::list<I *>& ready_list)
|
||||
{
|
||||
bool success = false;
|
||||
while (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
|
||||
auto ii = ready_list.begin();
|
||||
sfn_log << SfnLog::schedule << "Schedule: " << **ii << " "
|
||||
<< m_current_block->remaining_slots() << "\n";
|
||||
(*ii)->set_scheduled();
|
||||
m_current_block->push_back(*ii);
|
||||
ready_list.erase(ii);
|
||||
success = true;
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
|
||||
bool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list)
|
||||
{
|
||||
if (m_current_block->type() != Block::cf)
|
||||
start_new_block(out_blocks, Block::cf);
|
||||
|
||||
if (!ready_list.empty()) {
|
||||
auto ii = ready_list.begin();
|
||||
sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
|
||||
(*ii)->set_scheduled();
|
||||
m_current_block->push_back(*ii);
|
||||
switch ((*ii)->export_type()) {
|
||||
case ExportInstr::pos: m_last_pos = *ii; break;
|
||||
case ExportInstr::param: m_last_param = *ii; break;
|
||||
case ExportInstr::pixel: m_last_pixel = *ii; break;
|
||||
}
|
||||
(*ii)->set_is_last_export(false);
|
||||
ready_list.erase(ii);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BlockSheduler::collect_ready(CollectInstructions &available)
|
||||
{
|
||||
sfn_log << SfnLog::schedule << "Ready instructions\n";
|
||||
bool result = false;
|
||||
result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
|
||||
result |= collect_ready_type(alu_trans_ready, available.alu_trans);
|
||||
result |= collect_ready_type(alu_groups_ready, available.alu_groups);
|
||||
result |= collect_ready_type(gds_ready, available.gds_op);
|
||||
result |= collect_ready_type(tex_ready, available.tex);
|
||||
result |= collect_ready_type(fetches_ready, available.fetches);
|
||||
result |= collect_ready_type(memops_ready, available.mem_write_instr);
|
||||
result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes);
|
||||
result |= collect_ready_type(write_tf_ready, available.write_tf);
|
||||
result |= collect_ready_type(rat_instr_ready, available.rat_instr);
|
||||
|
||||
sfn_log << SfnLog::schedule << "\n";
|
||||
return result;
|
||||
}
|
||||
|
||||
bool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available)
|
||||
{
|
||||
auto i = available.begin();
|
||||
auto e = available.end();
|
||||
|
||||
for (auto alu : ready) {
|
||||
alu->add_priority(100 * alu->register_priority());
|
||||
}
|
||||
|
||||
int max_check = 0;
|
||||
while (i != e && max_check++ < 32) {
|
||||
if (ready.size() < 32 && (*i)->ready()) {
|
||||
|
||||
int priority = 0;
|
||||
/* LDS fetches that use static offsets are usually ready ery fast,
|
||||
* so that they would get schedules early, and this leaves the problem
|
||||
* that we allocate too many registers with just constant values,
|
||||
* and this will make problems wih RA. So limit the number of LDS
|
||||
* address registers.
|
||||
*/
|
||||
if ((*i)->has_alu_flag(alu_lds_address)) {
|
||||
if (m_lds_addr_count > 64) {
|
||||
++i;
|
||||
continue;
|
||||
} else {
|
||||
++m_lds_addr_count;
|
||||
}
|
||||
}
|
||||
|
||||
/* LDS instructions are scheduled with high priority.
|
||||
* instractions that can go into the t slot and don't have
|
||||
* indirect access are put in last, so that they don't block
|
||||
* vec-only instructions when scheduling to the vector slots
|
||||
* for everything else we look at the register use */
|
||||
|
||||
if ((*i)->has_lds_access())
|
||||
priority = 100000;
|
||||
else if (AluGroup::has_t()) {
|
||||
auto opinfo = alu_ops.find((*i)->opcode());
|
||||
assert(opinfo != alu_ops.end());
|
||||
if (opinfo->second.can_channel(AluOp::t) && !(*i)->indirect_addr().first)
|
||||
priority = -1;
|
||||
}
|
||||
|
||||
priority += 100 * (*i)->register_priority();
|
||||
|
||||
(*i)->add_priority(priority);
|
||||
ready.push_back(*i);
|
||||
|
||||
auto old_i = i;
|
||||
++i;
|
||||
available.erase(old_i);
|
||||
} else
|
||||
++i;
|
||||
}
|
||||
|
||||
for (auto& i: ready)
|
||||
sfn_log << SfnLog::schedule << "V: " << *i << "\n";
|
||||
|
||||
ready.sort([](const AluInstr *lhs, const AluInstr *rhs) {
|
||||
return lhs->priority() > rhs->priority();});
|
||||
|
||||
for (auto& i: ready)
|
||||
sfn_log << SfnLog::schedule << "V (S): " << *i << "\n";
|
||||
|
||||
return !ready.empty();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct type_char {
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
struct type_char<AluInstr> {
|
||||
static constexpr const char value = 'A';
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_char<AluGroup> {
|
||||
static constexpr const char value = 'G';
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_char<ExportInstr> {
|
||||
static constexpr const char value = 'E';
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_char<TexInstr> {
|
||||
static constexpr const char value = 'T';
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_char<FetchInstr> {
|
||||
static constexpr const char value = 'F';
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_char<WriteOutInstr> {
|
||||
static constexpr const char value = 'M';
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_char<MemRingOutInstr> {
|
||||
static constexpr const char value = 'R';
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_char<WriteTFInstr> {
|
||||
static constexpr const char value = 'X';
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_char<GDSInstr> {
|
||||
static constexpr const char value = 'S';
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_char<RatInstr> {
|
||||
static constexpr const char value = 'I';
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
bool BlockSheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& available)
|
||||
{
|
||||
auto i = available.begin();
|
||||
auto e = available.end();
|
||||
|
||||
while (i != e) {
|
||||
if ((*i)->ready()) {
|
||||
ready.push_back(*i);
|
||||
auto old_i = i;
|
||||
++i;
|
||||
available.erase(old_i);
|
||||
} else
|
||||
++i;
|
||||
}
|
||||
|
||||
for (auto& i: ready)
|
||||
sfn_log << SfnLog::schedule << type_char<T>::value << "; " << *i << "\n";
|
||||
|
||||
return !ready.empty();
|
||||
}
|
||||
|
||||
}
|
13
src/gallium/drivers/r600/sfn/sfn_scheduler.h
Normal file
13
src/gallium/drivers/r600/sfn/sfn_scheduler.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef SHEDULER_H
|
||||
#define SHEDULER_H
|
||||
|
||||
#include "sfn_shader.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
|
||||
Shader *schedule(Shader *original);
|
||||
|
||||
}
|
||||
|
||||
#endif // SHEDULER_H
|
1379
src/gallium/drivers/r600/sfn/sfn_shader.cpp
Normal file
1379
src/gallium/drivers/r600/sfn/sfn_shader.cpp
Normal file
File diff suppressed because it is too large
Load Diff
365
src/gallium/drivers/r600/sfn/sfn_shader.h
Normal file
365
src/gallium/drivers/r600/sfn/sfn_shader.h
Normal file
@ -0,0 +1,365 @@
|
||||
#ifndef SHADER_H
|
||||
#define SHADER_H
|
||||
|
||||
#include "sfn_instr.h"
|
||||
#include "sfn_instrfactory.h"
|
||||
#include "sfn_instr_controlflow.h"
|
||||
#include "gallium/drivers/r600/r600_shader.h"
|
||||
#include "sfn_liverangeevaluator.h"
|
||||
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <stack>
|
||||
#include <vector>
|
||||
|
||||
struct nir_shader;
|
||||
struct nir_cf_node;
|
||||
struct nir_if;
|
||||
struct nir_block;
|
||||
struct nir_instr;
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class ShaderIO {
|
||||
public:
|
||||
void set_sid(int sid);
|
||||
void override_spi_sid(int spi_sid);
|
||||
void print(std::ostream& os) const;
|
||||
|
||||
int spi_sid() const { return m_spi_sid;}
|
||||
unsigned sid() const { return m_sid;}
|
||||
|
||||
int location() const {return m_location;}
|
||||
unsigned name() const { return m_name;}
|
||||
|
||||
int pos() const { return m_pos;}
|
||||
void set_pos(int pos) {m_pos = pos;}
|
||||
|
||||
bool is_param() const { return m_is_param;}
|
||||
void set_is_param(bool val) { m_is_param = val;}
|
||||
|
||||
void set_gpr(int gpr) {m_gpr = gpr;}
|
||||
int gpr() const {return m_gpr;}
|
||||
|
||||
protected:
|
||||
ShaderIO(const char *type, int loc, int name);
|
||||
|
||||
private:
|
||||
|
||||
virtual void do_print(std::ostream& os) const = 0;
|
||||
|
||||
const char *m_type;
|
||||
int m_location{-1};
|
||||
int m_name{-1};
|
||||
int m_sid{0};
|
||||
int m_spi_sid{0};
|
||||
int m_pos{0};
|
||||
int m_is_param{false};
|
||||
int m_gpr{0};
|
||||
};
|
||||
|
||||
class ShaderOutput : public ShaderIO {
|
||||
public:
|
||||
ShaderOutput();
|
||||
ShaderOutput(int location, int name, int writemask);
|
||||
|
||||
int writemask() const { return m_writemask;}
|
||||
|
||||
private:
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
int m_writemask{0};
|
||||
};
|
||||
|
||||
|
||||
class ShaderInput : public ShaderIO {
|
||||
public:
|
||||
ShaderInput();
|
||||
ShaderInput(int location, int name);
|
||||
void set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid);
|
||||
void set_uses_interpolate_at_centroid();
|
||||
void set_need_lds_pos() { m_need_lds_pos = true;}
|
||||
int ij_index() const { return m_ij_index;}
|
||||
|
||||
int interpolator() const{return m_interpolator;}
|
||||
int interpolate_loc() const {return m_interpolate_loc;}
|
||||
bool need_lds_pos() const {return m_need_lds_pos;}
|
||||
int lds_pos() const {return m_lds_pos;}
|
||||
void set_lds_pos(int pos) {m_lds_pos = pos;}
|
||||
|
||||
int ring_offset() const {return m_ring_offset;}
|
||||
void set_ring_offset(int offs) {m_ring_offset = offs;}
|
||||
bool uses_interpolate_at_centroid() const {return m_uses_interpolate_at_centroid;}
|
||||
|
||||
private:
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
||||
int m_interpolator{0};
|
||||
int m_interpolate_loc{0};
|
||||
int m_ij_index{0};
|
||||
bool m_uses_interpolate_at_centroid{false};
|
||||
bool m_need_lds_pos{false};
|
||||
int m_lds_pos{0};
|
||||
int m_ring_offset{0};
|
||||
};
|
||||
|
||||
class Shader : public Allocate {
|
||||
public:
|
||||
using InputIterator = std::map<int, ShaderInput>::iterator;
|
||||
using OutputIterator = std::map<int, ShaderOutput>::iterator;
|
||||
|
||||
using ShaderBlocks = std::list<Block::Pointer, Allocator<Block::Pointer>>;
|
||||
|
||||
Shader(const Shader& orig) = delete;
|
||||
|
||||
virtual ~Shader() {}
|
||||
|
||||
bool add_info_from_string(std::istream& is);
|
||||
|
||||
static Shader *translate_from_nir(nir_shader *nir, const pipe_stream_output_info *so_info, r600_shader *gs_shader,
|
||||
r600_shader_key& key, r600_chip_class chip_class);
|
||||
|
||||
bool process(nir_shader *nir);
|
||||
|
||||
bool process_cf_node(nir_cf_node *node);
|
||||
bool process_if(nir_if *node);
|
||||
bool process_loop(nir_loop *node);
|
||||
bool process_block(nir_block *node);
|
||||
bool process_instr(nir_instr *instr);
|
||||
void emit_instruction(PInst instr);
|
||||
bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
|
||||
|
||||
void print(std::ostream& os ) const;
|
||||
void print_header(std::ostream& os ) const;
|
||||
|
||||
bool process_intrinsic(nir_intrinsic_instr *intr);
|
||||
|
||||
virtual bool load_input(nir_intrinsic_instr *intr) = 0;
|
||||
virtual bool store_output(nir_intrinsic_instr *intr) = 0;
|
||||
|
||||
bool load_uniform(nir_intrinsic_instr *intr);
|
||||
bool load_ubo(nir_intrinsic_instr *intr);
|
||||
|
||||
ValueFactory& value_factory();
|
||||
|
||||
void add_output(const ShaderOutput& output) {
|
||||
m_outputs[output.location()] = output;
|
||||
}
|
||||
|
||||
void add_input(const ShaderInput& input) {
|
||||
m_inputs[input.location()] = input;
|
||||
}
|
||||
|
||||
void set_input_gpr(int driver_lcation, int gpr);
|
||||
|
||||
InputIterator find_input(int location) { return m_inputs.find(location);}
|
||||
|
||||
InputIterator input_not_found() {return m_inputs.end();}
|
||||
|
||||
OutputIterator find_output(int location);
|
||||
OutputIterator output_not_found() {return m_outputs.end();}
|
||||
|
||||
ShaderBlocks& func() { return m_root; }
|
||||
void reset_function(ShaderBlocks& new_root);
|
||||
|
||||
void emit_instruction_from_string(const std::string &s);
|
||||
|
||||
void set_info(nir_shader *nir);
|
||||
void get_shader_info(r600_shader *sh_info);
|
||||
|
||||
r600_chip_class chip_class() const {return m_chip_class;};
|
||||
void set_chip_class(r600_chip_class cls) {m_chip_class = cls;};
|
||||
|
||||
void start_new_block(int nesting_depth);
|
||||
|
||||
const ShaderOutput& output(int base) const;
|
||||
|
||||
LiveRangeMap prepare_live_range_map();
|
||||
|
||||
void set_last_txd(Instr *txd){m_last_txd = txd;}
|
||||
Instr *last_txd(){return m_last_txd;}
|
||||
|
||||
// Needed for keeping the memory access in order
|
||||
void chain_scratch_read(Instr *instr);
|
||||
void chain_ssbo_read(Instr *instr);
|
||||
|
||||
virtual uint32_t enabled_stream_buffers_mask() const {return 0;};
|
||||
|
||||
size_t noutputs() const { return m_outputs.size();}
|
||||
size_t ninputs() const { return m_inputs.size();}
|
||||
|
||||
enum Flags {
|
||||
sh_indirect_const_file,
|
||||
sh_needs_scratch_space,
|
||||
sh_needs_sbo_ret_address,
|
||||
sh_uses_atomics,
|
||||
sh_uses_images,
|
||||
sh_uses_tex_buffer,
|
||||
sh_writes_memory,
|
||||
sh_txs_cube_array_comp,
|
||||
sh_indirect_atomic,
|
||||
sh_mem_barrier,
|
||||
sh_flags_count
|
||||
};
|
||||
|
||||
void set_flag(Flags f) {m_flags.set(f);}
|
||||
bool has_flag(Flags f) const {return m_flags.test(f);}
|
||||
|
||||
int atomic_file_count() const { return m_atomic_file_count; }
|
||||
|
||||
PRegister atomic_update();
|
||||
int remap_atomic_base(int base);
|
||||
auto evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) -> std::pair<int, PRegister>;
|
||||
int ssbo_image_offset() const {return m_ssbo_image_offset;}
|
||||
PRegister rat_return_address() {assert(m_rat_return_address); return m_rat_return_address;}
|
||||
|
||||
PRegister emit_load_to_register(PVirtualValue src);
|
||||
|
||||
protected:
|
||||
enum ESlots {
|
||||
es_face,
|
||||
es_instanceid,
|
||||
es_invocation_id,
|
||||
es_patch_id,
|
||||
es_pos,
|
||||
es_rel_patch_id,
|
||||
es_sample_mask_in,
|
||||
es_sample_id,
|
||||
es_sample_pos,
|
||||
es_tess_factor_base,
|
||||
es_vertexid,
|
||||
es_tess_coord,
|
||||
es_primitive_id,
|
||||
es_helper_invocation,
|
||||
es_last
|
||||
};
|
||||
|
||||
std::bitset<es_last> m_sv_values;
|
||||
|
||||
Shader(const char *type_id);
|
||||
|
||||
const ShaderInput& input(int base) const;
|
||||
|
||||
bool emit_simple_mov(nir_dest& dest, int chan, PVirtualValue src, Pin pin = pin_free);
|
||||
|
||||
private:
|
||||
virtual bool process_stage_intrinsic(nir_intrinsic_instr *intr) = 0;
|
||||
|
||||
bool allocate_registers_from_string(std::istream& is, Pin pin);
|
||||
bool allocate_arrays_from_string(std::istream& is);
|
||||
|
||||
bool read_chipclass(std::istream& is);
|
||||
|
||||
bool load_uniform_indirect(nir_intrinsic_instr *intr, PVirtualValue addr, int offset , int buffer_id);
|
||||
|
||||
bool scan_shader(const nir_function *impl);
|
||||
bool scan_uniforms(nir_variable *uniform);
|
||||
void allocate_reserved_registers();
|
||||
|
||||
void allocate_local_registers(const exec_list *registers);
|
||||
|
||||
virtual int do_allocate_reserved_registers() = 0;
|
||||
|
||||
bool scan_instruction(nir_instr *instr);
|
||||
virtual bool do_scan_instruction(nir_instr *instr) = 0;
|
||||
|
||||
void print_properties(std::ostream& os) const;
|
||||
virtual void do_print_properties(std::ostream& os) const = 0;
|
||||
|
||||
bool read_output(std::istream& is);
|
||||
bool read_input(std::istream& is);
|
||||
virtual bool read_prop(std::istream& is) = 0;
|
||||
|
||||
bool emit_if_start(nir_if *if_stmt);
|
||||
bool emit_control_flow(ControlFlowInstr::CFType type);
|
||||
bool emit_store_scratch(nir_intrinsic_instr *intr);
|
||||
bool emit_load_scratch(nir_intrinsic_instr *intr);
|
||||
bool emit_local_store(nir_intrinsic_instr *intr);
|
||||
bool emit_local_load(nir_intrinsic_instr* instr);
|
||||
bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
|
||||
bool emit_barrier(nir_intrinsic_instr* intr);
|
||||
bool emit_shader_clock(nir_intrinsic_instr* instr);
|
||||
bool emit_wait_ack();
|
||||
|
||||
bool equal_to(const Shader& other) const;
|
||||
void finalize();
|
||||
virtual void do_finalize();
|
||||
|
||||
virtual void do_get_shader_info(r600_shader *sh_info);
|
||||
|
||||
ShaderBlocks m_root;
|
||||
Block::Pointer m_current_block;
|
||||
|
||||
InstrFactory *m_instr_factory;
|
||||
const char *m_type_id;
|
||||
|
||||
template <typename T>
|
||||
using IOMap = std::map<int, T, std::less<int>, Allocator<std::pair<const int, T>>>;
|
||||
|
||||
IOMap<ShaderOutput> m_outputs;
|
||||
IOMap<ShaderInput> m_inputs;
|
||||
r600_chip_class m_chip_class;
|
||||
|
||||
int m_scratch_size;
|
||||
int m_next_block;
|
||||
bool m_indirect_const_file{false};
|
||||
|
||||
Instr *m_last_txd {nullptr};
|
||||
|
||||
uint32_t m_indirect_files{0};
|
||||
std::bitset<sh_flags_count> m_flags;
|
||||
uint32_t nhwatomic_ranges{0};
|
||||
std::vector<r600_shader_atomic> m_atomics;
|
||||
|
||||
uint32_t m_nhwatomic{0};
|
||||
uint32_t m_atomic_base{0};
|
||||
uint32_t m_next_hwatomic_loc{0};
|
||||
std::unordered_map<int, int> m_atomic_base_map;
|
||||
uint32_t m_atomic_file_count{0};
|
||||
PRegister m_atomic_update{nullptr};
|
||||
PRegister m_rat_return_address{nullptr};
|
||||
|
||||
int32_t m_ssbo_image_offset{0};
|
||||
uint32_t m_nloops{0};
|
||||
|
||||
class InstructionChain : public InstrVisitor {
|
||||
public:
|
||||
void visit(AluInstr *instr) override {(void) instr;}
|
||||
void visit(AluGroup *instr) override {(void) instr;}
|
||||
void visit(TexInstr *instr) override {(void) instr;}
|
||||
void visit(ExportInstr *instr) override {(void) instr;}
|
||||
void visit(FetchInstr *instr) override {(void) instr;}
|
||||
void visit(Block *instr) override {(void) instr;}
|
||||
void visit(ControlFlowInstr *instr) override {(void) instr;}
|
||||
void visit(IfInstr *instr) override {(void) instr;}
|
||||
void visit(StreamOutInstr *instr) override {(void) instr;}
|
||||
void visit(MemRingOutInstr *instr) override {(void) instr;}
|
||||
void visit(EmitVertexInstr *instr) override {(void) instr;}
|
||||
void visit(WriteTFInstr *instr) override {(void) instr;}
|
||||
void visit(LDSAtomicInstr *instr) override {(void) instr;}
|
||||
void visit(LDSReadInstr *instr) override {(void) instr;}
|
||||
|
||||
void visit(WriteScratchInstr *instr) override;
|
||||
void visit(GDSInstr *instr) override;
|
||||
void visit(RatInstr *instr) override;
|
||||
|
||||
void apply(Instr *current, Instr **last);
|
||||
|
||||
Shader *this_shader{nullptr};
|
||||
Instr *last_scratch_instr{nullptr};
|
||||
Instr *last_gds_instr{nullptr};
|
||||
Instr *last_ssbo_instr{nullptr};
|
||||
bool prepare_mem_barrier{false};
|
||||
};
|
||||
|
||||
InstructionChain m_chain_instr;
|
||||
std::vector<Instr *> m_loops;
|
||||
};
|
||||
|
||||
|
||||
std::pair<unsigned, unsigned>
|
||||
r600_get_varying_semantic(unsigned varying_location);
|
||||
|
||||
}
|
||||
|
||||
#endif // SHADER_H
|
File diff suppressed because it is too large
Load Diff
@ -1,231 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef sfn_shader_from_nir_h
|
||||
#define sfn_shader_from_nir_h
|
||||
|
||||
|
||||
#include "gallium/drivers/r600/r600_shader.h"
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir_types.h"
|
||||
|
||||
#include "sfn_instruction_block.h"
|
||||
#include "sfn_instruction_export.h"
|
||||
#include "sfn_alu_defines.h"
|
||||
#include "sfn_valuepool.h"
|
||||
#include "sfn_debug.h"
|
||||
#include "sfn_instruction_cf.h"
|
||||
#include "sfn_emittexinstruction.h"
|
||||
#include "sfn_emitaluinstruction.h"
|
||||
#include "sfn_emitssboinstruction.h"
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <stack>
|
||||
#include <unordered_map>
|
||||
|
||||
struct nir_instr;
|
||||
|
||||
namespace r600 {
|
||||
|
||||
extern SfnLog sfn_log;
|
||||
|
||||
class ShaderFromNirProcessor : public ValuePool {
|
||||
public:
|
||||
ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel,
|
||||
r600_shader& sh_info, int scratch_size, enum amd_gfx_level _chip_class,
|
||||
int atomic_base);
|
||||
virtual ~ShaderFromNirProcessor();
|
||||
|
||||
void emit_instruction(Instruction *ir);
|
||||
|
||||
PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
|
||||
GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
|
||||
const GPRVector::Swizzle& swizzle, bool match = false);
|
||||
|
||||
bool emit_instruction(EAluOp opcode, PValue dest,
|
||||
std::vector<PValue> src0,
|
||||
const std::set<AluModifiers>& m_flags);
|
||||
void emit_export_instruction(WriteoutInstruction *ir);
|
||||
void emit_instruction(AluInstruction *ir);
|
||||
|
||||
bool use_legacy_math_rules(void) {
|
||||
return m_sel.nir->info.use_legacy_math_rules;
|
||||
};
|
||||
|
||||
void split_constants(nir_alu_instr* instr);
|
||||
void remap_registers();
|
||||
|
||||
const nir_variable *get_deref_location(const nir_src& src) const;
|
||||
|
||||
r600_shader& sh_info() {return m_sh_info;}
|
||||
void add_param_output_reg(int loc, const GPRVector *gpr);
|
||||
void set_output(unsigned pos, int sel);
|
||||
const GPRVector *output_register(unsigned location) const;
|
||||
void evaluate_spi_sid(r600_shader_io &io);
|
||||
|
||||
enum amd_gfx_level get_chip_class() const;
|
||||
|
||||
int remap_atomic_base(int base) {
|
||||
return m_atomic_base_map[base];
|
||||
}
|
||||
|
||||
void get_array_info(r600_shader& shader) const;
|
||||
|
||||
virtual bool scan_inputs_read(const nir_shader *sh);
|
||||
void set_shader_info(const nir_shader *sh);
|
||||
|
||||
protected:
|
||||
|
||||
void set_var_address(nir_deref_instr *instr);
|
||||
void set_input(unsigned pos, PValue var);
|
||||
|
||||
bool scan_instruction(nir_instr *instr);
|
||||
|
||||
virtual bool scan_sysvalue_access(nir_instr *instr) = 0;
|
||||
|
||||
bool emit_if_start(int if_id, nir_if *if_stmt);
|
||||
bool emit_else_start(int if_id);
|
||||
bool emit_ifelse_end(int if_id);
|
||||
|
||||
bool emit_loop_start(int loop_id);
|
||||
bool emit_loop_end(int loop_id);
|
||||
bool emit_jump_instruction(nir_jump_instr *instr);
|
||||
|
||||
bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
|
||||
bool emit_load_local_shared(nir_intrinsic_instr* instr);
|
||||
bool emit_store_local_shared(nir_intrinsic_instr* instr);
|
||||
bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
|
||||
|
||||
bool emit_barrier(nir_intrinsic_instr* instr);
|
||||
|
||||
bool load_preloaded_value(const nir_dest& dest, int chan, PValue value,
|
||||
bool as_last = true);
|
||||
|
||||
void inc_atomic_file_count();
|
||||
|
||||
virtual void do_set_shader_info(const nir_shader *sh);
|
||||
|
||||
enum ESlots {
|
||||
es_face,
|
||||
es_instanceid,
|
||||
es_invocation_id,
|
||||
es_patch_id,
|
||||
es_pos,
|
||||
es_rel_patch_id,
|
||||
es_sample_mask_in,
|
||||
es_sample_id,
|
||||
es_sample_pos,
|
||||
es_tess_factor_base,
|
||||
es_vertexid,
|
||||
es_tess_coord,
|
||||
es_primitive_id,
|
||||
es_helper_invocation,
|
||||
es_last
|
||||
};
|
||||
|
||||
std::bitset<es_last> m_sv_values;
|
||||
|
||||
bool allocate_reserved_registers();
|
||||
|
||||
|
||||
private:
|
||||
virtual bool do_allocate_reserved_registers() = 0;
|
||||
|
||||
|
||||
void emit_instruction_internal(Instruction *ir);
|
||||
|
||||
bool emit_alu_instruction(nir_instr *instr);
|
||||
bool emit_deref_instruction(nir_deref_instr* instr);
|
||||
bool emit_intrinsic_instruction(nir_intrinsic_instr* instr);
|
||||
virtual bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr);
|
||||
bool emit_tex_instruction(nir_instr* instr);
|
||||
bool emit_discard_if(nir_intrinsic_instr* instr);
|
||||
bool emit_load_ubo_vec4(nir_intrinsic_instr* instr);
|
||||
bool emit_ssbo_atomic_add(nir_intrinsic_instr* instr);
|
||||
bool load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufid);
|
||||
|
||||
/* Code creating functions */
|
||||
bool emit_load_function_temp(const nir_variable *var, nir_intrinsic_instr *instr);
|
||||
AluInstruction *emit_load_literal(const nir_load_const_instr *literal, const nir_src& src, unsigned writemask);
|
||||
|
||||
bool load_uniform(nir_intrinsic_instr* instr);
|
||||
bool process_uniforms(nir_variable *uniform);
|
||||
|
||||
void append_block(int nesting_change);
|
||||
|
||||
virtual void emit_shader_start();
|
||||
virtual bool emit_deref_instruction_override(nir_deref_instr* instr);
|
||||
|
||||
bool emit_store_scratch(nir_intrinsic_instr* instr);
|
||||
bool emit_load_scratch(nir_intrinsic_instr* instr);
|
||||
bool emit_shader_clock(nir_intrinsic_instr* instr);
|
||||
virtual void do_finalize() = 0;
|
||||
|
||||
void finalize();
|
||||
friend class ShaderFromNir;
|
||||
|
||||
std::set<nir_variable*> m_arrays;
|
||||
|
||||
std::map<unsigned, PValue> m_inputs;
|
||||
std::map<unsigned, int> m_outputs;
|
||||
|
||||
std::map<unsigned, nir_variable*> m_var_derefs;
|
||||
std::map<const nir_variable *, nir_variable_mode> m_var_mode;
|
||||
|
||||
std::map<unsigned, const glsl_type*> m_uniform_type_map;
|
||||
std::map<int, IfElseInstruction *> m_if_block_start_map;
|
||||
std::map<int, LoopBeginInstruction *> m_loop_begin_block_map;
|
||||
|
||||
pipe_shader_type m_processor_type;
|
||||
|
||||
std::vector<InstructionBlock> m_output;
|
||||
unsigned m_nesting_depth;
|
||||
unsigned m_block_number;
|
||||
InstructionBlock m_export_output;
|
||||
r600_shader& m_sh_info;
|
||||
enum amd_gfx_level m_chip_class;
|
||||
EmitTexInstruction m_tex_instr;
|
||||
EmitAluInstruction m_alu_instr;
|
||||
EmitSSBOInstruction m_ssbo_instr;
|
||||
OutputRegisterMap m_output_register_map;
|
||||
|
||||
IfElseInstruction *m_pending_else;
|
||||
int m_scratch_size;
|
||||
int m_next_hwatomic_loc;
|
||||
|
||||
r600_pipe_shader_selector& m_sel;
|
||||
int m_atomic_base ;
|
||||
int m_image_count;
|
||||
|
||||
std::unordered_map<int, int> m_atomic_base_map;
|
||||
AluInstruction *last_emitted_alu;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,112 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2018 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sfn_shader_compute.h"
|
||||
#include "sfn_instruction_fetch.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
ComputeShaderFromNir::ComputeShaderFromNir(r600_pipe_shader *sh,
|
||||
r600_pipe_shader_selector& sel,
|
||||
UNUSED const r600_shader_key& key,
|
||||
enum amd_gfx_level gfx_level):
|
||||
ShaderFromNirProcessor (PIPE_SHADER_COMPUTE, sel, sh->shader,
|
||||
sh->scratch_space_needed, gfx_level, 0),
|
||||
m_reserved_registers(0)
|
||||
{
|
||||
}
|
||||
|
||||
bool ComputeShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
bool ComputeShaderFromNir::do_allocate_reserved_registers()
|
||||
{
|
||||
int thread_id_sel = m_reserved_registers++;
|
||||
int wg_id_sel = m_reserved_registers++;
|
||||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
auto tmp = new GPRValue(thread_id_sel, i);
|
||||
tmp->set_as_input();
|
||||
tmp->set_keep_alive();
|
||||
m_local_invocation_id[i] = PValue(tmp);
|
||||
inject_register(tmp->sel(), i, m_local_invocation_id[i], false);
|
||||
|
||||
tmp = new GPRValue(wg_id_sel, i);
|
||||
tmp->set_as_input();
|
||||
tmp->set_keep_alive();
|
||||
m_workgroup_id[i] = PValue(tmp);
|
||||
inject_register(tmp->sel(), i, m_workgroup_id[i], false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ComputeShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
|
||||
{
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
return emit_load_3vec(instr, m_local_invocation_id);
|
||||
case nir_intrinsic_load_workgroup_id:
|
||||
return emit_load_3vec(instr, m_workgroup_id);
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
return emit_load_num_workgroups(instr);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ComputeShaderFromNir::emit_load_3vec(nir_intrinsic_instr* instr,
|
||||
const std::array<PValue,3>& src)
|
||||
{
|
||||
for (int i = 0; i < 3; ++i)
|
||||
load_preloaded_value(instr->dest, i, src[i], i == 2);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ComputeShaderFromNir::emit_load_num_workgroups(nir_intrinsic_instr* instr)
|
||||
{
|
||||
PValue a_zero = get_temp_register(1);
|
||||
emit_instruction(new AluInstruction(op1_mov, a_zero, Value::zero, EmitInstruction::last_write));
|
||||
GPRVector dest;
|
||||
for (int i = 0; i < 3; ++i)
|
||||
dest.set_reg_i(i, from_nir(instr->dest, i));
|
||||
dest.set_reg_i(3, from_nir(instr->dest, 7));
|
||||
|
||||
auto ir = new FetchInstruction(vc_fetch, no_index_offset,
|
||||
fmt_32_32_32_32, vtx_nf_int, vtx_es_none, a_zero, dest, 16,
|
||||
false, 16, R600_BUFFER_INFO_CONST_BUFFER, 0,
|
||||
bim_none, false, false, 0, 0, 0, PValue(), {0,1,2,7});
|
||||
ir->set_flag(vtx_srf_mode);
|
||||
emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ComputeShaderFromNir::do_finalize()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SFN_COMPUTE_SHADER_FROM_NIR_H
|
||||
#define SFN_COMPUTE_SHADER_FROM_NIR_H
|
||||
|
||||
#include "sfn_shader_base.h"
|
||||
#include "sfn_shaderio.h"
|
||||
#include <bitset>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class ComputeShaderFromNir : public ShaderFromNirProcessor
|
||||
{
|
||||
public:
|
||||
ComputeShaderFromNir(r600_pipe_shader *sh,
|
||||
r600_pipe_shader_selector& sel,
|
||||
const r600_shader_key &key,
|
||||
enum amd_gfx_level gfx_level);
|
||||
|
||||
bool scan_sysvalue_access(nir_instr *instr) override;
|
||||
|
||||
private:
|
||||
bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
|
||||
|
||||
bool do_allocate_reserved_registers() override;
|
||||
void do_finalize() override;
|
||||
|
||||
bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PValue,3>& src);
|
||||
bool emit_load_num_workgroups(nir_intrinsic_instr* instr);
|
||||
|
||||
int m_reserved_registers;
|
||||
std::array<PValue,3> m_workgroup_id;
|
||||
std::array<PValue,3> m_local_invocation_id;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // SFN_COMPUTE_SHADER_FROM_NIR_H
|
95
src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp
Normal file
95
src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
#include "sfn_shader_cs.h"
|
||||
#include "sfn_instr_fetch.h"
|
||||
|
||||
|
||||
namespace r600 {
|
||||
|
||||
ComputeShader::ComputeShader(UNUSED const r600_shader_key& key):
|
||||
Shader("CS")
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
bool ComputeShader::do_scan_instruction(UNUSED nir_instr *instr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
int ComputeShader::do_allocate_reserved_registers()
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
|
||||
const int thread_id_sel = 0;
|
||||
const int wg_id_sel = 1;
|
||||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
m_local_invocation_id[i] = vf.allocate_pinned_register(thread_id_sel, i);
|
||||
m_local_invocation_id[i]->pin_live_range(true);
|
||||
|
||||
m_workgroup_id[i] = vf.allocate_pinned_register(wg_id_sel, i);
|
||||
m_workgroup_id[i]->pin_live_range(true);
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
|
||||
bool ComputeShader::process_stage_intrinsic(nir_intrinsic_instr *instr)
|
||||
{
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
return emit_load_3vec(instr, m_local_invocation_id);
|
||||
case nir_intrinsic_load_workgroup_id:
|
||||
return emit_load_3vec(instr, m_workgroup_id);
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
return emit_load_num_workgroups(instr);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void ComputeShader::do_get_shader_info(r600_shader *sh_info)
|
||||
{
|
||||
sh_info->processor_type = PIPE_SHADER_COMPUTE;
|
||||
}
|
||||
|
||||
bool ComputeShader::read_prop(UNUSED std::istream& is)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void ComputeShader::do_print_properties(UNUSED std::ostream& os) const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
bool ComputeShader::emit_load_num_workgroups(nir_intrinsic_instr* instr)
|
||||
{
|
||||
auto zero = value_factory().temp_register();
|
||||
|
||||
emit_instruction(new AluInstr(op1_mov, zero, value_factory().inline_const(ALU_SRC_0, 0),
|
||||
AluInstr::last_write));
|
||||
auto dest = value_factory().dest_vec4(instr->dest, pin_group);
|
||||
|
||||
auto ir = new LoadFromBuffer(dest, {0,1,2,7}, zero, 16,
|
||||
R600_BUFFER_INFO_CONST_BUFFER,
|
||||
nullptr, fmt_32_32_32_32);
|
||||
|
||||
ir->set_fetch_flag(LoadFromBuffer::srf_mode);
|
||||
ir->reset_fetch_flag(LoadFromBuffer::format_comp_signed);
|
||||
ir->set_num_format(vtx_nf_int);
|
||||
emit_instruction(ir);
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
bool ComputeShader::emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PRegister,3>& src)
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
auto dest = vf.dest(instr->dest, i, pin_none);
|
||||
emit_instruction(new AluInstr(op1_mov, dest, src[i], i == 2 ? AluInstr::last_write : AluInstr::write));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
39
src/gallium/drivers/r600/sfn/sfn_shader_cs.h
Normal file
39
src/gallium/drivers/r600/sfn/sfn_shader_cs.h
Normal file
@ -0,0 +1,39 @@
|
||||
#ifndef COMPUTE_H
|
||||
#define COMPUTE_H
|
||||
|
||||
#include "sfn_shader.h"
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class ComputeShader : public Shader
|
||||
{
|
||||
public:
|
||||
ComputeShader(const r600_shader_key& key);
|
||||
|
||||
private:
|
||||
bool do_scan_instruction(nir_instr *instr) override;
|
||||
int do_allocate_reserved_registers() override;
|
||||
|
||||
bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
|
||||
void do_get_shader_info(r600_shader *sh_info) override;
|
||||
|
||||
bool load_input(UNUSED nir_intrinsic_instr *intr) override {
|
||||
unreachable("compute shaders have bno inputs");
|
||||
};
|
||||
bool store_output(UNUSED nir_intrinsic_instr *intr) override {
|
||||
unreachable("compute shaders have no outputs");
|
||||
};
|
||||
|
||||
bool read_prop(std::istream& is) override;
|
||||
void do_print_properties(std::ostream& os) const override;
|
||||
|
||||
bool emit_load_num_workgroups(nir_intrinsic_instr* instr);
|
||||
bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PRegister,3>& src);
|
||||
|
||||
std::array<PRegister,3> m_workgroup_id{nullptr};
|
||||
std::array<PRegister,3> m_local_invocation_id{nullptr};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // COMPUTE_H
|
File diff suppressed because it is too large
Load Diff
@ -1,117 +0,0 @@
|
||||
/* -*- mesa-c++ -*-
|
||||
*
|
||||
* Copyright (c) 2019 Collabora LTD
|
||||
*
|
||||
* Author: Gert Wollny <gert.wollny@collabora.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef sfn_fragment_shader_from_nir_h
|
||||
#define sfn_fragment_shader_from_nir_h
|
||||
|
||||
#include "sfn_shader_base.h"
|
||||
#include "sfn_shaderio.h"
|
||||
#include <bitset>
|
||||
|
||||
namespace r600 {
|
||||
|
||||
class FragmentShaderFromNir : public ShaderFromNirProcessor {
|
||||
public:
|
||||
FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh_info,
|
||||
r600_pipe_shader_selector &sel, const r600_shader_key &key,
|
||||
enum amd_gfx_level gfx_level);
|
||||
bool scan_sysvalue_access(nir_instr *instr) override;
|
||||
private:
|
||||
|
||||
struct Interpolator {
|
||||
bool enabled;
|
||||
unsigned ij_index;
|
||||
PValue i;
|
||||
PValue j;
|
||||
};
|
||||
|
||||
void emit_shader_start() override;
|
||||
bool do_allocate_reserved_registers() override;
|
||||
bool process_store_output(nir_intrinsic_instr *instr);
|
||||
|
||||
bool emit_store_output(nir_intrinsic_instr* instr);
|
||||
|
||||
bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, int outputs);
|
||||
bool emit_export_pixel(nir_intrinsic_instr* instr, int outputs);
|
||||
bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip,
|
||||
int num_components, int start_comp);
|
||||
bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op);
|
||||
bool load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip,EAluOp op, int writemask);
|
||||
bool load_interpolated_two_comp_for_one(GPRVector &dest,
|
||||
ShaderInput& io, const Interpolator& ip, EAluOp op, int start, int comp);
|
||||
|
||||
bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
|
||||
void do_finalize() override;
|
||||
|
||||
void load_front_face();
|
||||
|
||||
bool emit_load_input(nir_intrinsic_instr* instr);
|
||||
bool emit_load_front_face(nir_intrinsic_instr* instr);
|
||||
bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
|
||||
bool emit_load_sample_pos(nir_intrinsic_instr* instr);
|
||||
bool emit_load_sample_id(nir_intrinsic_instr* instr);
|
||||
|
||||
bool process_load_input(nir_intrinsic_instr *instr, bool interpolated);
|
||||
bool emit_load_interpolated_input(nir_intrinsic_instr* instr);
|
||||
bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
|
||||
bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
|
||||
|
||||
|
||||
unsigned m_max_color_exports;
|
||||
unsigned m_max_counted_color_exports;
|
||||
bool m_two_sided_color;
|
||||
ExportInstruction *m_last_pixel_export;
|
||||
const nir_shader& m_nir;
|
||||
|
||||
|
||||
std::array<Interpolator, 6> m_interpolator;
|
||||
unsigned m_reserved_registers;
|
||||
unsigned m_frag_pos_index;
|
||||
PGPRValue m_front_face_reg;
|
||||
PGPRValue m_sample_mask_reg;
|
||||
PGPRValue m_sample_id_reg;
|
||||
PGPRValue m_helper_invocation;
|
||||
GPRVector m_frag_pos;
|
||||
bool m_need_back_color;
|
||||
bool m_front_face_loaded;
|
||||
ShaderIO m_shaderio;
|
||||
unsigned m_depth_exports;
|
||||
|
||||
std::map<unsigned, PValue> m_input_cache;
|
||||
|
||||
static const int s_max_interpolators = 6;
|
||||
|
||||
std::bitset<s_max_interpolators> m_interpolators_used;
|
||||
|
||||
unsigned m_apply_sample_mask;
|
||||
bool m_dual_source_blend;
|
||||
ShaderInput *m_pos_input;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user