r600/sfn: rewrite NIR backend

This is a rewite of the NIR backend. it adds some optimization
and a scheduler.

v2: - replace some magic numbers by constants
    - make sure constructor is always used with new
    - use default initialization in more places
      (changes suggested by Filip Gawin)

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Reviewed-by: Filip Gawin <filip@gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17076>
This commit is contained in:
Gert Wollny 2021-06-19 13:03:32 +02:00 committed by Marge Bot
parent ab06b00c63
commit 79ca456b48
140 changed files with 27360 additions and 16187 deletions

View File

@ -107,6 +107,10 @@ files_r600 = files(
'sb/sb_valtable.cpp',
'sfn/sfn_alu_defines.cpp',
'sfn/sfn_alu_defines.h',
'sfn/sfn_alu_readport_validation.cpp',
'sfn/sfn_alu_readport_validation.h',
'sfn/sfn_assembler.cpp',
'sfn/sfn_assembler.h',
'sfn/sfn_callstack.cpp',
'sfn/sfn_callstack.h',
'sfn/sfn_conditionaljumptracker.cpp',
@ -114,73 +118,66 @@ files_r600 = files(
'sfn/sfn_defines.h',
'sfn/sfn_debug.cpp',
'sfn/sfn_debug.h',
'sfn/sfn_emitaluinstruction.cpp',
'sfn/sfn_emitaluinstruction.h',
'sfn/sfn_emitinstruction.cpp',
'sfn/sfn_emitinstruction.h',
'sfn/sfn_emitssboinstruction.cpp',
'sfn/sfn_emitssboinstruction.h',
'sfn/sfn_emittexinstruction.cpp',
'sfn/sfn_emittexinstruction.h',
'sfn/sfn_emitinstruction.h',
'sfn/sfn_instruction_alu.cpp',
'sfn/sfn_instruction_alu.h',
'sfn/sfn_instruction_base.cpp',
'sfn/sfn_instruction_base.h',
'sfn/sfn_instruction_block.cpp',
'sfn/sfn_instruction_block.h',
'sfn/sfn_instruction_cf.cpp',
'sfn/sfn_instruction_cf.h',
'sfn/sfn_instruction_export.cpp',
'sfn/sfn_instruction_export.h',
'sfn/sfn_instruction_fetch.cpp',
'sfn/sfn_instruction_fetch.h',
'sfn/sfn_instruction_gds.cpp',
'sfn/sfn_instruction_gds.h',
'sfn/sfn_instruction_lds.cpp',
'sfn/sfn_instruction_lds.h',
'sfn/sfn_instruction_misc.cpp',
'sfn/sfn_instruction_misc.h',
'sfn/sfn_instruction_tex.cpp',
'sfn/sfn_instruction_tex.h',
'sfn/sfn_ir_to_assembly.cpp',
'sfn/sfn_ir_to_assembly.h',
'sfn/sfn_liverange.cpp',
'sfn/sfn_liverange.h',
'sfn/sfn_instr.cpp',
'sfn/sfn_instr.h',
'sfn/sfn_instr_alu.cpp',
'sfn/sfn_instr_alu.h',
'sfn/sfn_instr_alugroup.cpp',
'sfn/sfn_instr_alugroup.h',
'sfn/sfn_instr_controlflow.cpp',
'sfn/sfn_instr_controlflow.h',
'sfn/sfn_instr_export.cpp',
'sfn/sfn_instr_export.h',
'sfn/sfn_instr_fetch.cpp',
'sfn/sfn_instr_fetch.h',
'sfn/sfn_instr_mem.cpp',
'sfn/sfn_instr_mem.h',
'sfn/sfn_instr_lds.cpp',
'sfn/sfn_instr_lds.h',
'sfn/sfn_instr_tex.cpp',
'sfn/sfn_instr_tex.h',
'sfn/sfn_instrfactory.cpp',
'sfn/sfn_instrfactory.h',
'sfn/sfn_liverangeevaluator.cpp',
'sfn/sfn_liverangeevaluator.h',
'sfn/sfn_liverangeevaluator_helpers.cpp',
'sfn/sfn_liverangeevaluator_helpers.h',
'sfn/sfn_memorypool.cpp',
'sfn/sfn_memorypool.h',
'sfn/sfn_nir.cpp',
'sfn/sfn_nir.h',
'sfn/sfn_nir_legalize_image_load_store.cpp',
'sfn/sfn_nir_lower_64bit.cpp',
'sfn/sfn_nir_lower_alu.cpp',
'sfn/sfn_nir_lower_alu.h',
'sfn/sfn_nir_lower_tex.cpp',
'sfn/sfn_nir_lower_tex.h',
'sfn/sfn_nir_lower_fs_out_to_vector.cpp',
'sfn/sfn_nir_lower_fs_out_to_vector.h',
'sfn/sfn_nir_lower_tess_io.cpp',
'sfn/sfn_nir_vectorize_vs_inputs.c',
'sfn/sfn_shader_base.cpp',
'sfn/sfn_shader_base.h',
'sfn/sfn_shader_compute.cpp',
'sfn/sfn_shader_compute.h',
'sfn/sfn_shader_fragment.cpp',
'sfn/sfn_shader_fragment.h',
'sfn/sfn_shader_geometry.cpp',
'sfn/sfn_shader_geometry.h',
'sfn/sfn_shader_tcs.cpp',
'sfn/sfn_shader_tcs.h',
'sfn/sfn_shader_tess_eval.cpp',
'sfn/sfn_shader_tess_eval.h',
'sfn/sfn_shader_vertex.cpp',
'sfn/sfn_shader_vertex.h',
'sfn/sfn_shaderio.cpp',
'sfn/sfn_shaderio.h',
'sfn/sfn_value.cpp',
'sfn/sfn_value.h',
'sfn/sfn_value_gpr.cpp',
'sfn/sfn_value_gpr.h',
'sfn/sfn_valuepool.cpp',
'sfn/sfn_valuepool.h',
'sfn/sfn_vertexstageexport.cpp',
'sfn/sfn_vertexstageexport.h',
'sfn/sfn_optimizer.cpp',
'sfn/sfn_peephole.cpp',
'sfn/sfn_ra.cpp',
'sfn/sfn_ra.h',
'sfn/sfn_scheduler.cpp',
'sfn/sfn_scheduler.h',
'sfn/sfn_shader.cpp',
'sfn/sfn_shader.h',
'sfn/sfn_shader_cs.cpp',
'sfn/sfn_shader_cs.h',
'sfn/sfn_shader_fs.cpp',
'sfn/sfn_shader_fs.h',
'sfn/sfn_shader_gs.cpp',
'sfn/sfn_shader_gs.h',
'sfn/sfn_shader_tess.cpp',
'sfn/sfn_shader_tess.h',
'sfn/sfn_shader_vs.cpp',
'sfn/sfn_shader_vs.h',
'sfn/sfn_valuefactory.cpp',
'sfn/sfn_valuefactory.h',
'sfn/sfn_virtualvalues.cpp',
'sfn/sfn_virtualvalues.h',
)
egd_tables_h = custom_target(
@ -200,11 +197,13 @@ libr600 = static_library(
'r600',
[files_r600, egd_tables_h],
c_args : [r600_c_args, '-Wstrict-overflow=0'],
cpp_args: '-std=c++17',
gnu_symbol_visibility : 'hidden',
include_directories : [
inc_src, inc_mapi, inc_mesa, inc_include, inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common,
inc_gallium_drivers,
],
dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm, idep_nir, idep_nir_headers],
)
@ -212,3 +211,9 @@ driver_r600 = declare_dependency(
compile_args : '-DGALLIUM_R600',
link_with : [libr600, libmesa, libradeonwinsys],
)
if with_tests
subdir('sfn/tests')
endif

View File

@ -407,8 +407,8 @@ static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *
}
assignment[4] = alu;
} else {
if (assignment[chan]) {
assert(0); /* ALU.chan has already been allocated. */
if (assignment[chan]) {
assert(0); /* ALU.chan has already been allocated. */
return -1;
}
assignment[chan] = alu;

View File

@ -355,6 +355,8 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
unsigned *num_format, unsigned *format_comp, unsigned *endian);
int r600_load_ar(struct r600_bytecode *bc);
static inline int fp64_switch(int i)
{
switch (i) {

View File

@ -420,11 +420,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
if (is_nir_enabled(&rscreen->b))
return 1;
return 0;
case PIPE_CAP_INT64_DIVMOD:
/* it is actually not supported, but the nir lowering hdanles this corectly wheras
* the glsl lowering path seems to not initialize the buildins correctly.
*/
return is_nir_enabled(&rscreen->b);
case PIPE_CAP_TWO_SIDED_COLOR:
return !is_nir_enabled(&rscreen->b);
case PIPE_CAP_INT64_DIVMOD:
/* it is actually not supported, but the nir lowering handles this corectly wheras
* the glsl lowering path seems to not initialize the buildins correctly.
*/
return is_nir_enabled(&rscreen->b);
case PIPE_CAP_CULL_DISTANCE:
return 1;

View File

@ -46,7 +46,7 @@ const std::map<EAluOp, AluOp> alu_ops = {
{op1_cos ,AluOp(1, 1, AluOp::t,"COS")},
{op1_exp_ieee ,AluOp(1, 1, AluOp::t,"EXP_IEEE")},
{op1_floor ,AluOp(1, 1, AluOp::a,"FLOOR")},
{op1_flt_to_int ,AluOp(1, 0, AluOp::a,"FLT_TO_INT")},
{op1_flt_to_int ,AluOp(1, 0, AluOp::v,"FLT_TO_INT")},
{op1_flt_to_uint ,AluOp(1, 1, AluOp::t,"FLT_TO_UINT")},
{op1_flt_to_int_rpi ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_RPI")},
{op1_flt_to_int_floor ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_FLOOR")},
@ -84,15 +84,15 @@ const std::map<EAluOp, AluOp> alu_ops = {
{op1_recipsqrt_ieee1 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_IEEE")},
{op1_recip_int ,AluOp(1, 0, AluOp::t,"RECIP_INT")},
{op1_recip_uint ,AluOp(1, 0, AluOp::t,"RECIP_UINT")},
{op1_recip_64 ,AluOp(1, 1, AluOp::t,"RECIP_64")},
{op1_recip_clamped_64 ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED_64")},
{op1_recipsqrt_64 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_64")},
{op1_recipsqrt_clamped_64,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
{op1_recip_64 ,AluOp(2, 1, AluOp::t,"RECIP_64")},
{op1_recip_clamped_64 ,AluOp(2, 1, AluOp::t,"RECIP_CLAMPED_64")},
{op1_recipsqrt_64 ,AluOp(2, 1, AluOp::t,"RECIPSQRT_64")},
{op1_recipsqrt_clamped_64,AluOp(2, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
{op1_rndne ,AluOp(1, 1, AluOp::a,"RNDNE")},
{op1_sqrt_ieee ,AluOp(1, 1, AluOp::t,"SQRT_IEEE")},
{op1_sin ,AluOp(1, 1, AluOp::t,"SIN")},
{op1_trunc ,AluOp(1, 1, AluOp::a,"TRUNC")},
{op1_sqrt_64 ,AluOp(1, 1, AluOp::t,"SQRT_64")},
{op1_sqrt_64 ,AluOp(2, 1, AluOp::t,"SQRT_64")},
{op1_ubyte0_flt ,AluOp(1, 1, AluOp::v,"UBYTE0_FLT")},
{op1_ubyte1_flt ,AluOp(1, 1, AluOp::v,"UBYTE1_FLT")},
{op1_ubyte2_flt ,AluOp(1, 1, AluOp::v,"UBYTE2_FLT")},
@ -273,53 +273,73 @@ const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const = {
};
const std::map<ESDOp, LDSOp> lds_ops = {
{DS_OP_ADD , {2, "DS_ADD"}},
{DS_OP_SUB , {2, "DS_SUB"}},
{DS_OP_RSUB , {2, "DS_RSUB"}},
{DS_OP_INC , {2, "DS_INC"}},
{DS_OP_DEC , {2, "DS_DEC"}},
{DS_OP_MIN_INT , {2, "DS_MIN_INT"}},
{DS_OP_MAX_INT , {2, "DS_MAX_INT"}},
{DS_OP_MIN_UINT , {2, "DS_MIN_UINT"}},
{DS_OP_MAX_UINT , {2, "DS_MAX_UINT"}},
{DS_OP_AND , {2, "DS_AND"}},
{DS_OP_OR , {2, "DS_OR"}},
{DS_OP_XOR , {2, "DS_XOR"}},
{DS_OP_MSKOR , {3, "DS_MSKOR"}},
{DS_OP_WRITE , {2, "DS_WRITE"}},
{DS_OP_WRITE_REL , {3, "DS_WRITE_REL"}},
{DS_OP_WRITE2 , {3, "DS_WRITE2"}},
{DS_OP_CMP_STORE , {3, "DS_CMP_STORE"}},
{DS_OP_CMP_STORE_SPF , {3, "DS_CMP_STORE_SPF"}},
{DS_OP_BYTE_WRITE , {2, "DS_BYTE_WRITE"}},
{DS_OP_SHORT_WRITE , {2, "DS_SHORT_WRITE"}},
{DS_OP_ADD_RET , {2, "DS_ADD_RET"}},
{DS_OP_SUB_RET , {2, "DS_SUB_RET"}},
{DS_OP_RSUB_RET , {2, "DS_RSUB_RET"}},
{DS_OP_INC_RET , {2, "DS_INC_RET"}},
{DS_OP_DEC_RET , {2, "DS_DEC_RET"}},
{DS_OP_MIN_INT_RET , {2, "DS_MIN_INT_RET"}},
{DS_OP_MAX_INT_RET , {2, "DS_MAX_INT_RET"}},
{DS_OP_MIN_UINT_RET , {2, "DS_MIN_UINT_RET"}},
{DS_OP_MAX_UINT_RET , {2, "DS_MAX_UINT_RET"}},
{DS_OP_AND_RET , {2, "DS_AND_RET"}},
{DS_OP_OR_RET , {2, "DS_OR_RET"}},
{DS_OP_XOR_RET , {2, "DS_XOR_RET"}},
{DS_OP_MSKOR_RET , {3, "DS_MSKOR_RET"}},
{DS_OP_XCHG_RET , {2, "DS_XCHG_RET"}},
{DS_OP_XCHG_REL_RET , {3, "DS_XCHG_REL_RET"}},
{DS_OP_XCHG2_RET , {3, "DS_XCHG2_RET"}},
{DS_OP_CMP_XCHG_RET , {3, "DS_CMP_XCHG_RET"}},
{DS_OP_CMP_XCHG_SPF_RET, {3, "DS_CMP_XCHG_SPF_RET"}},
{DS_OP_READ_RET , {1, "DS_READ_RET"}},
{DS_OP_READ_REL_RET , {1, "DS_READ_REL_RET"}},
{DS_OP_READ2_RET , {2, "DS_READ2_RET"}},
{DS_OP_READWRITE_RET , {3, "DS_READWRITE_RET"}},
{DS_OP_BYTE_READ_RET , {1, "DS_BYTE_READ_RET"}},
{DS_OP_UBYTE_READ_RET, {1, "DS_UBYTE_READ_RET"}},
{DS_OP_SHORT_READ_RET, {1, "DS_SHORT_READ_RET"}},
{DS_OP_USHORT_READ_RET, {1, "DS_USHORT_READ_RET"}},
{DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "DS_ATOMIC_ORDERED_ALLOC_RET"}}
{DS_OP_ADD , {2, "ADD"}},
{DS_OP_SUB , {2, "SUB"}},
{DS_OP_RSUB , {2, "RSUB"}},
{DS_OP_INC , {2, "INC"}},
{DS_OP_DEC , {2, "DEC"}},
{DS_OP_MIN_INT , {2, "MIN_INT"}},
{DS_OP_MAX_INT , {2, "MAX_INT"}},
{DS_OP_MIN_UINT , {2, "MIN_UINT"}},
{DS_OP_MAX_UINT , {2, "MAX_UINT"}},
{DS_OP_AND , {2, "AND"}},
{DS_OP_OR , {2, "OR"}},
{DS_OP_XOR , {2, "XOR"}},
{DS_OP_MSKOR , {3, "MSKOR"}},
{DS_OP_WRITE , {2, "WRITE"}},
{DS_OP_WRITE_REL , {3, "WRITE_REL"}},
{DS_OP_WRITE2 , {3, "WRITE2"}},
{DS_OP_CMP_STORE , {3, "CMP_STORE"}},
{DS_OP_CMP_STORE_SPF , {3, "CMP_STORE_SPF"}},
{DS_OP_BYTE_WRITE , {2, "BYTE_WRITE"}},
{DS_OP_SHORT_WRITE , {2, "SHORT_WRITE"}},
{DS_OP_ADD_RET , {2, "ADD_RET"}},
{DS_OP_SUB_RET , {2, "SUB_RET"}},
{DS_OP_RSUB_RET , {2, "RSUB_RET"}},
{DS_OP_INC_RET , {2, "INC_RET"}},
{DS_OP_DEC_RET , {2, "DEC_RET"}},
{DS_OP_MIN_INT_RET , {2, "MIN_INT_RET"}},
{DS_OP_MAX_INT_RET , {2, "MAX_INT_RET"}},
{DS_OP_MIN_UINT_RET , {2, "MIN_UINT_RET"}},
{DS_OP_MAX_UINT_RET , {2, "MAX_UINT_RET"}},
{DS_OP_AND_RET , {2, "AND_RET"}},
{DS_OP_OR_RET , {2, "OR_RET"}},
{DS_OP_XOR_RET , {2, "XOR_RET"}},
{DS_OP_MSKOR_RET , {3, "MSKOR_RET"}},
{DS_OP_XCHG_RET , {2, "XCHG_RET"}},
{DS_OP_XCHG_REL_RET , {3, "XCHG_REL_RET"}},
{DS_OP_XCHG2_RET , {3, "XCHG2_RET"}},
{DS_OP_CMP_XCHG_RET , {3, "CMP_XCHG_RET"}},
{DS_OP_CMP_XCHG_SPF_RET, {3, "CMP_XCHG_SPF_RET"}},
{DS_OP_READ_RET , {1, "READ_RET"}},
{DS_OP_READ_REL_RET , {1, "READ_REL_RET"}},
{DS_OP_READ2_RET , {2, "READ2_RET"}},
{DS_OP_READWRITE_RET , {3, "READWRITE_RET"}},
{DS_OP_BYTE_READ_RET , {1, "BYTE_READ_RET"}},
{DS_OP_UBYTE_READ_RET, {1, "UBYTE_READ_RET"}},
{DS_OP_SHORT_READ_RET, {1, "SHORT_READ_RET"}},
{DS_OP_USHORT_READ_RET, {1, "USHORT_READ_RET"}},
{DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "ATOMIC_ORDERED_ALLOC_RET"}},
{LDS_ADD_RET, {2, "LDS_ADD_RET"}},
{LDS_ADD, {2, "LDS_ADD"}},
{LDS_AND_RET, {2, "LDS_AND_RET"}},
{LDS_AND, {2, "LDS_AND"}},
{LDS_WRITE, {2, "LDS_WRITE"}},
{LDS_OR_RET, {2, "LDS_OR_RET"}},
{LDS_OR, {2, "LDS_OR"}},
{LDS_MAX_INT_RET, {2, "LDS_MAX_INT_RET"}},
{LDS_MAX_INT, {2, "LDS_MAX_INT"}},
{LDS_MAX_UINT_RET, {2, "LDS_MAX_UINT_RET"}},
{LDS_MAX_UINT, {2, "LDS_MAX_UINT"}},
{LDS_MIN_INT_RET, {2, "LDS_MIN_INT_RET"}},
{LDS_MIN_INT, {2, "LDS_MIN_INT"}},
{LDS_MIN_UINT_RET, {2, "LDS_MIN_UINT_RET"}},
{LDS_MIN_UINT, {2, "LDS_MIN_UINT"}},
{LDS_XOR_RET, {2, "LDS_XOR"}},
{LDS_XOR, {2, "LDS_XOR"}},
{LDS_XCHG_RET, {2, "LDS_XCHG_RET"}},
{LDS_CMP_XCHG_RET, {3, "LDS_CMP_XCHG_RET"}},
{LDS_WRITE_REL, {3, "LDS_WRITE_REL"}},
};
}

View File

@ -27,6 +27,8 @@
#ifndef r600_sfn_alu_defines_h
#define r600_sfn_alu_defines_h
#include "../r600_isa.h"
#include <map>
#include <bitset>
@ -235,12 +237,71 @@ enum EAluOp {
op3_cnde_int = 28<< 6,
op3_cndgt_int = 29<< 6,
op3_cndge_int = 30<< 6,
op3_mul_lit = 31<< 6
op3_mul_lit = 31<< 6,
op_invalid = 0xffff
};
enum AluModifiers {
alu_src0_neg,
alu_src0_abs,
alu_src0_rel,
alu_src1_neg,
alu_src1_abs,
alu_src1_rel,
alu_src2_neg,
alu_src2_rel,
alu_dst_clamp,
alu_dst_rel,
alu_last_instr,
alu_update_exec,
alu_update_pred,
alu_write,
alu_op3,
alu_is_trans,
alu_is_cayman_trans,
alu_is_lds,
alu_lds_group_start,
alu_lds_group_end,
alu_lds_address,
alu_no_schedule_bias,
alu_64bit_op,
alu_flag_count
};
enum AluDstModifiers {
omod_off = 0,
omod_mul2 = 1,
omod_mul4 = 2,
omod_divl2 = 3
};
using AluOpFlags=std::bitset<32>;
enum AluPredSel {
pred_off = 0,
pred_zero = 2,
pred_one = 3
};
enum AluBankSwizzle {
alu_vec_012 = 0,
sq_alu_scl_201 = 0,
alu_vec_021 = 1,
sq_alu_scl_122 = 1,
alu_vec_120 = 2,
sq_alu_scl_212 = 2,
alu_vec_102 = 3,
sq_alu_scl_221 = 3,
alu_vec_201 = 4,
sq_alu_scl_unknown = 4,
alu_vec_210 = 5,
alu_vec_unknown = 6
};
inline AluBankSwizzle operator ++(AluBankSwizzle& x) {
x = static_cast<AluBankSwizzle>(x + 1);
return x;
}
using AluOpFlags=std::bitset<alu_flag_count>;
struct AluOp {
static constexpr int x = 1;
@ -314,6 +375,8 @@ struct AluInlineConstantDescr {
extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const;
#define LDSOP2(X) LDS_ ## X = LDS_OP2_LDS_ ## X
enum ESDOp {
DS_OP_ADD = 0,
DS_OP_SUB = 1,
@ -362,9 +425,31 @@ enum ESDOp {
DS_OP_SHORT_READ_RET = 56,
DS_OP_USHORT_READ_RET = 57,
DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63,
DS_OP_INVALID = 64
DS_OP_INVALID = 64,
LDSOP2(ADD_RET),
LDSOP2(ADD),
LDSOP2(AND_RET),
LDSOP2(AND),
LDSOP2(WRITE),
LDSOP2(OR_RET),
LDSOP2(OR),
LDSOP2(MAX_INT_RET),
LDSOP2(MAX_INT),
LDSOP2(MAX_UINT_RET),
LDSOP2(MAX_UINT),
LDSOP2(MIN_INT_RET),
LDSOP2(MIN_INT),
LDSOP2(MIN_UINT_RET),
LDSOP2(MIN_UINT),
LDSOP2(XOR_RET),
LDSOP2(XOR),
LDSOP2(XCHG_RET),
LDS_CMP_XCHG_RET = LDS_OP3_LDS_CMP_XCHG_RET,
LDS_WRITE_REL = LDS_OP3_LDS_WRITE_REL
};
#undef LDSOP2
struct LDSOp {
int nsrc;
const char *name;
@ -372,6 +457,18 @@ struct LDSOp {
extern const std::map<ESDOp, LDSOp> lds_ops;
struct KCacheLine {
int bank{0};
int addr{0};
int len{0};
enum KCacheLockMode {
free,
lock_1,
lock_2
} mode{free};
};
}
#endif // ALU_DEFINES_H

View File

@ -0,0 +1,329 @@
#include "sfn_alu_readport_validation.h"
#include <cstring>
namespace r600 {
class ReserveReadport : public ConstRegisterVisitor {
public:
ReserveReadport(AluReadportReservation& reserv);
void visit(const LocalArray& value) override;
void visit(const LiteralConstant& value) override;
void visit(const InlineConstant& value) override;
void reserve_gpr(int sel, int chan);
AluReadportReservation& reserver;
int cycle = -1;
int isrc = -1;
int src0_sel = -1;
int src0_chan = -1;
bool success = true;
static const int max_const_readports = 2;
};
class ReserveReadportVec : public ReserveReadport {
public:
using ReserveReadport::ReserveReadport;
void visit(const Register& value) override;
void visit(const LocalArrayValue& value) override;
void visit(const UniformValue& value) override;
};
class ReserveReadportTrans : public ReserveReadport
{
public:
ReserveReadportTrans(AluReadportReservation& reserv);
int n_consts;
};
class ReserveReadportTransPass1 : public ReserveReadportTrans {
public:
using ReserveReadportTrans::ReserveReadportTrans;
void visit(const Register& value) override;
void visit(const LocalArrayValue& value) override;
void visit(const UniformValue& value) override;
void visit(const InlineConstant& value) override;
void visit(const LiteralConstant& value) override;
};
class ReserveReadportTransPass2 : public ReserveReadportTrans {
public:
using ReserveReadportTrans::ReserveReadportTrans;
void visit(const Register& value) override;
void visit(const LocalArrayValue& value) override;
void visit(const UniformValue& value) override;
};
bool AluReadportReservation::schedule_vec_src(PVirtualValue src[3], int nsrc, AluBankSwizzle swz)
{
ReserveReadportVec visitor(*this);
if (src[0]->as_register()) {
visitor.src0_sel = src[0]->sel();
visitor.src0_chan = src[0]->chan();
} else {
visitor.src0_sel = 0xffff;
visitor.src0_chan = 8;
}
for (int i = 0; i < nsrc; ++i) {
visitor.cycle = cycle_vec(swz, i);
visitor.isrc = i;
src[i]->accept(visitor);
}
return visitor.success;
}
bool AluReadportReservation::schedule_vec_instruction(const AluInstr& alu, AluBankSwizzle swz)
{
ReserveReadportVec visitor(*this);
for (unsigned i = 0; i < alu.n_sources() && visitor.success; ++i) {
visitor.cycle = cycle_vec(swz, i);
visitor.isrc = i;
if (i == 1 && alu.src(i).equal_to(alu.src(0)))
continue;
alu.src(i).accept(visitor);
}
return visitor.success;
}
bool AluReadportReservation::schedule_trans_instruction(const AluInstr& alu, AluBankSwizzle swz)
{
ReserveReadportTransPass1 visitor1(*this);
for (unsigned i = 0; i < alu.n_sources(); ++i) {
visitor1.cycle = cycle_trans(swz, i);
alu.src(i).accept(visitor1);
}
if (!visitor1.success)
return false;
ReserveReadportTransPass2 visitor2(*this);
visitor2.n_consts = visitor1.n_consts;
for (unsigned i = 0; i < alu.n_sources(); ++i) {
visitor2.cycle = cycle_trans(swz, i);
alu.src(i).accept(visitor2);
}
return visitor2.success;
}
AluReadportReservation::AluReadportReservation()
{
for (int i = 0; i < max_chan_channels; ++i) {
for (int j = 0; j < max_gpr_readports; ++j)
m_hw_gpr[j][i] = -1;
m_hw_const_addr[i] = -1;
m_hw_const_chan[i] = -1;
m_hw_const_bank[i] = -1;
}
}
bool AluReadportReservation::reserve_gpr(int sel, int chan, int cycle)
{
if (m_hw_gpr[cycle][chan] == -1) {
m_hw_gpr[cycle][chan] = sel;
}
else if (m_hw_gpr[cycle][chan] != sel) {
return false;
}
return true;
}
bool AluReadportReservation::reserve_const(const UniformValue& value)
{
int match = -1;
int empty = -1;
for (int res = 0; res < ReserveReadport::max_const_readports; ++res) {
if (m_hw_const_addr[res] == -1)
empty = res;
else if ((m_hw_const_addr[res] == value.sel()) &&
(m_hw_const_bank[res] == value.kcache_bank()) &&
(m_hw_const_chan[res] == (value.chan() >> 1)))
match = res;
}
if (match < 0) {
if (empty >= 0) {
m_hw_const_addr[empty] = value.sel();
(m_hw_const_bank[empty] = value.kcache_bank());
m_hw_const_chan[empty] = value.chan() >> 1;
} else {
return false;
}
}
return true;
}
bool AluReadportReservation::add_literal(uint32_t value)
{
for (unsigned i = 0; i < m_nliterals; ++i) {
if (m_literals[i] == value)
return true;
}
if (m_nliterals < m_literals.size()) {
m_literals[m_nliterals++] = value;
return true;
}
return false;
}
int AluReadportReservation::cycle_vec(AluBankSwizzle swz, int src)
{
static const int mapping[AluBankSwizzle::alu_vec_unknown][max_gpr_readports] = {
{0, 1, 2},
{0, 2, 1},
{1, 0, 2},
{1, 2, 0},
{2, 0, 1},
{2, 1, 0}
};
return mapping[swz][src];
}
int AluReadportReservation::cycle_trans(AluBankSwizzle swz, int src)
{
static const int mapping[AluBankSwizzle::sq_alu_scl_unknown][max_gpr_readports] = {
{2, 1, 0},
{1, 2, 2},
{2, 1, 2},
{2, 2, 1},
};
return mapping[swz][src];
}
ReserveReadport::ReserveReadport(AluReadportReservation& reserv):
reserver(reserv)
{
}
void ReserveReadport::visit(const LocalArray& value)
{
(void)value;
unreachable("a full array is not available here");
}
void ReserveReadport::visit(const LiteralConstant& value)
{
success &= reserver.add_literal(value.value());
}
void ReserveReadport::visit(const InlineConstant& value)
{
(void)value;
}
void ReserveReadportVec::visit(const Register& value)
{
reserve_gpr(value.sel(), value.chan());
}
void ReserveReadportVec::visit(const LocalArrayValue& value)
{
// Set the hightest non-sign bit to indicated that we use the
// AR register
reserve_gpr(0x4000000 | value.sel(), value.chan());
}
void ReserveReadport::reserve_gpr(int sel, int chan)
{
if (isrc == 1 && src0_sel == sel && src0_chan == chan)
return;
success &= reserver.reserve_gpr(sel, chan, cycle);
}
void ReserveReadportVec::visit(const UniformValue& value)
{
// kcache bank?
success &= reserver.reserve_const(value);
}
ReserveReadportTrans::ReserveReadportTrans(AluReadportReservation& reserv):
ReserveReadport(reserv),
n_consts(0)
{}
void ReserveReadportTransPass1::visit(const Register& value)
{
(void)value;
}
void ReserveReadportTransPass1::visit(const LocalArrayValue& value)
{
(void)value;
}
void ReserveReadportTransPass1::visit(const UniformValue& value)
{
if (n_consts >= max_const_readports) {
success = false;
return;
}
n_consts++;
success &= reserver.reserve_const(value);
}
void ReserveReadportTransPass1::visit(const InlineConstant& value)
{
(void)value;
if (n_consts >= max_const_readports) {
success = false;
return;
}
n_consts++;
}
void ReserveReadportTransPass1::visit(const LiteralConstant& value)
{
if (n_consts >= max_const_readports) {
success = false;
return;
}
n_consts++;
success &= reserver.add_literal(value.value());
}
void ReserveReadportTransPass2::visit(const Register& value)
{
if (cycle < n_consts) {
success = false;
return;
}
reserve_gpr(value.sel(), value.chan());
}
void ReserveReadportTransPass2::visit(const LocalArrayValue& value)
{
if (cycle < n_consts) {
success = false;
return;
}
reserve_gpr(0x4000000 | value.sel(), value.chan());
}
void ReserveReadportTransPass2::visit(const UniformValue& value)
{
(void)value;
}
}

View File

@ -0,0 +1,41 @@
#ifndef ALUREADPORTVALIDATION_H
#define ALUREADPORTVALIDATION_H
#include "sfn_instr_alu.h"
namespace r600 {
class AluReadportReservation {
public:
AluReadportReservation();
AluReadportReservation(const AluReadportReservation& orig) = default;
AluReadportReservation& operator = (const AluReadportReservation& orig) = default;
bool schedule_vec_src(PVirtualValue src[3], int nsrc, AluBankSwizzle swz);
bool schedule_vec_instruction(const AluInstr& alu, AluBankSwizzle swz);
bool schedule_trans_instruction(const AluInstr& alu, AluBankSwizzle swz);
bool reserve_gpr(int sel, int chan, int cycle);
bool reserve_const(const UniformValue& value);
bool add_literal(uint32_t value);
static int cycle_vec(AluBankSwizzle swz, int src);
static int cycle_trans(AluBankSwizzle swz, int src);
static const int max_chan_channels = 4;
static const int max_gpr_readports = 3;
std::array<std::array<int, max_chan_channels>, max_gpr_readports> m_hw_gpr;
std::array<int, max_chan_channels> m_hw_const_addr;
std::array<int, max_chan_channels> m_hw_const_chan;
std::array<int, max_chan_channels> m_hw_const_bank;
std::array<uint32_t, max_chan_channels> m_literals;
uint32_t m_nliterals{0};
};
}
#endif // ALUREADPORTVALIDATION_H

View File

@ -0,0 +1,26 @@
#ifndef ASSEMBLER_H
#define ASSEMBLER_H
#include "../r600_pipe.h"
#include "../r600_shader.h"
#include "sfn_shader.h"
namespace r600 {
class Assembler
{
public:
Assembler(r600_shader *sh, const r600_shader_key& key);
bool lower(Shader *shader);
private:
r600_shader *m_sh;
const r600_shader_key& m_key;
};
}
#endif // ASSAMBLY_H

View File

@ -38,10 +38,7 @@ enum JumpType {
/**
Class to link the jump locations
*/
class ConditionalJumpTracker
{
public:
@ -49,7 +46,6 @@ public:
~ConditionalJumpTracker();
/* Mark the start of a loop or a if/else */
void push(r600_bytecode_cf *start, JumpType type);
/* Mark the end of a loop or a if/else and fixup the jump sites */

View File

@ -61,6 +61,10 @@ static const struct debug_named_value sfn_debug_options[] = {
{"nomerge", SfnLog::nomerge, "Skip register merge step"},
{"tex", SfnLog::tex, "Log texture ops"},
{"trans", SfnLog::trans, "Log generic translation messages"},
{"schedule", SfnLog::schedule, "Log scheduling"},
{"opt", SfnLog::opt, "Log optimization"},
{"steps", SfnLog::steps, "Log shaders at transformation steps"},
{"noopt", SfnLog::noopt, "Don't run backend optimizations"},
DEBUG_NAMED_VALUE_END
};

View File

@ -64,8 +64,12 @@ public:
merge = 1 << 10,
tex = 1 << 11,
trans = 1 << 12,
all = (1 << 13) - 1,
schedule = 1 << 13,
opt = 1 << 14,
all = (1 << 15) - 1,
nomerge = 1 << 16,
steps = 1 << 17,
noopt = 1 << 18
};
SfnLog();

View File

@ -303,6 +303,9 @@ enum EVFetchFlagShift {
vtx_alt_const,
vtx_use_tc,
vtx_vpm,
vtx_is_mega_fetch,
vtx_uncached,
vtx_indexed,
vtx_unknown
};

View File

@ -2,44 +2,33 @@
This code is an attempt to implement a NIR backend for r600.
Supported hardware: Cayman, Evergreen and NI (tested on CAYMAN, CEDAR and BARTS)
Thanks to soft fp64 the OpenGL version is now 4.5 also for EG.
sb can bee enabled for nir, it still gives some improvements, e.g. with Xonotic
The aim is still to get rid of it.
## State
Supported hardware: Evergreen and NI (tested on CEDAR and BARTS)
Thanks to soft fp64 the OpenGL version is now 4.5
sb has been enabled for nir to be able to run some more demanding work loads. The aim is
still to get rid of it.
TODO:
piglits gpu passes mostly like with TGSI, there are some fixes but also a few regressions.
CTS gles
- 2 passes like with TGSI
- 3 no regressions, a few fixes compared to TGSI
- 31
* a few fixes with interpolation specifiers
* synchronization has some unstable tests, this might be because global synchronization is missing (in both)
GL CTS:
* a few regressions and a hang with KHR-GL43.compute_shader.shared-max
piglit:
* spilling arrays is broken on Barts (but it works on Cedar)
* a few tests fail because the register limit is exhausted, and needlessly so, because
with better RA it would work
* spilling arrays is broken on Barts and CAYMAN (but it works on Cedar)
## Needed optimizations:
- Register allocator and scheduler (Could the sb allocator and scheduler
be ported?)
- peepholes:
- compare + set predicate
- compare + set predicate / kill
- use clause local registers
- reduce register usage
- don't rely on the backend to schedule addr load and Index load as well
- don't rely on the backend to merge some alu groups
## There are still some hangs
- copy propagation:
- Moves from inputs are usually not required, they could be forwarded
- texture operations often move additional parameters in extra registers
but they are actually needed in the same registers they come from and
could just be swizzled into the right place
(lower in NIR like it is done in e.g. in ETNAVIV)

File diff suppressed because it is too large Load Diff

View File

@ -1,116 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SFN_EMITALUINSTRUCTION_H
#define SFN_EMITALUINSTRUCTION_H
#include "sfn_emitinstruction.h"
#include "sfn_alu_defines.h"
#include "sfn_instruction_alu.h"
#include "sfn_instruction_tex.h"
namespace r600 {
class EmitAluInstruction : public EmitInstruction
{
public:
EmitAluInstruction(ShaderFromNirProcessor& processor);
private:
enum AluOp2Opts {
op2_opt_none = 0,
op2_opt_reverse = 1,
op2_opt_neg_src1 = 1 << 1
};
bool do_emit(nir_instr* instr) override;
void split_constants(const nir_alu_instr& instr, unsigned nsrc_comp);
bool emit_mov(const nir_alu_instr& instr);
bool emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, const AluOpFlags &flags = 0);
bool emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
bool emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode);
bool emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode);
bool emit_alu_inot(const nir_alu_instr& instr);
bool emit_alu_ineg(const nir_alu_instr& instr);
bool emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array<uint8_t, 3> reorder={0,1,2});
bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false);
bool emit_alu_b2f(const nir_alu_instr& instr);
bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op);
bool emit_dot(const nir_alu_instr& instr, int n);
bool emit_create_vec(const nir_alu_instr& instr, unsigned nc);
bool emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
bool emit_any_iequal(const nir_alu_instr& instr, unsigned nc);
bool emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
bool emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all);
bool emit_fdph(const nir_alu_instr &instr);
bool emit_discard_if(const nir_intrinsic_instr *instr);
bool emit_alu_f2b32(const nir_alu_instr& instr);
bool emit_b2i32(const nir_alu_instr& instr);
bool emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op);
bool emit_pack_64_2x32_split(const nir_alu_instr& instr);
bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp);
bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine);
bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr);
bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr);
bool emit_pack_32_2x16_split(const nir_alu_instr& instr);
bool emit_cube(const nir_alu_instr& instr);
private:
void make_last(AluInstruction *ir) const;
void split_alu_modifiers(const nir_alu_src &src, const GPRVector::Values& v,
GPRVector::Values& out, int ncomp);
void preload_src(const nir_alu_instr& instr);
unsigned num_src_comp(const nir_alu_instr& instr);
using vreg = std::array<PValue, 4>;
std::array<PValue, 4> m_src[4];
};
inline void EmitAluInstruction::make_last(AluInstruction *ir) const
{
if (ir)
ir->set_flag(alu_last_instr);
}
}
#endif // SFN_EMITALUINSTRUCTION_H

View File

@ -1,169 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_emitinstruction.h"
#include "sfn_shader_base.h"
namespace r600 {
EmitInstruction::EmitInstruction(ShaderFromNirProcessor& processor):
m_proc(processor)
{
}
EmitInstruction::~EmitInstruction()
{
}
bool EmitInstruction::emit(nir_instr* instr)
{
return do_emit(instr);
}
bool EmitInstruction::use_legacy_math_rules(void)
{
return m_proc.use_legacy_math_rules();
}
PValue EmitInstruction::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
{
return m_proc.from_nir(v, component, swizzled);
}
PValue EmitInstruction::from_nir(const nir_alu_src& v, unsigned component)
{
return m_proc.from_nir(v, component);
}
PValue EmitInstruction::from_nir(const nir_tex_src& v, unsigned component)
{
return m_proc.from_nir(v, component);
}
PValue EmitInstruction::from_nir(const nir_alu_dest& v, unsigned component)
{
return m_proc.from_nir(v, component);
}
PValue EmitInstruction::from_nir(const nir_dest& v, unsigned component)
{
return m_proc.from_nir(v, component);
}
PValue EmitInstruction::from_nir(const nir_src& v, unsigned component)
{
return m_proc.from_nir(v, component);
}
void EmitInstruction::emit_instruction(Instruction *ir)
{
return m_proc.emit_instruction(ir);
}
void EmitInstruction::emit_instruction(AluInstruction *ir)
{
return m_proc.emit_instruction(ir);
}
bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest,
std::vector<PValue> src0,
const std::set<AluModifiers>& m_flags)
{
return m_proc.emit_instruction(opcode, dest,src0, m_flags);
}
const nir_variable *
EmitInstruction::get_deref_location(const nir_src& v) const
{
return m_proc.get_deref_location(v);
}
PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
{
return m_proc.from_nir_with_fetch_constant(src, component, channel);
}
GPRVector EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
const GPRVector::Swizzle& swizzle, bool match)
{
return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle, match);
}
PGPRValue EmitInstruction::get_temp_register(int channel)
{
return m_proc.get_temp_register(channel);
}
GPRVector EmitInstruction::get_temp_vec4(const GPRVector::Swizzle& swizzle)
{
return m_proc.get_temp_vec4(swizzle);
}
PValue EmitInstruction::create_register_from_nir_src(const nir_src& src, unsigned swizzle)
{
return m_proc.create_register_from_nir_src(src, swizzle);
}
enum amd_gfx_level EmitInstruction::get_chip_class(void) const
{
return m_proc.get_chip_class();
}
PValue EmitInstruction::literal(uint32_t value)
{
return m_proc.literal(value);
}
GPRVector EmitInstruction::vec_from_nir(const nir_dest& dst, int num_components)
{
return m_proc.vec_from_nir(dst, num_components);
}
bool EmitInstruction::inject_register(unsigned sel, unsigned swizzle,
const PValue& reg, bool map)
{
return m_proc.inject_register(sel, swizzle, reg, map);
}
int EmitInstruction::remap_atomic_base(int base)
{
return m_proc.remap_atomic_base(base);
}
void EmitInstruction::set_has_txs_cube_array_comp()
{
m_proc.sh_info().has_txq_cube_array_z_comp = 1;
}
const std::set<AluModifiers> EmitInstruction::empty = {};
const std::set<AluModifiers> EmitInstruction::write = {alu_write};
const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr};
const std::set<AluModifiers> EmitInstruction::last = {alu_last_instr};
}

View File

@ -1,102 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef EMITINSTRUCTION_H
#define EMITINSTRUCTION_H
#include "compiler/nir/nir.h"
#include "sfn_defines.h"
#include "sfn_value.h"
#include "sfn_instruction_alu.h"
namespace r600 {
class ShaderFromNirProcessor;
class EmitInstruction
{
public:
EmitInstruction(ShaderFromNirProcessor& processor);
virtual ~EmitInstruction();
bool emit(nir_instr* instr);
static const std::set<AluModifiers> empty;
static const std::set<AluModifiers> write;
static const std::set<AluModifiers> last_write;
static const std::set<AluModifiers> last;
protected:
virtual bool do_emit(nir_instr* instr) = 0;
// forwards from ValuePool
PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
PValue from_nir(const nir_src& v, unsigned component);
PValue from_nir(const nir_alu_src& v, unsigned component);
PValue from_nir(const nir_tex_src& v, unsigned component);
PValue from_nir(const nir_alu_dest& v, unsigned component);
PValue from_nir(const nir_dest& v, unsigned component);
PValue create_register_from_nir_src(const nir_src& src, unsigned comp);
PGPRValue get_temp_register(int channel = -1);
GPRVector get_temp_vec4(const GPRVector::Swizzle& swizzle = {0,1,2,3});
// forwards from ShaderFromNirProcessor
void emit_instruction(Instruction *ir);
void emit_instruction(AluInstruction *ir);
bool emit_instruction(EAluOp opcode, PValue dest,
std::vector<PValue> src0,
const std::set<AluModifiers>& m_flags);
bool use_legacy_math_rules(void);
PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
const GPRVector::Swizzle& swizzle, bool match = false);
const nir_variable *get_deref_location(const nir_src& v) const;
enum amd_gfx_level get_chip_class(void) const;
PValue literal(uint32_t value);
GPRVector vec_from_nir(const nir_dest& dst, int num_components);
bool inject_register(unsigned sel, unsigned swizzle,
const PValue& reg, bool map);
int remap_atomic_base(int base);
void set_has_txs_cube_array_comp();
private:
ShaderFromNirProcessor& m_proc;
};
}
#endif // EMITINSTRUCTION_H

View File

@ -1,741 +0,0 @@
#include "sfn_emitssboinstruction.h"
#include "sfn_instruction_fetch.h"
#include "sfn_instruction_gds.h"
#include "sfn_instruction_misc.h"
#include "sfn_instruction_tex.h"
#include "../r600_pipe.h"
#include "../r600_asm.h"
namespace r600 {
#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
EmitInstruction(processor),
m_require_rat_return_address(false),
m_ssbo_image_offset(0)
{
}
void EmitSSBOInstruction::set_ssbo_offset(int offset)
{
m_ssbo_image_offset = offset;
}
void EmitSSBOInstruction::set_require_rat_return_address()
{
m_require_rat_return_address = true;
}
bool
EmitSSBOInstruction::load_rat_return_address()
{
if (m_require_rat_return_address) {
m_rat_return_address = get_temp_vec4();
emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
{alu_write, alu_last_instr}));
m_require_rat_return_address = false;
}
return true;
}
bool EmitSSBOInstruction::do_emit(nir_instr* instr)
{
const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_atomic_counter_add:
case nir_intrinsic_atomic_counter_and:
case nir_intrinsic_atomic_counter_exchange:
case nir_intrinsic_atomic_counter_max:
case nir_intrinsic_atomic_counter_min:
case nir_intrinsic_atomic_counter_or:
case nir_intrinsic_atomic_counter_xor:
case nir_intrinsic_atomic_counter_comp_swap:
return emit_atomic(intr);
case nir_intrinsic_atomic_counter_read:
case nir_intrinsic_atomic_counter_post_dec:
return emit_unary_atomic(intr);
case nir_intrinsic_atomic_counter_inc:
return emit_atomic_inc(intr);
case nir_intrinsic_atomic_counter_pre_dec:
return emit_atomic_pre_dec(intr);
case nir_intrinsic_load_ssbo:
return emit_load_ssbo(intr);
case nir_intrinsic_store_ssbo:
return emit_store_ssbo(intr);
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_ssbo_atomic_exchange:
return emit_ssbo_atomic_op(intr);
case nir_intrinsic_image_store:
return emit_image_store(intr);
case nir_intrinsic_image_load:
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_imax:
return emit_image_load(intr);
case nir_intrinsic_image_size:
return emit_image_size(intr);
case nir_intrinsic_get_ssbo_size:
return emit_buffer_size(intr);
case nir_intrinsic_memory_barrier:
case nir_intrinsic_memory_barrier_image:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_group_memory_barrier:
return make_stores_ack_and_waitack();
default:
return false;
}
}
bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
{
bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
ESDOp op = read_result ? get_opcode(instr->intrinsic) :
get_opcode_wo(instr->intrinsic);
if (DS_OP_INVALID == op)
return false;
GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
int base = remap_atomic_base(nir_intrinsic_base(instr));
PValue uav_id = from_nir(instr->src[0], 0);
PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
GDSInstr *ir = nullptr;
if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap) {
PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
ir = new GDSInstr(op, dest, value, value2, uav_id, base);
} else {
ir = new GDSInstr(op, dest, value, uav_id, base);
}
emit_instruction(ir);
return true;
}
bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
{
bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
ESDOp op = read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
if (DS_OP_INVALID == op)
return false;
GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
PValue uav_id = from_nir(instr->src[0], 0);
auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
emit_instruction(ir);
return true;
}
ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode) const
{
switch (opcode) {
case nir_intrinsic_atomic_counter_add:
return DS_OP_ADD_RET;
case nir_intrinsic_atomic_counter_and:
return DS_OP_AND_RET;
case nir_intrinsic_atomic_counter_exchange:
return DS_OP_XCHG_RET;
case nir_intrinsic_atomic_counter_inc:
return DS_OP_INC_RET;
case nir_intrinsic_atomic_counter_max:
return DS_OP_MAX_UINT_RET;
case nir_intrinsic_atomic_counter_min:
return DS_OP_MIN_UINT_RET;
case nir_intrinsic_atomic_counter_or:
return DS_OP_OR_RET;
case nir_intrinsic_atomic_counter_read:
return DS_OP_READ_RET;
case nir_intrinsic_atomic_counter_xor:
return DS_OP_XOR_RET;
case nir_intrinsic_atomic_counter_post_dec:
return DS_OP_DEC_RET;
case nir_intrinsic_atomic_counter_comp_swap:
return DS_OP_CMP_XCHG_RET;
case nir_intrinsic_atomic_counter_pre_dec:
default:
return DS_OP_INVALID;
}
}
ESDOp EmitSSBOInstruction::get_opcode_wo(const nir_intrinsic_op opcode) const
{
switch (opcode) {
case nir_intrinsic_atomic_counter_add:
return DS_OP_ADD;
case nir_intrinsic_atomic_counter_and:
return DS_OP_AND;
case nir_intrinsic_atomic_counter_inc:
return DS_OP_INC;
case nir_intrinsic_atomic_counter_max:
return DS_OP_MAX_UINT;
case nir_intrinsic_atomic_counter_min:
return DS_OP_MIN_UINT;
case nir_intrinsic_atomic_counter_or:
return DS_OP_OR;
case nir_intrinsic_atomic_counter_xor:
return DS_OP_XOR;
case nir_intrinsic_atomic_counter_post_dec:
return DS_OP_DEC;
case nir_intrinsic_atomic_counter_comp_swap:
return DS_OP_CMP_XCHG_RET;
case nir_intrinsic_atomic_counter_exchange:
return DS_OP_XCHG_RET;
case nir_intrinsic_atomic_counter_pre_dec:
default:
return DS_OP_INVALID;
}
}
RatInstruction::ERatOp
EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
{
switch (opcode) {
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_image_atomic_add:
return RatInstruction::ADD_RTN;
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_image_atomic_and:
return RatInstruction::AND_RTN;
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_image_atomic_exchange:
return RatInstruction::XCHG_RTN;
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_image_atomic_or:
return RatInstruction::OR_RTN;
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_image_atomic_imin:
return RatInstruction::MIN_INT_RTN;
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_image_atomic_imax:
return RatInstruction::MAX_INT_RTN;
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_image_atomic_umin:
return RatInstruction::MIN_UINT_RTN;
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_image_atomic_umax:
return RatInstruction::MAX_UINT_RTN;
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_image_atomic_xor:
return RatInstruction::XOR_RTN;
case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_image_atomic_comp_swap:
if (util_format_is_float(format))
return RatInstruction::CMPXCHG_FLT_RTN;
else
return RatInstruction::CMPXCHG_INT_RTN;
case nir_intrinsic_image_load:
return RatInstruction::NOP_RTN;
default:
unreachable("Unsupported RAT instruction");
}
}
RatInstruction::ERatOp
EmitSSBOInstruction::get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const
{
switch (opcode) {
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_image_atomic_add:
return RatInstruction::ADD;
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_image_atomic_and:
return RatInstruction::AND;
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_image_atomic_or:
return RatInstruction::OR;
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_image_atomic_imin:
return RatInstruction::MIN_INT;
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_image_atomic_imax:
return RatInstruction::MAX_INT;
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_image_atomic_umin:
return RatInstruction::MIN_UINT;
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_image_atomic_umax:
return RatInstruction::MAX_UINT;
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_image_atomic_xor:
return RatInstruction::XOR;
case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_image_atomic_comp_swap:
if (util_format_is_float(format))
return RatInstruction::CMPXCHG_FLT;
else
return RatInstruction::CMPXCHG_INT;
default:
unreachable("Unsupported WO RAT instruction");
}
}
bool EmitSSBOInstruction::load_atomic_inc_limits()
{
m_atomic_update = get_temp_register();
m_atomic_update->set_keep_alive();
emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
{alu_write, alu_last_instr}));
return true;
}
bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
{
bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
PValue uav_id = from_nir(instr->src[0], 0);
GPRVector dest = read_result ? make_dest(instr): GPRVector(0, {7,7,7,7});
auto ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest,
m_atomic_update, uav_id,
remap_atomic_base(nir_intrinsic_base(instr)));
emit_instruction(ir);
return true;
}
bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
{
GPRVector dest = make_dest(instr);
PValue uav_id = from_nir(instr->src[0], 0);
auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
remap_atomic_base(nir_intrinsic_base(instr)));
emit_instruction(ir);
emit_instruction(new AluInstruction(op2_sub_int, dest.x(), dest.x(), literal(1), last_write));
return true;
}
bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
{
GPRVector dest = make_dest(instr);
/** src0 not used, should be some offset */
auto addr = from_nir(instr->src[1], 0);
PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
/** Should be lowered in nir */
emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
{alu_write, alu_last_instr}));
const EVTXDataFormat formats[4] = {
fmt_32,
fmt_32_32,
fmt_32_32_32,
fmt_32_32_32_32
};
const std::array<int,4> dest_swt[4] = {
{0,7,7,7},
{0,1,7,7},
{0,1,2,7},
{0,1,2,3}
};
/* TODO fix resource index */
auto ir = new FetchInstruction(dest, addr_temp,
R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
, from_nir(instr->src[0], 0),
formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
ir->set_flag(vtx_use_tc);
emit_instruction(ir);
return true;
}
bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
{
GPRVector::Swizzle swz = {7,7,7,7};
for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i)
swz[i] = i;
auto orig_addr = from_nir(instr->src[2], 0);
GPRVector addr_vec = get_temp_vec4({0,1,2,7});
auto temp2 = get_temp_vec4();
auto rat_id = from_nir(instr->src[1], 0);
emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
PValue(new LiteralValue(2)), write));
emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
auto values = vec_from_nir_with_fetch_constant(instr->src[0],
(1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
auto cf_op = cf_mem_rat;
//auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
values, addr_vec, m_ssbo_image_offset, rat_id, 1,
1, 0, false);
emit_instruction(store);
m_store_ops.push_back(store);
for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), get_chip_class() == CAYMAN ? last_write : write));
emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
{addr_vec.reg_i(0), Value::one_i}, last_write));
store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
temp2, addr_vec, m_ssbo_image_offset, rat_id, 1,
1, 0, false);
emit_instruction(store);
if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
m_store_ops.push_back(store);
}
return true;
}
bool
EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
{
int imageid = 0;
PValue image_offset;
if (nir_src_is_const(intrin->src[0]))
imageid = nir_src_as_int(intrin->src[0]);
else
image_offset = from_nir(intrin->src[0], 0);
auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
auto undef = from_nir(intrin->src[2], 0);
auto value = vec_from_nir_with_fetch_constant(intrin->src[3], 0xf, {0,1,2,3});
auto unknown = from_nir(intrin->src[4], 0);
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
nir_intrinsic_image_array(intrin)) {
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
}
auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
image_offset, 1, 0xf, 0, false);
//if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
m_store_ops.push_back(store);
emit_instruction(store);
return true;
}
bool
EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
{
int imageid = 0;
PValue image_offset;
if (nir_src_is_const(intrin->src[0]))
imageid = nir_src_as_int(intrin->src[0]);
else
image_offset = from_nir(intrin->src[0], 0);
bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
auto coord_orig = from_nir(intrin->src[1], 0, 0);
auto coord = get_temp_register(0);
emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
from_nir(intrin->src[3], 0), {alu_write}));
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
} else {
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
from_nir(intrin->src[2], 0), {alu_write}));
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
}
GPRVector out_vec({coord, coord, coord, coord});
auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
image_offset, 1, 0xf, 0, true);
emit_instruction(atomic);
if (read_result) {
emit_instruction(new WaitAck(0));
GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
auto fetch = new FetchInstruction(vc_fetch,
no_index_offset,
fmt_32,
vtx_nf_int,
vtx_es_none,
m_rat_return_address.reg_i(1),
dest,
0,
false,
0xf,
R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
0,
bim_none,
false,
false,
0,
0,
0,
image_offset,
{0,7,7,7});
fetch->set_flag(vtx_srf_mode);
fetch->set_flag(vtx_use_tc);
fetch->set_flag(vtx_vpm);
emit_instruction(fetch);
}
return true;
}
bool
EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
{
int imageid = 0;
PValue image_offset;
if (nir_src_is_const(intrin->src[0]))
imageid = nir_src_as_int(intrin->src[0]);
else
image_offset = from_nir(intrin->src[0], 0);
bool read_retvalue = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
auto rat_op = read_retvalue ? get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin)):
get_rat_opcode_wo(intrin->intrinsic, nir_intrinsic_format(intrin));
GPRVector::Swizzle swz = {0,1,2,3};
auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
nir_intrinsic_image_array(intrin)) {
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
}
if (intrin->intrinsic != nir_intrinsic_image_load) {
if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
from_nir(intrin->src[4], 0), {alu_write}));
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
} else {
emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
}
}
auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
image_offset, 1, 0xf, 0, true);
emit_instruction(store);
return read_retvalue ? fetch_return_value(intrin) : true;
}
bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
{
emit_instruction(new WaitAck(0));
pipe_format format = nir_intrinsic_format(intrin);
unsigned fmt = fmt_32;
unsigned num_format = 0;
unsigned format_comp = 0;
unsigned endian = 0;
int imageid = 0;
PValue image_offset;
if (nir_src_is_const(intrin->src[0]))
imageid = nir_src_as_int(intrin->src[0]);
else
image_offset = from_nir(intrin->src[0], 0);
r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
auto fetch = new FetchInstruction(vc_fetch,
no_index_offset,
(EVTXDataFormat)fmt,
(EVFetchNumFormat)num_format,
(EVFetchEndianSwap)endian,
m_rat_return_address.reg_i(1),
dest,
0,
false,
0x3,
R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
0,
bim_none,
false,
false,
0,
0,
0,
image_offset, {0,1,2,3});
fetch->set_flag(vtx_srf_mode);
fetch->set_flag(vtx_use_tc);
fetch->set_flag(vtx_vpm);
if (format_comp)
fetch->set_flag(vtx_format_comp_signed);
emit_instruction(fetch);
return true;
}
bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
{
GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
GPRVector src{0,{4,4,4,4}};
assert(nir_src_as_uint(intrin->src[1]) == 0);
auto const_offset = nir_src_as_const_value(intrin->src[0]);
auto dyn_offset = PValue();
int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
if (const_offset)
res_id += const_offset[0].u32;
else
dyn_offset = from_nir(intrin->src[0], 0);
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
res_id,
bim_none));
return true;
} else {
emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
0/* ?? */,
res_id, dyn_offset));
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
/* Need to load the layers from a const buffer */
set_has_txs_cube_array_comp();
if (const_offset) {
unsigned lookup_resid = const_offset[0].u32;
emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
R600_BUFFER_INFO_CONST_BUFFER)),
EmitInstruction::last_write));
} else {
/* If the adressing is indirect we have to get the z-value by using a binary search */
GPRVector trgt;
GPRVector help;
auto addr = help.reg_i(0);
auto comp = help.reg_i(1);
auto low_bit = help.reg_i(2);
auto high_bit = help.reg_i(3);
emit_instruction(new AluInstruction(op2_lshr_int, addr, from_nir(intrin->src[0], 0),
literal(2), EmitInstruction::write));
emit_instruction(new AluInstruction(op2_and_int, comp, from_nir(intrin->src[0], 0),
literal(3), EmitInstruction::last_write));
emit_instruction(new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, R600_SHADER_BUFFER_INFO_SEL,
R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none));
emit_instruction(new AluInstruction(op3_cnde_int, comp, high_bit, trgt.reg_i(0), trgt.reg_i(2),
EmitInstruction::write));
emit_instruction(new AluInstruction(op3_cnde_int, high_bit, high_bit, trgt.reg_i(1), trgt.reg_i(3),
EmitInstruction::last_write));
emit_instruction(new AluInstruction(op3_cnde_int, dest.reg_i(2), low_bit, comp, high_bit, EmitInstruction::last_write));
}
}
}
return true;
}
bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
{
std::array<PValue,4> dst_elms;
for (uint16_t i = 0; i < 4; ++i) {
dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
}
GPRVector dst(dst_elms);
GPRVector src(0,{4,4,4,4});
auto const_offset = nir_src_as_const_value(intr->src[0]);
auto dyn_offset = PValue();
int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
if (const_offset)
res_id += const_offset[0].u32;
else
assert(0 && "dynamic buffer offset not supported in buffer_size");
emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
res_id, bim_none));
return true;
}
bool EmitSSBOInstruction::make_stores_ack_and_waitack()
{
for (auto&& store: m_store_ops)
store->set_ack();
if (!m_store_ops.empty())
emit_instruction(new WaitAck(0));
m_store_ops.clear();
return true;
}
GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
{
GPRVector::Values v;
int i;
for (i = 0; i < 4; ++i)
v[i] = from_nir(ir->dest, i);
return GPRVector(v);
}
}

View File

@ -1,60 +0,0 @@
#ifndef SFN_EMITSSBOINSTRUCTION_H
#define SFN_EMITSSBOINSTRUCTION_H
#include "sfn_emitinstruction.h"
#include "sfn_instruction_gds.h"
#include "sfn_value_gpr.h"
namespace r600 {
class EmitSSBOInstruction: public EmitInstruction {
public:
EmitSSBOInstruction(ShaderFromNirProcessor& processor);
void set_ssbo_offset(int offset);
void set_require_rat_return_address();
bool load_rat_return_address();
bool load_atomic_inc_limits();
private:
bool do_emit(nir_instr *instr);
bool emit_atomic(const nir_intrinsic_instr* instr);
bool emit_unary_atomic(const nir_intrinsic_instr* instr);
bool emit_atomic_inc(const nir_intrinsic_instr* instr);
bool emit_atomic_pre_dec(const nir_intrinsic_instr* instr);
bool emit_load_ssbo(const nir_intrinsic_instr* instr);
bool emit_store_ssbo(const nir_intrinsic_instr* instr);
bool emit_image_size(const nir_intrinsic_instr *intrin);
bool emit_image_load(const nir_intrinsic_instr *intrin);
bool emit_image_store(const nir_intrinsic_instr *intrin);
bool emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin);
bool emit_buffer_size(const nir_intrinsic_instr *intrin);
bool fetch_return_value(const nir_intrinsic_instr *intrin);
bool make_stores_ack_and_waitack();
ESDOp get_opcode(nir_intrinsic_op opcode) const;
ESDOp get_opcode_wo(const nir_intrinsic_op opcode) const;
RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const;
RatInstruction::ERatOp get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const;
GPRVector make_dest(const nir_intrinsic_instr* instr);
PGPRValue m_atomic_update;
bool m_require_rat_return_address;
GPRVector m_rat_return_address;
int m_ssbo_image_offset;
std::vector<RatInstruction *> m_store_ops;
};
}
#endif // SFN_EMITSSBOINSTRUCTION_H

View File

@ -1,671 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_emittexinstruction.h"
#include "sfn_shader_base.h"
#include "sfn_instruction_fetch.h"
namespace r600 {
EmitTexInstruction::EmitTexInstruction(ShaderFromNirProcessor &processor):
EmitInstruction (processor)
{
}
bool EmitTexInstruction::do_emit(nir_instr* instr)
{
nir_tex_instr* ir = nir_instr_as_tex(instr);
TexInputs src;
if (!get_inputs(*ir, src))
return false;
if (ir->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
switch (ir->op) {
case nir_texop_txf:
return emit_buf_txf(ir, src);
case nir_texop_txs:
return emit_tex_txs(ir, src, {0,1,2,3});
default:
return false;
}
} else {
switch (ir->op) {
case nir_texop_tex:
return emit_tex_tex(ir, src);
case nir_texop_txf:
return emit_tex_txf(ir, src);
case nir_texop_txb:
return emit_tex_txb(ir, src);
case nir_texop_txl:
return emit_tex_txl(ir, src);
case nir_texop_txd:
return emit_tex_txd(ir, src);
case nir_texop_txs:
return emit_tex_txs(ir, src, {0,1,2,3});
case nir_texop_lod:
return emit_tex_lod(ir, src);
case nir_texop_tg4:
return emit_tex_tg4(ir, src);
case nir_texop_txf_ms:
return emit_tex_txf_ms(ir, src);
case nir_texop_query_levels:
return emit_tex_txs(ir, src, {3,7,7,7});
case nir_texop_texture_samples:
return emit_tex_texture_samples(ir, src, {3,7,7,7});
default:
return false;
}
}
}
bool EmitTexInstruction::emit_buf_txf(nir_tex_instr* instr, TexInputs &src)
{
auto dst = make_dest(*instr);
auto ir = new FetchInstruction(vc_fetch, no_index_offset, dst, src.coord.reg_i(0), 0,
instr->texture_index + R600_MAX_CONST_BUFFERS,
src.texture_offset, bim_none);
ir->set_flag(vtx_use_const_field);
emit_instruction(ir);
return true;
}
bool EmitTexInstruction::emit_tex_tex(nir_tex_instr* instr, TexInputs& src)
{
r600::sfn_log << SfnLog::instr << "emit '"
<< *reinterpret_cast<nir_instr*>(instr)
<< "' (" << __func__ << ")\n";
auto tex_op = TexInstruction::sample;
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
assert(!sampler.indirect);
if (instr->is_shadow) {
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
{alu_last_instr, alu_write}));
tex_op = TexInstruction::sample_c;
}
auto dst = make_dest(*instr);
auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
if (instr->is_array)
handle_array_index(*instr, src.coord, irt);
set_rect_coordinate_flags(instr, irt);
set_offsets(irt, src.offset);
emit_instruction(irt);
return true;
}
bool EmitTexInstruction::emit_tex_txd(nir_tex_instr* instr, TexInputs& src)
{
r600::sfn_log << SfnLog::instr << "emit '"
<< *reinterpret_cast<nir_instr*>(instr)
<< "' (" << __func__ << ")\n";
auto tex_op = TexInstruction::sample_g;
auto dst = make_dest(*instr);
GPRVector empty_dst(0,{7,7,7,7});
if (instr->is_shadow) {
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
{alu_last_instr, alu_write}));
tex_op = TexInstruction::sample_c_g;
}
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, src.ddx,
sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
irgh->set_dest_swizzle({7,7,7,7});
TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, src.ddy,
sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
irgv->set_dest_swizzle({7,7,7,7});
TexInstruction *ir = new TexInstruction(tex_op, dst, src.coord, sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
if (instr->is_array)
handle_array_index(*instr, src.coord, ir);
set_rect_coordinate_flags(instr, ir);
set_offsets(ir, src.offset);
emit_instruction(irgh);
emit_instruction(irgv);
emit_instruction(ir);
return true;
}
bool EmitTexInstruction::emit_tex_txf(nir_tex_instr* instr, TexInputs& src)
{
r600::sfn_log << SfnLog::instr << "emit '"
<< *reinterpret_cast<nir_instr*>(instr)
<< "' (" << __func__ << ")\n";
auto dst = make_dest(*instr);
if (*src.coord.reg_i(3) != *src.lod) {
if (src.coord.sel() != src.lod->sel())
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {alu_write, alu_last_instr}));
else
src.coord.set_reg_i(3, src.lod);
}
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
assert(!sampler.indirect);
/* txf doesn't need rounding for the array index, but 1D has the array index
* in the z component */
if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
src.coord.set_reg_i(2, src.coord.reg_i(1));
auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
if (src.offset) {
assert(src.offset->is_ssa);
AluInstruction *ir = nullptr;
for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
{src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
emit_instruction(ir);
}
if (ir)
ir->set_flag(alu_last_instr);
}
if (instr->is_array)
tex_ir->set_flag(TexInstruction::z_unnormalized);
emit_instruction(tex_ir);
return true;
}
bool EmitTexInstruction::emit_tex_lod(nir_tex_instr* instr, TexInputs& src)
{
auto tex_op = TexInstruction::get_tex_lod;
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
auto dst = make_dest(*instr);
auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
irt->set_dest_swizzle({1,0,7,7});
emit_instruction(irt);
return true;
}
bool EmitTexInstruction::emit_tex_txl(nir_tex_instr* instr, TexInputs& src)
{
r600::sfn_log << SfnLog::instr << "emit '"
<< *reinterpret_cast<nir_instr*>(instr)
<< "' (" << __func__ << ")\n";
auto tex_op = TexInstruction::sample_l;
if (instr->is_shadow) {
if (src.coord.sel() != src.comperator->sel())
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
else
src.coord.set_reg_i(2, src.comperator);
tex_op = TexInstruction::sample_c_l;
}
if (src.coord.sel() != src.lod->sel())
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {last_write}));
else
src.coord.set_reg_i(3, src.lod);
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
auto dst = make_dest(*instr);
auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
if (instr->is_array)
handle_array_index(*instr, src.coord, irt);
set_rect_coordinate_flags(instr, irt);
set_offsets(irt, src.offset);
emit_instruction(irt);
return true;
}
bool EmitTexInstruction::emit_tex_txb(nir_tex_instr* instr, TexInputs& src)
{
auto tex_op = TexInstruction::sample_lb;
std::array<uint8_t, 4> in_swizzle = {0,1,2,3};
if (instr->is_shadow) {
if (src.coord.sel() != src.comperator->sel())
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
else
src.coord.set_reg_i(2, src.comperator);
tex_op = TexInstruction::sample_c_lb;
}
if (src.coord.sel() != src.bias->sel())
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.bias, {last_write}));
else
src.coord.set_reg_i(3, src.bias);
GPRVector tex_src(src.coord, in_swizzle);
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
auto dst = make_dest(*instr);
auto irt = new TexInstruction(tex_op, dst, tex_src, sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
if (instr->is_array)
handle_array_index(*instr, tex_src, irt);
set_rect_coordinate_flags(instr, irt);
set_offsets(irt, src.offset);
emit_instruction(irt);
return true;
}
bool EmitTexInstruction::emit_tex_txs(nir_tex_instr* instr, TexInputs& tex_src,
const std::array<int,4>& dest_swz)
{
std::array<PValue,4> dst_elms;
std::array<PValue,4> src_elms;
for (uint16_t i = 0; i < 4; ++i) {
dst_elms[i] = from_nir(instr->dest, (i < instr->dest.ssa.num_components) ? i : 7);
}
GPRVector dst(dst_elms);
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
instr->sampler_index + R600_MAX_CONST_BUFFERS,
bim_none));
} else {
for (uint16_t i = 0; i < 4; ++i)
src_elms[i] = tex_src.lod;
GPRVector src(src_elms);
auto sampler = get_sampler_id(instr->sampler_index, tex_src.sampler_deref);
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
auto ir = new TexInstruction(TexInstruction::get_resinfo, dst, src,
sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
ir->set_dest_swizzle(dest_swz);
emit_instruction(ir);
if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
PValue src(new UniformValue(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2),
sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER));
auto alu = new AluInstruction(op1_mov, dst[2], src, {last_write});
emit_instruction(alu);
set_has_txs_cube_array_comp();
}
}
return true;
}
bool EmitTexInstruction::emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
const std::array<int, 4> &dest_swz)
{
GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
GPRVector help{0,{4,4,4,4}};
auto dyn_offset = PValue();
int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index;
auto ir = new TexInstruction(TexInstruction::get_nsampled, dest, help,
0, res_id, src.sampler_offset);
ir->set_dest_swizzle(dest_swz);
emit_instruction(ir);
return true;
}
bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src)
{
r600::sfn_log << SfnLog::instr << "emit '"
<< *reinterpret_cast<nir_instr*>(instr)
<< "' (" << __func__ << ")\n";
TexInstruction *set_ofs = nullptr;
auto tex_op = TexInstruction::gather4;
if (instr->is_shadow) {
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
{alu_last_instr, alu_write}));
tex_op = TexInstruction::gather4_c;
}
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
bool literal_offset = false;
if (src.offset) {
literal_offset = nir_src_as_const_value(*src.offset) != 0;
r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
(literal_offset ? "literal" : "varying") <<
"\n";
if (!literal_offset) {
GPRVector::Swizzle swizzle = {4,4,4,4};
for (unsigned i = 0; i < instr->coord_components; ++i)
swizzle[i] = i;
int noffsets = instr->coord_components;
if (instr->is_array)
--noffsets;
auto ofs = vec_from_nir_with_fetch_constant(*src.offset,
( 1 << noffsets) - 1,
swizzle);
GPRVector dummy(0, {7,7,7,7});
tex_op = (tex_op == TexInstruction::gather4_c) ?
TexInstruction::gather4_c_o : TexInstruction::gather4_o;
set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy,
ofs, sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
set_ofs->set_dest_swizzle({7,7,7,7});
}
}
/* pre CAYMAN needs swizzle */
auto dst = make_dest(*instr);
auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
if (get_chip_class() != CAYMAN)
irt->set_dest_swizzle({1,2,0,3});
irt->set_gather_comp(instr->component);
if (instr->is_array)
handle_array_index(*instr, src.coord, irt);
if (literal_offset) {
r600::sfn_log << SfnLog::tex << "emit literal offsets\n";
set_offsets(irt, src.offset);
}
set_rect_coordinate_flags(instr, irt);
if (set_ofs)
emit_instruction(set_ofs);
emit_instruction(irt);
return true;
}
bool EmitTexInstruction::emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src)
{
assert(instr->src[0].src.is_ssa);
r600::sfn_log << SfnLog::instr << "emit '"
<< *reinterpret_cast<nir_instr*>(instr)
<< "' (" << __func__ << ")\n";
auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
assert(!sampler.indirect && "Indirect sampler selection not yet supported");
PGPRValue sample_id_dest_reg = get_temp_register();
GPRVector sample_id_dest(sample_id_dest_reg->sel(), {7,7,7,7});
sample_id_dest.set_reg_i(sample_id_dest_reg->chan(), sample_id_dest_reg);
std::array<int,4> dest_swz = {7,7,7,7};
dest_swz[sample_id_dest_reg->chan()] = 0;
emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3),
src.ms_index,
{alu_write, alu_last_instr}));
auto tex_sample_id_ir = new TexInstruction(TexInstruction::ld, sample_id_dest, src.coord,
sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
tex_sample_id_ir->set_flag(TexInstruction::x_unnormalized);
tex_sample_id_ir->set_flag(TexInstruction::y_unnormalized);
tex_sample_id_ir->set_flag(TexInstruction::z_unnormalized);
tex_sample_id_ir->set_flag(TexInstruction::w_unnormalized);
tex_sample_id_ir->set_inst_mode(1);
tex_sample_id_ir->set_dest_swizzle(dest_swz);
emit_instruction(tex_sample_id_ir);
if (src.ms_index->type() != Value::literal ||
static_cast<const LiteralValue&>(*src.ms_index).value() != 0) {
PValue help = get_temp_register();
emit_instruction(new AluInstruction(op2_lshl_int, help,
src.ms_index, literal(2),
{alu_write, alu_last_instr}));
emit_instruction(new AluInstruction(op2_lshr_int, sample_id_dest_reg,
{sample_id_dest_reg, help},
{alu_write, alu_last_instr}));
}
emit_instruction(new AluInstruction(op2_and_int, src.coord.reg_i(3),
{sample_id_dest_reg, PValue(new LiteralValue(15))},
{alu_write, alu_last_instr}));
auto dst = make_dest(*instr);
/* txf doesn't need rounding for the array index, but 1D has the array index
* in the z component */
if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
src.coord.set_reg_i(2, src.coord.reg_i(1));
auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
sampler.id,
sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
if (src.offset) {
assert(src.offset->is_ssa);
AluInstruction *ir = nullptr;
for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
{src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
emit_instruction(ir);
}
if (ir)
ir->set_flag(alu_last_instr);
}
emit_instruction(tex_ir);
return true;
}
bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src)
{
sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
unsigned grad_components = instr.coord_components;
if (instr.is_array && !instr.array_is_lowered_cube)
--grad_components;
src.offset = nullptr;
bool retval = true;
for (unsigned i = 0; i < instr.num_srcs; ++i) {
switch (instr.src[i].src_type) {
case nir_tex_src_bias:
src.bias = from_nir(instr.src[i], 0);
break;
case nir_tex_src_coord: {
src.coord = vec_from_nir_with_fetch_constant(instr.src[i].src,
(1 << instr.coord_components) - 1,
{0,1,2,3});
} break;
case nir_tex_src_comparator:
src.comperator = from_nir(instr.src[i], 0);
break;
case nir_tex_src_ddx: {
sfn_log << SfnLog::tex << "Get DDX ";
src.ddx = vec_from_nir_with_fetch_constant(instr.src[i].src,
(1 << grad_components) - 1,
swizzle_from_comps(grad_components));
sfn_log << SfnLog::tex << src.ddx << "\n";
} break;
case nir_tex_src_ddy:{
sfn_log << SfnLog::tex << "Get DDY ";
src.ddy = vec_from_nir_with_fetch_constant(instr.src[i].src,
(1 << grad_components) - 1,
swizzle_from_comps(grad_components));
sfn_log << SfnLog::tex << src.ddy << "\n";
} break;
case nir_tex_src_lod:
src.lod = from_nir_with_fetch_constant(instr.src[i].src, 0);
break;
case nir_tex_src_offset:
sfn_log << SfnLog::tex << " -- Find offset\n";
src.offset = &instr.src[i].src;
break;
case nir_tex_src_sampler_deref:
src.sampler_deref = get_deref_location(instr.src[i].src);
break;
case nir_tex_src_texture_deref:
src.texture_deref = get_deref_location(instr.src[i].src);
break;
case nir_tex_src_ms_index:
src.ms_index = from_nir(instr.src[i], 0);
break;
case nir_tex_src_texture_offset:
src.texture_offset = from_nir(instr.src[i], 0);
break;
case nir_tex_src_sampler_offset:
src.sampler_offset = from_nir(instr.src[i], 0);
break;
case nir_tex_src_plane:
case nir_tex_src_projector:
case nir_tex_src_min_lod:
default:
sfn_log << SfnLog::tex << "Texture source type " << instr.src[i].src_type << " not supported\n";
retval = false;
}
}
return retval;
}
GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr)
{
int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
instr.dest.reg.reg->num_components;
std::array<PValue,4> dst_elms;
for (uint16_t i = 0; i < 4; ++i)
dst_elms[i] = from_nir(instr.dest, (i < num_dest_components) ? i : 7);
return GPRVector(dst_elms);
}
GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr,
const std::array<int, 4>& swizzle)
{
int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
instr.dest.reg.reg->num_components;
std::array<PValue,4> dst_elms;
for (uint16_t i = 0; i < 4; ++i) {
int k = swizzle[i];
dst_elms[i] = from_nir(instr.dest, (k < num_dest_components) ? k : 7);
}
return GPRVector(dst_elms);
}
void EmitTexInstruction::set_rect_coordinate_flags(nir_tex_instr* instr,
TexInstruction* ir) const
{
if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
ir->set_flag(TexInstruction::x_unnormalized);
ir->set_flag(TexInstruction::y_unnormalized);
}
}
void EmitTexInstruction::set_offsets(TexInstruction* ir, nir_src *offset)
{
if (!offset)
return;
assert(offset->is_ssa);
auto literal = nir_src_as_const_value(*offset);
assert(literal);
for (int i = 0; i < offset->ssa->num_components; ++i) {
ir->set_offset(i, literal[i].i32);
}
}
void EmitTexInstruction::handle_array_index(const nir_tex_instr& instr, const GPRVector& src, TexInstruction *ir)
{
int src_idx = instr.sampler_dim == GLSL_SAMPLER_DIM_1D ? 1 : 2;
emit_instruction(new AluInstruction(op1_rndne, src.reg_i(2), src.reg_i(src_idx),
{alu_last_instr, alu_write}));
ir->set_flag(TexInstruction::z_unnormalized);
}
EmitTexInstruction::SamplerId
EmitTexInstruction::get_sampler_id(int sampler_id, const nir_variable *deref)
{
EmitTexInstruction::SamplerId result = {sampler_id, false};
if (deref) {
assert(glsl_type_is_sampler(deref->type));
result.id = deref->data.binding;
}
return result;
}
EmitTexInstruction::TexInputs::TexInputs():
sampler_deref(nullptr),
texture_deref(nullptr),
offset(nullptr)
{
}
}

View File

@ -1,96 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SFN_EMITTEXINSTRUCTION_H
#define SFN_EMITTEXINSTRUCTION_H
#include "sfn_emitinstruction.h"
#include "sfn_instruction_tex.h"
namespace r600 {
class EmitTexInstruction : public EmitInstruction
{
public:
EmitTexInstruction(ShaderFromNirProcessor& processor);
private:
struct TexInputs {
TexInputs();
const nir_variable *sampler_deref;
const nir_variable *texture_deref;
GPRVector coord;
PValue bias;
PValue comperator;
PValue lod;
GPRVector ddx;
GPRVector ddy;
nir_src *offset;
PValue gather_comp;
PValue ms_index;
PValue sampler_offset;
PValue texture_offset;
};
bool emit_tex_tex(nir_tex_instr* instr, TexInputs& src);
bool emit_tex_txf(nir_tex_instr* instr, TexInputs &src);
bool emit_tex_txb(nir_tex_instr* instr, TexInputs& src);
bool emit_tex_txd(nir_tex_instr* instr, TexInputs& src);
bool emit_tex_txl(nir_tex_instr* instr, TexInputs& src);
bool emit_tex_txs(nir_tex_instr* instr, TexInputs& src,
const std::array<int, 4> &dest_swz);
bool emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
const std::array<int, 4> &dest_swz);
bool emit_tex_lod(nir_tex_instr* instr, TexInputs& src);
bool emit_tex_tg4(nir_tex_instr* instr, TexInputs& src);
bool emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src);
bool emit_buf_txf(nir_tex_instr* instr, TexInputs& src);
bool get_inputs(const nir_tex_instr& instr, TexInputs &src);
void set_rect_coordinate_flags(nir_tex_instr* instr, TexInstruction* ir) const;
bool do_emit(nir_instr* instr) override;
GPRVector make_dest(nir_tex_instr& instr);
GPRVector make_dest(nir_tex_instr &instr, const std::array<int, 4> &swizzle);
void set_offsets(TexInstruction* ir, nir_src *offset);
void handle_array_index(const nir_tex_instr& instr, const GPRVector &src, TexInstruction* ir);
struct SamplerId {
int id;
bool indirect;
};
SamplerId get_sampler_id(int sampler_id, const nir_variable *deref);
};
}
#endif // SFN_EMITTEXINSTRUCTION_H

View File

@ -0,0 +1,522 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2021 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_instr_alugroup.h"
#include "sfn_instr_export.h"
#include "sfn_instr_fetch.h"
#include "sfn_instr_mem.h"
#include "sfn_instr_lds.h"
#include "sfn_instr_tex.h"
#include "sfn_instr_controlflow.h"
#include <iostream>
#include <sstream>
#include <numeric>
namespace r600 {
using std::string;
using std::vector;
Instr::Instr():
m_use_count(0),
m_block_id(std::numeric_limits<int>::max()),
m_index(std::numeric_limits<int>::max())
{
}
Instr::~Instr()
{
}
void Instr::print(std::ostream& os) const
{
do_print(os);
}
bool Instr::ready() const
{
for (auto& i : m_required_instr)
if (!i->ready())
return false;
return do_ready();
}
int int_from_string_with_prefix(const std::string& str, const std::string& prefix)
{
if (str.substr(0, prefix.length()) != prefix) {
std::cerr << "Expect '" << prefix << "' as start of '" << str << "'\n";
assert(0);
}
std::stringstream help(str.substr(prefix.length()));
int retval;
help >> retval;
return retval;
}
int sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle &swz, bool& is_ssa)
{
assert(str[0] == 'R' || str[0] == '_' || str[0] == 'S');
int sel = 0;
auto istr = str.begin() + 1;
if (str[0] == '_') {
while (istr != str.end() && *istr == '_')
++istr;
sel = std::numeric_limits<int>::max();
} else {
while (istr != str.end() && isdigit(*istr)) {
sel *= 10;
sel += *istr - '0';
++istr;
}
}
assert(*istr == '.');
istr++;
int i = 0;
while (istr != str.end()) {
switch (*istr) {
case 'x': swz[i] = 0; break;
case 'y': swz[i] = 1; break;
case 'z': swz[i] = 2; break;
case 'w': swz[i] = 3; break;
case '0': swz[i] = 4; break;
case '1': swz[i] = 5; break;
case '_': swz[i] = 7; break;
default:
unreachable("Unknown swizzle character");
}
++istr;
++i;
}
is_ssa = str[0] == 'S';
return sel;
}
bool Instr::is_last() const
{
return true;
}
bool Instr::set_dead()
{
if (m_instr_flags.test(always_keep))
return false;
bool is_dead = propagate_death();
m_instr_flags.set(dead);
return is_dead;
}
bool Instr::propagate_death()
{
return true;
}
bool Instr::replace_source(PRegister old_src, PVirtualValue new_src)
{
(void)old_src;
(void)new_src;
return false;
}
void Instr::add_required_instr(Instr *instr)
{
assert(instr);
m_required_instr.push_back(instr);
instr->m_dependend_instr.push_back(this);
}
void Instr::replace_required_instr(Instr *old_instr, Instr *new_instr)
{
for (auto i = m_required_instr.begin(); i != m_required_instr.end(); ++i) {
if (*i == old_instr)
*i = new_instr;
}
}
bool Instr::replace_dest(PRegister new_dest, r600::AluInstr *move_instr)
{
(void)new_dest;
(void)move_instr;
return false;
}
void Instr::set_blockid(int id, int index)
{
m_block_id = id;
m_index = index;
forward_set_blockid(id, index);
}
void Instr::forward_set_blockid(int id, int index)
{
(void)id;
(void)index;
}
InstrWithVectorResult::InstrWithVectorResult(const RegisterVec4& dest,
const RegisterVec4::Swizzle& dest_swizzle):
m_dest(dest),
m_dest_swizzle(dest_swizzle)
{
for (int i = 0; i < 4; ++i) {
if (m_dest_swizzle[i] < 6)
m_dest[i]->add_parent(this);
}
}
void InstrWithVectorResult::print_dest(std::ostream& os) const
{
os << (m_dest[0]->is_ssa() ? 'S' : 'R' ) << m_dest.sel();
os << ".";
for (int i = 0; i < 4; ++i)
os << VirtualValue::chanchar[m_dest_swizzle[i]];
}
bool InstrWithVectorResult::comp_dest(const RegisterVec4& dest,
const RegisterVec4::Swizzle& dest_swizzle) const
{
for(int i = 0; i < 4; ++i) {
if (!m_dest[i]->equal_to(*dest[i])) {
return false;
}
if (m_dest_swizzle[i] != dest_swizzle[i])
return false;
}
return true;
}
void Block::do_print(std::ostream& os) const
{
for (int j = 0; j < 2 * m_nesting_depth; ++j)
os << ' ';
os << "BLOCK START\n";
for (auto& i : m_instructions) {
for (int j = 0; j < 2 * (m_nesting_depth + i->nesting_corr()) + 2; ++j)
os << ' ';
os << *i << "\n";
}
for (int j = 0; j < 2 * m_nesting_depth; ++j)
os << ' ';
os << "BLOCK END\n";
}
bool Block::is_equal_to(const Block& lhs) const
{
if (m_id != lhs.m_id || m_nesting_depth != lhs.m_nesting_depth)
return false;
if (m_instructions.size() != lhs.m_instructions.size())
return false;
return std::inner_product(m_instructions.begin(), m_instructions.end(), lhs.m_instructions.begin(),
true,
[] (bool l, bool r) { return l && r;},
[](PInst l, PInst r) { return l->equal_to(*r);});
}
inline bool operator != (const Block& lhs, const Block& rhs)
{
return !lhs.is_equal_to(rhs);
}
void Block::erase(iterator node)
{
m_instructions.erase(node);
}
void Block::set_type(Type t)
{
m_blocK_type = t;
switch (t) {
case vtx:
case gds:
case tex: m_remaining_slots = 8; break; /* TODO: 16 for >= EVERGREEN */
default:
m_remaining_slots = 0xffff;
}
}
Block::Block(int nesting_depth, int id):
m_nesting_depth(nesting_depth),
m_id(id),
m_next_index(0)
{
assert(!has_instr_flag(force_cf));
}
void Block::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void Block::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
void Block::push_back(PInst instr)
{
instr->set_blockid(m_id, m_next_index++);
if (m_remaining_slots != 0xffff) {
uint32_t new_slots = instr->slots();
m_remaining_slots -= new_slots;
}
if (m_lds_group_start)
m_lds_group_requirement += instr->slots();
m_instructions.push_back(instr);
}
bool Block::try_reserve_kcache(const AluGroup& group)
{
auto kcache_constants = group.get_kconsts();
for (auto& kc : kcache_constants) {
auto u = kc->as_uniform();
assert(u);
if (!try_reserve_kcache(*u))
return false;
}
return true;
}
bool Block::try_reserve_kcache(const UniformValue& u)
{
const int kcache_banks = 4; // TODO: handle pre-evergreen
int bank = u.kcache_bank();
int sel = (u.sel() - 512);
int line = sel >> 4;
bool found = false;
for (int i = 0; i < kcache_banks && !found; ++i) {
if (m_kcache[i].mode) {
if (m_kcache[i].bank < bank)
continue;
if ((m_kcache[i].bank == bank &&
m_kcache[i].addr > line + 1) ||
m_kcache[i].bank > bank) {
if (m_kcache[kcache_banks - 1].mode)
return false;
memmove(&m_kcache[i+1],&m_kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
m_kcache[i].mode = KCacheLine::lock_1;
m_kcache[i].bank = bank;
m_kcache[i].addr = line;
return true;
}
int d = line - m_kcache[i].addr;
if (d == -1) {
m_kcache[i].addr--;
if (m_kcache[i].mode == KCacheLine::lock_2) {
/* we are prepending the line to the current set,
* discarding the existing second line,
* so we'll have to insert line+2 after it */
line += 2;
continue;
} else if (m_kcache[i].mode == KCacheLine::lock_1) {
m_kcache[i].mode = KCacheLine::lock_2;
return true;
} else {
/* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
return false;
}
} else if (d == 1) {
m_kcache[i].mode = KCacheLine::lock_2;
return true;
} else if (d == 0)
return true;
} else { /* free kcache set - use it */
m_kcache[i].mode = KCacheLine::lock_1;
m_kcache[i].bank = bank;
m_kcache[i].addr = line;
return true;
}
}
return false;
}
void Block::lds_group_start(AluInstr *alu)
{
assert(!m_lds_group_start);
m_lds_group_start = alu;
m_lds_group_requirement = 0;
}
void Block::lds_group_end()
{
assert(m_lds_group_start);
m_lds_group_start->set_required_slots(m_lds_group_requirement);
m_lds_group_start = 0;
}
InstrWithVectorResult::InstrWithVectorResult(const InstrWithVectorResult& orig):
m_dest(orig.m_dest),
m_dest_swizzle(orig.m_dest_swizzle)
{
}
class InstrComparer : public ConstInstrVisitor {
public:
InstrComparer() = default;
bool result {false};
#define DECLARE_MEMBER(TYPE) \
InstrComparer(const TYPE *instr) \
{ \
this_ ## TYPE = instr; \
} \
\
void visit(const TYPE& instr) \
{ \
result = false; \
if (!this_ ## TYPE) \
return; \
result = this_ ## TYPE->is_equal_to(instr); \
} \
\
const TYPE *this_ ## TYPE{nullptr};
DECLARE_MEMBER(AluInstr);
DECLARE_MEMBER(AluGroup);
DECLARE_MEMBER(TexInstr);
DECLARE_MEMBER(ExportInstr);
DECLARE_MEMBER(FetchInstr);
DECLARE_MEMBER(Block);
DECLARE_MEMBER(ControlFlowInstr);
DECLARE_MEMBER(IfInstr);
DECLARE_MEMBER(WriteScratchInstr);
DECLARE_MEMBER(StreamOutInstr);
DECLARE_MEMBER(MemRingOutInstr);
DECLARE_MEMBER(EmitVertexInstr);
DECLARE_MEMBER(GDSInstr);
DECLARE_MEMBER(WriteTFInstr);
DECLARE_MEMBER(LDSAtomicInstr);
DECLARE_MEMBER(LDSReadInstr);
DECLARE_MEMBER(RatInstr);
};
class InstrCompareForward: public ConstInstrVisitor {
public:
void visit(const AluInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const AluGroup& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const TexInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const ExportInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const FetchInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const Block& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const ControlFlowInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const IfInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const WriteScratchInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const StreamOutInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const MemRingOutInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const EmitVertexInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const GDSInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const WriteTFInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const LDSAtomicInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const LDSReadInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
void visit(const RatInstr& instr) override {
m_comparer = InstrComparer(&instr);
}
InstrComparer m_comparer;
};
bool Instr::equal_to(const Instr& lhs) const
{
InstrCompareForward cmp;
accept(cmp);
lhs.accept(cmp.m_comparer);
return cmp.m_comparer.result;
}
} // ns r600

View File

@ -0,0 +1,314 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2021 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "sfn_virtualvalues.h"
#include "sfn_alu_defines.h"
#include "sfn_defines.h"
#include <set>
#include <list>
#include <iostream>
namespace r600 {
class ConstInstrVisitor;
class InstrVisitor;
class AluInstr;
class AluGroup;
class TexInstr;
class ExportInstr;
class FetchInstr;
class ControlFlowInstr;
class IfInstr;
class WriteScratchInstr;
class StreamOutInstr;
class MemRingOutInstr;
class EmitVertexInstr;
class GDSInstr;
class WriteTFInstr;
class LDSAtomicInstr;
class LDSReadInstr;
class RatInstr;
int int_from_string_with_prefix(const std::string& str, const std::string& prefix);
int sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle& swz, bool& is_ssa);
class Instr : public Allocate {
public:
enum Flags {
always_keep,
dead,
scheduled,
vpm,
force_cf,
ack_rat_return_write,
nflags
};
Instr();
Instr(const Instr& orig) = default;
virtual ~Instr();
using Pointer = R600_POINTER_TYPE(Instr);
void print(std::ostream& os) const;
bool equal_to(const Instr& lhs) const;
virtual void accept(ConstInstrVisitor& visitor) const = 0;
virtual void accept(InstrVisitor& visitor) = 0;
virtual bool end_group() const { return true;}
virtual bool is_last() const;
void set_always_keep() {m_instr_flags.set(always_keep);}
bool set_dead();
virtual void set_scheduled() { m_instr_flags.set(scheduled); forward_set_scheduled();}
void add_use() {++m_use_count;}
void dec_use() {assert(m_use_count > 0); --m_use_count;}
bool is_dead() const {return m_instr_flags.test(dead);}
bool is_scheduled() const {return m_instr_flags.test(scheduled);}
bool keep() const {return m_instr_flags.test(always_keep);}
bool has_uses() const {return m_use_count > 0;}
bool has_instr_flag(Flags f) const {return m_instr_flags.test(f);}
void set_instr_flag(Flags f) { m_instr_flags.set(f);}
virtual bool replace_source(PRegister old_src, PVirtualValue new_src);
virtual bool replace_dest(PRegister new_dest, AluInstr *move_instr);
virtual int nesting_corr() const { return 0;}
virtual bool end_block() const { return false;}
virtual int nesting_offset() const { return 0;}
void set_blockid(int id, int index);
int block_id() const {return m_block_id;}
int index() const { return m_index;}
void add_required_instr(Instr *instr);
void replace_required_instr(Instr *old_instr, Instr *new_instr);
bool ready() const;
virtual uint32_t slots() const {return 0;};
using InstrList = std::list<Instr *, Allocator<Instr *>>;
const InstrList& dependend_instr() { return m_dependend_instr;}
protected:
const InstrList& required_instr() const {return m_required_instr; }
private:
virtual void forward_set_blockid(int id, int index);
virtual bool do_ready() const = 0;
virtual void do_print(std::ostream& os) const = 0;
virtual bool propagate_death();
virtual void forward_set_scheduled() {}
InstrList m_required_instr;
InstrList m_dependend_instr;
int m_use_count;
int m_block_id;
int m_index;
std::bitset<nflags> m_instr_flags{0};
};
using PInst = Instr::Pointer;
class Block : public Instr {
public:
enum Type {
cf,
alu,
tex,
vtx,
gds,
unknown
};
using Instructions = std::list<Instr *, Allocator<Instr *>>;
using Pointer = R600_POINTER_TYPE(Block);
using iterator = Instructions::iterator;
using reverse_iterator = Instructions::reverse_iterator;
using const_iterator = Instructions::const_iterator;
Block(int nesting_depth, int id);
Block(const Block& orig) = delete;
void push_back(PInst instr);
iterator begin() { return m_instructions.begin(); }
iterator end() { return m_instructions.end(); }
reverse_iterator rbegin() { return m_instructions.rbegin(); }
reverse_iterator rend() { return m_instructions.rend(); }
const_iterator begin() const { return m_instructions.begin();}
const_iterator end() const { return m_instructions.end();}
bool empty() const { return m_instructions.empty();}
void erase(iterator node);
bool is_equal_to(const Block& lhs) const;
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
int nesting_depth() const { return m_nesting_depth;}
int id() const {return m_id;}
auto type() const {return m_blocK_type; }
void set_type(Type t);
uint32_t remaining_slots() const { return m_remaining_slots;}
bool try_reserve_kcache(const AluGroup& group);
auto last_lds_instr() {return m_last_lds_instr;}
void set_last_lds_instr(Instr *instr) {m_last_lds_instr = instr;}
void lds_group_start(AluInstr *alu);
void lds_group_end();
bool lds_group_active() { return m_lds_group_start != nullptr;}
size_t size() const { return m_instructions.size();}
private:
bool try_reserve_kcache(const UniformValue& u);
bool do_ready() const override {return true;};
void do_print(std::ostream& os) const override;
Instructions m_instructions;
int m_nesting_depth;
int m_id;
int m_next_index;
Type m_blocK_type{unknown};
uint32_t m_remaining_slots{0xffff};
std::array<KCacheLine, 4> m_kcache;
Instr *m_last_lds_instr{nullptr};
int m_lds_group_requirement{0};
AluInstr *m_lds_group_start{nullptr};
};
class InstrWithVectorResult : public Instr {
public:
InstrWithVectorResult(const RegisterVec4& dest, const RegisterVec4::Swizzle& dest_swizzle);
void set_dest_swizzle(const RegisterVec4::Swizzle& swz) {m_dest_swizzle = swz;}
int dest_swizzle(int i) const { return m_dest_swizzle[i];}
const RegisterVec4::Swizzle& all_dest_swizzle() const { return m_dest_swizzle;}
const RegisterVec4& dst() const {return m_dest;}
protected:
InstrWithVectorResult(const InstrWithVectorResult& orig);
void print_dest(std::ostream& os) const;
bool comp_dest(const RegisterVec4& dest, const RegisterVec4::Swizzle& dest_swizzle) const;
private:
RegisterVec4 m_dest;
RegisterVec4::Swizzle m_dest_swizzle;
};
inline bool operator == (const Instr& lhs, const Instr& rhs) {
return lhs.equal_to(rhs);
}
inline bool operator != (const Instr& lhs, const Instr& rhs) {
return !(lhs == rhs);
}
inline std::ostream& operator << (std::ostream& os, const Instr& instr)
{
instr.print(os);
return os;
}
template <typename T, typename = std::enable_if_t<std::is_base_of_v<Instr, T>>>
std::ostream& operator<<(std::ostream& os, const T& instr) {
instr.print(os);
return os;
}
class ConstInstrVisitor {
public:
virtual void visit(const AluInstr& instr) = 0;
virtual void visit(const AluGroup& instr) = 0;
virtual void visit(const TexInstr& instr) = 0;
virtual void visit(const ExportInstr& instr) = 0;
virtual void visit(const FetchInstr& instr) = 0;
virtual void visit(const Block& instr) = 0;
virtual void visit(const ControlFlowInstr& instr) = 0;
virtual void visit(const IfInstr& instr) = 0;
virtual void visit(const WriteScratchInstr& instr) = 0;
virtual void visit(const StreamOutInstr& instr) = 0;
virtual void visit(const MemRingOutInstr& instr) = 0;
virtual void visit(const EmitVertexInstr& instr) = 0;
virtual void visit(const GDSInstr& instr) = 0;
virtual void visit(const WriteTFInstr& instr) = 0;
virtual void visit(const LDSAtomicInstr& instr) = 0;
virtual void visit(const LDSReadInstr& instr) = 0;
virtual void visit(const RatInstr& instr) = 0;
};
class InstrVisitor {
public:
virtual void visit(AluInstr *instr) = 0;
virtual void visit(AluGroup *instr) = 0;
virtual void visit(TexInstr *instr) = 0;
virtual void visit(ExportInstr *instr) = 0;
virtual void visit(FetchInstr *instr) = 0;
virtual void visit(Block *instr) = 0;
virtual void visit(ControlFlowInstr *instr) = 0;
virtual void visit(IfInstr *instr) = 0;
virtual void visit(WriteScratchInstr *instr) = 0;
virtual void visit(StreamOutInstr *instr) = 0;
virtual void visit(MemRingOutInstr *instr) = 0;
virtual void visit(EmitVertexInstr *instr) = 0;
virtual void visit(GDSInstr *instr) = 0;
virtual void visit(WriteTFInstr *instr) = 0;
virtual void visit(LDSAtomicInstr *instr) = 0;
virtual void visit(LDSReadInstr *instr) = 0;
virtual void visit(RatInstr *instr) = 0;
};
} // ns r600

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,193 @@
#ifndef INSTRALU_H
#define INSTRALU_H
#include "sfn_instr.h"
#include <unordered_set>
struct nir_alu_instr;
namespace r600 {
class Shader;
class ValueFactory;
class AluInstr : public Instr {
public:
using SrcValues = std::vector<PVirtualValue, Allocator<PVirtualValue>>;
enum Op2Options {
op2_opt_none = 0,
op2_opt_reverse = 1,
op2_opt_neg_src1 = 1 << 1,
op2_opt_abs_src0 = 1 << 2
};
static constexpr const AluBankSwizzle bs[6] = {
alu_vec_012,
alu_vec_021,
alu_vec_120,
alu_vec_102,
alu_vec_201,
alu_vec_210
};
static const AluModifiers src_abs_flags[2];
static const AluModifiers src_neg_flags[3];
static const AluModifiers src_rel_flags[3];
AluInstr(EAluOp opcode);
AluInstr(EAluOp opcode, int chan);
AluInstr(EAluOp opcode, PRegister dest,
SrcValues src0,
const std::set<AluModifiers>& flags, int alu_slot);
AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0,
const std::set<AluModifiers>& flags);
AluInstr(EAluOp opcode, PRegister dest,
PVirtualValue src0, PVirtualValue src1,
const std::set<AluModifiers>& flags);
AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, PVirtualValue src1,
PVirtualValue src2,
const std::set<AluModifiers>& flags);
AluInstr(ESDOp op, PVirtualValue src0, PVirtualValue src1, PVirtualValue address);
AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags);
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
auto opcode() const {assert(!has_alu_flag(alu_is_lds)); return m_opcode;}
auto lds_opcode() const {assert(has_alu_flag(alu_is_lds)); return m_lds_opcode;}
bool can_propagate_src() const;
bool can_propagate_dest() const;
bool replace_source(PRegister old_src, PVirtualValue new_src) override;
bool replace_dest(PRegister new_dest, AluInstr *move_instr) override;
void set_op(EAluOp op) {m_opcode = op;}
PRegister dest() const {return m_dest;}
unsigned n_sources() const {return m_src.size();}
int dest_chan() const {return m_dest ? m_dest->chan() : m_fallback_chan;}
PVirtualValue psrc(unsigned i) {return i < m_src.size() ? m_src[i] : nullptr;}
VirtualValue& src(unsigned i) {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
const VirtualValue& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
void set_sources(SrcValues src);
const SrcValues& sources() const {return m_src;}
void pin_sources_to_chan();
int register_priority() const;
void reset_alu_flag(AluModifiers flag) {m_alu_flags.reset(flag);}
void set_alu_flag(AluModifiers flag) {m_alu_flags.set(flag);}
bool has_alu_flag(AluModifiers f) const {return m_alu_flags.test(f);}
ECFAluOpCode cf_type() const {return m_cf_type;}
void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
void set_bank_swizzle(AluBankSwizzle swz) {m_bank_swizzle = swz;}
AluBankSwizzle bank_swizzle() const {return m_bank_swizzle;}
void set_index_offset(unsigned offs) {m_idx_offset = offs;}
auto index_offset() const {return m_idx_offset;}
bool is_equal_to(const AluInstr& lhs) const;
bool has_lds_access() const;
static const std::map<ECFAluOpCode, std::string> cf_map;
static const std::map<AluBankSwizzle, std::string> bank_swizzle_map;
static Instr::Pointer from_string(std::istream &is, ValueFactory& value_factory, AluGroup *);
static bool from_nir(nir_alu_instr *alu, Shader& shader);
int alu_slots() const {return m_alu_slots;}
AluGroup *split(ValueFactory &vf);
bool end_group() const override { return m_alu_flags.test(alu_last_instr);}
static const std::set<AluModifiers> empty;
static const std::set<AluModifiers> write;
static const std::set<AluModifiers> last;
static const std::set<AluModifiers> last_write;
std::pair<PRegister, bool> indirect_addr() const;
void add_extra_dependency(PVirtualValue reg);
void set_required_slots(int nslots) { m_required_slots = nslots;}
unsigned required_slots() const { return m_required_slots;}
void add_priority(int priority) { m_priority += priority;}
int priority() const { return m_priority;}
void inc_priority() { ++m_priority;}
void set_parent_group(AluGroup *group) { m_parent_group = group;}
private:
friend class AluGroup;
void update_uses();
bool do_ready() const override;
bool can_copy_propagate() const;
bool check_readport_validation(PRegister old_src, PVirtualValue new_src) const;
void set_alu_flags(const AluOpFlags& flags) { m_alu_flags = flags; }
bool propagate_death() override;
void do_print(std::ostream& os) const override;
union {
EAluOp m_opcode;
ESDOp m_lds_opcode;
};
PRegister m_dest{nullptr};
SrcValues m_src;
AluOpFlags m_alu_flags;
AluBankSwizzle m_bank_swizzle{alu_vec_unknown};
ECFAluOpCode m_cf_type{cf_alu};
int m_alu_slots{1};
int m_fallback_chan{0};
unsigned m_idx_offset{0};
unsigned m_required_slots{0};
int m_priority{0};
std::set<PRegister, std::less<PRegister>, Allocator<PRegister>> m_extra_dependencies;
AluGroup *m_parent_group{nullptr};
};
class AluInstrVisitor : public InstrVisitor {
public:
void visit(AluGroup *instr) override;
void visit(Block *instr) override;
void visit(IfInstr *instr) override;
void visit(TexInstr *instr) override {(void)instr;}
void visit(ExportInstr *instr) override {(void)instr;}
void visit(FetchInstr *instr) override {(void)instr;}
void visit(ControlFlowInstr *instr) override {(void)instr;}
void visit(WriteScratchInstr *instr) override {(void)instr;}
void visit(StreamOutInstr *instr) override {(void)instr;}
void visit(MemRingOutInstr *instr) override {(void)instr;}
void visit(EmitVertexInstr *instr) override {(void)instr;}
void visit(GDSInstr *instr) override {(void)instr;};
void visit(WriteTFInstr *instr) override {(void)instr;};
void visit(LDSAtomicInstr *instr) override {(void)instr;};
void visit(LDSReadInstr *instr) override {(void)instr;};
void visit(RatInstr *instr) override {(void)instr;};
};
}
#endif // INSTRALU_H

View File

@ -0,0 +1,361 @@
#include "sfn_instr_alugroup.h"
#include "sfn_debug.h"
#include <algorithm>
namespace r600 {
AluGroup::AluGroup()
{
std::fill(m_slots.begin(), m_slots.end(), nullptr);
}
bool AluGroup::add_instruction(AluInstr *instr)
{
/* we can only schedule one op that accesses LDS or
the LDS read queue */
if (m_has_lds_op && instr->has_lds_access())
return false;
if (instr->has_alu_flag(alu_is_trans) && add_trans_instructions(instr))
return true;
if (add_vec_instructions(instr)) {
instr->set_parent_group(this);
return true;
}
auto opinfo = alu_ops.find(instr->opcode());
assert(opinfo != alu_ops.end());
if (s_max_slots > 4 &&
opinfo->second.can_channel(AluOp::t) &&
add_trans_instructions(instr)) {
instr->set_parent_group(this);
return true;
}
return false;
}
bool AluGroup::add_trans_instructions(AluInstr *instr)
{
if (m_slots[4] || s_max_slots < 5)
return false;
if (!update_indirect_access(instr))
return false;
/* LDS instructions have to be scheduled in X */
if (instr->has_alu_flag(alu_is_lds))
return false;
auto opinfo = alu_ops.find(instr->opcode());
assert(opinfo != alu_ops.end());
if (!opinfo->second.can_channel(AluOp::t))
return false;
/* if we schedule a non-trans instr into the trans slot, we have to make
* sure that the corresponding vector slot is already occupied, otherwise
* the hardware will schedule it as vector op and the bank-swizzle as
* checked here (and in r600_asm.c) will not catch conflicts.
*/
if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) {
if (instr->dest() && instr->dest()->pin() == pin_free) {
int used_slot = 3;
while (!m_slots[used_slot] && used_slot >= 0)
--used_slot;
// if we schedule a non-trans instr into the trans slot,
// there should always be some slot that is already used
assert(used_slot >= 0);
instr->dest()->set_chan(used_slot);
}
}
for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown ; ++i) {
AluReadportReservation readports_evaluator = m_readports_evaluator;
if (readports_evaluator.schedule_trans_instruction(*instr, i)) {
m_readports_evaluator = readports_evaluator;
m_slots[4] = instr;
instr->pin_sources_to_chan();
sfn_log << SfnLog::schedule << "T: " << *instr << "\n";
/* We added a vector op in the trans channel, so we have to
* make sure the corresponding vector channel is used */
if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()])
m_slots[instr->dest_chan()] =
new AluInstr(op0_nop, instr->dest_chan());
return true;
}
}
return false;
}
int AluGroup::free_slots() const
{
int free_mask = 0;
for(int i = 0; i < s_max_slots; ++i) {
if (!m_slots[i])
free_mask |= 1 << i;
}
return free_mask;
}
class AluAllowSlotSwitch : public AluInstrVisitor {
public:
using AluInstrVisitor::visit;
void visit(AluInstr *alu) {
yes = (alu->alu_slots() == 1 || alu->has_alu_flag(alu_is_cayman_trans));
}
bool yes{false};
};
bool AluGroup::add_vec_instructions(AluInstr *instr)
{
if (!update_indirect_access(instr))
return false;
int param_src = -1;
for (auto& s : instr->sources()) {
auto is = s->as_inline_const();
if (is)
param_src = is->sel() - ALU_SRC_PARAM_BASE;
}
if (param_src >= 0) {
if (m_param_used < 0)
m_param_used = param_src;
else if (m_param_used != param_src)
return false;
}
if (m_has_lds_op && instr->has_lds_access())
return false;
int preferred_chan = instr->dest_chan();
if (!m_slots[preferred_chan]) {
if (instr->bank_swizzle() != alu_vec_unknown) {
if (try_readport(instr, instr->bank_swizzle()))
return true;
} else {
for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
if (try_readport(instr, i))
return true;
}
}
} else {
auto dest = instr->dest();
if (dest && dest->pin() == pin_free) {
for (auto u : dest->uses()) {
AluAllowSlotSwitch swich_allowed;
u->accept(swich_allowed);
if (!swich_allowed.yes)
return false;
}
int free_chan = 0;
while (m_slots[free_chan] && free_chan < 4)
free_chan++;
if (!m_slots[free_chan] && free_chan < 4) {
sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
dest->set_chan(free_chan);
if (instr->bank_swizzle() != alu_vec_unknown) {
if (try_readport(instr, instr->bank_swizzle()))
return true;
} else {
for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
if (try_readport(instr, i))
return true;
}
}
}
}
}
return false;
}
bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
{
int preferred_chan = instr->dest_chan();
AluReadportReservation readports_evaluator = m_readports_evaluator;
if (readports_evaluator.schedule_vec_instruction(*instr, cycle)) {
m_readports_evaluator = readports_evaluator;
m_slots[preferred_chan] = instr;
m_has_lds_op |= instr->has_lds_access();
sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
auto dest = instr->dest();
if (dest && dest->pin() == pin_free)
dest->set_pin(pin_chan);
instr->pin_sources_to_chan();
return true;
}
return false;
}
bool AluGroup::update_indirect_access(AluInstr *instr)
{
auto indirect_addr = instr->indirect_addr();
if (indirect_addr.first) {
if (!m_addr_used) {
m_addr_used = indirect_addr.first;
m_addr_is_index = indirect_addr.second;
} else if (!indirect_addr.first->equal_to(*m_addr_used)) {
return false;
}
}
return true;
}
void AluGroup::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void AluGroup::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
void AluGroup::set_scheduled()
{
for (int i = 0; i < s_max_slots; ++i) {
if (m_slots[i])
m_slots[i]->set_scheduled();
}
}
void AluGroup::fix_last_flag()
{
bool last_seen = false;
for (int i = s_max_slots - 1; i >= 0; --i) {
if (m_slots[i]) {
if (!last_seen) {
m_slots[i]->set_alu_flag(alu_last_instr);
last_seen = true;
} else {
m_slots[i]->reset_alu_flag(alu_last_instr);
}
}
}
}
bool AluGroup::is_equal_to(const AluGroup& other) const
{
for (int i = 0; i < s_max_slots; ++i) {
if (!other.m_slots[i]) {
if (!m_slots[i])
continue;
else
return false;
}
if (m_slots[i]) {
if (!other.m_slots[i])
return false;
else if (!m_slots[i]->is_equal_to(*other.m_slots[i]))
return false;
}
}
return true;
}
bool AluGroup::has_lds_group_end() const
{
for (int i = 0; i < s_max_slots; ++i) {
if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end))
return true;
}
return false;
}
bool AluGroup::do_ready() const
{
for (int i = 0; i < s_max_slots; ++i) {
if (m_slots[i] && !m_slots[i]->ready())
return false;
}
return true;
}
void AluGroup::forward_set_blockid(int id, int index)
{
for (int i = 0; i < s_max_slots; ++i) {
if (m_slots[i]) {
m_slots[i]->set_blockid(id, index);
}
}
}
uint32_t AluGroup::slots() const
{
uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1;
for (int i = 0; i < s_max_slots; ++i) {
if (m_slots[i])
++result;
}
if (m_addr_used) {
++result;
if (m_addr_is_index)
++result;
}
return result;
}
void AluGroup::do_print(std::ostream& os) const
{
const char slotname[] = "xyzwt";
os << "ALU_GROUP_BEGIN\n";
for (int i = 0; i < s_max_slots; ++i) {
if (m_slots[i]) {
for (int j = 0; j < 2 * m_nesting_depth + 4; ++j)
os << ' ';
os << slotname[i] << ": ";
m_slots[i]->print(os);
os << "\n";
}
}
for (int i = 0; i < 2 * m_nesting_depth + 2; ++i)
os << ' ';
os << "ALU_GROUP_END";
}
AluInstr::SrcValues AluGroup::get_kconsts() const
{
AluInstr::SrcValues result;
for (int i = 0; i < s_max_slots; ++i) {
if (m_slots[i]) {
for (auto s : m_slots[i]->sources())
if (s->as_uniform())
result.push_back(s);
}
}
return result;
}
void AluGroup::set_chipclass(r600_chip_class chip_class)
{
switch (chip_class) {
case ISA_CC_CAYMAN:
s_max_slots = 4;
break;
default:
s_max_slots = 5;
}
}
int AluGroup::s_max_slots = 5;
}

View File

@ -0,0 +1,89 @@
#ifndef ALUGROUP_H
#define ALUGROUP_H
#include "sfn_instr_alu.h"
#include "sfn_alu_readport_validation.h"
namespace r600 {
class AluGroup : public Instr
{
public:
using Slots = std::array<AluInstr *, 5>;
AluGroup();
using iterator = Slots::iterator;
using const_iterator = Slots::const_iterator;
bool add_instruction(AluInstr *instr);
bool add_trans_instructions(AluInstr *instr);
bool add_vec_instructions(AluInstr *instr);
bool is_equal_to(const AluGroup& other) const;
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
auto begin() {return m_slots.begin(); }
auto end() {return m_slots.begin() + s_max_slots; }
auto begin() const {return m_slots.begin(); }
auto end() const {return m_slots.begin() + s_max_slots; }
bool end_group() const override { return true; }
void set_scheduled() override;
void set_nesting_depth(int depth) {m_nesting_depth = depth;}
void fix_last_flag();
static void set_chipclass(r600_chip_class chip_class);
int free_slots() const;
auto addr() const {return std::make_pair(m_addr_used, m_addr_is_index);}
uint32_t slots() const override;
AluInstr::SrcValues get_kconsts() const;
bool has_lds_group_start() const { return m_slots[0] ?
m_slots[0]->has_alu_flag(alu_lds_group_start) : false;}
bool has_lds_group_end() const;
const auto& readport_reserer() const { return m_readports_evaluator; }
void set_readport_reserer(const AluReadportReservation& rr) {
m_readports_evaluator = rr;
};
static bool has_t() { return s_max_slots == 5;}
private:
void forward_set_blockid(int id, int index) override;
bool do_ready() const override;
void do_print(std::ostream& os) const override;
bool update_indirect_access(AluInstr *instr);
bool try_readport(AluInstr *instr, AluBankSwizzle cycle);
Slots m_slots;
AluReadportReservation m_readports_evaluator;
static int s_max_slots;
PRegister m_addr_used{nullptr};
int m_param_used{-1};
int m_nesting_depth{0};
bool m_has_lds_op{false};
bool m_addr_is_index{false};
};
}
#endif // ALUGROUP_H

View File

@ -0,0 +1,176 @@
#include "sfn_instr_controlflow.h"
#include <sstream>
namespace r600 {
ControlFlowInstr::ControlFlowInstr(CFType type):
m_type(type)
{
}
bool ControlFlowInstr::do_ready() const
{
/* Have to rework this, but the CF should always */
return true;
}
bool ControlFlowInstr::is_equal_to(const ControlFlowInstr& rhs) const
{
return m_type == rhs.m_type;
}
void ControlFlowInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void ControlFlowInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
void ControlFlowInstr::do_print(std::ostream& os) const
{
switch (m_type) {
case cf_else: os << "ELSE"; break;
case cf_endif: os << "ENDIF";break;
case cf_loop_begin: os << "LOOP_BEGIN"; break;
case cf_loop_end: os << "LOOP_END"; break;
case cf_loop_break: os << "BREAK"; break;
case cf_loop_continue: os << "CONTINUE"; break;
case cf_wait_ack: os << "WAIT_ACK"; break;
default:
unreachable("Unknown CF type");
}
}
Instr::Pointer ControlFlowInstr::from_string(std::string type_str)
{
if (type_str == "ELSE")
return new ControlFlowInstr(cf_else);
else if (type_str == "ENDIF")
return new ControlFlowInstr(cf_endif);
else if (type_str == "LOOP_BEGIN")
return new ControlFlowInstr(cf_loop_begin);
else if (type_str == "LOOP_END")
return new ControlFlowInstr(cf_loop_end);
else if (type_str == "BREAK")
return new ControlFlowInstr(cf_loop_break);
else if (type_str == "CONTINUE")
return new ControlFlowInstr(cf_loop_continue);
else if (type_str == "WAIT_ACK")
return new ControlFlowInstr(cf_wait_ack);
else
return nullptr;
}
int ControlFlowInstr::nesting_corr() const
{
switch (m_type) {
case cf_else:
case cf_endif:
case cf_loop_end: return -1;
default:
return 0;
}
}
int ControlFlowInstr::nesting_offset() const
{
switch (m_type) {
case cf_endif:
case cf_loop_end: return -1;
case cf_loop_begin: return 1;
default:
return 0;
}
}
IfInstr::IfInstr(AluInstr *pred):
m_predicate(pred)
{
assert(pred);
}
IfInstr::IfInstr(const IfInstr& orig)
{
m_predicate = new AluInstr(*orig.m_predicate);
}
bool IfInstr::is_equal_to(const IfInstr& rhs) const
{
return m_predicate->equal_to(*rhs.m_predicate);
}
void IfInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void IfInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
bool IfInstr::replace_source(PRegister old_src, PVirtualValue new_src)
{
return m_predicate->replace_source(old_src, new_src);
}
bool IfInstr::do_ready() const
{
return m_predicate->ready();
}
void IfInstr::forward_set_scheduled()
{
m_predicate->set_scheduled();
}
void IfInstr::forward_set_blockid(int id, int index)
{
m_predicate->set_blockid(id, index);
}
void IfInstr::do_print(std::ostream& os) const
{
os << "IF (( " << *m_predicate << " ))";
}
void IfInstr::set_predicate(AluInstr *new_predicate)
{
m_predicate = new_predicate;
m_predicate->set_blockid(block_id(), index());
}
Instr::Pointer IfInstr::from_string(std::istream &is, ValueFactory& value_factory)
{
std::string pred_start;
is >> pred_start;
if (pred_start != "((")
return nullptr;
char buf[2048];
is.get(buf, 2048, ')');
std::string pred_end;
is >> pred_end;
if (pred_end != "))") {
return nullptr;
}
std::istringstream bufstr(buf);
std::string instr_type;
bufstr >> instr_type;
if (instr_type != "ALU")
return nullptr;
auto pred = AluInstr::from_string(bufstr, value_factory, nullptr);
return new IfInstr(static_cast<AluInstr*>(pred));
}
}

View File

@ -0,0 +1,81 @@
#ifndef CONTROLFLOWINSTR_H
#define CONTROLFLOWINSTR_H
#include "sfn_instr_alu.h"
namespace r600 {
class ControlFlowInstr : public Instr
{
public:
enum CFType {
cf_else,
cf_endif,
cf_loop_begin,
cf_loop_end,
cf_loop_break,
cf_loop_continue,
cf_stream_write,
cf_wait_ack
};
ControlFlowInstr(CFType type);
ControlFlowInstr(const ControlFlowInstr& orig) = default;
bool is_equal_to(const ControlFlowInstr& lhs) const;
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
CFType cf_type() const { return m_type;}
int nesting_corr() const override;
static Instr::Pointer from_string(std::string type_str);
bool end_block() const override { return true;}
int nesting_offset() const override;
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
CFType m_type;
};
class IfInstr : public Instr {
public:
IfInstr(AluInstr *pred);
IfInstr(const IfInstr& orig);
bool is_equal_to(const IfInstr& lhs) const;
void set_predicate(AluInstr *new_predicate);
AluInstr *predicate() const { return m_predicate; }
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
bool replace_source(PRegister old_src, PVirtualValue new_src) override;
static Instr::Pointer from_string(std::istream &is, ValueFactory& value_factory);
bool end_block() const override { return true;}
int nesting_offset() const override { return 1;}
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
void forward_set_blockid(int id, int index) override;
void forward_set_scheduled() override;
AluInstr *m_predicate;
};
}
#endif // CONTROLFLOWINSTR_H

View File

@ -0,0 +1,524 @@
#include "sfn_instr_export.h"
#include "sfn_valuefactory.h"
#include <sstream>
namespace r600 {
using std::string;
static char *writemask_to_swizzle(int writemask, char *buf)
{
const char *swz = "xyzw";
for (int i = 0; i < 4; ++i) {
buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
}
return buf;
}
WriteOutInstr::WriteOutInstr(const RegisterVec4& value):
m_value(value)
{
m_value.add_use(this);
set_always_keep();
}
void WriteOutInstr::override_chan(int i, int chan)
{
m_value.set_value(i,
new Register(m_value[i]->sel(), chan,
m_value[i]->pin()));
}
ExportInstr::ExportInstr(ExportType type, unsigned loc, const RegisterVec4& value):
WriteOutInstr(value),
m_type(type),
m_loc(loc),
m_is_last(false)
{
}
void ExportInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void ExportInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
bool ExportInstr::is_equal_to(const ExportInstr& lhs) const
{
return
(m_type == lhs.m_type &&
m_loc == lhs.m_loc &&
value() == lhs.value() &&
m_is_last == lhs.m_is_last);
}
ExportInstr::ExportType ExportInstr::type_from_string(const std::string& s)
{
(void)s;
return param;
}
void ExportInstr::do_print(std::ostream& os) const
{
os << "EXPORT";
if (m_is_last)
os << "_DONE";
switch (m_type) {
case param: os << " PARAM "; break;
case pos: os << " POS "; break;
case pixel: os << " PIXEL "; break;
}
os << m_loc << " ";
value().print(os);
}
bool ExportInstr::do_ready() const
{
return value().ready(block_id(), index());
}
Instr::Pointer ExportInstr::from_string(std::istream& is, ValueFactory& vf)
{
return from_string_impl(is, vf);
}
Instr::Pointer ExportInstr::last_from_string(std::istream& is, ValueFactory &vf)
{
auto result = from_string_impl(is, vf);
result->set_is_last_export(true);
return result;
}
ExportInstr::Pointer ExportInstr::from_string_impl(std::istream& is, ValueFactory &vf)
{
string typestr;
int pos;
string value_str;
is >> typestr >> pos >> value_str;
ExportInstr::ExportType type;
if (typestr == "PARAM")
type = ExportInstr::param;
else if (typestr == "POS")
type = ExportInstr::pos;
else if (typestr == "PIXEL")
type = ExportInstr::pixel;
else
unreachable("Unknown export type");
RegisterVec4 value = vf.src_vec4_from_string(value_str);
return new ExportInstr( type, pos, value);
}
WriteScratchInstr::WriteScratchInstr(const RegisterVec4& value, PRegister addr,
int align, int align_offset, int writemask, int array_size):
WriteOutInstr(value),
m_address(addr),
m_align(align),
m_align_offset(align_offset),
m_writemask(writemask),
m_array_size(array_size - 1)
{
addr->add_use(this);
}
WriteScratchInstr::WriteScratchInstr(const RegisterVec4& value, int loc,
int align, int align_offset,int writemask):
WriteOutInstr(value),
m_loc(loc),
m_align(align),
m_align_offset(align_offset),
m_writemask(writemask)
{
}
void WriteScratchInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void WriteScratchInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
bool WriteScratchInstr::is_equal_to(const WriteScratchInstr& lhs) const
{
if (m_address) {
if (!lhs.m_address)
return false;
if (! m_address->equal_to(*lhs.m_address))
return false;
} else if (lhs.m_address)
return false;
return m_loc == lhs.m_loc &&
m_align == lhs.m_align &&
m_align_offset == lhs.m_align_offset &&
m_writemask == lhs.m_writemask &&
m_array_size == lhs.m_array_size &&
value().sel() == lhs.value().sel();
}
bool WriteScratchInstr::do_ready() const
{
return value().ready(block_id(), index()) &&
(!m_address || m_address->ready(block_id(), index()));
}
void WriteScratchInstr::do_print(std::ostream& os) const
{
char buf[6];
os << "WRITE_SCRATCH ";
if (m_address)
os << "@" << *m_address << "[" << m_array_size + 1<<"]";
else
os << m_loc;
os << (value()[0]->is_ssa() ? " S" : " R")
<< value().sel() << "." << writemask_to_swizzle(m_writemask, buf)
<< " " << "AL:" << m_align << " ALO:" << m_align_offset;
}
auto WriteScratchInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
{
string loc_str;
string value_str;
string align_str;
string align_offset_str;
int offset;
int array_size = 0;
PVirtualValue addr_reg = nullptr;
is >> loc_str >> value_str >> align_str >> align_offset_str;
std::istringstream loc_ss(loc_str);
auto align = int_from_string_with_prefix(align_str, "AL:");
auto align_offset = int_from_string_with_prefix(align_offset_str, "ALO:");
auto value = vf.src_vec4_from_string(value_str);
int writemask = 0;
for (int i = 0; i < 4; ++i) {
if (value[i]->chan() == i)
writemask |= 1 << i;
}
if (loc_str[0] == '@') {
string addr_str;
char c;
loc_ss >> c;
loc_ss >> c;
while (!loc_ss.eof() && c != '[') {
addr_str.append(1, c);
loc_ss >> c;
}
addr_reg = vf.src_from_string(addr_str);
assert(addr_reg && addr_reg->as_register());
loc_ss >> array_size;
loc_ss >> c;
assert(c == ']');
return new WriteScratchInstr(value, addr_reg->as_register(), align, align_offset, writemask, array_size);
} else {
loc_ss >> offset;
return new WriteScratchInstr(value, offset, align, align_offset, writemask);
}
}
StreamOutInstr::StreamOutInstr(const RegisterVec4& value, int num_components,
int array_base, int comp_mask, int out_buffer,
int stream):
WriteOutInstr(value),
m_element_size(num_components == 3 ? 3 : num_components - 1),
m_array_base(array_base),
m_writemask(comp_mask),
m_output_buffer(out_buffer),
m_stream(stream)
{
}
unsigned StreamOutInstr::op() const
{
int op = 0;
switch (m_output_buffer) {
case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
}
return 4 * m_stream + op;
}
bool StreamOutInstr::is_equal_to(const StreamOutInstr& oth) const
{
return value() == oth.value() &&
m_element_size == oth.m_element_size &&
m_burst_count == oth.m_burst_count &&
m_array_base == oth.m_array_base &&
m_array_size == oth.m_array_size &&
m_writemask == oth.m_writemask &&
m_output_buffer == oth.m_output_buffer &&
m_stream == oth.m_stream;
}
void StreamOutInstr::do_print(std::ostream& os) const
{
os << "WRITE STREAM(" << m_stream << ") " << value()
<< " ES:" << m_element_size
<< " BC:" << m_burst_count
<< " BUF:" << m_output_buffer
<< " ARRAY:" << m_array_base;
if (m_array_size != 0xfff)
os << "+" << m_array_size;
}
bool StreamOutInstr::do_ready() const
{
return value().ready(block_id(), index());
}
void StreamOutInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void StreamOutInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
MemRingOutInstr::MemRingOutInstr(ECFOpCode ring, EMemWriteType type,
const RegisterVec4& value,
unsigned base_addr, unsigned ncomp,
PRegister index):
WriteOutInstr(value),
m_ring_op(ring),
m_type(type),
m_base_address(base_addr),
m_num_comp(ncomp),
m_export_index(index)
{
assert(m_ring_op == cf_mem_ring || m_ring_op == cf_mem_ring1||
m_ring_op == cf_mem_ring2 || m_ring_op == cf_mem_ring3);
assert(m_num_comp <= 4);
if (m_export_index)
m_export_index->add_use(this);
}
unsigned MemRingOutInstr::ncomp() const
{
switch (m_num_comp) {
case 1: return 0;
case 2: return 1;
case 3:
case 4: return 3;
default:
assert(0);
}
return 3;
}
bool MemRingOutInstr::is_equal_to(const MemRingOutInstr& oth) const
{
bool equal = value() == oth.value() &&
m_ring_op == oth.m_ring_op &&
m_type == oth.m_type &&
m_num_comp == oth.m_num_comp &&
m_base_address == oth.m_base_address;
if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
equal &= (*m_export_index == *oth.m_export_index);
return equal;
}
static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" };
void MemRingOutInstr::do_print(std::ostream& os) const
{
os << "MEM_RING " << (m_ring_op == cf_mem_ring ? 0 : m_ring_op - cf_mem_ring1 + 1);
os << " " << write_type_str[m_type] << " " << m_base_address;
os << " " << value();
if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
os << " @" << *m_export_index;
os << " ES:" << m_num_comp;
}
void MemRingOutInstr::patch_ring(int stream, PRegister index)
{
const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
assert(stream < 4);
m_ring_op = ring_op[stream];
m_export_index = index;
}
bool MemRingOutInstr::do_ready() const
{
if (m_export_index && !m_export_index->ready(block_id(), index()))
return false;
return value().ready(block_id(), index());
}
void MemRingOutInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void MemRingOutInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
static const std::map<string, MemRingOutInstr::EMemWriteType> type_lookop =
{
{"WRITE", MemRingOutInstr::mem_write},
{"WRITE_IDX", MemRingOutInstr::mem_write_ind},
{"WRITE_ACK", MemRingOutInstr::mem_write_ack},
{"WRITE_IDX_ACK", MemRingOutInstr::mem_write_ind_ack}
};
auto MemRingOutInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
{
string type_str;
int ring;
int base_address;
string value_str;
is >> ring >> type_str >> base_address >> value_str;
assert(ring < 4);
auto itype = type_lookop.find(type_str);
assert(itype != type_lookop.end());
auto type = itype->second;
PVirtualValue index{nullptr};
if (type == mem_write_ind || type == mem_write_ind_ack) {
char c;
string index_str;
is >> c >> index_str;
assert('@' == c );
index = vf.src_from_string(index_str);
}
string elm_size_str;
is >> elm_size_str;
int num_comp = int_from_string_with_prefix(elm_size_str, "ES:");
auto value = vf.src_vec4_from_string(value_str);
ECFOpCode opcodes[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
assert(ring < 4);
return new MemRingOutInstr(opcodes[ring], type, value, base_address, num_comp, index->as_register());
}
EmitVertexInstr::EmitVertexInstr(int stream, bool cut):
m_stream(stream),
m_cut(cut)
{
}
bool EmitVertexInstr::is_equal_to(const EmitVertexInstr& oth) const
{
return oth.m_stream == m_stream &&
oth.m_cut == m_cut;
}
void EmitVertexInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void EmitVertexInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
bool EmitVertexInstr::do_ready() const
{
return true;
}
void EmitVertexInstr::do_print(std::ostream& os) const
{
os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream;
}
auto EmitVertexInstr::from_string(std::istream& is, bool cut) -> Pointer
{
char c;
is >> c;
assert(c == '@');
int stream;
is >> stream;
return new EmitVertexInstr(stream, cut);
}
void WriteTFInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void WriteTFInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
bool WriteTFInstr::is_equal_to(const WriteTFInstr& rhs) const
{
return value() == rhs.value();
}
auto WriteTFInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
{
string value_str;
is >> value_str;
auto value = vf.src_vec4_from_string(value_str);
return new WriteTFInstr(value);
}
bool WriteTFInstr::do_ready() const
{
return value().ready(block_id(), index());
}
void WriteTFInstr::do_print(std::ostream& os) const
{
os << "WRITE_TF " << value();
}
}

View File

@ -0,0 +1,213 @@
#ifndef INSTR_EXPORT_H
#define INSTR_EXPORT_H
#include "sfn_instr.h"
namespace r600 {
class ValueFactory;
class WriteOutInstr: public Instr {
public:
WriteOutInstr(const RegisterVec4& value);
WriteOutInstr(const WriteOutInstr& orig) = delete;
void override_chan(int i, int chan);
const RegisterVec4& value() const {return m_value;};
RegisterVec4& value() {return m_value;};
private:
RegisterVec4 m_value;
};
class ExportInstr: public WriteOutInstr {
public:
enum ExportType {
pixel,
pos,
param
};
using Pointer = R600_POINTER_TYPE(ExportInstr);
ExportInstr(ExportType type, unsigned loc, const RegisterVec4& value);
ExportInstr(const ExportInstr& orig) = delete;
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
bool is_equal_to(const ExportInstr& lhs) const;
static ExportType type_from_string(const std::string& s);
ExportType export_type() const {return m_type;}
unsigned location() const {return m_loc;}
void set_is_last_export(bool value) {m_is_last = value;}
bool is_last_export() const {return m_is_last;}
static Instr::Pointer from_string(std::istream& is, ValueFactory &vf);
static Instr::Pointer last_from_string(std::istream& is, ValueFactory &vf);
private:
static ExportInstr::Pointer from_string_impl(std::istream& is, ValueFactory &vf);
bool do_ready() const override;
void do_print(std::ostream& os) const override;
ExportType m_type;
unsigned m_loc;
bool m_is_last;
};
class WriteScratchInstr : public WriteOutInstr {
public:
WriteScratchInstr(const RegisterVec4& value, PRegister addr,
int align, int align_offset, int writemask, int array_size);
WriteScratchInstr(const RegisterVec4& value, int addr, int align, int align_offset,
int writemask);
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
bool is_equal_to(const WriteScratchInstr& lhs) const;
unsigned location() const { return m_loc;};
int write_mask() const { return m_writemask;}
auto address() const { return m_address;}
bool indirect() const { return !!m_address;}
int array_size() const { return m_array_size;}
static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
unsigned m_loc{0};
PRegister m_address {nullptr};
unsigned m_align;
unsigned m_align_offset;
unsigned m_writemask;
int m_array_size{0};
};
class StreamOutInstr: public WriteOutInstr {
public:
StreamOutInstr(const RegisterVec4& value, int num_components,
int array_base, int comp_mask, int out_buffer,
int stream);
int element_size() const { return m_element_size;}
int burst_count() const { return m_burst_count;}
int array_base() const { return m_array_base;}
int array_size() const { return m_array_size;}
int comp_mask() const { return m_writemask;}
unsigned op() const;
bool is_equal_to(const StreamOutInstr& lhs) const;
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
int m_element_size{0};
int m_burst_count{1};
int m_array_base{0};
int m_array_size{0xfff};
int m_writemask{0};
int m_output_buffer{0};
int m_stream{0};
};
class MemRingOutInstr: public WriteOutInstr {
public:
enum EMemWriteType {
mem_write = 0,
mem_write_ind = 1,
mem_write_ack = 2,
mem_write_ind_ack = 3,
};
MemRingOutInstr(ECFOpCode ring, EMemWriteType type,
const RegisterVec4& value, unsigned base_addr,
unsigned ncomp, PRegister m_index);
unsigned op() const{return m_ring_op;}
unsigned ncomp() const;
unsigned addr() const {return m_base_address;}
EMemWriteType type() const {return m_type;}
unsigned index_reg() const {assert(m_export_index->sel() >= 0); return m_export_index->sel();}
unsigned array_base() const {return m_base_address; }
PVirtualValue export_index() const {return m_export_index;}
void patch_ring(int stream, PRegister index);
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
bool is_equal_to(const MemRingOutInstr& lhs) const;
static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
ECFOpCode m_ring_op;
EMemWriteType m_type;
unsigned m_base_address;
unsigned m_num_comp;
PRegister m_export_index;
};
class EmitVertexInstr : public Instr {
public:
EmitVertexInstr(int stream, bool cut);
ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;}
int stream() const { return m_stream;}
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
bool is_equal_to(const EmitVertexInstr& lhs) const;
static auto from_string(std::istream& is, bool cut) -> Pointer;
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
int m_stream;
bool m_cut;
};
class WriteTFInstr : public WriteOutInstr {
public:
using WriteOutInstr::WriteOutInstr;
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
bool is_equal_to(const WriteTFInstr& rhs) const;
static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
};
}
#endif // INSTR_EXPORT_H

View File

@ -0,0 +1,659 @@
#include "sfn_instr_fetch.h"
#include "sfn_valuefactory.h"
#include "sfn_defines.h"
#include <sstream>
namespace r600 {
using std::string;
using std::istringstream;
FetchInstr::FetchInstr(EVFetchInstr opcode,
const RegisterVec4& dst,
const RegisterVec4::Swizzle& dest_swizzle,
PRegister src,
uint32_t src_offset,
EVFetchType fetch_type,
EVTXDataFormat data_format,
EVFetchNumFormat num_format,
EVFetchEndianSwap endian_swap,
uint32_t resource_id,
PRegister resource_offset):
InstrWithVectorResult(dst, dest_swizzle),
m_opcode(opcode),
m_src(src),
m_src_offset(src_offset),
m_fetch_type(fetch_type),
m_data_format(data_format),
m_num_format(num_format),
m_endian_swap(endian_swap),
m_resource_id(resource_id),
m_resource_offset(resource_offset),
m_mega_fetch_count(0),
m_array_base(0),
m_array_size(0),
m_elm_size(0)
{
switch (m_opcode) {
case vc_fetch :
m_opname ="VFETCH";
break;
case vc_semantic :
m_opname = "FETCH_SEMANTIC";
break;
case vc_get_buf_resinfo :
set_print_skip(mfc);
set_print_skip(fmt);
set_print_skip(ftype);
m_opname = "GET_BUF_RESINFO";
break;
case vc_read_scratch :
m_opname = "READ_SCRATCH";
break;
default:
unreachable("Unknwon fetch instruction");
}
if (m_src)
m_src->add_use(this);
if (m_resource_offset && m_resource_offset->as_register())
m_resource_offset->as_register()->add_use(this);
}
void FetchInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void FetchInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
bool FetchInstr::is_equal_to(const FetchInstr& rhs) const
{
if (m_src) {
if (rhs.m_src) {
if (!m_src->equal_to(*rhs.m_src))
return false;
} else
return false;
} else if (rhs.m_src)
return false;
if (!comp_dest(rhs.dst(), rhs.all_dest_swizzle()))
return false;
if (m_tex_flags != rhs.m_tex_flags)
return false;
if (m_resource_offset && rhs.m_resource_offset) {
if (!m_resource_offset->equal_to(*rhs.m_resource_offset))
return false;
} else if (!(!!m_resource_offset == !!rhs.m_resource_offset))
return false;
return m_opcode == rhs.m_opcode &&
m_src_offset == rhs.m_src_offset &&
m_fetch_type == rhs.m_fetch_type &&
m_data_format == rhs.m_data_format &&
m_num_format == rhs.m_num_format &&
m_endian_swap == rhs.m_endian_swap &&
m_resource_id == rhs.m_resource_id &&
m_mega_fetch_count == rhs.m_mega_fetch_count &&
m_array_base == rhs.m_array_base &&
m_array_size == rhs.m_array_size &&
m_elm_size == rhs.m_elm_size;
}
bool FetchInstr::propagate_death()
{
auto reg = m_src->as_register();
if (reg)
reg->del_use(this);
return true;
}
bool FetchInstr::replace_source(PRegister old_src, PVirtualValue new_src)
{
bool success = false;
auto new_reg = new_src->as_register();
if (new_reg) {
if (old_src->equal_to(*m_src)) {
m_src->del_use(this);
m_src = new_reg;
new_reg->add_use(this);
success = true;
}
if (m_resource_offset && old_src->equal_to(*m_resource_offset)) {
m_resource_offset->del_use(this);
m_resource_offset = new_reg;
new_reg->add_use(this);
success = true;
}
}
return success;
}
bool FetchInstr::do_ready() const
{
for (auto i: required_instr()) {
if (!i->is_scheduled())
return false;
}
bool result = m_src && m_src->ready(block_id(), index());
if (m_resource_offset) {
auto r = m_resource_offset->as_register();
if (r)
result &= r->ready(block_id(), index());
}
return result;
}
void FetchInstr::do_print(std::ostream& os) const
{
os << m_opname << ' ';
print_dest(os);
os << " :";
if (m_opcode != vc_get_buf_resinfo) {
if (m_src && m_src->chan() < 7) {
os << " " << *m_src;
if (m_src_offset)
os << " + " << m_src_offset << "b";
}
}
if (m_opcode != vc_read_scratch)
os << " RID:" << m_resource_id;
if (m_resource_offset) {
os << " + ";
m_resource_offset->print(os);
}
if (!m_skip_print.test(ftype)) {
switch (m_fetch_type) {
case vertex_data : os << " VERTEX"; break;
case instance_data : os << " INSTANCE_DATA"; break;
case no_index_offset : os << " NO_IDX_OFFSET"; break;
default:
unreachable("Unknwon fetch instruction type");
}
}
if (!m_skip_print.test(fmt)) {
os << " FMT(";
auto fmt = s_data_format_map.find(m_data_format);
if (fmt != s_data_format_map.end())
os << fmt->second << ",";
else
unreachable("unknwon data format");
if (m_tex_flags.test(format_comp_signed))
os << "S";
else
os << "U";
switch (m_num_format) {
case vtx_nf_norm : os << "NORM"; break;
case vtx_nf_int : os << "INT"; break;
case vtx_nf_scaled: os << "SCALED"; break;
default:
unreachable("Unknwon number format");
}
os << ")";
}
if (m_array_base) {
if (m_opcode != vc_read_scratch)
os << " BASE:" << m_array_base;
else
os << " L[0x" << std::uppercase << std::hex << m_array_base << std::dec << "]";
}
if (m_array_size)
os << " SIZE:" << m_array_size + 1;
if (m_tex_flags.test(is_mega_fetch) && !m_skip_print.test(mfc))
os << " MFC:" << m_mega_fetch_count;
if (m_elm_size)
os << " ES:" << m_elm_size;
if (m_tex_flags.test(fetch_whole_quad)) os << " WQ";
if (m_tex_flags.test(use_const_field)) os << " UCF";
if (m_tex_flags.test(srf_mode)) os << " SRF";
if (m_tex_flags.test(buf_no_stride)) os << " BNS";
if (m_tex_flags.test(alt_const)) os << " AC";
if (m_tex_flags.test(use_tc)) os << " TC";
if (m_tex_flags.test(vpm)) os << " VPM";
if (m_tex_flags.test(uncached) && m_opcode != vc_read_scratch) os << " UNCACHED";
if (m_tex_flags.test(indexed) && m_opcode != vc_read_scratch) os << " INDEXED";
}
Instr::Pointer FetchInstr::from_string(std::istream& is, ValueFactory& vf)
{
return from_string_impl(is, vc_fetch, vf);
}
Instr::Pointer FetchInstr::from_string_impl(std::istream& is, EVFetchInstr opcode, ValueFactory& vf)
{
std::string deststr;
is >> deststr;
RegisterVec4::Swizzle dst_swz;
auto dest_reg = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
char help;
is >> help;
assert(help == ':');
string srcstr;
is >> srcstr;
std::cerr << "Get source " << srcstr << "\n";
auto src_reg = vf.src_from_string(srcstr)->as_register();
assert(src_reg);
string res_id_str;
string next;
is >> next;
int src_offset_val = 0;
if (next == "+") {
is >> src_offset_val;
is >> help;
assert(help == 'b');
is >> res_id_str;
} else {
res_id_str = next;
}
int res_id = int_from_string_with_prefix(res_id_str, "RID:");
string fetch_type_str;
is >> fetch_type_str;
EVFetchType fetch_type = vertex_data;
if (fetch_type_str == "VERTEX") {
fetch_type = vertex_data;
} else {
assert("Fetch type not yet implemented");
}
string format_str;
is >> format_str;
assert(!strncmp(format_str.c_str(), "FMT(", 4));
string data_format;
string num_format_str;
istringstream fmt_stream(format_str.substr(4));
bool is_num_fmr = false;
assert(!fmt_stream.eof());
do {
char c;
fmt_stream >> c;
if (c == ',') {
is_num_fmr = true;
continue;
}
if (!is_num_fmr)
data_format.append(1, c);
else
num_format_str.append(1, c);
} while (!fmt_stream.eof());
EVTXDataFormat fmt = fmt_invalid;
for (auto& [f, name] : s_data_format_map) {
if (data_format == name) {
fmt = f;
break;
}
}
assert(fmt != fmt_invalid);
bool fmt_signed = num_format_str[0] == 'S';
assert(fmt_signed || num_format_str[0] == 'U');
size_t num_format_end = num_format_str.find(')');
num_format_str = num_format_str.substr(1, num_format_end - 1) ;
EVFetchNumFormat num_fmt;
if (num_format_str == "NORM")
num_fmt = vtx_nf_norm;
else if (num_format_str == "INT")
num_fmt = vtx_nf_int;
else if (num_format_str == "SCALED")
num_fmt = vtx_nf_scaled;
else {
std::cerr << "Number format: '" << num_format_str << "' : ";
unreachable("Unknown number format");
}
auto fetch = new FetchInstr(opcode, dest_reg, dst_swz,
src_reg, src_offset_val, fetch_type, fmt, num_fmt,
vtx_es_none, res_id, nullptr);
if (fmt_signed)
fetch->set_fetch_flag(format_comp_signed);
while (!is.eof() && is.good()) {
std::string next_token;
is >> next_token;
if (next_token.empty())
break;
if (next_token.find(':') != string::npos) {
fetch->set_param_from_string(next_token);
} else {
fetch->set_flag_from_string(next_token);
}
}
return fetch;
}
void FetchInstr::set_param_from_string(const std::string& token)
{
if (token.substr(0,4) == "MFC:")
set_mfc(int_from_string_with_prefix(token, "MFC:"));
else if (token.substr(0,5) == "ARRB:")
set_array_base(int_from_string_with_prefix(token, "ARRB:"));
else if (token.substr(0,5) == "ARRS:")
set_array_size(int_from_string_with_prefix(token, "ARRS:"));
else if (token.substr(0,3) == "ES:")
set_element_size(int_from_string_with_prefix(token, "ES:"));
else {
std::cerr << "Token '" << token << "': ";
unreachable("Unknown token in fetch param list");
}
}
void FetchInstr::set_flag_from_string(const std::string& token)
{
auto flag = s_flag_map.find(token.c_str());
if (flag != s_flag_map.end())
set_fetch_flag(flag->second);
else {
std::cerr << "Token: " << token << " : ";
unreachable("Unknown token in fetch flag list");
}
}
const std::map<const char *, FetchInstr::EFlags> FetchInstr::s_flag_map = {
{"WQ", fetch_whole_quad},
{"UCF", use_const_field},
{"SRF", srf_mode},
{"BNS", buf_no_stride},
{"AC", alt_const},
{"TC", use_tc},
{"VPM", vpm},
{"UNCACHED", uncached},
{"INDEXED", indexed}
};
const std::map<EVTXDataFormat, const char *> FetchInstr::s_data_format_map = {
{fmt_invalid, "INVALID"},
{fmt_8, "8"},
{fmt_4_4, "4_4"},
{fmt_3_3_2, "3_3_2"},
{fmt_reserved_4, "RESERVED_4"},
{fmt_16, "16"},
{fmt_16_float, "16F"},
{fmt_8_8, "8_8"},
{fmt_5_6_5, "5_6_5"},
{fmt_6_5_5, "6_5_5"},
{fmt_1_5_5_5, "1_5_5_5"},
{fmt_4_4_4_4, "4_4_4_4"},
{fmt_5_5_5_1, "5_5_5_1"},
{fmt_32, "32"},
{fmt_32_float, "32F"},
{fmt_16_16, "16_16"},
{fmt_16_16_float, "16_16F"},
{fmt_8_24, "8_24"},
{fmt_8_24_float, "8_24F"},
{fmt_24_8, "24_8"},
{fmt_24_8_float, "24_8F"},
{fmt_10_11_11, "10_11_11"},
{fmt_10_11_11_float, "10_11_11F"},
{fmt_11_11_10, "11_11_10"},
{fmt_10_11_11_float, "11_11_10F"},
{fmt_2_10_10_10, "2_10_10_10"},
{fmt_8_8_8_8, "8_8_8_8"},
{fmt_10_10_10_2, "10_10_10_2"},
{fmt_x24_8_32_float, "X24_8_32F"},
{fmt_32_32, "32_32"},
{fmt_32_32_float, "32_32F"},
{fmt_16_16_16_16, "16_16_16_16"},
{fmt_16_16_16_16_float, "16_16_16_16F"},
{fmt_reserved_33, "RESERVED_33"},
{fmt_32_32_32_32, "32_32_32_32"},
{fmt_32_32_32_32_float, "32_32_32_32F"},
{fmt_reserved_36, "RESERVED_36"},
{fmt_1, "1"},
{fmt_1_reversed, "1_REVERSED"},
{fmt_gb_gr, "GB_GR"},
{fmt_bg_rg, "BG_RG"},
{fmt_32_as_8, "32_AS_8"},
{fmt_32_as_8_8, "32_AS_8_8"},
{fmt_5_9_9_9_sharedexp, "5_9_9_9_SHAREDEXP"},
{fmt_8_8_8, "8_8_8"},
{fmt_16_16_16, "16_16_16"},
{fmt_16_16_16_float, "16_16_16F"},
{fmt_32_32_32, "32_32_32"},
{fmt_32_32_32_float, "32_32_32F"},
{fmt_bc1, "BC1"},
{fmt_bc2, "BC2"},
{fmt_bc3, "BC3"},
{fmt_bc4, "BC4"},
{fmt_bc5, "BC5"},
{fmt_apc0, "APC0"},
{fmt_apc1, "APC1"},
{fmt_apc2, "APC2"},
{fmt_apc3, "APC3"},
{fmt_apc4, "APC4"},
{fmt_apc5, "APC5"},
{fmt_apc6, "APC6"},
{fmt_apc7, "APC7"},
{fmt_ctx1, "CTX1"},
{fmt_reserved_63, "RESERVED_63"}
};
QueryBufferSizeInstr::QueryBufferSizeInstr(const RegisterVec4& dst,
const RegisterVec4::Swizzle& dst_swz,
uint32_t resid):
FetchInstr(vc_get_buf_resinfo,
dst, dst_swz,
new Register( 0, 7, pin_fully),
0,
no_index_offset,
fmt_32_32_32_32,
vtx_nf_norm,
vtx_es_none,
resid,
nullptr)
{
set_fetch_flag(format_comp_signed);
set_print_skip(mfc);
set_print_skip(fmt);
set_print_skip(ftype);
}
Instr::Pointer QueryBufferSizeInstr::from_string(std::istream& is, ValueFactory& vf)
{
std::string deststr, res_id_str;
is >> deststr;
char help;
is >> help;
assert(help == ':');
is >> res_id_str;
RegisterVec4::Swizzle dst_swz;
auto dst = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
int res_id = int_from_string_with_prefix(res_id_str, "RID:");
return new QueryBufferSizeInstr( dst, dst_swz, res_id);
}
LoadFromBuffer::LoadFromBuffer(const RegisterVec4& dst, const RegisterVec4::Swizzle& dst_swizzle,
PRegister addr, uint32_t addr_offset,
uint32_t resid, PRegister res_offset, EVTXDataFormat data_format):
FetchInstr(vc_fetch, dst, dst_swizzle, addr, addr_offset, no_index_offset,
data_format, vtx_nf_scaled, vtx_es_none, resid, res_offset)
{
set_fetch_flag(format_comp_signed);
set_mfc(16);
override_opname("LOAD_BUF");
set_print_skip(mfc);
set_print_skip(fmt);
set_print_skip(ftype);
}
Instr::Pointer LoadFromBuffer::from_string(std::istream& is, ValueFactory& vf)
{
std::string deststr;
is >> deststr;
RegisterVec4::Swizzle dst_swz;
auto dst = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
char help;
is >> help;
assert(help == ':');
string addrstr;
is >> addrstr;
auto addr_reg = vf.src_from_string(addrstr)->as_register();
string res_id_str;
string next;
is >> next;
int addr_offset_val = 0;
if (next == "+") {
is >> addr_offset_val;
is >> help;
assert(help == 'b');
is >> res_id_str;
} else {
res_id_str = next;
}
int res_id = int_from_string_with_prefix(res_id_str, "RID:");
next.clear();
is >> next;
PRegister res_offset = nullptr;
if (next == "+") {
string res_offset_str;
is >> res_offset_str;
res_offset = vf.src_from_string(res_offset_str)->as_register();
}
auto fetch = new LoadFromBuffer( dst, dst_swz,
addr_reg, addr_offset_val,
res_id, res_offset, fmt_32_32_32_32_float);
is >> next;
if (next == "SRF")
fetch->set_fetch_flag(srf_mode);
return fetch;
}
class AddrResolver: public RegisterVisitor {
public:
AddrResolver(LoadFromScratch *lfs) : m_lfs(lfs) {}
void visit(Register& value) {
m_lfs->set_fetch_flag(FetchInstr::indexed);
m_lfs->set_src(&value);
value.add_use(m_lfs);
}
void visit(LocalArray& value) {assert(0);(void)value;}
void visit(LocalArrayValue& value) {assert(0);(void)value;}
void visit(UniformValue& value) {assert(0);(void)value;}
void visit(LiteralConstant& value) {
m_lfs->set_array_base(value.value());
m_lfs->set_src(new Register( 0, 7, pin_none));
}
void visit(InlineConstant& value) {assert(0);(void)value;}
LoadFromScratch *m_lfs;
};
LoadFromScratch::LoadFromScratch(const RegisterVec4& dst, const RegisterVec4::Swizzle& dst_swz, PVirtualValue addr, uint32_t scratch_size):
FetchInstr(vc_read_scratch,
dst, dst_swz,
nullptr,
0,
no_index_offset,
fmt_32_32_32_32,
vtx_nf_int,
vtx_es_none,
0,
nullptr)
{
set_fetch_flag(uncached);
set_fetch_flag(wait_ack);
assert(scratch_size >= 1);
set_array_size(scratch_size - 1);
set_array_base(0);
AddrResolver ar(this);
addr->accept(ar);
set_print_skip(mfc);
set_print_skip(fmt);
set_print_skip(ftype);
set_element_size(3);
}
Instr::Pointer LoadFromScratch::from_string(std::istream& is, ValueFactory &vf)
{
std::string deststr;
is >> deststr;
RegisterVec4::Swizzle dst_swz;
auto dest = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
char help;
is >> help;
assert(help == ':');
string addrstr;
is >> addrstr;
auto addr_reg = vf.src_from_string(addrstr);
string offsetstr;
is >> offsetstr;
int size = int_from_string_with_prefix(offsetstr, "SIZE:");
assert(size >= 1);
return new LoadFromScratch( dest, dst_swz, addr_reg, size);
}
}

View File

@ -0,0 +1,152 @@
#ifndef INSTR_FETCH_H
#define INSTR_FETCH_H
#include "sfn_instr.h"
namespace r600 {
class ValueFactory;
class FetchInstr : public InstrWithVectorResult {
public:
enum EFlags {
fetch_whole_quad,
use_const_field,
format_comp_signed,
srf_mode,
buf_no_stride,
alt_const,
use_tc,
vpm,
is_mega_fetch,
uncached,
indexed,
wait_ack,
unknown
};
enum EPrintSkip {
fmt,
ftype,
mfc,
count
};
FetchInstr(EVFetchInstr opcode,
const RegisterVec4& dst,
const RegisterVec4::Swizzle& dest_swizzle,
PRegister src,
uint32_t src_offset,
EVFetchType fetch_type,
EVTXDataFormat data_format,
EVFetchNumFormat num_format,
EVFetchEndianSwap endian_swap,
uint32_t resource_id,
PRegister resource_offset);
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
void set_src(PRegister src) { m_src = src; }
const auto& src() const {assert(m_src); return *m_src;}
uint32_t src_offset() const {return m_src_offset;}
uint32_t resource_id() const {return m_resource_id;}
auto resource_offset() const {return m_resource_offset;}
EVFetchType fetch_type() const {return m_fetch_type;}
EVTXDataFormat data_format() const {return m_data_format;}
void set_num_format(EVFetchNumFormat nf) {m_num_format = nf;}
EVFetchNumFormat num_format() const {return m_num_format;}
EVFetchEndianSwap endian_swap() const {return m_endian_swap;}
uint32_t mega_fetch_count() const {return m_mega_fetch_count;}
uint32_t array_base() const {return m_array_base;}
uint32_t array_size() const {return m_array_size;}
uint32_t elm_size() const {return m_elm_size;}
void reset_fetch_flag(EFlags flag) {m_tex_flags.reset(flag);}
void set_fetch_flag(EFlags flag) {m_tex_flags.set(flag);}
bool has_fetch_flag(EFlags flag) const { return m_tex_flags.test(flag);}
EVFetchInstr opcode() const {return m_opcode;}
bool is_equal_to(const FetchInstr& rhs) const;
static Instr::Pointer from_string(std::istream& is, ValueFactory &vf);
void set_mfc(int mfc) {m_tex_flags.set(is_mega_fetch); m_mega_fetch_count = mfc;}
void set_array_base(int arrb) {m_array_base = arrb;}
void set_array_size(int arrs) {m_array_size = arrs;}
void set_element_size(int size) { m_elm_size = size;}
void set_print_skip(EPrintSkip skip) {m_skip_print.set(skip);}
uint32_t slots() const override {return 1;};
bool replace_source(PRegister old_src, PVirtualValue new_src) override;
protected:
static Instr::Pointer from_string_impl(std::istream& is, EVFetchInstr opcode, ValueFactory &vf);
void override_opname(const char *opname) { m_opname = opname;}
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
void set_param_from_string(const std::string& next_token);
void set_flag_from_string(const std::string& next_token);
static const std::map<EVTXDataFormat, const char *> s_data_format_map;
static const std::map<const char *, EFlags> s_flag_map;
bool propagate_death() override;
EVFetchInstr m_opcode;
PRegister m_src;
uint32_t m_src_offset;
EVFetchType m_fetch_type;
EVTXDataFormat m_data_format;
EVFetchNumFormat m_num_format;
EVFetchEndianSwap m_endian_swap;
uint32_t m_resource_id;
PRegister m_resource_offset;
std::bitset<EFlags::unknown> m_tex_flags;
std::bitset<EPrintSkip::count> m_skip_print;
uint32_t m_mega_fetch_count;
uint32_t m_array_base;
uint32_t m_array_size;
uint32_t m_elm_size;
std::string m_opname;
};
class QueryBufferSizeInstr : public FetchInstr {
public:
QueryBufferSizeInstr(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, uint32_t resid);
static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
};
class LoadFromBuffer : public FetchInstr {
public:
LoadFromBuffer(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle,
PRegister addr, uint32_t addr_offset,
uint32_t resid, PRegister res_offset, EVTXDataFormat data_format);
static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
};
class LoadFromScratch : public FetchInstr {
public:
LoadFromScratch(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, PVirtualValue addr, uint32_t offset);
static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
};
}
#endif // INSTR_FETCH_H

View File

@ -0,0 +1,411 @@
#include "sfn_instr_lds.h"
#include "sfn_instr_alu.h"
#include "sfn_debug.h"
namespace r600 {
using std::istream;
LDSReadInstr::LDSReadInstr(std::vector<PRegister, Allocator<PRegister>>& value,
AluInstr::SrcValues& address):
m_address(address),
m_dest_value(value)
{
assert(m_address.size() == m_dest_value.size());
for (auto& v: value)
v->add_parent(this);
for (auto& s: m_address)
if (s->as_register())
s->as_register()->add_use(this);
}
void LDSReadInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void LDSReadInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
bool LDSReadInstr::remove_unused_components()
{
uint8_t inactive_mask = 0;
for (size_t i = 0; i < m_dest_value.size(); ++i) {
if (m_dest_value[i]->uses().empty())
inactive_mask |= 1 << i;
}
if (!inactive_mask)
return false;
auto new_addr = AluInstr::SrcValues();
auto new_dest = std::vector<PRegister, Allocator<PRegister>>();
for (size_t i = 0; i < m_dest_value.size(); ++i) {
if ((1 << i) & inactive_mask) {
if (m_address[i]->as_register())
m_address[i]->as_register()->del_use(this);
m_dest_value[i]->del_parent(this);
} else {
new_dest.push_back(m_dest_value[i]);
new_addr.push_back(m_address[i]);
}
}
m_dest_value.swap(new_dest);
m_address.swap(new_addr);
return m_address.size() != new_addr.size();
}
class SetLDSAddrProperty : public AluInstrVisitor {
using AluInstrVisitor::visit;
void visit(AluInstr *instr) override {
instr->set_alu_flag(alu_lds_address);
}
};
AluInstr *LDSReadInstr::split(std::vector<AluInstr*>& out_block, AluInstr *last_lds_instr)
{
AluInstr* first_instr = nullptr;
SetLDSAddrProperty prop;
for (auto& addr: m_address) {
auto reg = addr->as_register();
if (reg) {
reg->del_use(this);
if (reg->parents().size() == 1) {
for (auto& p: reg->parents()) {
p->accept(prop);
}
}
}
auto instr = new AluInstr(DS_OP_READ_RET, nullptr, nullptr, addr);
instr->set_blockid(block_id(), index());
if (last_lds_instr)
instr->add_required_instr(last_lds_instr);
out_block.push_back(instr);
last_lds_instr = instr;
if (!first_instr) {
first_instr = instr;
first_instr->set_alu_flag(alu_lds_group_start);
} else {
/* In order to make it possible that the scheduler
* keeps the loads of a group close together, we
* require that the addresses are all already available
* when the first read instruction is emitted.
* Otherwise it might happen that the loads and reads from the
* queue are split across ALU cf clauses, and this is not allowed */
first_instr->add_extra_dependency(addr);
}
}
for (auto& dest: m_dest_value) {
dest->del_parent(this);
auto instr = new AluInstr(op1_mov, dest,
new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
AluInstr::last_write);
instr->add_required_instr(last_lds_instr);
instr->set_blockid(block_id(), index());
out_block.push_back(instr);
last_lds_instr = instr;
}
if (last_lds_instr)
last_lds_instr->set_alu_flag(alu_lds_group_end);
return last_lds_instr;
}
bool LDSReadInstr::do_ready() const
{
unreachable("This instruction is not handled by the schduler");
return false;
}
void LDSReadInstr::do_print(std::ostream& os) const
{
os << "LDS_READ ";
os << "[ ";
for (auto d: m_dest_value) {
os << *d << " ";
}
os << "] : [ ";
for (auto a: m_address) {
os << *a << " ";
}
os << "]";
}
bool LDSReadInstr::is_equal_to(const LDSReadInstr& rhs) const
{
if (m_address.size() != rhs.m_address.size())
return false;
for (unsigned i = 0; i < num_values(); ++i) {
if (!m_address[i]->equal_to(*rhs.m_address[i]))
return false;
if (!m_dest_value[i]->equal_to(*rhs.m_dest_value[i]))
return false;
}
return true;
}
auto LDSReadInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer
{
/* LDS_READ [ d1, d2, d3 ... ] : a1 a2 a3 ... */
std::string temp_str;
is >> temp_str;
assert(temp_str == "[");
std::vector<PRegister, Allocator<PRegister> > dests;
AluInstr::SrcValues srcs;
is >> temp_str;
while (temp_str != "]") {
auto dst = value_factory.dest_from_string(temp_str);
assert(dst);
dests.push_back(dst);
is >> temp_str;
}
is >> temp_str;
assert(temp_str == ":");
is >> temp_str;
assert(temp_str == "[");
is >> temp_str;
while (temp_str != "]") {
auto src = value_factory.src_from_string(temp_str);
assert(src);
srcs.push_back(src);
is >> temp_str;
};
assert(srcs.size() == dests.size() && !dests.empty());
return new LDSReadInstr(dests, srcs);
}
LDSAtomicInstr::LDSAtomicInstr(ESDOp op, PRegister dest, PVirtualValue address,
const SrcValues& srcs):
m_opcode(op),
m_address(address),
m_dest(dest),
m_srcs(srcs)
{
if (m_dest)
m_dest->add_parent(this);
if (m_address->as_register())
m_address->as_register()->add_use(this);
for (auto& s: m_srcs) {
if (s->as_register())
s->as_register()->add_use(this);
}
}
void LDSAtomicInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void LDSAtomicInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
AluInstr *LDSAtomicInstr::split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr)
{
AluInstr::SrcValues srcs = {m_address};
for(auto& s : m_srcs)
srcs.push_back(s);
for(auto& s :srcs) {
if (s->as_register())
s->as_register()->del_use(this);
}
SetLDSAddrProperty prop;
auto reg = srcs[0]->as_register();
if (reg) {
reg->del_use(this);
if (reg->parents().size() == 1) {
for (auto& p: reg->parents()) {
p->accept(prop);
}
}
}
auto op_instr = new AluInstr(m_opcode, srcs, {});
op_instr->set_blockid(block_id(), index());
if (last_lds_instr) {
op_instr->add_required_instr(last_lds_instr);
}
out_block.push_back(op_instr);
if (m_dest) {
op_instr->set_alu_flag(alu_lds_group_start);
m_dest->del_parent(this);
auto read_instr = new AluInstr(op1_mov, m_dest,
new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
AluInstr::last_write);
read_instr->add_required_instr(op_instr);
read_instr->set_blockid(block_id(), index());
read_instr->set_alu_flag(alu_lds_group_end);
out_block.push_back(read_instr);
last_lds_instr = read_instr;
}
return last_lds_instr;
}
bool LDSAtomicInstr::replace_source(PRegister old_src, PVirtualValue new_src)
{
bool process = false;
if (new_src->as_uniform() && m_srcs.size() > 2) {
int nconst = 0;
for (auto& s : m_srcs) {
if (s->as_uniform() && !s->equal_to(*old_src))
++nconst;
}
/* Conservative check: with two kcache values can always live,
* tree might be a problem, don't care for now, just reject
*/
if (nconst > 2)
return false;
}
/* If the old source is an array element, we assume that there
* might have been an (untracked) indirect access, so don't replace
* this source */
if (old_src->pin() == pin_array)
return false;
if (new_src->get_addr()) {
for (auto& s : m_srcs) {
auto addr = s->get_addr();
/* can't have two differen't indirect addresses in the same instr */
if (addr && !addr->equal_to(*new_src->get_addr()))
return false;
}
}
for (unsigned i = 0; i < m_srcs.size(); ++i) {
if (old_src->equal_to(*m_srcs[i])) {
m_srcs[i] = new_src;
process = true;
}
}
if (process) {
auto r = new_src->as_register();
if (r)
r->add_use(this);
old_src->del_use(this);
}
return process;
}
bool LDSAtomicInstr::do_ready() const
{
unreachable("This instruction is not handled by the schduler");
return false;
}
void LDSAtomicInstr::do_print(std::ostream& os) const
{
auto ii = lds_ops.find(m_opcode);
assert(ii != lds_ops.end());
os << "LDS " << ii->second.name << " ";
if (m_dest)
os << *m_dest;
else
os << "__.x";
os << " [ " << *m_address << " ] : " << *m_srcs[0];
if (m_srcs.size() > 1)
os << " " << *m_srcs[1];
}
bool LDSAtomicInstr::is_equal_to(const LDSAtomicInstr& rhs) const
{
if (m_srcs.size() != rhs.m_srcs.size())
return false;
for (unsigned i = 0; i < m_srcs.size(); ++i) {
if (!m_srcs[i]->equal_to(*rhs.m_srcs[i]))
return false;
}
return m_opcode == rhs.m_opcode &&
sfn_value_equal(m_address, rhs.m_address) &&
sfn_value_equal(m_dest, rhs.m_dest);
}
auto LDSAtomicInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer
{
/* LDS WRITE2 __.x [ R1.x ] : R2.y R3.z */
/* LDS WRITE __.x [ R1.x ] : R2.y */
/* LDS ATOMIC_ADD_RET [ R5.y ] : R2.y */
std::string temp_str;
is >> temp_str;
ESDOp opcode = DS_OP_INVALID;
int nsrc = 0;
for (auto& [op, opinfo] : lds_ops) {
if (temp_str == opinfo.name) {
opcode = op;
nsrc = opinfo.nsrc;
break;
}
}
assert(opcode != DS_OP_INVALID);
is >> temp_str;
PRegister dest = nullptr;
if (temp_str[0] != '_')
dest = value_factory.dest_from_string(temp_str);
is >> temp_str;
assert(temp_str == "[");
is >> temp_str;
auto addr = value_factory.src_from_string(temp_str);
is >> temp_str;
assert(temp_str == "]");
is >> temp_str;
assert(temp_str == ":");
AluInstr::SrcValues srcs;
for (int i = 0; i < nsrc - 1; ++i) {
is >> temp_str;
auto src = value_factory.src_from_string(temp_str);
assert(src);
srcs.push_back(src);
}
return new LDSAtomicInstr(opcode, dest, addr, srcs);
}
}

View File

@ -0,0 +1,80 @@
#ifndef LDSINSTR_H
#define LDSINSTR_H
#include "sfn_instr_alu.h"
#include "sfn_valuefactory.h"
namespace r600 {
class LDSReadInstr : public Instr {
public:
LDSReadInstr(std::vector<PRegister, Allocator<PRegister>>& value,
AluInstr::SrcValues& address);
unsigned num_values() const { return m_dest_value.size();}
auto address(unsigned i) const { return m_address[i];}
auto dest(unsigned i) const { return m_dest_value[i];}
auto address(unsigned i){ return m_address[i];}
auto dest(unsigned i) { return m_dest_value[i];}
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
AluInstr *split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr);
bool is_equal_to(const LDSReadInstr& lhs) const;
static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
bool remove_unused_components();
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
AluInstr::SrcValues m_address;
std::vector<PRegister, Allocator<PRegister>> m_dest_value;
};
class LDSAtomicInstr : public Instr {
public:
using SrcValues = AluInstr::SrcValues;
LDSAtomicInstr(ESDOp op, PRegister dest, PVirtualValue address, const SrcValues& src);
auto address() const { return m_address;}
auto dest() const { return m_dest;}
auto src0() const { return m_srcs[0];}
auto src1() const { return m_srcs.size() > 1 ? m_srcs[1] : nullptr;}
PVirtualValue address() { return m_address;}
PRegister dest() { return m_dest;}
PVirtualValue src0() { return m_srcs[0];}
PVirtualValue src1() { return m_srcs.size() > 1 ? m_srcs[1] : nullptr;}
unsigned op() const {return m_opcode;}
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
AluInstr *split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr);
bool is_equal_to(const LDSAtomicInstr& lhs) const;
static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
bool replace_source(PRegister old_src, PVirtualValue new_src) override;
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
ESDOp m_opcode;
PVirtualValue m_address{nullptr};
PRegister m_dest{nullptr};
SrcValues m_srcs;
};
}
#endif // LDSINSTR_H

View File

@ -0,0 +1,844 @@
#include "sfn_instr_mem.h"
#include "sfn_instr_fetch.h"
#include "sfn_instr_tex.h"
#include "sfn_shader.h"
namespace r600 {
GDSInstr::GDSInstr(ESDOp op, Register *dest,
const RegisterVec4& src, int uav_base,
PRegister uav_id):
m_op(op),
m_dest(dest),
m_src(src),
m_uav_base(uav_base),
m_uav_id(uav_id)
{
set_always_keep();
m_src.add_use(this);
m_dest->add_parent(this);
if (m_uav_id)
m_uav_id->add_use(this);
}
bool GDSInstr::is_equal_to(const GDSInstr& rhs) const
{
#define NE(X) (X != rhs. X)
if (NE(m_op) ||
NE(m_src) ||
NE(m_uav_base))
return false;
sfn_value_equal(m_dest, rhs.m_dest);
return sfn_value_equal(m_uav_id, rhs.m_uav_id);
}
void GDSInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void GDSInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
bool GDSInstr::do_ready() const
{
return m_src.ready(block_id(), index()) &&
(!m_uav_id || m_uav_id->ready(block_id(), index()));
}
void GDSInstr::do_print(std::ostream& os) const
{
os << "GDS " << lds_ops.at(m_op).name
<< *m_dest;
os << " " << m_src;
os << " BASE:" << m_uav_base;
if (m_uav_id)
os << " UAV:" << *m_uav_id;
}
bool GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader)
{
switch (intr->intrinsic) {
case nir_intrinsic_atomic_counter_add:
case nir_intrinsic_atomic_counter_and:
case nir_intrinsic_atomic_counter_exchange:
case nir_intrinsic_atomic_counter_max:
case nir_intrinsic_atomic_counter_min:
case nir_intrinsic_atomic_counter_or:
case nir_intrinsic_atomic_counter_xor:
case nir_intrinsic_atomic_counter_comp_swap:
return emit_atomic_op2(intr, shader);
case nir_intrinsic_atomic_counter_read:
case nir_intrinsic_atomic_counter_post_dec:
return emit_atomic_read(intr, shader);
case nir_intrinsic_atomic_counter_inc:
return emit_atomic_inc(intr, shader);
case nir_intrinsic_atomic_counter_pre_dec:
return emit_atomic_pre_dec(intr, shader);
default:
return false;
}
}
static ESDOp get_opcode(const nir_intrinsic_op opcode)
{
switch (opcode) {
case nir_intrinsic_atomic_counter_add:
return DS_OP_ADD_RET;
case nir_intrinsic_atomic_counter_and:
return DS_OP_AND_RET;
case nir_intrinsic_atomic_counter_exchange:
return DS_OP_XCHG_RET;
case nir_intrinsic_atomic_counter_inc:
return DS_OP_INC_RET;
case nir_intrinsic_atomic_counter_max:
return DS_OP_MAX_UINT_RET;
case nir_intrinsic_atomic_counter_min:
return DS_OP_MIN_UINT_RET;
case nir_intrinsic_atomic_counter_or:
return DS_OP_OR_RET;
case nir_intrinsic_atomic_counter_read:
return DS_OP_READ_RET;
case nir_intrinsic_atomic_counter_xor:
return DS_OP_XOR_RET;
case nir_intrinsic_atomic_counter_post_dec:
return DS_OP_DEC_RET;
case nir_intrinsic_atomic_counter_comp_swap:
return DS_OP_CMP_XCHG_RET;
case nir_intrinsic_atomic_counter_pre_dec:
default:
return DS_OP_INVALID;
}
}
static ESDOp get_opcode_wo(const nir_intrinsic_op opcode)
{
switch (opcode) {
case nir_intrinsic_atomic_counter_add:
return DS_OP_ADD;
case nir_intrinsic_atomic_counter_and:
return DS_OP_AND;
case nir_intrinsic_atomic_counter_inc:
return DS_OP_INC;
case nir_intrinsic_atomic_counter_max:
return DS_OP_MAX_UINT;
case nir_intrinsic_atomic_counter_min:
return DS_OP_MIN_UINT;
case nir_intrinsic_atomic_counter_or:
return DS_OP_OR;
case nir_intrinsic_atomic_counter_xor:
return DS_OP_XOR;
case nir_intrinsic_atomic_counter_post_dec:
return DS_OP_DEC;
case nir_intrinsic_atomic_counter_comp_swap:
return DS_OP_CMP_XCHG_RET;
case nir_intrinsic_atomic_counter_exchange:
return DS_OP_XCHG_RET;
case nir_intrinsic_atomic_counter_pre_dec:
default:
return DS_OP_INVALID;
}
}
bool GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
{
auto& vf = shader.value_factory();
bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
ESDOp op = read_result ? get_opcode(instr->intrinsic) :
get_opcode_wo(instr->intrinsic);
if (DS_OP_INVALID == op)
return false;
auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
offset += nir_intrinsic_base(instr);
auto dest = vf.dest(instr->dest, 0, pin_free);
PRegister src_as_register = nullptr;
auto src_val = vf.src(instr->src[1], 0);
if (!src_val->as_register()) {
auto temp_src_val = vf.temp_register();
shader.emit_instruction(new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
src_as_register = temp_src_val;
} else
src_as_register = src_val->as_register();
if (uav_id != nullptr)
shader.set_flag(Shader::sh_indirect_atomic);
GDSInstr *ir = nullptr;
if (shader.chip_class() < ISA_CC_CAYMAN) {
RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free);
ir = new GDSInstr(op, dest, src, offset, uav_id);
} else {
auto dest = vf.dest(instr->dest, 0, pin_free);
auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
if (uav_id)
shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
AluInstr::write));
else
shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
shader.emit_instruction(new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
ir = new GDSInstr(op, dest, tmp, 0, nullptr);
}
shader.emit_instruction(ir);
return true;
}
bool GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader)
{
auto& vf = shader.value_factory();
auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
auto dest = vf.dest(instr->dest, 0, pin_free);
GDSInstr *ir = nullptr;
if (shader.chip_class() < ISA_CC_CAYMAN) {
RegisterVec4 src = RegisterVec4(0, true, {7,7,7,7});
ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id);
} else {
auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7});
if (uav_id)
shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
AluInstr::write));
else
shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr);
}
shader.emit_instruction(ir);
return true;
}
bool GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
{
auto& vf = shader.value_factory();
bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
GDSInstr *ir = nullptr;
if (shader.chip_class() < ISA_CC_CAYMAN) {
auto dest = vf.dest(instr->dest, 0, pin_free);
RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
dest, src, offset, uav_id);
} else {
auto dest = vf.dest(instr->dest, 0, pin_free);
auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
if (uav_id)
shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
AluInstr::write));
else
shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
dest, tmp, 0, nullptr);
}
shader.emit_instruction(ir);
return true;
}
bool GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
{
auto& vf = shader.value_factory();
auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
auto *tmp_dest = vf.temp_register();
GDSInstr *ir = nullptr;
if (shader.chip_class() < ISA_CC_CAYMAN) {
RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, src, offset, uav_id);
} else {
auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
if (uav_id)
shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
AluInstr::write));
else
shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, tmp, 0, nullptr);
}
shader.emit_instruction(ir);
shader.emit_instruction(new AluInstr(op2_sub_int, vf.dest(instr->dest, 0, pin_free),
tmp_dest, vf.one_i(), AluInstr::last_write));
return true;
}
RatInstr::RatInstr(ECFOpCode cf_opcode, ERatOp rat_op,
const RegisterVec4& data, const RegisterVec4& index,
int rat_id, PRegister rat_id_offset,
int burst_count, int comp_mask, int element_size):
m_cf_opcode(cf_opcode),
m_rat_op(rat_op),
m_data(data),
m_index(index),
m_rat_id_offset(rat_id_offset),
m_rat_id(rat_id),
m_burst_count(burst_count),
m_comp_mask(comp_mask),
m_element_size(element_size)
{
set_always_keep();
m_data.add_use(this);
m_index.add_use(this);
if (m_rat_id_offset)
m_rat_id_offset->add_use(this);
}
void RatInstr::accept(ConstInstrVisitor& visitor) const
{
visitor.visit(*this);
}
void RatInstr::accept(InstrVisitor& visitor)
{
visitor.visit(this);
}
bool RatInstr::is_equal_to(const RatInstr& lhs) const
{
(void)lhs;
assert(0);
return false;
}
bool RatInstr::do_ready() const
{
if (m_rat_op != STORE_TYPED) {
for (auto i: required_instr()) {
if (!i->is_scheduled()) {
return false;
}
}
}
return m_data.ready(block_id(), index()) &&
m_index.ready(block_id(), index());
}
void RatInstr::do_print(std::ostream& os) const
{
os << "MEM_RAT RAT " << m_rat_id;
if (m_rat_id_offset)
os << "+" << *m_rat_id_offset;
os << " @" << m_index;
os << " OP:" << m_rat_op << " " << m_data;
os << " BC:" << m_burst_count
<< " MASK:" << m_comp_mask
<< " ES:" << m_element_size;
if (m_need_ack)
os << " ACK";
}
static RatInstr::ERatOp
get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format)
{
switch (opcode) {
case nir_intrinsic_image_load:
return RatInstr::NOP_RTN;
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_image_atomic_add:
return RatInstr::ADD_RTN;
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_image_atomic_and:
return RatInstr::AND_RTN;
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_image_atomic_or:
return RatInstr::OR_RTN;
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_image_atomic_imin:
return RatInstr::MIN_INT_RTN;
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_image_atomic_imax:
return RatInstr::MAX_INT_RTN;
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_image_atomic_umin:
return RatInstr::MIN_UINT_RTN;
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_image_atomic_umax:
return RatInstr::MAX_UINT_RTN;
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_image_atomic_xor:
return RatInstr::XOR_RTN;
case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_image_atomic_comp_swap:
if (util_format_is_float(format))
return RatInstr::CMPXCHG_FLT_RTN;
else
return RatInstr::CMPXCHG_INT_RTN;
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_image_atomic_exchange:
return RatInstr::XCHG_RTN;
default:
unreachable("Unsupported WO RAT instruction");
}
}
static RatInstr::ERatOp
get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format)
{
switch (opcode) {
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_image_atomic_add:
return RatInstr::ADD;
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_image_atomic_and:
return RatInstr::AND;
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_image_atomic_or:
return RatInstr::OR;
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_image_atomic_imin:
return RatInstr::MIN_INT;
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_image_atomic_imax:
return RatInstr::MAX_INT;
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_image_atomic_umin:
return RatInstr::MIN_UINT;
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_image_atomic_umax:
return RatInstr::MAX_UINT;
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_image_atomic_xor:
return RatInstr::XOR;
case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_image_atomic_comp_swap:
if (util_format_is_float(format))
return RatInstr::CMPXCHG_FLT;
else
return RatInstr::CMPXCHG_INT;
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_image_atomic_exchange:
return RatInstr::XCHG_RTN;
default:
unreachable("Unsupported WO RAT instruction");
}
}
bool RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
{
switch (intr->intrinsic) {
case nir_intrinsic_load_ssbo:
return emit_ssbo_load(intr, shader);
case nir_intrinsic_store_ssbo:
return emit_ssbo_store(intr, shader);
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_ssbo_atomic_exchange:
return emit_ssbo_atomic_op(intr, shader);
case nir_intrinsic_image_store:
return emit_image_store(intr, shader);
case nir_intrinsic_image_load:
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_imax:
return emit_image_load_or_atomic(intr, shader);
case nir_intrinsic_image_size:
return emit_image_size(intr, shader);
case nir_intrinsic_get_ssbo_size:
return emit_ssbo_size(intr, shader);
default:
return false;
}
}
bool RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
{
auto &vf = shader.value_factory();
auto dest = vf.dest_vec4(intr->dest, pin_group);
/** src0 not used, should be some offset */
auto addr = vf.src(intr->src[1], 0);
auto addr_temp = vf.temp_register();
/** Should be lowered in nir */
shader.emit_instruction(new AluInstr(op2_lshr_int, addr_temp, addr, vf.literal(2),
{alu_write, alu_last_instr}));
const EVTXDataFormat formats[4] = {
fmt_32,
fmt_32_32,
fmt_32_32_32,
fmt_32_32_32_32
};
RegisterVec4::Swizzle dest_swz[4] = {
{0,7,7,7},
{0,1,7,7},
{0,1,2,7},
{0,1,2,3}
};
int comp_idx = nir_dest_num_components(intr->dest) - 1;
auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0); {}
auto res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + offset +
shader.ssbo_image_offset();
auto ir = new LoadFromBuffer(dest, dest_swz[comp_idx], addr_temp, 0,
res_id, res_offset, formats[comp_idx]);
ir->set_fetch_flag(FetchInstr::use_tc);
ir->set_num_format(vtx_nf_int);
shader.emit_instruction(ir);
return true;
}
bool RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
{
/* Forche the scheduler to not move the preparation too far away, by starting
* a new block (TODO: better priority handling in the scheduler)*/
if (nir_src_num_components(instr->src[0]) > 2)
shader.start_new_block(0);
auto &vf = shader.value_factory();
auto orig_addr = vf.src(instr->src[2], 0);
auto addr_base = vf.temp_register();
auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1);
shader.emit_instruction(new AluInstr(op2_lshr_int, addr_base, orig_addr,
vf.literal(2), AluInstr::write));
for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
auto addr_vec = vf.temp_vec4(pin_group, {0,1,2,7});
if (i == 0) {
shader.emit_instruction(new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
} else {
shader.emit_instruction(new AluInstr(op2_add_int, addr_vec[0], addr_base,
vf.literal(i),
AluInstr::last_write));
}
auto value = vf.src(instr->src[0], i);
PRegister v = vf.temp_register(0);
shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
auto store = new RatInstr(cf_mem_rat, RatInstr::STORE_TYPED,
value_vec, addr_vec, offset + shader.ssbo_image_offset(),
rat_id, 1, 1, 0);
shader.emit_instruction(store);
}
return true;
}
bool RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
{
auto& vf = shader.value_factory();
auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0); {}
bool read_result = !intr->dest.is_ssa || !list_is_empty(&intr->dest.ssa.uses);
auto opcode = read_result ? get_rat_opcode(intr->intrinsic, PIPE_FORMAT_R32_UINT) :
get_rat_opcode_wo(intr->intrinsic, PIPE_FORMAT_R32_UINT);
auto coord_orig = vf.src(intr->src[1], 0);
auto coord = vf.temp_register(0);
auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
shader.emit_instruction(new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write));
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
vf.src(intr->src[2], 0), {alu_last_instr, alu_write}));
} else {
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
}
RegisterVec4 out_vec(coord, coord, coord, coord, pin_group);
auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, out_vec, imageid + shader.ssbo_image_offset(),
image_offset, 1, 0xf, 0);
shader.emit_instruction(atomic);
atomic->set_ack();
if (read_result) {
atomic->set_instr_flag(ack_rat_return_write);
auto dest = vf.dest_vec4(intr->dest, pin_group);
auto fetch = new FetchInstr(vc_fetch,
dest, {0, 1, 2, 3},
shader.rat_return_address(),
0,
no_index_offset,
fmt_32,
vtx_nf_int,
vtx_es_none,
R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
image_offset);
fetch->set_mfc(15);
fetch->set_fetch_flag(FetchInstr::srf_mode);
fetch->set_fetch_flag(FetchInstr::use_tc);
fetch->set_fetch_flag(FetchInstr::vpm);
fetch->set_fetch_flag(FetchInstr::wait_ack);
fetch->add_required_instr(atomic);
shader.chain_ssbo_read(fetch);
shader.emit_instruction(fetch);
}
return true;
}
bool RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader)
{
auto& vf = shader.value_factory();
auto dest = vf.dest_vec4(intr->dest, pin_group);
auto const_offset = nir_src_as_const_value(intr->src[0]);
int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
if (const_offset)
res_id += const_offset[0].u32;
else
assert(0 && "dynamic buffer offset not supported in buffer_size");
shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3},res_id));
return true;
}
bool RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
{
auto& vf = shader.value_factory();
auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
auto coord_load = vf.src_vec4(intrin->src[1], pin_chan);
auto coord = vf.temp_vec4(pin_group);
auto value_load = vf.src_vec4(intrin->src[3], pin_chan);
auto value = vf.temp_vec4(pin_group);
RegisterVec4::Swizzle swizzle = {0,1,2,3};
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
nir_intrinsic_image_array(intrin))
swizzle = {0,2,1,3};
for (int i = 0; i < 4; ++i) {
auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
}
for (int i = 0; i < 4; ++i) {
auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
}
auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
auto store = new RatInstr(op, RatInstr::STORE_TYPED, value, coord, imageid,
image_offset, 1, 0xf, 0);
if (nir_intrinsic_has_access(intrin) & ACCESS_COHERENT)
store->set_ack();
shader.emit_instruction(store);
return true;
}
bool RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
{
auto& vf = shader.value_factory();
auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
auto coord_orig = vf.src_vec4(intrin->src[1], pin_chan);
auto coord = vf.temp_vec4(pin_group);
auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
RegisterVec4::Swizzle swizzle = {0,1,2,3};
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
nir_intrinsic_image_array(intrin))
swizzle = {0,2,1,3};
for (int i = 0; i < 4; ++i) {
auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
}
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write));
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
vf.src(intrin->src[3], 0), AluInstr::last_write));
} else {
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0],
vf.src(intrin->src[3], 0), AluInstr::write));
shader.emit_instruction(new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
}
auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid,
image_offset, 1, 0xf, 0);
shader.emit_instruction(atomic);
atomic->set_ack();
if (read_result) {
atomic->set_instr_flag(ack_rat_return_write);
auto dest = vf.dest_vec4(intrin->dest, pin_group);
pipe_format format = nir_intrinsic_format(intrin);
unsigned fmt = fmt_32;
unsigned num_format = 0;
unsigned format_comp = 0;
unsigned endian = 0;
r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
auto fetch = new FetchInstr(vc_fetch,
dest, {0, 1, 2, 3},
shader.rat_return_address(),
0,
no_index_offset,
(EVTXDataFormat)fmt,
(EVFetchNumFormat)num_format,
(EVFetchEndianSwap)endian,
R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
image_offset);
fetch->set_mfc(3);
fetch->set_fetch_flag(FetchInstr::srf_mode);
fetch->set_fetch_flag(FetchInstr::use_tc);
fetch->set_fetch_flag(FetchInstr::vpm);
fetch->set_fetch_flag(FetchInstr::wait_ack);
if (format_comp)
fetch->set_fetch_flag(FetchInstr::format_comp_signed);
shader.chain_ssbo_read(fetch);
shader.emit_instruction(fetch);
}
return true;
}
#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
bool RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
{
auto& vf = shader.value_factory();
auto src = RegisterVec4(0, true, {4,4,4,4});
assert(nir_src_as_uint(intrin->src[1]) == 0);
auto const_offset = nir_src_as_const_value(intrin->src[0]);
PRegister dyn_offset = nullptr;
int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
if (const_offset)
res_id += const_offset[0].u32;
else
dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
auto dest = vf.dest_vec4(intrin->dest, pin_group);
shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3}, res_id));
return true;
} else {
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
/* Need to load the layers from a const buffer */
auto dest = vf.dest_vec4(intrin->dest, pin_group);
shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,7,3},
src, 0/* ?? */, res_id, dyn_offset));
shader.set_flag(Shader::sh_txs_cube_array_comp);
if (const_offset) {
unsigned lookup_resid = const_offset[0].u32;
shader.emit_instruction(new AluInstr(op1_mov, dest[2],
vf.uniform(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
R600_BUFFER_INFO_CONST_BUFFER),
AluInstr::last_write));
} else {
/* If the adressing is indirect we have to get the z-value by using a binary search */
auto addr = vf.temp_register();
auto comp1 = vf.temp_register();
auto comp2 = vf.temp_register();
auto low_bit = vf.temp_register();
auto high_bit = vf.temp_register();
auto trgt = vf.temp_vec4(pin_group);
shader.emit_instruction(new AluInstr(op2_lshr_int, addr, vf.src(intrin->src[0], 0),
vf.literal(2), AluInstr::write));
shader.emit_instruction(new AluInstr(op2_and_int, low_bit, vf.src(intrin->src[0], 0),
vf.one_i(), AluInstr::write));
shader.emit_instruction(new AluInstr(op2_and_int, high_bit, vf.src(intrin->src[0], 0),
vf.literal(2), AluInstr::last_write));
shader.emit_instruction(new LoadFromBuffer(trgt, {0, 1, 2, 3}, addr, R600_SHADER_BUFFER_INFO_SEL,
R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float));
// this may be wrong
shader.emit_instruction(new AluInstr(op3_cnde_int, comp1, high_bit, trgt[0], trgt[2],
AluInstr::write));
shader.emit_instruction(new AluInstr(op3_cnde_int, comp2, high_bit, trgt[1], trgt[3],
AluInstr::last_write));
shader.emit_instruction(new AluInstr(op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
}
} else {
auto dest = vf.dest_vec4(intrin->dest, pin_group);
shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,2,3},
src, 0/* ?? */, res_id, dyn_offset));
}
}
return true;
}
}

View File

@ -0,0 +1,177 @@
#ifndef GDSINSTR_H
#define GDSINSTR_H
#include "sfn_instr.h"
#include "sfn_valuefactory.h"
namespace r600 {
class Shader;
class GDSInstr : public Instr {
public:
GDSInstr(ESDOp op, Register *dest,
const RegisterVec4& src, int uav_base,
PRegister uav_id);
bool is_equal_to(const GDSInstr& lhs) const;
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
bool do_ready() const override;
auto opcode() const {return m_op;}
auto src() const { return m_src;}
const auto& dest() const { return m_dest;}
auto& dest() { return m_dest;}
auto uav_id() const {return m_uav_id;}
auto uav_base() const {return m_uav_base;}
static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
static bool emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader);
uint32_t slots() const override {return 1;};
private:
static bool emit_atomic_read(nir_intrinsic_instr *intr, Shader& shader);
static bool emit_atomic_op2(nir_intrinsic_instr *intr, Shader& shader);
static bool emit_atomic_inc(nir_intrinsic_instr *intr, Shader& shader);
static bool emit_atomic_pre_dec(nir_intrinsic_instr *intr, Shader& shader);
void do_print(std::ostream& os) const override;
ESDOp m_op{DS_OP_INVALID};
Register *m_dest;
RegisterVec4 m_src;
int m_uav_base{0};
PRegister m_uav_id{nullptr};
std::bitset<8> m_tex_flags;
};
class RatInstr : public Instr {
public:
enum ERatOp {
NOP,
STORE_TYPED,
STORE_RAW,
STORE_RAW_FDENORM,
CMPXCHG_INT,
CMPXCHG_FLT,
CMPXCHG_FDENORM,
ADD,
SUB,
RSUB,
MIN_INT,
MIN_UINT,
MAX_INT,
MAX_UINT,
AND,
OR,
XOR,
MSKOR,
INC_UINT,
DEC_UINT,
NOP_RTN = 32,
XCHG_RTN = 34,
XCHG_FDENORM_RTN,
CMPXCHG_INT_RTN,
CMPXCHG_FLT_RTN,
CMPXCHG_FDENORM_RTN,
ADD_RTN,
SUB_RTN,
RSUB_RTN,
MIN_INT_RTN,
MIN_UINT_RTN,
MAX_INT_RTN,
MAX_UINT_RTN,
AND_RTN,
OR_RTN,
XOR_RTN,
MSKOR_RTN,
UINT_RTN,
UNSUPPORTED
};
RatInstr(ECFOpCode cf_opcode, ERatOp rat_op,
const RegisterVec4& data, const RegisterVec4& index,
int rat_id, PRegister rat_id_offset,
int burst_count, int comp_mask, int element_size);
auto rat_id_offset() const { return m_rat_id_offset;}
int rat_id() const { return m_rat_id;}
ERatOp rat_op() const {return m_rat_op;}
const auto& value() const { return m_data;}
auto& value() { return m_data;}
const auto& addr() const { return m_index;}
auto& addr() { return m_index;}
int data_gpr() const {return m_data.sel();}
int index_gpr() const {return m_index.sel();}
int elm_size() const {return m_element_size;}
int comp_mask() const {return m_comp_mask;}
bool need_ack() const {return m_need_ack;}
int burst_count() const {return m_burst_count;}
int data_swz(int chan) const {return m_data[chan]->chan();}
ECFOpCode cf_opcode() const { return m_cf_opcode;}
void set_ack() {m_need_ack = true; set_mark(); }
void set_mark() {m_need_mark = true; }
bool mark() {return m_need_mark;}
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
bool is_equal_to(const RatInstr& lhs) const;
static bool emit(nir_intrinsic_instr *intr, Shader& shader);
private:
static bool emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader);
static bool emit_ssbo_store(nir_intrinsic_instr *intr, Shader& shader);
static bool emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader);
static bool emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader);
static bool emit_image_store(nir_intrinsic_instr *intr, Shader& shader);
static bool emit_image_load_or_atomic(nir_intrinsic_instr *intr, Shader& shader);
static bool emit_image_size(nir_intrinsic_instr *intr, Shader& shader);
bool do_ready() const override;
void do_print(std::ostream& os) const override;
ECFOpCode m_cf_opcode;
ERatOp m_rat_op;
RegisterVec4 m_data;
RegisterVec4 m_index;
PRegister m_rat_id_offset{nullptr};
int m_rat_id{0};
int m_burst_count{0};
int m_comp_mask{15};
int m_element_size{3};
bool m_need_ack{false};
bool m_need_mark{false};
};
}
#endif // GDSINSTR_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,166 @@
#ifndef INSTR_TEX_H
#define INSTR_TEX_H
#include "sfn_instr.h"
#include "sfn_valuefactory.h"
#include "sfn_shader.h"
namespace r600 {
class TexInstr : public InstrWithVectorResult {
public:
enum Opcode {
ld = FETCH_OP_LD,
get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
get_nsamples = FETCH_OP_GET_NUMBER_OF_SAMPLES,
get_tex_lod = FETCH_OP_GET_LOD,
get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
keep_gradients = FETCH_OP_KEEP_GRADIENTS,
set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
sample = FETCH_OP_SAMPLE,
sample_l = FETCH_OP_SAMPLE_L,
sample_lb = FETCH_OP_SAMPLE_LB,
sample_lz = FETCH_OP_SAMPLE_LZ,
sample_g = FETCH_OP_SAMPLE_G,
sample_g_lb = FETCH_OP_SAMPLE_G_L,
gather4 = FETCH_OP_GATHER4,
gather4_o = FETCH_OP_GATHER4_O,
sample_c = FETCH_OP_SAMPLE_C,
sample_c_l = FETCH_OP_SAMPLE_C_L,
sample_c_lb = FETCH_OP_SAMPLE_C_LB,
sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
sample_c_g = FETCH_OP_SAMPLE_C_G,
sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
gather4_c = FETCH_OP_GATHER4_C,
gather4_c_o = FETCH_OP_GATHER4_C_O,
unknown = 255
};
enum Flags {
x_unnormalized,
y_unnormalized,
z_unnormalized,
w_unnormalized,
grad_fine,
num_tex_flag
};
struct Inputs {
Inputs(const nir_tex_instr& instr, ValueFactory &vf);
const nir_variable *sampler_deref;
const nir_variable *texture_deref;
RegisterVec4 coord;
PVirtualValue bias;
PVirtualValue comperator;
PVirtualValue lod;
RegisterVec4 ddx;
RegisterVec4 ddy;
nir_src *offset;
PVirtualValue gather_comp;
PVirtualValue ms_index;
PVirtualValue sampler_offset;
PVirtualValue texture_offset;
RegisterVec4::Swizzle swizzle_from_ncomps(int comps) const;
Opcode opcode;
private:
auto get_opcode(const nir_tex_instr& instr) -> Opcode;
};
TexInstr(Opcode op, const RegisterVec4& dest,
const RegisterVec4::Swizzle& dest_swizzle,
const RegisterVec4& src, unsigned sid, unsigned rid,
PVirtualValue sampler_offs = nullptr);
TexInstr(const TexInstr& orig) = delete;
TexInstr(const TexInstr&& orig) = delete;
TexInstr& operator =(const TexInstr& orig) = delete;
TexInstr& operator =(const TexInstr&& orig) = delete;
void accept(ConstInstrVisitor& visitor) const override;
void accept(InstrVisitor& visitor) override;
const auto& src() const {return m_src;}
auto& src() {return m_src;}
unsigned opcode() const {return m_opcode;}
unsigned sampler_id() const {return m_sampler_id;}
unsigned resource_id() const {return m_resource_id;}
void set_offset(unsigned index, int32_t val);
int get_offset(unsigned index) const;
void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
int inst_mode() const { return m_inst_mode;}
void set_tex_flag(Flags flag) {m_tex_flags.set(flag);}
bool has_tex_flag(Flags flag) const {return m_tex_flags.test(flag);}
void set_sampler_offset(PVirtualValue ofs) {m_sampler_offset = ofs;}
auto* sampler_offset() const {return m_sampler_offset;}
void set_gather_comp(int cmp);
bool is_equal_to(const TexInstr& lhs) const;
static Opcode op_from_string(const std::string& s);
static Instr::Pointer from_string(std::istream& is, ValueFactory& value_fctory);
static bool from_nir(nir_tex_instr *tex, Shader& shader);
uint32_t slots() const override {return 1;};
auto prepare_instr() const { return m_prepare_instr;}
private:
bool do_ready() const override;
void do_print(std::ostream& os) const override;
bool propagate_death() override;
static const char *opname(Opcode code);
static bool is_gather(Opcode op);
void read_tex_coord_normalitazion(const std::string& next_token);
void set_tex_param(const std::string& next_token);
static auto prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader &shader) -> RegisterVec4;
static bool emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader);
static bool emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader);
static bool emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader);
static bool emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader);
static bool emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader);
static bool emit_tex_txs(nir_tex_instr *tex, Inputs& src,
RegisterVec4::Swizzle dest_swz, Shader& shader);
static bool emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader);
static bool emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader);
static bool emit_tex_tg4(nir_tex_instr* instr, Inputs& src , Shader& shader);
static bool emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader);
void set_coord_offsets(nir_src *offset);
void set_rect_coordinate_flags(nir_tex_instr* instr);
void add_prepare_instr(TexInstr *ir) {m_prepare_instr.push_back(ir);};
Opcode m_opcode;
RegisterVec4 m_src;
PVirtualValue m_sampler_offset;
std::bitset<num_tex_flag> m_tex_flags;
int m_offset[3];
int m_inst_mode;
unsigned m_sampler_id;
unsigned m_resource_id;
static const std::map<Opcode, std::string> s_opcode_map;
std::list<TexInstr *> m_prepare_instr;
};
}
#endif // INSTR_TEX_H

View File

@ -0,0 +1,188 @@
#include "sfn_instrfactory.h"
#include "sfn_instr_alugroup.h"
#include "sfn_debug.h"
#include "sfn_instr_controlflow.h"
#include "sfn_instr_export.h"
#include "sfn_instr_fetch.h"
#include "sfn_instr_lds.h"
#include "sfn_instr_mem.h"
#include "sfn_instr_tex.h"
#include "sfn_alu_defines.h"
#include "sfn_shader.h"
#include <string>
#include <sstream>
#include <vector>
namespace r600 {
using std::string;
using std::vector;
InstrFactory::InstrFactory():
group(nullptr)
{
}
PInst InstrFactory::from_string(const std::string& s, int nesting_depth)
{
string type;
std::istringstream is(s);
PInst result = nullptr;
do {
is >> type;
} while (type.empty() && is.good());
if (type == "ALU_GROUP_BEGIN") {
group = new AluGroup();
group->set_nesting_depth(nesting_depth);
return nullptr;
} else if (type == "ALU_GROUP_END") {
AluGroup *retval = group;
group = nullptr;
return retval;
} else if (type == "ALU") {
result = AluInstr::from_string(is, m_value_factory, group);
} else if (type == "TEX") {
result = TexInstr::from_string(is, m_value_factory);
} else if (type == "EXPORT") {
result = ExportInstr::from_string(is, m_value_factory);
} else if (type == "EXPORT_DONE") {
result = ExportInstr::last_from_string(is, m_value_factory);
} else if (type == "VFETCH") {
result = FetchInstr::from_string(is, m_value_factory);
} else if (type == "GET_BUF_RESINFO") {
result = QueryBufferSizeInstr::from_string(is, m_value_factory);
} else if (type == "LOAD_BUF") {
result = LoadFromBuffer::from_string(is, m_value_factory);
} else if (type == "READ_SCRATCH") {
result = LoadFromScratch::from_string(is, m_value_factory);
} else if (type == "IF") {
result = IfInstr::from_string(is, m_value_factory);
} else if (type == "WRITE_SCRATCH") {
result = WriteScratchInstr::from_string(is, m_value_factory);
} else if (type == "MEM_RING") {
result = MemRingOutInstr::from_string(is, m_value_factory);
} else if (type == "EMIT_VERTEX") {
result = EmitVertexInstr::from_string(is, false);
} else if (type == "EMIT_CUT_VERTEX") {
result = EmitVertexInstr::from_string(is, true);
} else if (type == "LDS_READ") {
result = LDSReadInstr::from_string(is, m_value_factory);
} else if (type == "LDS") {
result = LDSAtomicInstr::from_string(is, m_value_factory);
} else if (type == "WRITE_TF") {
result = WriteTFInstr::from_string(is, m_value_factory);
} else
result = ControlFlowInstr::from_string(type);
if (!result && !group) {
std::cerr << "Error translating '" << s << "'\n";
}
return result;
}
bool InstrFactory::from_nir(nir_instr *instr, Shader& shader)
{
switch (instr->type) {
case nir_instr_type_alu:
return AluInstr::from_nir(nir_instr_as_alu(instr), shader);
case nir_instr_type_intrinsic:
return shader.process_intrinsic(nir_instr_as_intrinsic(instr));
case nir_instr_type_load_const:
return load_const(nir_instr_as_load_const(instr), shader);
case nir_instr_type_tex:
return TexInstr::from_nir(nir_instr_as_tex(instr), shader);
case nir_instr_type_jump:
return process_jump(nir_instr_as_jump(instr), shader);
case nir_instr_type_ssa_undef:
return process_undef(nir_instr_as_ssa_undef(instr), shader);
default:
fprintf(stderr, "Instruction type %d not supported\n", instr->type);
return false;
}
}
bool InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader)
{
AluInstr *ir = nullptr;
if (literal->def.bit_size == 64) {
for (int i = 0; i < literal->def.num_components; ++i) {
auto dest0 = m_value_factory.dest(literal->def, 2 * i, pin_none);
auto src0 = m_value_factory.literal(literal->value[i].u64 & 0xffffffff);
shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, {alu_write}));
auto dest1 = m_value_factory.dest(literal->def, 2 * i + 1, pin_none);
auto src1 = m_value_factory.literal((literal->value[i].u64 >> 32) & 0xffffffff);
shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::last_write));
}
} else {
Pin pin = literal->def.num_components == 1 ? pin_free : pin_none;
for (int i = 0; i < literal->def.num_components; ++i) {
auto dest = m_value_factory.dest(literal->def, i, pin);
uint32_t v = literal->value[i].i32;
PVirtualValue src = nullptr;
switch (v) {
case 0: src = m_value_factory.zero(); break;
case 1: src = m_value_factory.one_i(); break;
case 0xffffffff: src = m_value_factory.inline_const(ALU_SRC_M_1_INT, 0); break;
case 0x3f800000: src = m_value_factory.inline_const(ALU_SRC_1, 0); break;
case 0x3f000000: src = m_value_factory.inline_const(ALU_SRC_0_5, 0); break;
default: src = m_value_factory.literal(v);
}
ir = new AluInstr(op1_mov, dest, src, {alu_write});
shader.emit_instruction(ir);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
}
return true;
}
bool InstrFactory::process_jump(nir_jump_instr *instr, Shader& shader)
{
ControlFlowInstr::CFType type;
switch (instr->type) {
case nir_jump_break:
type = ControlFlowInstr::cf_loop_break;
break;
case nir_jump_continue:
type = ControlFlowInstr::cf_loop_continue;
break;
default: {
nir_instr *i = reinterpret_cast<nir_instr*>(instr);
sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
return false;
}
}
shader.emit_instruction(new ControlFlowInstr(type));
shader.start_new_block(0);
return true;
}
bool InstrFactory::process_undef(nir_ssa_undef_instr *undef, Shader& shader)
{
for (int i = 0; i < undef->def.num_components; ++i) {
auto dest = shader.value_factory().undef(undef->def.index, i);
shader.emit_instruction(new AluInstr(op1_mov, dest,
value_factory().zero(),
AluInstr::last_write));
}
return true;
}
}

View File

@ -0,0 +1,34 @@
#ifndef INSTRFACTORY_H
#define INSTRFACTORY_H
#include "sfn_instr.h"
#include "sfn_valuefactory.h"
#include <iosfwd>
namespace r600 {
class Shader;
class InstrFactory : public Allocate {
public:
InstrFactory();
PInst from_string(const std::string &s, int nesting_depth);
bool from_nir(nir_instr *instr, Shader& shader);
auto& value_factory() { return m_value_factory;}
private:
bool load_const(nir_load_const_instr *lc, Shader& shader);
bool process_jump(nir_jump_instr *instr, Shader& shader);
bool process_undef(nir_ssa_undef_instr *undef, Shader& shader);
Instr::Pointer export_from_string(std::istream& is, bool is_last);
ValueFactory m_value_factory;
AluGroup *group;
};
}
#endif // INSTRFACTORY_H

View File

@ -1,183 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_instruction_alu.h"
#include "sfn_valuepool.h"
namespace r600 {
const AluModifiers AluInstruction::src_abs_flags[2] =
{alu_src0_abs, alu_src1_abs};
const AluModifiers AluInstruction::src_neg_flags[3] =
{alu_src0_neg, alu_src1_neg, alu_src2_neg};
const AluModifiers AluInstruction::src_rel_flags[3] =
{alu_src0_rel, alu_src1_rel, alu_src2_rel};
AluInstruction::AluInstruction(EAluOp opcode):
Instruction (Instruction::alu),
m_opcode(opcode),
m_src(alu_ops.at(opcode).nsrc),
m_bank_swizzle(alu_vec_unknown),
m_cf_type(cf_alu)
{
if (alu_ops.at(opcode).nsrc == 3)
m_flags.set(alu_op3);
}
AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
std::vector<PValue> src,
const std::set<AluModifiers>& flags):
Instruction (Instruction::alu),
m_opcode(opcode),
m_dest(dest),
m_bank_swizzle(alu_vec_unknown),
m_cf_type(cf_alu)
{
assert(dest);
m_src.swap(src);
for (auto f : flags)
m_flags.set(f);
if (alu_ops.at(opcode).nsrc == 3)
m_flags.set(alu_op3);
for (auto &s: m_src)
add_remappable_src_value(&s);
add_remappable_dst_value(&m_dest);
}
AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
const std::set<AluModifiers>& flags):
AluInstruction(opcode, dest, std::vector<PValue>{src0}, flags)
{
}
AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
PValue src0, PValue src1,
const std::set<AluModifiers> &m_flags):
AluInstruction(opcode, dest, {src0, src1}, m_flags)
{
}
AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
PValue src1, PValue src2,
const std::set<AluModifiers> &flags):
AluInstruction(opcode, dest, {src0, src1, src2}, flags)
{
}
bool AluInstruction::is_equal_to(const Instruction& lhs) const
{
assert(lhs.type() == alu);
const auto& oth = static_cast<const AluInstruction&>(lhs);
if (m_opcode != oth.m_opcode) {
return false;
}
if (*m_dest != *oth.m_dest)
return false;
if (m_src.size() != oth.m_src.size())
return false;
for (unsigned i = 0; i < m_src.size(); ++i)
if (*m_src[i] != *oth.m_src[i]) {
return false;
}
return (m_flags == oth.m_flags && m_cf_type == oth.m_cf_type);
}
void AluInstruction::replace_values(const ValueSet& candidates, PValue new_value)
{
for (auto c: candidates) {
if (*c == *m_dest)
m_dest = new_value;
for (auto& s: m_src) {
if (*c == *s)
s = new_value;
}
}
}
PValue AluInstruction::remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
ValueMap &values)
{
auto new_index = map[reg->sel()];
if (new_index.valid)
reg = values.get_or_inject(new_index.new_reg, reg->chan());
map[reg->sel()].used = true;
return reg;
}
void AluInstruction::set_flag(AluModifiers flag)
{
m_flags.set(flag);
}
void AluInstruction::set_bank_swizzle(AluBankSwizzle bswz)
{
m_bank_swizzle = bswz;
}
unsigned AluInstruction::n_sources() const
{
return m_src.size();
}
void AluInstruction::do_print(std::ostream& os) const
{
os << "ALU " << alu_ops.at(m_opcode).name;
if (m_flags.test(alu_dst_clamp))
os << "_CLAMP";
if (m_dest)
os << ' ' << *m_dest << " : " ;
for (unsigned i = 0; i < m_src.size(); ++i) {
int pflags = 0;
if (i)
os << ' ';
if (m_flags.test(src_neg_flags[i])) pflags |= Value::PrintFlags::has_neg;
if (m_flags.test(src_rel_flags[i])) pflags |= Value::PrintFlags::is_rel;
if (i < 2)
if (m_flags.test(src_abs_flags[i])) pflags |= Value::PrintFlags::has_abs;
m_src[i]->print(os, Value::PrintFlags(0, pflags));
}
os << " {";
os << (m_flags.test(alu_write) ? 'W' : ' ');
os << (m_flags.test(alu_last_instr) ? 'L' : ' ');
os << (m_flags.test(alu_update_exec) ? 'E' : ' ');
os << (m_flags.test(alu_update_pred) ? 'P' : ' ');
os << "}";
os << " BS:" << m_bank_swizzle;
os << " CF:" << m_cf_type;
}
}

View File

@ -1,142 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef sfn_r600_instruction_alu_h
#define sfn_r600_instruction_alu_h
#include "sfn_instruction_base.h"
#include "sfn_alu_defines.h"
namespace r600 {
enum AluModifiers {
alu_src0_neg,
alu_src0_abs,
alu_src0_rel,
alu_src1_neg,
alu_src1_abs,
alu_src1_rel,
alu_src2_neg,
alu_src2_rel,
alu_dst_clamp,
alu_dst_rel,
alu_last_instr,
alu_update_exec,
alu_update_pred,
alu_write,
alu_op3
};
enum AluDstModifiers {
omod_off = 0,
omod_mul2 = 1,
omod_mul4 = 2,
omod_divl2 = 3
};
enum AluPredSel {
pred_off = 0,
pred_zero = 2,
pred_one = 3
};
enum AluBankSwizzle {
alu_vec_012 = 0,
sq_alu_scl_201 = 0,
alu_vec_021 = 1,
sq_alu_scl_122 = 1,
alu_vec_120 = 2,
sq_alu_scl_212 = 2,
alu_vec_102 = 3,
sq_alu_scl_221 = 3,
alu_vec_201 = 4,
alu_vec_210 = 5,
alu_vec_unknown = 6
};
class AluInstruction : public Instruction {
public:
static const AluModifiers src_abs_flags[2];
static const AluModifiers src_neg_flags[3];
static const AluModifiers src_rel_flags[3];
AluInstruction(EAluOp opcode);
AluInstruction(EAluOp opcode, PValue dest,
std::vector<PValue> src0,
const std::set<AluModifiers>& m_flags);
AluInstruction(EAluOp opcode, PValue dest, PValue src0,
const std::set<AluModifiers>& m_flags);
AluInstruction(EAluOp opcode, PValue dest,
PValue src0, PValue src1,
const std::set<AluModifiers>& m_flags);
AluInstruction(EAluOp opcode, PValue dest, PValue src0, PValue src1,
PValue src2,
const std::set<AluModifiers>& m_flags);
void set_flag(AluModifiers flag);
unsigned n_sources() const;
PValue dest() {return m_dest;}
EAluOp opcode() const {return m_opcode;}
const Value *dest() const {return m_dest.get();}
Value& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
PValue *psrc(unsigned i) {assert(i < m_src.size()); return &m_src[i];}
bool is_last() const {return m_flags.test(alu_last_instr);}
bool write() const {return m_flags.test(alu_write);}
bool flag(AluModifiers f) const {return m_flags.test(f);}
void set_bank_swizzle(AluBankSwizzle swz);
int bank_swizzle() const {return m_bank_swizzle;}
ECFAluOpCode cf_type() const {return m_cf_type;}
void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
void replace_values(const ValueSet& candidates, PValue new_value) override;
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
PValue remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
ValueMap &values);
EAluOp m_opcode;
PValue m_dest;
std::vector<PValue> m_src;
AluOpFlags m_flags;
AluBankSwizzle m_bank_swizzle;
ECFAluOpCode m_cf_type;
};
}
#endif

View File

@ -1,187 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <algorithm>
#include <cassert>
#include "sfn_instruction_base.h"
#include "sfn_liverange.h"
#include "sfn_valuepool.h"
namespace r600 {
ValueRemapper::ValueRemapper(std::vector<rename_reg_pair>& m,
ValueMap& values):
m_map(m),
m_values(values)
{
}
void ValueRemapper::remap(PValue& v)
{
if (!v)
return;
if (v->type() == Value::gpr) {
v = remap_one_registers(v);
} else if (v->type() == Value::gpr_array_value) {
GPRArrayValue& val = static_cast<GPRArrayValue&>(*v);
auto value = val.value();
auto addr = val.indirect();
val.reset_value(remap_one_registers(value));
if (addr) {
if (addr->type() == Value::gpr)
val.reset_addr(remap_one_registers(addr));
}
size_t range_start = val.sel();
size_t range_end = range_start + val.array_size();
while (range_start < range_end)
m_map[range_start++].used = true;
} else if (v->type() == Value::kconst) {
auto& val = static_cast<UniformValue&>(*v);
auto addr = val.addr();
if (addr && addr->type() == Value::gpr)
val.reset_addr(remap_one_registers(addr));
}
}
void ValueRemapper::remap(GPRVector& v)
{
for (int i = 0; i < 4; ++i) {
if (v.reg_i(i)) {
auto& ns_idx = m_map[v.reg_i(i)->sel()];
if (ns_idx.valid)
v.set_reg_i(i,m_values.get_or_inject(ns_idx.new_reg, v.reg_i(i)->chan()));
m_map[v.reg_i(i)->sel()].used = true;
}
}
}
PValue ValueRemapper::remap_one_registers(PValue& reg)
{
auto new_index = m_map[reg->sel()];
if (new_index.valid)
reg = m_values.get_or_inject(new_index.new_reg, reg->chan());
m_map[reg->sel()].used = true;
return reg;
}
Instruction::Instruction(instr_type t):
m_type(t)
{
}
Instruction::~Instruction()
{
}
void Instruction::print(std::ostream& os) const
{
os << "OP:";
do_print(os);
}
void Instruction::remap_registers(ValueRemapper& map)
{
sfn_log << SfnLog::merge << "REMAP " << *this << "\n";
for (auto& v: m_mappable_src_registers)
map.remap(*v);
for (auto& v: m_mappable_src_vectors)
map.remap(*v);
for (auto& v: m_mappable_dst_registers)
map.remap(*v);
for (auto& v: m_mappable_dst_vectors)
map.remap(*v);
sfn_log << SfnLog::merge << "TO " << *this << "\n\n";
}
void Instruction::add_remappable_src_value(PValue *v)
{
if (*v)
m_mappable_src_registers.push_back(v);
}
void Instruction::add_remappable_src_value(GPRVector *v)
{
m_mappable_src_vectors.push_back(v);
}
void Instruction::add_remappable_dst_value(PValue *v)
{
if (v)
m_mappable_dst_registers.push_back(v);
}
void Instruction::add_remappable_dst_value(GPRVector *v)
{
m_mappable_dst_vectors.push_back(v);
}
void Instruction::replace_values(UNUSED const ValueSet& candidates, UNUSED PValue new_value)
{
}
void Instruction::evalue_liveness(LiverangeEvaluator& eval) const
{
sfn_log << SfnLog::merge << "Scan " << *this << "\n";
for (const auto& s: m_mappable_src_registers)
if (*s)
eval.record_read(**s);
for (const auto& s: m_mappable_src_vectors)
eval.record_read(*s);
for (const auto& s: m_mappable_dst_registers)
if (*s)
eval.record_write(**s);
for (const auto& s: m_mappable_dst_vectors)
eval.record_write(*s);
do_evalue_liveness(eval);
}
void Instruction::do_evalue_liveness(UNUSED LiverangeEvaluator& eval) const
{
}
bool operator == (const Instruction& lhs, const Instruction& rhs)
{
if (rhs.m_type != lhs.m_type)
return false;
return lhs.is_equal_to(rhs);
}
}

View File

@ -1,155 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef sfn_r600_instr_h
#define sfn_r600_instr_h
#include "sfn_instructionvisitor.h"
#include "sfn_value_gpr.h"
#include "sfn_defines.h"
#include "gallium/drivers/r600/r600_isa.h"
#include <iostream>
#include <memory>
#include <vector>
#include <set>
namespace r600 {
struct rename_reg_pair {
bool valid;
bool used;
int new_reg;
};
class LiverangeEvaluator;
class ValueMap;
class ValueRemapper {
public:
ValueRemapper(std::vector<rename_reg_pair>& m,
ValueMap& values);
void remap(PValue& v);
void remap(GPRVector& v);
private:
PValue remap_one_registers(PValue& reg);
std::vector<rename_reg_pair>& m_map;
ValueMap& m_values;
};
using OutputRegisterMap = std::map<unsigned, const GPRVector *>;
class Instruction {
public:
enum instr_type {
alu,
exprt,
tex,
vtx,
wait_ack,
cond_if,
cond_else,
cond_endif,
lds_atomic,
lds_read,
lds_write,
loop_begin,
loop_end,
loop_break,
loop_continue,
phi,
streamout,
ring,
emit_vtx,
mem_wr_scratch,
gds,
rat,
tf_write,
block,
unknown
};
typedef std::shared_ptr<Instruction> Pointer;
friend bool operator == (const Instruction& lhs, const Instruction& rhs);
Instruction(instr_type t);
virtual ~Instruction();
instr_type type() const { return m_type;}
void print(std::ostream& os) const;
virtual void replace_values(const ValueSet& candidates, PValue new_value);
void evalue_liveness(LiverangeEvaluator& eval) const;
void remap_registers(ValueRemapper& map);
virtual bool accept(InstructionVisitor& visitor) = 0;
virtual bool accept(ConstInstructionVisitor& visitor) const = 0;
protected:
void add_remappable_src_value(PValue *v);
void add_remappable_src_value(GPRVector *v);
void add_remappable_dst_value(PValue *v);
void add_remappable_dst_value(GPRVector *v);
private:
virtual void do_evalue_liveness(LiverangeEvaluator& eval) const;
virtual bool is_equal_to(const Instruction& lhs) const = 0;
instr_type m_type;
virtual void do_print(std::ostream& os) const = 0;
std::vector<PValue*> m_mappable_src_registers;
std::vector<GPRVector*> m_mappable_src_vectors;
std::vector<PValue*> m_mappable_dst_registers;
std::vector<GPRVector*> m_mappable_dst_vectors;
};
using PInstruction=Instruction::Pointer;
inline std::ostream& operator << (std::ostream& os, const Instruction& instr)
{
instr.print(os);
return os;
}
bool operator == (const Instruction& lhs, const Instruction& rhs);
}
#endif

View File

@ -1,57 +0,0 @@
#include "sfn_instruction_block.h"
namespace r600 {
InstructionBlock::InstructionBlock(unsigned nesting_depth, unsigned block_number):
Instruction(block),
m_block_number(block_number),
m_nesting_depth(nesting_depth)
{
}
void InstructionBlock::emit(PInstruction instr)
{
m_block.push_back(instr);
}
void InstructionBlock::remap_registers(ValueRemapper& map)
{
for(auto& i: m_block)
i->remap_registers(map);
}
void InstructionBlock::do_evalue_liveness(LiverangeEvaluator& eval) const
{
for(auto& i: m_block)
i->evalue_liveness(eval);
}
bool InstructionBlock::is_equal_to(const Instruction& lhs) const
{
assert(lhs.type() == block);
auto& l = static_cast<const InstructionBlock&>(lhs);
if (m_block.size() != l.m_block.size())
return false;
if (m_block_number != l.m_block_number)
return false;
return std::equal(m_block.begin(), m_block.end(), l.m_block.begin(),
[](PInstruction ri, PInstruction li) {return *ri == *li;});
}
PInstruction InstructionBlock::last_instruction()
{
return m_block.size() ? *m_block.rbegin() : nullptr;
}
void InstructionBlock::do_print(std::ostream& os) const
{
std::string space(" ", 2 * m_nesting_depth);
for(auto& i: m_block)
os << space << *i << "\n";
}
}

View File

@ -1,82 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef sfn_instruction_block_h
#define sfn_instruction_block_h
#include "sfn_instruction_base.h"
namespace r600 {
class InstructionBlock : public Instruction
{
public:
InstructionBlock(unsigned nesting_depth, unsigned block_number);
void emit(PInstruction instr);
std::vector<PInstruction>::const_iterator begin() const {
return m_block.begin();
}
std::vector<PInstruction>::const_iterator end() const {
return m_block.end();
}
void remap_registers(ValueRemapper& map);
size_t size() const {
return m_block.size();
}
const PInstruction& operator [] (int i) const {
return m_block[i];
}
unsigned number() const {
return m_block_number;
}
PInstruction last_instruction();
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
std::vector<PInstruction> m_block;
unsigned m_block_number;
unsigned m_nesting_depth;
};
}
#endif // INSTRUCTIONBLOCK_H

View File

@ -1,195 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_instruction_cf.h"
#include "sfn_liverange.h"
namespace r600 {
CFInstruction::CFInstruction(instr_type type):Instruction(type)
{
}
IfElseInstruction::IfElseInstruction(instr_type type):
CFInstruction (type)
{
}
IfInstruction::IfInstruction(AluInstruction *pred):
IfElseInstruction(cond_if),
m_pred(pred)
{
PValue *v = m_pred->psrc(0);
add_remappable_src_value(v);
pred->set_cf_type(cf_alu_push_before);
}
void IfInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
{
eval.scope_if();
}
bool IfInstruction::is_equal_to(const Instruction& lhs) const
{
assert(lhs.type() == cond_if);
const IfInstruction& l = static_cast<const IfInstruction&>(lhs);
return *l.m_pred == *m_pred;
}
void IfInstruction::do_print(std::ostream& os) const
{
os << "PRED = " << *m_pred << "\n";
os << "IF (PRED)";
}
ElseInstruction::ElseInstruction(IfInstruction *jump_src):
IfElseInstruction(cond_else),
m_jump_src(jump_src)
{
}
void ElseInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
{
eval.scope_else();
}
bool ElseInstruction::is_equal_to(const Instruction& lhs) const
{
if (lhs.type() != cond_else)
return false;
auto& l = static_cast<const ElseInstruction&>(lhs);
return (*m_jump_src == *l.m_jump_src);
}
void ElseInstruction::do_print(std::ostream& os) const
{
os << "ELSE";
}
IfElseEndInstruction::IfElseEndInstruction():
IfElseInstruction(cond_endif)
{
}
void IfElseEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
{
eval.scope_endif();
}
bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const
{
if (lhs.type() != cond_endif)
return false;
return true;
}
void IfElseEndInstruction::do_print(std::ostream& os) const
{
os << "ENDIF";
}
LoopBeginInstruction::LoopBeginInstruction():
CFInstruction(loop_begin)
{
}
void LoopBeginInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
{
eval.scope_loop_begin();
}
bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const
{
assert(lhs.type() == loop_begin);
return true;
}
void LoopBeginInstruction::do_print(std::ostream& os) const
{
os << "BGNLOOP";
}
LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start):
CFInstruction (loop_end),
m_start(start)
{
}
void LoopEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
{
eval.scope_loop_end();
}
bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const
{
assert(lhs.type() == loop_end);
const auto& other = static_cast<const LoopEndInstruction&>(lhs);
return *m_start == *other.m_start;
}
void LoopEndInstruction::do_print(std::ostream& os) const
{
os << "ENDLOOP";
}
LoopBreakInstruction::LoopBreakInstruction():
CFInstruction (loop_break)
{
}
void LoopBreakInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
{
eval.scope_loop_break();
}
bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const
{
return true;
}
void LoopBreakInstruction::do_print(std::ostream& os) const
{
os << "BREAK";
}
LoopContInstruction::LoopContInstruction():
CFInstruction (loop_continue)
{
}
bool LoopContInstruction::is_equal_to(UNUSED const Instruction& lhs) const
{
return true;
}
void LoopContInstruction::do_print(std::ostream& os) const
{
os << "CONTINUE";
}
}

View File

@ -1,142 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SFN_IFELSEINSTRUCTION_H
#define SFN_IFELSEINSTRUCTION_H
#include "sfn_instruction_alu.h"
namespace r600 {
class CFInstruction : public Instruction {
protected:
CFInstruction(instr_type type);
};
class IfElseInstruction : public CFInstruction {
public:
IfElseInstruction(instr_type type);
};
class IfInstruction : public IfElseInstruction {
public:
IfInstruction(AluInstruction *pred);
const AluInstruction& pred() const {return *m_pred;}
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
std::shared_ptr<AluInstruction> m_pred;
};
class ElseInstruction : public IfElseInstruction {
public:
ElseInstruction(IfInstruction *jump_src);
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
IfElseInstruction *m_jump_src;
};
class IfElseEndInstruction : public IfElseInstruction {
public:
IfElseEndInstruction();
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
};
class LoopBeginInstruction: public CFInstruction {
public:
LoopBeginInstruction();
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
};
class LoopEndInstruction: public CFInstruction {
public:
LoopEndInstruction(LoopBeginInstruction *start);
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
LoopBeginInstruction *m_start;
};
class LoopBreakInstruction: public CFInstruction {
public:
LoopBreakInstruction();
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
};
class LoopContInstruction: public CFInstruction {
public:
LoopContInstruction();
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
};
}
#endif // SFN_IFELSEINSTRUCTION_H

View File

@ -1,341 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_instruction_export.h"
#include "sfn_liverange.h"
#include "sfn_valuepool.h"
namespace r600 {
WriteoutInstruction::WriteoutInstruction(instr_type t, const GPRVector& value):
Instruction(t),
m_value(value)
{
add_remappable_src_value(&m_value);
}
void WriteoutInstruction::replace_values(const ValueSet& candidates, PValue new_value)
{
// I wonder whether we can actually end up here ...
for (auto c: candidates) {
if (*c == *m_value.reg_i(c->chan()))
m_value.set_reg_i(c->chan(), new_value);
}
replace_values_child(candidates, new_value);
}
void WriteoutInstruction::replace_values_child(UNUSED const ValueSet& candidates,
UNUSED PValue new_value)
{
}
void WriteoutInstruction::remap_registers_child(UNUSED std::vector<rename_reg_pair>& map,
UNUSED ValueMap& values)
{
}
ExportInstruction::ExportInstruction(unsigned loc, const GPRVector &value, ExportType type):
WriteoutInstruction(Instruction::exprt, value),
m_type(type),
m_loc(loc),
m_is_last(false)
{
}
bool ExportInstruction::is_equal_to(const Instruction& lhs) const
{
assert(lhs.type() == exprt);
const auto& oth = static_cast<const ExportInstruction&>(lhs);
return (gpr() == oth.gpr()) &&
(m_type == oth.m_type) &&
(m_loc == oth.m_loc) &&
(m_is_last == oth.m_is_last);
}
void ExportInstruction::do_print(std::ostream& os) const
{
os << (m_is_last ? "EXPORT_DONE ":"EXPORT ");
switch (m_type) {
case et_pixel: os << "PIXEL "; break;
case et_pos: os << "POS "; break;
case et_param: os << "PARAM "; break;
}
os << m_loc << " " << gpr();
}
void ExportInstruction::update_output_map(OutputRegisterMap& map) const
{
map[m_loc] = gpr_ptr();
}
void ExportInstruction::set_last()
{
m_is_last = true;
}
WriteScratchInstruction::WriteScratchInstruction(unsigned loc, const GPRVector& value,
int align, int align_offset, int writemask):
WriteoutInstruction (Instruction::mem_wr_scratch, value),
m_loc(loc),
m_align(align),
m_align_offset(align_offset),
m_writemask(writemask),
m_array_size(0)
{
}
WriteScratchInstruction::WriteScratchInstruction(const PValue& address, const GPRVector& value,
int align, int align_offset, int writemask, int array_size):
WriteoutInstruction (Instruction::mem_wr_scratch, value),
m_loc(0),
m_address(address),
m_align(align),
m_align_offset(align_offset),
m_writemask(writemask),
m_array_size(array_size - 1)
{
add_remappable_src_value(&m_address);
}
bool WriteScratchInstruction::is_equal_to(const Instruction& lhs) const
{
if (lhs.type() != Instruction::mem_wr_scratch)
return false;
const auto& other = static_cast<const WriteScratchInstruction&>(lhs);
if (m_address) {
if (!other.m_address)
return false;
if (*m_address != *other.m_address)
return false;
} else {
if (other.m_address)
return false;
}
return gpr() == other.gpr() &&
m_loc == other.m_loc &&
m_align == other.m_align &&
m_align_offset == other.m_align_offset &&
m_writemask == other.m_writemask;
}
static char *writemask_to_swizzle(int writemask, char *buf)
{
const char *swz = "xyzw";
for (int i = 0; i < 4; ++i) {
buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
}
return buf;
}
void WriteScratchInstruction::do_print(std::ostream& os) const
{
char buf[5];
os << "MEM_SCRATCH_WRITE ";
if (m_address)
os << "@" << *m_address << "+";
os << m_loc << "." << writemask_to_swizzle(m_writemask, buf)
<< " " << gpr() << " AL:" << m_align << " ALO:" << m_align_offset;
}
void WriteScratchInstruction::replace_values_child(const ValueSet& candidates, PValue new_value)
{
if (!m_address)
return;
for (auto c: candidates) {
if (*c == *m_address)
m_address = new_value;
}
}
void WriteScratchInstruction::remap_registers_child(std::vector<rename_reg_pair>& map,
ValueMap& values)
{
if (!m_address)
return;
sfn_log << SfnLog::merge << "Remap " << *m_address << " of type " << m_address->type() << "\n";
assert(m_address->type() == Value::gpr);
auto new_index = map[m_address->sel()];
if (new_index.valid)
m_address = values.get_or_inject(new_index.new_reg, m_address->chan());
map[m_address->sel()].used = true;
}
StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components,
int array_base, int comp_mask, int out_buffer,
int stream):
WriteoutInstruction(Instruction::streamout, value),
m_element_size(num_components == 3 ? 3 : num_components - 1),
m_burst_count(1),
m_array_base(array_base),
m_array_size(0xfff),
m_writemask(comp_mask),
m_output_buffer(out_buffer),
m_stream(stream)
{
}
unsigned StreamOutIntruction::op() const
{
int op = 0;
switch (m_output_buffer) {
case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
}
return 4 * m_stream + op;
}
bool StreamOutIntruction::is_equal_to(const Instruction& lhs) const
{
assert(lhs.type() == streamout);
const auto& oth = static_cast<const StreamOutIntruction&>(lhs);
return gpr() == oth.gpr() &&
m_element_size == oth.m_element_size &&
m_burst_count == oth.m_burst_count &&
m_array_base == oth.m_array_base &&
m_array_size == oth.m_array_size &&
m_writemask == oth.m_writemask &&
m_output_buffer == oth.m_output_buffer &&
m_stream == oth.m_stream;
}
void StreamOutIntruction::do_print(std::ostream& os) const
{
os << "WRITE STREAM(" << m_stream << ") " << gpr()
<< " ES:" << m_element_size
<< " BC:" << m_burst_count
<< " BUF:" << m_output_buffer
<< " ARRAY:" << m_array_base;
if (m_array_size != 0xfff)
os << "+" << m_array_size;
}
MemRingOutIntruction::MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
const GPRVector& value,
unsigned base_addr, unsigned ncomp,
PValue index):
WriteoutInstruction(Instruction::ring, value),
m_ring_op(ring),
m_type(type),
m_base_address(base_addr),
m_num_comp(ncomp),
m_index(index)
{
add_remappable_src_value(&m_index);
assert(m_ring_op == cf_mem_ring || m_ring_op == cf_mem_ring1||
m_ring_op == cf_mem_ring2 || m_ring_op == cf_mem_ring3);
assert(m_num_comp <= 4);
}
unsigned MemRingOutIntruction::ncomp() const
{
switch (m_num_comp) {
case 1: return 0;
case 2: return 1;
case 3:
case 4: return 3;
default:
assert(0);
}
return 3;
}
bool MemRingOutIntruction::is_equal_to(const Instruction& lhs) const
{
assert(lhs.type() == streamout);
const auto& oth = static_cast<const MemRingOutIntruction&>(lhs);
bool equal = gpr() == oth.gpr() &&
m_ring_op == oth.m_ring_op &&
m_type == oth.m_type &&
m_num_comp == oth.m_num_comp &&
m_base_address == oth.m_base_address;
if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
equal &= (*m_index == *oth.m_index);
return equal;
}
static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" };
void MemRingOutIntruction::do_print(std::ostream& os) const
{
os << "MEM_RING " << m_ring_op;
os << " " << write_type_str[m_type] << " " << m_base_address;
os << " " << gpr();
if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
os << " @" << *m_index;
os << " ES:" << m_num_comp;
}
void MemRingOutIntruction::replace_values_child(const ValueSet& candidates,
PValue new_value)
{
if (!m_index)
return;
for (auto c: candidates) {
if (*c == *m_index)
m_index = new_value;
}
}
void MemRingOutIntruction::remap_registers_child(std::vector<rename_reg_pair>& map,
ValueMap& values)
{
if (!m_index)
return;
assert(m_index->type() == Value::gpr);
auto new_index = map[m_index->sel()];
if (new_index.valid)
m_index = values.get_or_inject(new_index.new_reg, m_index->chan());
map[m_index->sel()].used = true;
}
void MemRingOutIntruction::patch_ring(int stream, PValue index)
{
const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
assert(stream < 4);
m_ring_op = ring_op[stream];
m_index = index;
}
}

View File

@ -1,185 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SFN_EXPORTINSTRUCTION_H
#define SFN_EXPORTINSTRUCTION_H
#include "sfn_instruction_base.h"
namespace r600 {
class WriteoutInstruction: public Instruction {
public:
void replace_values(const ValueSet& candidates, PValue new_value) override;
const GPRVector& gpr() const {return m_value;}
const GPRVector *gpr_ptr() const {return &m_value;}
protected:
WriteoutInstruction(instr_type t, const GPRVector& value);
private:
virtual void replace_values_child(const ValueSet& candidates, PValue new_value);
virtual void remap_registers_child(std::vector<rename_reg_pair>& map,
ValueMap& values);
GPRVector m_value;
};
class ExportInstruction : public WriteoutInstruction {
public:
enum ExportType {
et_pixel,
et_pos,
et_param
};
ExportInstruction(unsigned loc, const GPRVector& value, ExportType type);
void set_last();
ExportType export_type() const {return m_type;}
unsigned location() const {return m_loc;}
bool is_last_export() const {return m_is_last;}
void update_output_map(OutputRegisterMap& map) const;
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
ExportType m_type;
unsigned m_loc;
bool m_is_last;
};
class WriteScratchInstruction : public WriteoutInstruction {
public:
WriteScratchInstruction(unsigned loc, const GPRVector& value, int align,
int align_offset, int writemask);
WriteScratchInstruction(const PValue& address, const GPRVector& value,
int align, int align_offset, int writemask, int array_size);
unsigned location() const {return m_loc;}
int write_mask() const { return m_writemask;}
int address() const { assert(m_address); return m_address->sel();}
bool indirect() const { return !!m_address;}
int array_size() const { return m_array_size;}
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
void replace_values_child(const ValueSet& candidates, PValue new_value) override;
void remap_registers_child(std::vector<rename_reg_pair>& map,
ValueMap& values)override;
unsigned m_loc;
PValue m_address;
unsigned m_align;
unsigned m_align_offset;
unsigned m_writemask;
int m_array_size;
};
class StreamOutIntruction: public WriteoutInstruction {
public:
StreamOutIntruction(const GPRVector& value, int num_components,
int array_base, int comp_mask, int out_buffer,
int stream);
int element_size() const { return m_element_size;}
int burst_count() const { return m_burst_count;}
int array_base() const { return m_array_base;}
int array_size() const { return m_array_size;}
int comp_mask() const { return m_writemask;}
unsigned op() const;
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
int m_element_size;
int m_burst_count;
int m_array_base;
int m_array_size;
int m_writemask;
int m_output_buffer;
int m_stream;
};
enum EMemWriteType {
mem_write = 0,
mem_write_ind = 1,
mem_write_ack = 2,
mem_write_ind_ack = 3,
};
class MemRingOutIntruction: public WriteoutInstruction {
public:
MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
const GPRVector& value, unsigned base_addr,
unsigned ncomp, PValue m_index);
unsigned op() const{return m_ring_op;}
unsigned ncomp() const;
unsigned addr() const {return m_base_address;}
EMemWriteType type() const {return m_type;}
unsigned index_reg() const {return m_index->sel();}
unsigned array_base() const {return m_base_address; }
void replace_values_child(const ValueSet& candidates, PValue new_value) override;
void remap_registers_child(std::vector<rename_reg_pair>& map,
ValueMap& values) override;
void patch_ring(int stream, PValue index);
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
ECFOpCode m_ring_op;
EMemWriteType m_type;
unsigned m_base_address;
unsigned m_num_comp;
PValue m_index;
};
}
#endif // SFN_EXPORTINSTRUCTION_H

View File

@ -1,480 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_instruction_fetch.h"
#include "gallium/drivers/r600/r600_pipe.h"
namespace r600 {
/* refactor this to add status create methods for specific tasks */
FetchInstruction::FetchInstruction(EVFetchInstr op,
EVFetchType type,
GPRVector dst,
PValue src, int offset,
int buffer_id, PValue buffer_offset,
EBufferIndexMode cp_rel,
bool use_const_field):
Instruction(vtx),
m_vc_opcode(op),
m_fetch_type(type),
m_endian_swap(vtx_es_none),
m_src(src),
m_dst(dst),
m_offset(offset),
m_is_mega_fetch(1),
m_mega_fetch_count(16),
m_buffer_id(buffer_id),
m_semantic_id(0),
m_buffer_index_mode(cp_rel),
m_flags(0),
m_uncached(false),
m_indexed(false),
m_array_base(0),
m_array_size(0),
m_elm_size(0),
m_buffer_offset(buffer_offset),
m_dest_swizzle({0,1,2,3})
{
if (use_const_field) {
m_flags.set(vtx_use_const_field);
m_data_format = fmt_invalid;
m_num_format = vtx_nf_norm;
} else {
m_flags.set(vtx_format_comp_signed);
m_data_format = fmt_32_32_32_32_float;
m_num_format = vtx_nf_scaled;
}
add_remappable_src_value(&m_src);
add_remappable_src_value(&m_buffer_offset);
add_remappable_dst_value(&m_dst);
}
/* Resource query */
FetchInstruction::FetchInstruction(EVFetchInstr vc_opcode,
EVFetchType fetch_type,
EVTXDataFormat data_format,
EVFetchNumFormat num_format,
EVFetchEndianSwap endian_swap,
const PValue src,
const GPRVector dst,
uint32_t offset,
bool is_mega_fetch,
uint32_t mega_fetch_count,
uint32_t buffer_id,
uint32_t semantic_id,
EBufferIndexMode buffer_index_mode,
bool uncached,
bool indexed,
int array_base,
int array_size,
int elm_size,
PValue buffer_offset,
const std::array<int, 4>& dest_swizzle):
Instruction(vtx),
m_vc_opcode(vc_opcode),
m_fetch_type(fetch_type),
m_data_format(data_format),
m_num_format(num_format),
m_endian_swap(endian_swap),
m_src(src),
m_dst(dst),
m_offset(offset),
m_is_mega_fetch(is_mega_fetch),
m_mega_fetch_count(mega_fetch_count),
m_buffer_id(buffer_id),
m_semantic_id(semantic_id),
m_buffer_index_mode(buffer_index_mode),
m_uncached(uncached),
m_indexed(indexed),
m_array_base(array_base),
m_array_size(array_size),
m_elm_size(elm_size),
m_buffer_offset(buffer_offset),
m_dest_swizzle(dest_swizzle)
{
add_remappable_src_value(&m_src);
add_remappable_dst_value(&m_dst);
add_remappable_src_value(&m_buffer_offset);
}
FetchInstruction::FetchInstruction(GPRVector dst,
PValue src,
int buffer_id, PValue buffer_offset,
EVTXDataFormat format,
EVFetchNumFormat num_format):
Instruction(vtx),
m_vc_opcode(vc_fetch),
m_fetch_type(no_index_offset),
m_data_format(format),
m_num_format(num_format),
m_endian_swap(vtx_es_none),
m_src(src),
m_dst(dst),
m_offset(0),
m_is_mega_fetch(0),
m_mega_fetch_count(0),
m_buffer_id(buffer_id),
m_semantic_id(0),
m_buffer_index_mode(bim_none),
m_flags(0),
m_uncached(false),
m_indexed(false),
m_array_base(0),
m_array_size(0),
m_elm_size(1),
m_buffer_offset(buffer_offset),
m_dest_swizzle({0,1,2,3})
{
m_flags.set(vtx_format_comp_signed);
add_remappable_src_value(&m_src);
add_remappable_dst_value(&m_dst);
add_remappable_src_value(&m_buffer_offset);
}
/* Resource query */
FetchInstruction::FetchInstruction(GPRVector dst,
PValue src,
int buffer_id,
EBufferIndexMode cp_rel):
Instruction(vtx),
m_vc_opcode(vc_get_buf_resinfo),
m_fetch_type(no_index_offset),
m_data_format(fmt_32_32_32_32),
m_num_format(vtx_nf_norm),
m_endian_swap(vtx_es_none),
m_src(src),
m_dst(dst),
m_offset(0),
m_is_mega_fetch(0),
m_mega_fetch_count(16),
m_buffer_id(buffer_id),
m_semantic_id(0),
m_buffer_index_mode(cp_rel),
m_flags(0),
m_uncached(false),
m_indexed(false),
m_array_base(0),
m_array_size(0),
m_elm_size(0),
m_dest_swizzle({0,1,2,3})
{
m_flags.set(vtx_format_comp_signed);
add_remappable_src_value(&m_src);
add_remappable_dst_value(&m_dst);
add_remappable_src_value(&m_buffer_offset);
}
FetchInstruction::FetchInstruction(GPRVector dst, PValue src, int scratch_size):
Instruction(vtx),
m_vc_opcode(vc_read_scratch),
m_fetch_type(vertex_data),
m_data_format(fmt_32_32_32_32),
m_num_format(vtx_nf_int),
m_endian_swap(vtx_es_none),
m_dst(dst),
m_offset(0),
m_is_mega_fetch(0),
m_mega_fetch_count(16),
m_buffer_id(0),
m_semantic_id(0),
m_buffer_index_mode(bim_none),
m_flags(0),
m_uncached(true),
m_array_base(0),
m_array_size(0),
m_elm_size(3),
m_dest_swizzle({0,1,2,3})
{
if (src->type() == Value::literal) {
const auto& lv = static_cast<const LiteralValue&>(*src);
m_array_base = lv.value();
m_indexed = false;
m_src.reset(new GPRValue(0,0));
m_array_size = 0;
} else {
m_array_base = 0;
m_src = src;
m_indexed = true;
m_array_size = scratch_size - 1;
}
add_remappable_src_value(&m_src);
add_remappable_dst_value(&m_dst);
add_remappable_src_value(&m_buffer_offset);
}
void FetchInstruction::replace_values(const ValueSet& candidates, PValue new_value)
{
if (!m_src)
return;
for (auto c: candidates) {
for (int i = 0; i < 4; ++i) {
if (*c == *m_dst.reg_i(i))
m_dst.set_reg_i(i, new_value);
}
if (*m_src == *c)
m_src = new_value;
}
}
bool FetchInstruction::is_equal_to(const Instruction& lhs) const
{
auto& l = static_cast<const FetchInstruction&>(lhs);
if (m_src) {
if (!l.m_src)
return false;
if (*m_src != *l.m_src)
return false;
} else {
if (l.m_src)
return false;
}
return m_vc_opcode == l.m_vc_opcode &&
m_fetch_type == l.m_fetch_type &&
m_data_format == l.m_data_format &&
m_num_format == l.m_num_format &&
m_endian_swap == l.m_endian_swap &&
m_dst == l.m_dst &&
m_offset == l.m_offset &&
m_buffer_id == l.m_buffer_id &&
m_semantic_id == l.m_semantic_id &&
m_buffer_index_mode == l.m_buffer_index_mode &&
m_flags == l.m_flags &&
m_indexed == l.m_indexed &&
m_uncached == l.m_uncached;
}
void FetchInstruction::set_format(EVTXDataFormat fmt)
{
m_data_format = fmt;
}
void FetchInstruction::set_dest_swizzle(const std::array<int,4>& swz)
{
m_dest_swizzle = swz;
}
void FetchInstruction::prelude_append(Instruction *instr)
{
assert(instr);
m_prelude.push_back(PInstruction(instr));
}
const std::vector<PInstruction>& FetchInstruction::prelude() const
{
return m_prelude;
}
LoadFromScratch::LoadFromScratch(GPRVector dst, PValue src, int scratch_size):
FetchInstruction(dst, src, scratch_size)
{
}
FetchGDSOpResult::FetchGDSOpResult(const GPRVector dst, const PValue src):
FetchInstruction(vc_fetch,
no_index_offset,
fmt_32,
vtx_nf_int,
vtx_es_none,
src,
dst,
0,
false,
0xf,
R600_IMAGE_IMMED_RESOURCE_OFFSET,
0,
bim_none,
false,
false,
0,
0,
0,
PValue(),
{0,7,7,7})
{
set_flag(vtx_srf_mode);
set_flag(vtx_vpm);
}
FetchTCSIOParam::FetchTCSIOParam(GPRVector dst, PValue src, int offset):
FetchInstruction(vc_fetch,
no_index_offset,
fmt_32_32_32_32,
vtx_nf_scaled,
vtx_es_none,
src,
dst,
offset,
false,
16,
R600_LDS_INFO_CONST_BUFFER,
0,
bim_none,
false,
false,
0,
0,
0,
PValue(),
{0,1,2,3})
{
set_flag(vtx_srf_mode);
set_flag(vtx_format_comp_signed);
}
static const char *fmt_descr[64] = {
"INVALID",
"8",
"4_4",
"3_3_2",
"RESERVED_4",
"16",
"16F",
"8_8",
"5_6_5",
"6_5_5",
"1_5_5_5",
"4_4_4_4",
"5_5_5_1",
"32",
"32F",
"16_16",
"16_16F",
"8_24",
"8_24F",
"24_8",
"24_8F",
"10_11_11",
"10_11_11F",
"11_11_10",
"11_11_10F",
"2_10_10_10",
"8_8_8_8",
"10_10_10_2",
"X24_8_32F",
"32_32",
"32_32F",
"16_16_16_16",
"16_16_16_16F",
"RESERVED_33",
"32_32_32_32",
"32_32_32_32F",
"RESERVED_36",
"1",
"1_REVERSED",
"GB_GR",
"BG_RG",
"32_AS_8",
"32_AS_8_8",
"5_9_9_9_SHAREDEXP",
"8_8_8",
"16_16_16",
"16_16_16F",
"32_32_32",
"32_32_32F",
"BC1",
"BC2",
"BC3",
"BC4",
"BC5",
"APC0",
"APC1",
"APC2",
"APC3",
"APC4",
"APC5",
"APC6",
"APC7",
"CTX1",
"RESERVED_63"
};
void FetchInstruction::do_print(std::ostream& os) const
{
static const std::string num_format_char[] = {"norm", "int", "scaled"};
static const std::string endian_swap_code[] = {
"noswap", "8in16", "8in32"
};
static const char buffer_index_mode_char[] = "_01E";
static const char *flag_string[] = {"WQM", "CF", "signed", "no_zero",
"nostride", "AC", "TC", "VPM"};
switch (m_vc_opcode) {
case vc_fetch:
os << "Fetch " << m_dst;
break;
case vc_semantic:
os << "Fetch Semantic ID:" << m_semantic_id;
break;
case vc_get_buf_resinfo:
os << "Fetch BufResinfo:" << m_dst;
break;
case vc_read_scratch:
os << "MEM_READ_SCRATCH:" << m_dst;
break;
default:
os << "Fetch ERROR";
return;
}
os << ", " << *m_src;
if (m_offset)
os << "+" << m_offset;
os << " BUFID:" << m_buffer_id
<< " FMT:(" << fmt_descr[m_data_format]
<< " " << num_format_char[m_num_format]
<< " " << endian_swap_code[m_endian_swap]
<< ")";
if (m_buffer_index_mode > 0)
os << " IndexMode:" << buffer_index_mode_char[m_buffer_index_mode];
if (m_is_mega_fetch)
os << " MFC:" << m_mega_fetch_count;
else
os << " mfc*:" << m_mega_fetch_count;
if (m_flags.any()) {
os << " Flags:";
for( int i = 0; i < vtx_unknown; ++i) {
if (m_flags.test(i))
os << ' ' << flag_string[i];
}
}
}
}

View File

@ -1,187 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SFN_INSTRUCTION_FETCH_H
#define SFN_INSTRUCTION_FETCH_H
#include "sfn_instruction_base.h"
namespace r600 {
class FetchInstruction : public Instruction {
public:
FetchInstruction(EVFetchInstr vc_opcode,
EVFetchType fetch_type,
EVTXDataFormat data_format,
EVFetchNumFormat num_format,
EVFetchEndianSwap endian_swap,
const PValue src,
const GPRVector dst,
uint32_t offset,
bool is_mega_fetch,
uint32_t mega_fetch_count,
uint32_t buffer_id,
uint32_t semantic_id,
EBufferIndexMode buffer_index_mode,
bool uncached,
bool indexed,
int array_base,
int array_size,
int elm_size,
PValue buffer_offset,
const std::array<int, 4>& dest_swizzle);
FetchInstruction(EVFetchInstr op,
EVFetchType type,
GPRVector dst,
PValue src, int offset,
int buffer_id, PValue buffer_offset,
EBufferIndexMode cp_rel,
bool use_const_field = false);
FetchInstruction(GPRVector dst,
PValue src,
int buffer_id,
PValue buffer_offset,
EVTXDataFormat format,
EVFetchNumFormat num_format);
FetchInstruction(GPRVector dst,
PValue src,
int buffer_id,
EBufferIndexMode cp_rel);
FetchInstruction(GPRVector dst, PValue src, int scratch_size);
void replace_values(const ValueSet& candidates, PValue new_value) override;
EVFetchInstr vc_opcode() const { return m_vc_opcode;}
EVFetchType fetch_type() const { return m_fetch_type;}
EVTXDataFormat data_format() const { return m_data_format;}
EVFetchNumFormat num_format() const { return m_num_format;}
EVFetchEndianSwap endian_swap() const { return m_endian_swap;}
const Value& src() const { return *m_src;}
const GPRVector& dst() const { return m_dst;}
uint32_t offset() const { return m_offset;}
bool is_mega_fetchconst() { return m_is_mega_fetch;}
uint32_t mega_fetch_count() const { return m_mega_fetch_count;}
uint32_t buffer_id() const { return m_buffer_id;}
uint32_t semantic_id() const { return m_semantic_id;}
EBufferIndexMode buffer_index_mode() const{ return m_buffer_index_mode;}
bool is_signed() const { return m_flags.test(vtx_format_comp_signed);}
bool use_const_fields() const { return m_flags.test(vtx_use_const_field);}
bool srf_mode_no_zero() const { return m_flags.test(vtx_srf_mode);}
void set_flag(EVFetchFlagShift flag) {m_flags.set(flag);}
bool uncached() const {return m_uncached; }
bool indexed() const {return m_indexed; }
int array_base()const {return m_array_base; }
int array_size() const {return m_array_size; }
int elm_size() const {return m_elm_size; }
void set_buffer_offset(PValue buffer_offset) {
m_buffer_offset = buffer_offset;
add_remappable_src_value(&m_buffer_offset);
}
PValue buffer_offset() const { return m_buffer_offset; }
void set_dest_swizzle(const std::array<int,4>& swz);
void set_format(EVTXDataFormat fmt);
int swz(int idx) const { return m_dest_swizzle[idx];}
bool use_tc() const {return m_flags.test(vtx_use_tc);}
bool use_vpm() const {return m_flags.test(vtx_vpm);}
void prelude_append(Instruction *instr);
const std::vector<PInstruction>& prelude() const;
bool has_prelude() const {return !m_prelude.empty();}
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
EVFetchInstr m_vc_opcode;
EVFetchType m_fetch_type;
EVTXDataFormat m_data_format;
EVFetchNumFormat m_num_format;
EVFetchEndianSwap m_endian_swap;
PValue m_src;
GPRVector m_dst;
uint32_t m_offset;
bool m_is_mega_fetch;
uint32_t m_mega_fetch_count;
uint32_t m_buffer_id;
uint32_t m_semantic_id;
EBufferIndexMode m_buffer_index_mode;
std::bitset<16> m_flags;
bool m_uncached;
bool m_indexed;
int m_array_base;
int m_array_size;
int m_elm_size;
PValue m_buffer_offset;
std::array<int, 4> m_dest_swizzle;
std::vector<PInstruction> m_prelude;
};
class LoadFromScratch: public FetchInstruction {
public:
LoadFromScratch(GPRVector dst, PValue src, int scratch_size);
};
class FetchGDSOpResult : public FetchInstruction {
public:
FetchGDSOpResult(const GPRVector dst, const PValue src);
};
class FetchTCSIOParam : public FetchInstruction {
public:
FetchTCSIOParam(GPRVector dst, PValue src, int offset);
};
}
#endif // SFN_INSTRUCTION_FETCH_H

View File

@ -1,180 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_instruction_gds.h"
#include "sfn_liverange.h"
namespace r600 {
GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
const PValue& value2, const PValue& uav_id, int uav_base):
Instruction(gds),
m_op(op),
m_src(value),
m_src2(value2),
m_dest(dest),
m_dest_swizzle({PIPE_SWIZZLE_X,7,7,7}),
m_src_swizzle({PIPE_SWIZZLE_0, PIPE_SWIZZLE_X, PIPE_SWIZZLE_0}),
m_buffer_index_mode(bim_none),
m_uav_id(uav_id),
m_uav_base(uav_base),
m_flags(0)
{
add_remappable_src_value(&m_src);
add_remappable_src_value(&m_src2);
add_remappable_src_value(&m_uav_id);
add_remappable_dst_value(&m_dest);
m_dest_swizzle[0] = m_dest.chan_i(0);
}
GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
const PValue& uav_id, int uav_base):
GDSInstr(op, dest, value, PValue(), uav_id, uav_base)
{
assert(value);
m_src_swizzle[1] = value->chan();
m_src_swizzle[2] = PIPE_SWIZZLE_0;
}
GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,
const PValue& uav_id, int uav_base):
GDSInstr(op, dest, PValue(), PValue(), uav_id, uav_base)
{
m_src_swizzle[1] = PIPE_SWIZZLE_0;
}
bool GDSInstr::is_equal_to(UNUSED const Instruction& lhs) const
{
return false;
}
void GDSInstr::do_print(std::ostream& os) const
{
const char *swz = "xyzw01?_";
os << lds_ops.at(m_op).name << " R" << m_dest.sel() << ".";
for (int i = 0; i < 4; ++i) {
os << swz[m_dest_swizzle[i]];
}
if (m_src)
os << " " << *m_src;
os << " UAV:" << *m_uav_id;
}
RatInstruction::RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
const GPRVector& data, const GPRVector& index,
int rat_id, const PValue& rat_id_offset,
int burst_count, int comp_mask, int element_size, bool ack):
Instruction(rat),
m_cf_opcode(cf_opcode),
m_rat_op(rat_op),
m_data(data),
m_index(index),
m_rat_id(rat_id),
m_rat_id_offset(rat_id_offset),
m_burst_count(burst_count),
m_comp_mask(comp_mask),
m_element_size(element_size),
m_need_ack(ack)
{
add_remappable_src_value(&m_data);
add_remappable_src_value(&m_rat_id_offset);
add_remappable_src_value(&m_index);
}
bool RatInstruction::is_equal_to(UNUSED const Instruction& lhs) const
{
return false;
}
void RatInstruction::do_print(std::ostream& os) const
{
os << "MEM_RAT RAT(" << m_rat_id;
if (m_rat_id_offset)
os << "+" << *m_rat_id_offset;
os << ") @" << m_index;
os << " OP:" << m_rat_op << " " << m_data;
os << " BC:" << m_burst_count
<< " MASK:" << m_comp_mask
<< " ES:" << m_element_size;
if (m_need_ack)
os << " ACK";
}
RatInstruction::ERatOp RatInstruction::opcode(nir_intrinsic_op opcode)
{
switch (opcode) {
case nir_intrinsic_ssbo_atomic_add:
return ADD_RTN;
case nir_intrinsic_ssbo_atomic_and:
return AND_RTN;
case nir_intrinsic_ssbo_atomic_exchange:
return XCHG_RTN;
case nir_intrinsic_ssbo_atomic_umax:
return MAX_UINT_RTN;
case nir_intrinsic_ssbo_atomic_umin:
return MIN_UINT_RTN;
case nir_intrinsic_ssbo_atomic_imax:
return MAX_INT_RTN;
case nir_intrinsic_ssbo_atomic_imin:
return MIN_INT_RTN;
case nir_intrinsic_ssbo_atomic_xor:
return XOR_RTN;
default:
return UNSUPPORTED;
}
}
GDSStoreTessFactor::GDSStoreTessFactor(GPRVector& value):
Instruction(tf_write),
m_value(value)
{
add_remappable_src_value(&m_value);
}
void GDSStoreTessFactor::replace_values(const ValueSet& candidates, PValue new_value)
{
for (auto& c: candidates) {
for (int i = 0; i < 4; ++i) {
if (*c == *m_value[i])
m_value[i] = new_value;
}
}
}
bool GDSStoreTessFactor::is_equal_to(const Instruction& lhs) const
{
auto& other = static_cast<const GDSStoreTessFactor&>(lhs);
return m_value == other.m_value;
}
void GDSStoreTessFactor::do_print(std::ostream& os) const
{
os << "TF_WRITE " << m_value;
}
}

View File

@ -1,225 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SFN_GDSINSTR_H
#define SFN_GDSINSTR_H
#include "sfn_instruction_base.h"
#include <bitset>
namespace r600 {
class GDSInstr : public Instruction
{
public:
GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
const PValue &uav_id, int uav_base);
GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
const PValue& value2, const PValue &uav_id, int uav_base);
GDSInstr(ESDOp op, const GPRVector& dest, const PValue &uav_id, int uav_base);
ESDOp op() const {return m_op;}
int src_sel() const {
if (!m_src)
return 0;
assert(m_src->type() == Value::gpr);
return m_src->sel();
}
int src2_chan() const {
if (!m_src2)
return 0;
assert(m_src->type() == Value::gpr);
return m_src->chan();
}
int src_swizzle(int idx) const {assert(idx < 3); return m_src_swizzle[idx];}
int dest_sel() const {
return m_dest.sel();
}
int dest_swizzle(int i) const {
if (i < 4)
return m_dest_swizzle[i];
return 7;
}
void set_dest_swizzle(const std::array<int,4>& swz) {
m_dest_swizzle = swz;
}
PValue uav_id() const {return m_uav_id;}
int uav_base() const {return m_uav_base;}
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
ESDOp m_op;
PValue m_src;
PValue m_src2;
GPRVector m_dest;
std::array <int, 4> m_dest_swizzle;
std::array <int, 3> m_src_swizzle;
EBufferIndexMode m_buffer_index_mode;
PValue m_uav_id;
int m_uav_base;
std::bitset<8> m_flags;
};
class RatInstruction : public Instruction {
public:
enum ERatOp {
NOP,
STORE_TYPED,
STORE_RAW,
STORE_RAW_FDENORM,
CMPXCHG_INT,
CMPXCHG_FLT,
CMPXCHG_FDENORM,
ADD,
SUB,
RSUB,
MIN_INT,
MIN_UINT,
MAX_INT,
MAX_UINT,
AND,
OR,
XOR,
MSKOR,
INC_UINT,
DEC_UINT,
NOP_RTN = 32,
XCHG_RTN = 34,
XCHG_FDENORM_RTN,
CMPXCHG_INT_RTN,
CMPXCHG_FLT_RTN,
CMPXCHG_FDENORM_RTN,
ADD_RTN,
SUB_RTN,
RSUB_RTN,
MIN_INT_RTN,
MIN_UINT_RTN,
MAX_INT_RTN,
MAX_UINT_RTN,
AND_RTN,
OR_RTN,
XOR_RTN,
MSKOR_RTN,
UINT_RTN,
UNSUPPORTED
};
RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
const GPRVector& data, const GPRVector& index,
int rat_id, const PValue& rat_id_offset,
int burst_count, int comp_mask, int element_size,
bool ack);
PValue rat_id_offset() const { return m_rat_id_offset;}
int rat_id() const { return m_rat_id;}
ERatOp rat_op() const {return m_rat_op;}
int data_gpr() const {return m_data.sel();}
int index_gpr() const {return m_index.sel();}
int elm_size() const {return m_element_size;}
int comp_mask() const {return m_comp_mask;}
bool need_ack() const {return m_need_ack;}
int burst_count() const {return m_burst_count;}
static ERatOp opcode(nir_intrinsic_op opcode);
int data_swz(int chan) const {return m_data.chan_i(chan);}
ECFOpCode cf_opcode() const { return m_cf_opcode;}
void set_ack() {m_need_ack = true; }
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
ECFOpCode m_cf_opcode;
ERatOp m_rat_op;
GPRVector m_data;
GPRVector m_index;
int m_rat_id;
PValue m_rat_id_offset;
int m_burst_count;
int m_comp_mask;
int m_element_size;
std::bitset<8> m_flags;
bool m_need_ack;
};
class GDSStoreTessFactor : public Instruction {
public:
GDSStoreTessFactor(GPRVector& value);
int sel() const {return m_value.sel();}
int chan(int i ) const {return m_value.chan_i(i);}
void replace_values(const ValueSet& candiates, PValue new_value) override;
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
GPRVector m_value;
};
}
#endif // SFN_GDSINSTR_H

View File

@ -1,151 +0,0 @@
#include "sfn_instruction_lds.h"
namespace r600 {
void LDSReadInstruction::do_print(std::ostream& os) const
{
os << "LDS Read [";
for (auto& v : m_dest_value)
os << *v << " ";
os << "], ";
for (auto& a : m_address)
os << *a << " ";
}
LDSReadInstruction::LDSReadInstruction(std::vector<PValue>& address, std::vector<PValue>& value):
Instruction(lds_read),
m_address(address),
m_dest_value(value)
{
assert(address.size() == value.size());
for (unsigned i = 0; i < address.size(); ++i) {
add_remappable_src_value(&m_address[i]);
add_remappable_dst_value(&m_dest_value[i]);
}
}
void LDSReadInstruction::replace_values(const ValueSet& candidates, PValue new_value)
{
for (auto& c : candidates) {
for (auto& d: m_dest_value) {
if (*c == *d)
d = new_value;
}
for (auto& a: m_address) {
if (*c == *a)
a = new_value;
}
}
}
bool LDSReadInstruction::is_equal_to(const Instruction& lhs) const
{
auto& other = static_cast<const LDSReadInstruction&>(lhs);
return m_address == other.m_address &&
m_dest_value == other.m_dest_value;
}
LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op):
Instruction(lds_atomic),
m_address(address),
m_dest_value(dest),
m_src0_value(src0),
m_src1_value(src1),
m_opcode(op)
{
add_remappable_src_value(&m_src0_value);
add_remappable_src_value(&m_src1_value);
add_remappable_src_value(&m_address);
add_remappable_dst_value(&m_dest_value);
}
LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op):
LDSAtomicInstruction(dest, src0, PValue(), address, op)
{
}
void LDSAtomicInstruction::do_print(std::ostream& os) const
{
os << "LDS " << m_opcode << " " << *m_dest_value << " ";
os << "[" << *m_address << "] " << *m_src0_value;
if (m_src1_value)
os << ", " << *m_src1_value;
}
bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const
{
auto& other = static_cast<const LDSAtomicInstruction&>(lhs);
return m_opcode == other.m_opcode &&
*m_dest_value == *other.m_dest_value &&
*m_src0_value == *other.m_src0_value &&
*m_address == *other.m_address &&
((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) ||
(!m_src1_value && !other.m_src1_value));
}
LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0):
LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue())
{
}
LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1):
Instruction(lds_write),
m_address(address),
m_value0(value0),
m_value1(value1),
m_idx_offset(idx_offset)
{
add_remappable_src_value(&m_address);
add_remappable_src_value(&m_value0);
if (m_value1)
add_remappable_src_value(&m_value1);
}
void LDSWriteInstruction::do_print(std::ostream& os) const
{
os << "LDS Write" << num_components()
<< " " << address() << ", " << value0();
if (num_components() > 1)
os << ", " << value1();
}
void LDSWriteInstruction::replace_values(const ValueSet& candidates, PValue new_value)
{
for (auto c: candidates) {
if (*c == *m_address)
m_address = new_value;
if (*c == *m_value0)
m_value0 = new_value;
if (*c == *m_value1)
m_value1 = new_value;
}
}
bool LDSWriteInstruction::is_equal_to(const Instruction& lhs) const
{
auto& other = static_cast<const LDSWriteInstruction&>(lhs);
if (m_value1) {
if (!other.m_value1)
return false;
if (*m_value1 != *other.m_value1)
return false;
} else {
if (other.m_value1)
return false;
}
return (m_value0 != other.m_value0 &&
*m_address != *other.m_address);
}
} // namespace r600

View File

@ -1,82 +0,0 @@
#ifndef LDSINSTRUCTION_H
#define LDSINSTRUCTION_H
#include "sfn_instruction_base.h"
namespace r600 {
class LDSReadInstruction : public Instruction {
public:
LDSReadInstruction(std::vector<PValue>& value, std::vector<PValue>& address);
void replace_values(const ValueSet& candidates, PValue new_value) override;
unsigned num_values() const { return m_dest_value.size();}
const Value& address(unsigned i) const { return *m_address[i];}
const Value& dest(unsigned i) const { return *m_dest_value[i];}
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
void do_print(std::ostream& os) const override;
bool is_equal_to(const Instruction& lhs) const override;
std::vector<PValue> m_address;
std::vector<PValue> m_dest_value;
};
class LDSAtomicInstruction : public Instruction {
public:
LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op);
LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op);
const Value& address() const { return *m_address;}
const Value& dest() const { return *m_dest_value;}
const Value& src0() const { return *m_src0_value;}
const PValue& src1() const { return m_src1_value;}
unsigned op() const {return m_opcode;}
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
void do_print(std::ostream& os) const override;
bool is_equal_to(const Instruction& lhs) const override;
PValue m_address;
PValue m_dest_value;
PValue m_src0_value;
PValue m_src1_value;
unsigned m_opcode;
};
class LDSWriteInstruction : public Instruction {
public:
LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0);
LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1);
const Value& address() const {return *m_address;};
const Value& value0() const { return *m_value0;}
const Value& value1() const { return *m_value1;}
unsigned num_components() const { return m_value1 ? 2 : 1;}
unsigned idx_offset() const {return m_idx_offset;};
void replace_values(const ValueSet& candidates, PValue new_value) override;
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
void do_print(std::ostream& os) const override;
bool is_equal_to(const Instruction& lhs) const override;
PValue m_address;
PValue m_value0;
PValue m_value1;
unsigned m_idx_offset;
};
}
#endif // LDSINSTRUCTION_H

View File

@ -1,68 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_instruction_misc.h"
namespace r600 {
EmitVertex::EmitVertex(int stream, bool cut):
Instruction (emit_vtx),
m_stream(stream),
m_cut(cut)
{
}
bool EmitVertex::is_equal_to(const Instruction& lhs) const
{
auto& oth = static_cast<const EmitVertex&>(lhs);
return oth.m_stream == m_stream &&
oth.m_cut == m_cut;
}
void EmitVertex::do_print(std::ostream& os) const
{
os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream;
}
WaitAck::WaitAck(int nack):
Instruction (wait_ack),
m_nack(nack)
{
}
bool WaitAck::is_equal_to(const Instruction& lhs) const
{
const auto& l = static_cast<const WaitAck&>(lhs);
return m_nack == l.m_nack;
}
void WaitAck::do_print(std::ostream& os) const
{
os << "WAIT_ACK @" << m_nack;
}
}

View File

@ -1,69 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SFN_INSTRUCTION_MISC_H
#define SFN_INSTRUCTION_MISC_H
#include "sfn_instruction_base.h"
namespace r600 {
class EmitVertex : public Instruction {
public:
EmitVertex(int stream, bool cut);
ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;}
int stream() const { return m_stream;}
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
int m_stream;
bool m_cut;
};
class WaitAck : public Instruction {
public:
WaitAck(int nack);
ECFOpCode op() const {return cf_wait_ack;}
int n_ack() const {return m_nack;}
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
int m_nack;
};
}
#endif // SFN_INSTRUCTION_MISC_H

View File

@ -1,143 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef INSTRUCTION_TEX_H
#define INSTRUCTION_TEX_H
#include "sfn_instruction_base.h"
namespace r600 {
class TexInstruction : public Instruction {
public:
enum Opcode {
ld = FETCH_OP_LD,
get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
get_nsampled = FETCH_OP_GET_NUMBER_OF_SAMPLES,
get_tex_lod = FETCH_OP_GET_LOD,
get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
keep_gradients = FETCH_OP_KEEP_GRADIENTS,
set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
sample = FETCH_OP_SAMPLE,
sample_l = FETCH_OP_SAMPLE_L,
sample_lb = FETCH_OP_SAMPLE_LB,
sample_lz = FETCH_OP_SAMPLE_LZ,
sample_g = FETCH_OP_SAMPLE_G,
sample_g_lb = FETCH_OP_SAMPLE_G_L,
gather4 = FETCH_OP_GATHER4,
gather4_o = FETCH_OP_GATHER4_O,
sample_c = FETCH_OP_SAMPLE_C,
sample_c_l = FETCH_OP_SAMPLE_C_L,
sample_c_lb = FETCH_OP_SAMPLE_C_LB,
sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
sample_c_g = FETCH_OP_SAMPLE_C_G,
sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
gather4_c = FETCH_OP_GATHER4_C,
gather4_c_o = FETCH_OP_GATHER4_C_O,
};
enum Flags {
x_unnormalized,
y_unnormalized,
z_unnormalized,
w_unnormalized,
grad_fine
};
TexInstruction(Opcode op, const GPRVector& dest, const GPRVector& src, unsigned sid,
unsigned rid, PValue sampler_offset);
const GPRVector& src() const {return m_src;}
const GPRVector& dst() const {return m_dst;}
unsigned opcode() const {return m_opcode;}
unsigned sampler_id() const {return m_sampler_id;}
unsigned resource_id() const {return m_resource_id;}
void replace_values(const ValueSet& candidates, PValue new_value) override;
void set_offset(unsigned index, int32_t val);
int get_offset(unsigned index) const;
void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
int inst_mode() const { return m_inst_mode;}
void set_flag(Flags flag) {
m_flags.set(flag);
}
PValue sampler_offset() const {
return m_sampler_offset;
}
bool has_flag(Flags flag) const {
return m_flags.test(flag);
}
int dest_swizzle(int i) const {
assert(i < 4);
return m_dest_swizzle[i];
}
void set_dest_swizzle(const std::array<int,4>& swz) {
m_dest_swizzle = swz;
}
void set_gather_comp(int cmp);
bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
private:
bool is_equal_to(const Instruction& lhs) const override;
void do_print(std::ostream& os) const override;
static const char *opname(Opcode code);
Opcode m_opcode;
GPRVector m_dst;
GPRVector m_src;
unsigned m_sampler_id;
unsigned m_resource_id;
std::bitset<8> m_flags;
int m_offset[3];
int m_inst_mode;
std::array<int,4> m_dest_swizzle;
PValue m_sampler_offset;
};
bool r600_nir_lower_int_tg4(nir_shader *nir);
bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
bool r600_nir_lower_cube_to_2darray(nir_shader *shader);
}
#endif // INSTRUCTION_TEX_H

View File

@ -1,91 +0,0 @@
#ifndef INSTRUCTIONVISITOR_H
#define INSTRUCTIONVISITOR_H
namespace r600 {
class AluInstruction;
class ExportInstruction;
class TexInstruction;
class FetchInstruction;
class IfInstruction;
class ElseInstruction;
class IfElseEndInstruction;
class LoopBeginInstruction;
class LoopEndInstruction;
class LoopBreakInstruction;
class LoopContInstruction;
class StreamOutIntruction;
class MemRingOutIntruction;
class EmitVertex;
class WaitAck;
class WriteScratchInstruction;
class GDSInstr;
class RatInstruction;
class LDSWriteInstruction;
class LDSReadInstruction;
class LDSAtomicInstruction;
class GDSStoreTessFactor;
class InstructionBlock;
class InstructionVisitor
{
public:
virtual ~InstructionVisitor() {};
virtual bool visit(AluInstruction& i) = 0;
virtual bool visit(ExportInstruction& i) = 0;
virtual bool visit(TexInstruction& i) = 0;
virtual bool visit(FetchInstruction& i) = 0;
virtual bool visit(IfInstruction& i) = 0;
virtual bool visit(ElseInstruction& i) = 0;
virtual bool visit(IfElseEndInstruction& i) = 0;
virtual bool visit(LoopBeginInstruction& i) = 0;
virtual bool visit(LoopEndInstruction& i) = 0;
virtual bool visit(LoopBreakInstruction& i) = 0;
virtual bool visit(LoopContInstruction& i) = 0;
virtual bool visit(StreamOutIntruction& i) = 0;
virtual bool visit(MemRingOutIntruction& i) = 0;
virtual bool visit(EmitVertex& i) = 0;
virtual bool visit(WaitAck& i) = 0;
virtual bool visit(WriteScratchInstruction& i) = 0;
virtual bool visit(GDSInstr& i) = 0;
virtual bool visit(RatInstruction& i) = 0;
virtual bool visit(LDSWriteInstruction& i) = 0;
virtual bool visit(LDSReadInstruction& i) = 0;
virtual bool visit(LDSAtomicInstruction& i) = 0;
virtual bool visit(GDSStoreTessFactor& i) = 0;
virtual bool visit(InstructionBlock& i) = 0;
};
class ConstInstructionVisitor
{
public:
virtual ~ConstInstructionVisitor() {};
virtual bool visit(const AluInstruction& i) = 0;
virtual bool visit(const ExportInstruction& i) = 0;
virtual bool visit(const TexInstruction& i) = 0;
virtual bool visit(const FetchInstruction& i) = 0;
virtual bool visit(const IfInstruction& i) = 0;
virtual bool visit(const ElseInstruction& i) = 0;
virtual bool visit(const IfElseEndInstruction& i) = 0;
virtual bool visit(const LoopBeginInstruction& i) = 0;
virtual bool visit(const LoopEndInstruction& i) = 0;
virtual bool visit(const LoopBreakInstruction& i) = 0;
virtual bool visit(const LoopContInstruction& i) = 0;
virtual bool visit(const StreamOutIntruction& i) = 0;
virtual bool visit(const MemRingOutIntruction& i) = 0;
virtual bool visit(const EmitVertex& i) = 0;
virtual bool visit(const WaitAck& i) = 0;
virtual bool visit(const WriteScratchInstruction& i) = 0;
virtual bool visit(const GDSInstr& i) = 0;
virtual bool visit(const RatInstruction& i) = 0;
virtual bool visit(const LDSWriteInstruction& i) = 0;
virtual bool visit(const LDSReadInstruction& i) = 0;
virtual bool visit(const LDSAtomicInstruction& i) = 0;
virtual bool visit(const GDSStoreTessFactor& i) = 0;
virtual bool visit(const InstructionBlock& i) = 0;
};
}
#endif // INSTRUCTIONVISITOR_H

View File

@ -1,45 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_nir.h"
struct r600_shader;
union r600_shader_key;
namespace r600 {
class AssemblyFromShaderLegacy : public AssemblyFromShader {
public:
AssemblyFromShaderLegacy(struct r600_shader *sh, r600_shader_key *key);
~AssemblyFromShaderLegacy() override;
private:
bool do_lower(const std::vector<InstructionBlock> &ir) override ;
struct AssemblyFromShaderLegacyImpl *impl;
};
}

File diff suppressed because it is too large Load Diff

View File

@ -1,314 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018-2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SFN_LIVERANGE_H
#define SFN_LIVERANGE_H
#include <cstdint>
#include <ostream>
#include <vector>
#include <limits>
#include "sfn_instruction_base.h"
#include "sfn_nir.h"
namespace r600 {
/** Storage to record the required live range of a temporary register
* begin == end == -1 indicates that the register can be reused without
* limitations. Otherwise, "begin" indicates the first instruction in which
* a write operation may target this temporary, and end indicates the
* last instruction in which a value can be read from this temporary.
* Hence, a register R2 can be merged with a register R1 if R1.end <= R2.begin.
*/
struct register_live_range {
int begin;
int end;
bool is_array_elm;
};
enum prog_scope_type {
outer_scope, /* Outer program scope */
loop_body, /* Inside a loop */
if_branch, /* Inside if branch */
else_branch, /* Inside else branch */
switch_body, /* Inside switch statement */
switch_case_branch, /* Inside switch case statement */
switch_default_branch, /* Inside switch default statement */
undefined_scope
};
class prog_scope {
public:
prog_scope();
prog_scope(prog_scope *parent, prog_scope_type type, int id,
int depth, int begin);
prog_scope_type type() const;
prog_scope *parent() const;
int nesting_depth() const;
int id() const;
int end() const;
int begin() const;
int loop_break_line() const;
const prog_scope *in_else_scope() const;
const prog_scope *in_ifelse_scope() const;
const prog_scope *in_parent_ifelse_scope() const;
const prog_scope *innermost_loop() const;
const prog_scope *outermost_loop() const;
const prog_scope *enclosing_conditional() const;
bool is_loop() const;
bool is_in_loop() const;
bool is_switchcase_scope_in_loop() const;
bool is_conditional() const;
bool is_child_of(const prog_scope *scope) const;
bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const;
bool break_is_for_switchcase() const;
bool contains_range_of(const prog_scope& other) const;
void set_end(int end);
void set_loop_break_line(int line);
private:
prog_scope_type scope_type;
int scope_id;
int scope_nesting_depth;
int scope_begin;
int scope_end;
int break_loop_line;
prog_scope *parent_scope;
};
/* Some storage class to encapsulate the prog_scope (de-)allocations */
class prog_scope_storage {
public:
prog_scope_storage(int n);
~prog_scope_storage();
prog_scope * create(prog_scope *p, prog_scope_type type, int id,
int lvl, int s_begin);
private:
int current_slot;
std::vector<prog_scope> storage;
};
/* Class to track the access to a component of a temporary register. */
class temp_comp_access {
public:
temp_comp_access();
void record_read(int line, prog_scope *scope);
void record_write(int line, prog_scope *scope);
register_live_range get_required_live_range();
private:
void propagate_live_range_to_dominant_write_scope();
bool conditional_ifelse_write_in_loop() const;
void record_ifelse_write(const prog_scope& scope);
void record_if_write(const prog_scope& scope);
void record_else_write(const prog_scope& scope);
prog_scope *last_read_scope;
prog_scope *first_read_scope;
prog_scope *first_write_scope;
int first_write;
int last_read;
int last_write;
int first_read;
/* This member variable tracks the current resolution of conditional writing
* to this temporary in IF/ELSE clauses.
*
* The initial value "conditionality_untouched" indicates that this
* temporary has not yet been written to within an if clause.
*
* A positive (other than "conditionality_untouched") number refers to the
* last loop id for which the write was resolved as unconditional. With each
* new loop this value will be overwitten by "conditionality_unresolved"
* on entering the first IF clause writing this temporary.
*
* The value "conditionality_unresolved" indicates that no resolution has
* been achieved so far. If the variable is set to this value at the end of
* the processing of the whole shader it also indicates a conditional write.
*
* The value "write_is_conditional" marks that the variable is written
* conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
* least one loop.
*/
int conditionality_in_loop_id;
/* Helper constants to make the tracking code more readable. */
static const int write_is_conditional = -1;
static const int conditionality_unresolved = 0;
static const int conditionality_untouched;
static const int write_is_unconditional;
/* A bit field tracking the nexting levels of if-else clauses where the
* temporary has (so far) been written to in the if branch, but not in the
* else branch.
*/
unsigned int if_scope_write_flags;
int next_ifelse_nesting_depth;
static const int supported_ifelse_nesting_depth = 32;
/* Tracks the last if scope in which the temporary was written to
* without a write in the corresponding else branch. Is also used
* to track read-before-write in the according scope.
*/
const prog_scope *current_unpaired_if_write_scope;
/* Flag to resolve read-before-write in the else scope. */
bool was_written_in_current_else_scope;
};
/* Class to track the access to all components of a temporary register. */
class temp_access {
public:
temp_access();
void record_read(int line, prog_scope *scope, int swizzle, bool is_array_elm);
void record_write(int line, prog_scope *scope, int writemask, bool is_array_elm);
register_live_range get_required_live_range();
private:
void update_access_mask(int mask);
temp_comp_access comp[4];
int access_mask;
bool needs_component_tracking;
bool is_array_element;
};
/* Helper class to merge the live ranges of an arrays.
*
* For arrays the array length, live range, and component access needs to
* be kept, because when live ranges are merged or arrays are interleaved
* one can only merge or interleave an array into another with equal or more
* elements. For interleaving it is also required that the sum of used swizzles
* is at most four.
*/
class array_live_range {
public:
array_live_range();
array_live_range(unsigned aid, unsigned alength);
array_live_range(unsigned aid, unsigned alength, int first_access,
int last_access, int mask);
void set_live_range(int first_access, int last_access);
void set_begin(int _begin){first_access = _begin;}
void set_end(int _end){last_access = _end;}
void set_access_mask(int s);
static void merge(array_live_range *a, array_live_range *b);
static void interleave(array_live_range *a, array_live_range *b);
int array_id() const {return id;}
int target_array_id() const {return target_array ? target_array->id : 0;}
const array_live_range *final_target() const {return target_array ?
target_array->final_target() : this;}
unsigned array_length() const { return length;}
int begin() const { return first_access;}
int end() const { return last_access;}
int access_mask() const { return component_access_mask;}
int used_components() const {return used_component_count;}
bool time_doesnt_overlap(const array_live_range& other) const;
void print(std::ostream& os) const;
bool is_mapped() const { return target_array != nullptr;}
int8_t remap_one_swizzle(int8_t idx) const;
private:
void init_swizzles();
void set_target(array_live_range *target);
void merge_live_range_from(array_live_range *other);
void interleave_into(array_live_range *other);
unsigned id;
unsigned length;
int first_access;
int last_access;
uint8_t component_access_mask;
uint8_t used_component_count;
array_live_range *target_array;
int8_t swizzle_map[4];
};
class LiverangeEvaluator {
public:
LiverangeEvaluator();
void run(const Shader& shader,
std::vector<register_live_range> &register_live_ranges);
void scope_if();
void scope_else();
void scope_endif();
void scope_loop_begin();
void scope_loop_end();
void scope_loop_break();
void record_read(const Value& src, bool is_array_elm = false);
void record_write(const Value& dst, bool is_array_elm = false);
void record_read(const GPRVector& src);
void record_write(const GPRVector& dst);
private:
prog_scope *create_scope(prog_scope *parent, prog_scope_type type, int id,
int lvl, int s_begin);
void get_required_live_ranges(std::vector<register_live_range>& register_live_ranges);
int line;
int loop_id;
int if_id;
int switch_id;
bool is_at_end;
int n_scopes;
std::unique_ptr<prog_scope_storage> scopes;
prog_scope *cur_scope;
std::vector<temp_access> temp_acc;
};
std::vector<rename_reg_pair>
get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges);
} // end namespace r600
#endif

View File

@ -0,0 +1,438 @@
#include "sfn_liverangeevaluator.h"
#include "sfn_liverangeevaluator_helpers.h"
#include "sfn_instr_alugroup.h"
#include "sfn_instr_controlflow.h"
#include "sfn_instr_export.h"
#include "sfn_instr_fetch.h"
#include "sfn_instr_mem.h"
#include "sfn_instr_tex.h"
#include "sfn_shader.h"
#include "sfn_debug.h"
#include <algorithm>
#include <map>
namespace r600 {
class LiveRangeInstrVisitor : public InstrVisitor {
public:
LiveRangeInstrVisitor(LiveRangeMap& live_range_map);
void visit(AluInstr *instr) override;
void visit(AluGroup *instr) override;
void visit(TexInstr *instr) override;
void visit(ExportInstr *instr) override;
void visit(FetchInstr *instr) override;
void visit(Block *instr) override;
void visit(ControlFlowInstr *instr) override;
void visit(IfInstr *instr) override;
void visit(WriteScratchInstr *instr) override;
void visit(StreamOutInstr *instr) override;
void visit(MemRingOutInstr *instr) override;
void visit(EmitVertexInstr *instr) override {(void)instr;}
void visit(GDSInstr *instr) override;
void visit(WriteTFInstr *instr) override;
void visit(LDSAtomicInstr *instr) override;
void visit(LDSReadInstr *instr) override;
void visit(RatInstr *instr) override;
void finalize();
private:
void record_write(const Register *reg);
void record_read(const Register *reg, LiveRangeEntry::EUse use);
void record_write(const RegisterVec4& reg, const RegisterVec4::Swizzle& swizzle);
void record_read(const RegisterVec4 &reg, LiveRangeEntry::EUse use);
void scope_if();
void scope_else();
void scope_endif();
void scope_loop_begin();
void scope_loop_end();
void scope_loop_break();
ProgramScope *create_scope(ProgramScope *parent, ProgramScopeType type,
int id, int nesting_depth, int line);
std::vector<std::unique_ptr<ProgramScope>> m_scopes;
ProgramScope *m_current_scope;
LiveRangeMap& m_live_range_map;
RegisterAccess m_register_access;
int m_line{0};
int m_if_id{1};
int m_loop_id{1};
};
LiveRangeEvaluator::LiveRangeEvaluator()
{
}
LiveRangeMap LiveRangeEvaluator::run(Shader& sh)
{
LiveRangeMap range_map = sh.prepare_live_range_map();
LiveRangeInstrVisitor evaluator(range_map);
for (auto& b : sh.func())
b->accept(evaluator);
evaluator.finalize();
return range_map;
}
void LiveRangeInstrVisitor::finalize()
{
m_current_scope->set_end(m_line);
for (int i = 0; i < 4; ++i) {
auto& live_ranges = m_live_range_map.component(i);
for(const auto& r : live_ranges) {
if (r.m_register->live_end_pinned())
record_read(r.m_register, LiveRangeEntry::use_unspecified);
}
auto& comp_access = m_register_access.component(i);
for (size_t i = 0; i < comp_access.size(); ++i) {
sfn_log << SfnLog::merge << "Evaluae access for " << *live_ranges[i].m_register << "\n";
auto& rca = comp_access[i];
rca.update_required_live_range();
live_ranges[i].m_start = rca.range().start;
live_ranges[i].m_end = rca.range().end;
live_ranges[i].m_use = rca.use_type();
}
}
}
LiveRangeInstrVisitor::LiveRangeInstrVisitor(LiveRangeMap& live_range_map):
m_live_range_map(live_range_map),
m_register_access(live_range_map.sizes())
{
if (sfn_log.has_debug_flag(SfnLog::merge)) {
sfn_log << SfnLog::merge << "Have component register numbers: ";
for (auto n : live_range_map.sizes())
sfn_log << n << " ";
sfn_log << "\n";
}
m_scopes.push_back(std::make_unique<ProgramScope>(nullptr, outer_scope, 0, 0, 0));
m_current_scope = m_scopes[0].get();
for (int i = 0; i < 4; ++i) {
const auto& comp = live_range_map.component(i);
for(const auto& r : comp) {
if (r.m_register->live_start_pinned())
record_write(r.m_register);
}
}
m_line = 1;
}
void LiveRangeInstrVisitor::record_write(const RegisterVec4& reg, const RegisterVec4::Swizzle &swizzle)
{
for (int i = 0; i < 4; ++i) {
if (swizzle[i] < 6 && reg[i]->chan() < 4)
record_write(reg[i]);
}
}
void LiveRangeInstrVisitor::record_read(const RegisterVec4& reg, LiveRangeEntry::EUse use)
{
for (int i = 0; i < 4; ++i) {
if (reg[i]->chan() < 4)
record_read(reg[i], use);
}
}
void LiveRangeInstrVisitor::scope_if()
{
m_current_scope = create_scope(m_current_scope, if_branch, m_if_id++,
m_current_scope->nesting_depth() + 1, m_line + 1);
}
void LiveRangeInstrVisitor::scope_else()
{
assert(m_current_scope->type() == if_branch);
m_current_scope->set_end(m_line - 1);
m_current_scope = create_scope(m_current_scope->parent(), else_branch, m_current_scope->id(),
m_current_scope->nesting_depth() + 1, m_line + 1);
}
void LiveRangeInstrVisitor::scope_endif()
{
m_current_scope->set_end(m_line - 1);
m_current_scope = m_current_scope->parent();
assert(m_current_scope);
}
void LiveRangeInstrVisitor::scope_loop_begin()
{
m_current_scope = create_scope(m_current_scope, loop_body, m_loop_id++,
m_current_scope->nesting_depth() + 1, m_line);
}
void LiveRangeInstrVisitor::scope_loop_end()
{
m_current_scope->set_end(m_line);
m_current_scope = m_current_scope->parent();
assert(m_current_scope);
}
void LiveRangeInstrVisitor::scope_loop_break()
{
m_current_scope->set_loop_break_line(m_line);
}
ProgramScope *LiveRangeInstrVisitor::create_scope(ProgramScope *parent, ProgramScopeType type,
int id, int nesting_depth, int line)
{
m_scopes.emplace_back(std::make_unique<ProgramScope>(parent, type, id, nesting_depth, line));
return m_scopes[m_scopes.size() - 1].get();
}
void LiveRangeInstrVisitor::visit(AluInstr *instr)
{
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
if (instr->has_alu_flag(alu_write))
record_write(instr->dest());
for (unsigned i = 0; i < instr->n_sources(); ++i) {
record_read(instr->src(i).as_register(), LiveRangeEntry::use_unspecified);
auto uniform = instr->src(i).as_uniform();
if (uniform && uniform->buf_addr()) {
record_read(uniform->buf_addr()->as_register(), LiveRangeEntry::use_unspecified);
}
}
}
void LiveRangeInstrVisitor::visit(AluGroup *group)
{
for (auto i : *group)
if (i)
i->accept(*this);
}
void LiveRangeInstrVisitor::visit(TexInstr *instr)
{
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
record_write(instr->dst(), instr->all_dest_swizzle());
auto src = instr->src();
record_read(src, LiveRangeEntry::use_unspecified);
if (instr->sampler_offset() && instr->sampler_offset()->as_register())
record_read(instr->sampler_offset()->as_register(), LiveRangeEntry::use_unspecified);
}
void LiveRangeInstrVisitor::visit(ExportInstr *instr)
{
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
auto src = instr->value();
record_read(src, LiveRangeEntry::use_export);
}
void LiveRangeInstrVisitor::visit(FetchInstr *instr)
{
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
record_write(instr->dst(), instr->all_dest_swizzle());
auto& src = instr->src();
if (src.chan() < 4) /* Channel can be 7 to disable source */
record_read(&src, LiveRangeEntry::use_unspecified);
}
void LiveRangeInstrVisitor::visit(Block *instr)
{
sfn_log << SfnLog::merge << "Visit block\n";
for (auto i : *instr) {
i->accept(*this);
if (i->end_group())
++m_line;
}
sfn_log << SfnLog::merge << "End block\n";
}
void LiveRangeInstrVisitor::visit(WriteScratchInstr *instr)
{
auto& src = instr->value();
for (int i = 0; i < 4; ++i) {
if ((1 << i) & instr->write_mask()) {
record_read(src[i], LiveRangeEntry::use_unspecified);
}
}
auto addr = instr->address();
if (addr)
record_read(addr, LiveRangeEntry::use_unspecified);
}
void LiveRangeInstrVisitor::visit(StreamOutInstr *instr)
{
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
auto src = instr->value();
record_read(src, LiveRangeEntry::use_export);
}
void LiveRangeInstrVisitor::visit(MemRingOutInstr *instr)
{
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
auto src = instr->value();
record_read(src, LiveRangeEntry::use_export);
auto idx = instr->export_index();
if (idx && idx->as_register())
record_read(idx->as_register(), LiveRangeEntry::use_unspecified);
}
void LiveRangeInstrVisitor::visit(ControlFlowInstr *instr)
{
switch (instr->cf_type()) {
case ControlFlowInstr::cf_else: scope_else(); break;
case ControlFlowInstr::cf_endif: scope_endif(); break;
case ControlFlowInstr::cf_loop_begin: scope_loop_begin(); break;
case ControlFlowInstr::cf_loop_end: scope_loop_end(); break;
case ControlFlowInstr::cf_loop_break: scope_loop_break(); break;
case ControlFlowInstr::cf_loop_continue: break;
case ControlFlowInstr::cf_wait_ack: break;
default:
unreachable("Flow control unreachanble");
}
}
void LiveRangeInstrVisitor::visit(IfInstr *instr)
{
instr->predicate()->accept(*this);
scope_if();
}
void LiveRangeInstrVisitor::visit(GDSInstr *instr)
{
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
record_read(instr->src(), LiveRangeEntry::use_unspecified);
if (instr->uav_id())
record_read(instr->uav_id(), LiveRangeEntry::use_unspecified);
record_write(instr->dest());
}
void LiveRangeInstrVisitor::visit(RatInstr *instr)
{
sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
record_read(instr->value(), LiveRangeEntry::use_unspecified);
record_read(instr->addr(), LiveRangeEntry::use_unspecified);
auto idx = instr->rat_id_offset();
if (idx)
record_read(idx, LiveRangeEntry::use_unspecified);
}
void LiveRangeInstrVisitor::visit(WriteTFInstr *instr)
{
record_read(instr->value(), LiveRangeEntry::use_export);
}
void LiveRangeInstrVisitor::visit(UNUSED LDSAtomicInstr *instr)
{
unreachable("LDSAtomicInstr must be lowered before scheduling and live range evaluation");
}
void LiveRangeInstrVisitor::visit(UNUSED LDSReadInstr *instr)
{
unreachable("LDSReadInstr must be lowered before scheduling and live range evaluation");
}
void LiveRangeInstrVisitor::record_write(const Register *reg)
{
auto addr = reg->get_addr();
if (addr && addr->as_register()) {
record_read(addr->as_register(), LiveRangeEntry::use_unspecified);
const auto av = static_cast<const LocalArrayValue *>(reg);
auto& array = av->array();
sfn_log << SfnLog::merge << array << " write:" << m_line << "\n";
for (auto i = 0u; i < array.size(); ++i) {
auto& rav = m_register_access(array(i, reg->chan()));
rav.record_write(m_line, m_current_scope);
}
} else {
auto& ra = m_register_access(*reg);
sfn_log << SfnLog::merge << *reg << " write:" << m_line << "\n";
ra.record_write(m_line, m_current_scope);
}
}
void LiveRangeInstrVisitor::record_read(const Register *reg, LiveRangeEntry::EUse use)
{
if (!reg)
return;
auto addr = reg->get_addr();
if (addr && addr->as_register()) {
sfn_log << SfnLog::merge << "Record reading address register " << *addr << "\n";
auto& ra = m_register_access(*addr->as_register());
ra.record_read(m_line, m_current_scope, use);
const auto av = static_cast<const LocalArrayValue *>(reg);
auto& array = av->array();
sfn_log << SfnLog::merge << array << " read:" << m_line << "\n";
for (auto i = 0u; i < array.size(); ++i) {
auto& rav = m_register_access(array(i, reg->chan()));
rav.record_read(m_line, m_current_scope, use);
}
} else {
sfn_log << SfnLog::merge << *reg << " read:" << m_line << "\n";
auto& ra = m_register_access(*reg);
ra.record_read(m_line, m_current_scope, use);
}
}
std::ostream& operator << (std::ostream& os, const LiveRangeMap& lrm)
{
os << "Live ranges\n";
for (int i = 0; i < 4; ++i) {
const auto& comp = lrm.component(i);
for (auto& range : comp)
os << " " << range << "\n";
}
return os;
}
bool operator == (const LiveRangeMap& lhs, const LiveRangeMap& rhs)
{
for (int i = 0; i < 4; ++i) {
const auto& lc = lhs.component(i);
const auto& rc = rhs.component(i);
if (lc.size() != rc.size())
return false;
for (auto j = 0u; j < lc.size(); ++j) {
const auto& lv = lc[j];
const auto& rv = rc[j];
if (lv.m_start != rv.m_start ||
lv.m_end != rv.m_end ||
lv.m_color != rv.m_color ||
!lv.m_register->equal_to(*rv.m_register))
return false;
}
}
return true;
}
}

View File

@ -0,0 +1,23 @@
#ifndef LIFERANGEEVALUATOR_H
#define LIFERANGEEVALUATOR_H
#include "sfn_valuefactory.h"
#include <map>
#include <cassert>
namespace r600 {
class Shader;
class LiveRangeEvaluator {
public:
LiveRangeEvaluator();
LiveRangeMap run(Shader &sh);
};
}
#endif // LIFERANGEEVALUATOR_H

View File

@ -0,0 +1,623 @@
#include "sfn_liverangeevaluator_helpers.h"
#include "sfn_virtualvalues.h"
#include "util/u_math.h"
#include <limits>
#include <cassert>
#include <iostream>
namespace r600 {
ProgramScope::ProgramScope(ProgramScope *parent, ProgramScopeType type, int id,
int depth, int scope_begin):
scope_type(type),
scope_id(id),
scope_nesting_depth(depth),
scope_begin(scope_begin),
scope_end(-1),
break_loop_line(std::numeric_limits<int>::max()),
parent_scope(parent)
{
}
ProgramScope::ProgramScope():
ProgramScope(nullptr, undefined_scope, -1, -1, -1)
{
}
ProgramScopeType ProgramScope::type() const
{
return scope_type;
}
ProgramScope *ProgramScope::parent() const
{
return parent_scope;
}
int ProgramScope::nesting_depth() const
{
return scope_nesting_depth;
}
bool ProgramScope::is_loop() const
{
return (scope_type == loop_body);
}
bool ProgramScope::is_in_loop() const
{
if (scope_type == loop_body)
return true;
if (parent_scope)
return parent_scope->is_in_loop();
return false;
}
const ProgramScope *ProgramScope::innermost_loop() const
{
if (scope_type == loop_body)
return this;
if (parent_scope)
return parent_scope->innermost_loop();
return nullptr;
}
const ProgramScope *ProgramScope::outermost_loop() const
{
const ProgramScope *loop = nullptr;
const ProgramScope *p = this;
do {
if (p->type() == loop_body)
loop = p;
p = p->parent();
} while (p);
return loop;
}
bool ProgramScope::is_child_of_ifelse_id_sibling(const ProgramScope *scope) const
{
const ProgramScope *my_parent = in_parent_ifelse_scope();
while (my_parent) {
/* is a direct child? */
if (my_parent == scope)
return false;
/* is a child of the conditions sibling? */
if (my_parent->id() == scope->id())
return true;
my_parent = my_parent->in_parent_ifelse_scope();
}
return false;
}
bool ProgramScope::is_child_of(const ProgramScope *scope) const
{
const ProgramScope *my_parent = parent();
while (my_parent) {
if (my_parent == scope)
return true;
my_parent = my_parent->parent();
}
return false;
}
const ProgramScope *ProgramScope::enclosing_conditional() const
{
if (is_conditional())
return this;
if (parent_scope)
return parent_scope->enclosing_conditional();
return nullptr;
}
bool ProgramScope::contains_range_of(const ProgramScope& other) const
{
return (begin() <= other.begin()) && (end() >= other.end());
}
bool ProgramScope::is_conditional() const
{
return scope_type == if_branch ||
scope_type == else_branch ||
scope_type == switch_case_branch ||
scope_type == switch_default_branch;
}
const ProgramScope *ProgramScope::in_else_scope() const
{
if (scope_type == else_branch)
return this;
if (parent_scope)
return parent_scope->in_else_scope();
return nullptr;
}
const ProgramScope *ProgramScope::in_parent_ifelse_scope() const
{
if (parent_scope)
return parent_scope->in_ifelse_scope();
else
return nullptr;
}
const ProgramScope *ProgramScope::in_ifelse_scope() const
{
if (scope_type == if_branch ||
scope_type == else_branch)
return this;
if (parent_scope)
return parent_scope->in_ifelse_scope();
return nullptr;
}
bool ProgramScope::is_switchcase_scope_in_loop() const
{
return (scope_type == switch_case_branch ||
scope_type == switch_default_branch) &&
is_in_loop();
}
bool ProgramScope::break_is_for_switchcase() const
{
if (scope_type == loop_body)
return false;
if (scope_type == switch_case_branch ||
scope_type == switch_default_branch ||
scope_type == switch_body)
return true;
if (parent_scope)
return parent_scope->break_is_for_switchcase();
return false;
}
int ProgramScope::id() const
{
return scope_id;
}
int ProgramScope::begin() const
{
return scope_begin;
}
int ProgramScope::end() const
{
return scope_end;
}
void ProgramScope::set_end(int end)
{
if (scope_end == -1)
scope_end = end;
}
void ProgramScope::set_loop_break_line(int line)
{
if (scope_type == loop_body) {
break_loop_line = MIN2(break_loop_line, line);
} else {
if (parent_scope)
parent()->set_loop_break_line(line);
}
}
int ProgramScope::loop_break_line() const
{
return break_loop_line;
}
RegisterCompAccess::RegisterCompAccess(LiveRange range):
last_read_scope(nullptr),
first_read_scope(nullptr),
first_write_scope(nullptr),
first_write(range.start),
last_read(range.end),
last_write(range.start),
first_read(std::numeric_limits<int>::max()),
conditionality_in_loop_id(conditionality_untouched),
if_scope_write_flags(0),
next_ifelse_nesting_depth(0),
current_unpaired_if_write_scope(nullptr),
was_written_in_current_else_scope(false),
m_range(range)
{
}
RegisterCompAccess::RegisterCompAccess():
RegisterCompAccess(LiveRange(-1,-1))
{
}
void RegisterCompAccess::record_read(int line, ProgramScope *scope, LiveRangeEntry::EUse use)
{
last_read_scope = scope;
if (use != LiveRangeEntry::use_unspecified)
m_use_type.set(use);
if (last_read < line)
last_read = line;
if (first_read > line) {
first_read = line;
first_read_scope = scope;
}
/* If the conditionality of the first write is already resolved then
* no further checks are required.
*/
if (conditionality_in_loop_id == write_is_unconditional ||
conditionality_in_loop_id == write_is_conditional)
return;
/* Check whether we are in a condition within a loop */
const ProgramScope *ifelse_scope = scope->in_ifelse_scope();
const ProgramScope *enclosing_loop;
if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) {
/* If we have either not yet written to this register nor writes are
* resolved as unconditional in the enclosing loop then check whether
* we read before write in an IF/ELSE branch.
*/
if ((conditionality_in_loop_id != write_is_conditional) &&
(conditionality_in_loop_id != enclosing_loop->id())) {
if (current_unpaired_if_write_scope) {
/* Has been written in this or a parent scope? - this makes the temporary
* unconditionally set at this point.
*/
if (scope->is_child_of(current_unpaired_if_write_scope))
return;
/* Has been written in the same scope before it was read? */
if (ifelse_scope->type() == if_branch) {
if (current_unpaired_if_write_scope->id() == scope->id())
return;
} else {
if (was_written_in_current_else_scope)
return;
}
}
/* The temporary was read (conditionally) before it is written, hence
* it should survive a loop. This can be signaled like if it were
* conditionally written.
*/
conditionality_in_loop_id = write_is_conditional;
}
}
}
void RegisterCompAccess::record_write(int line, ProgramScope *scope)
{
last_write = line;
if (first_write < 0) {
first_write = line;
first_write_scope = scope;
/* If the first write we encounter is not in a conditional branch, or
* the conditional write is not within a loop, then this is to be
* considered an unconditional dominant write.
*/
const ProgramScope *conditional = scope->enclosing_conditional();
if (!conditional || !conditional->innermost_loop()) {
conditionality_in_loop_id = write_is_unconditional;
}
}
/* The conditionality of the first write is already resolved. */
if (conditionality_in_loop_id == write_is_unconditional ||
conditionality_in_loop_id == write_is_conditional)
return;
/* If the nesting depth is larger than the supported level,
* then we assume conditional writes.
*/
if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) {
conditionality_in_loop_id = write_is_conditional;
return;
}
/* If we are in an IF/ELSE scope within a loop and the loop has not
* been resolved already, then record this write.
*/
const ProgramScope *ifelse_scope = scope->in_ifelse_scope();
if (ifelse_scope && ifelse_scope->innermost_loop() &&
ifelse_scope->innermost_loop()->id() != conditionality_in_loop_id)
record_ifelse_write(*ifelse_scope);
}
void RegisterCompAccess::record_ifelse_write(const ProgramScope& scope)
{
if (scope.type() == if_branch) {
/* The first write in an IF branch within a loop implies unresolved
* conditionality (if it was untouched or unconditional before).
*/
conditionality_in_loop_id = conditionality_unresolved;
was_written_in_current_else_scope = false;
record_if_write(scope);
} else {
was_written_in_current_else_scope = true;
record_else_write(scope);
}
}
void RegisterCompAccess::record_if_write(const ProgramScope& scope)
{
/* Don't record write if this IF scope if it ...
* - is not the first write in this IF scope,
* - has already been written in a parent IF scope.
* In both cases this write is a secondary write that doesn't contribute
* to resolve conditionality.
*
* Record the write if it
* - is the first one (obviously),
* - happens in an IF branch that is a child of the ELSE branch of the
* last active IF/ELSE pair. In this case recording this write is used to
* established whether the write is (un-)conditional in the scope enclosing
* this outer IF/ELSE pair.
*/
if (!current_unpaired_if_write_scope ||
(current_unpaired_if_write_scope->id() != scope.id() &&
scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope))) {
if_scope_write_flags |= 1 << next_ifelse_nesting_depth;
current_unpaired_if_write_scope = &scope;
next_ifelse_nesting_depth++;
}
}
void RegisterCompAccess::record_else_write(const ProgramScope& scope)
{
int mask = 1 << (next_ifelse_nesting_depth - 1);
/* If the temporary was written in an IF branch on the same scope level
* and this branch is the sibling of this ELSE branch, then we have a
* pair of writes that makes write access to this temporary unconditional
* in the enclosing scope.
*/
if ((if_scope_write_flags & mask) &&
(scope.id() == current_unpaired_if_write_scope->id())) {
--next_ifelse_nesting_depth;
if_scope_write_flags &= ~mask;
/* The following code deals with propagating unconditionality from
* inner levels of nested IF/ELSE to the outer levels like in
*
* 1: var t;
* 2: if (a) { <- start scope A
* 3: if (b)
* 4: t = ...
* 5: else
* 6: t = ...
* 7: } else { <- start scope B
* 8: if (c)
* 9: t = ...
* A: else <- start scope C
* B: t = ...
* C: }
*
*/
const ProgramScope *parent_ifelse = scope.parent()->in_ifelse_scope();
if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) {
/* We are at the end of scope C and already recorded a write
* within an IF scope (A), the sibling of the parent ELSE scope B,
* and it is not yet resolved. Mark that as the last relevant
* IF scope. Below the write will be resolved for the A/B
* scope pair.
*/
current_unpaired_if_write_scope = parent_ifelse;
} else {
current_unpaired_if_write_scope = nullptr;
}
/* Promote the first write scope to the enclosing scope because
* the current IF/ELSE pair is now irrelevant for the analysis.
* This is also required to evaluate the minimum life time for t in
* {
* var t;
* if (a)
* t = ...
* else
* t = ...
* x = t;
* ...
* }
*/
first_write_scope = scope.parent();
/* If some parent is IF/ELSE and in a loop then propagate the
* write to that scope. Otherwise the write is unconditional
* because it happens in both corresponding IF/ELSE branches
* in this loop, and hence, record the loop id to signal the
* resolution.
*/
if (parent_ifelse && parent_ifelse->is_in_loop()) {
record_ifelse_write(*parent_ifelse);
} else {
conditionality_in_loop_id = scope.innermost_loop()->id();
}
} else {
/* The temporary was not written in the IF branch corresponding
* to this ELSE branch, hence the write is conditional.
*/
conditionality_in_loop_id = write_is_conditional;
}
}
bool RegisterCompAccess::conditional_ifelse_write_in_loop() const
{
return conditionality_in_loop_id <= conditionality_unresolved;
}
void RegisterCompAccess::propagate_live_range_to_dominant_write_scope()
{
first_write = first_write_scope->begin();
int lr = first_write_scope->end();
if (last_read < lr)
last_read = lr;
}
void RegisterCompAccess::update_required_live_range()
{
bool keep_for_full_loop = false;
/* This register component is not used at all, or only read,
* mark it as unused and ignore it when renaming.
* glsl_to_tgsi_visitor::renumber_registers will take care of
* eliminating registers that are not written to.
*/
if (last_write < 0) {
m_range.start = -1;
m_range.end = -1;
return;
}
/* Only written to, just make sure the register component is not
* reused in the range it is used to write to
*/
if (!last_read_scope) {
m_range.start = first_write;
m_range.end = last_write + 1;
return;
}
assert(first_write_scope || m_range.start >= 0);
/* The register was pre-defines, so th first write scope is the outerpost scopw */
if (!first_write_scope) {
first_write_scope = first_read_scope;
while (first_write_scope->parent())
first_write_scope = first_write_scope->parent();
}
const ProgramScope *enclosing_scope_first_read = first_read_scope;
const ProgramScope *enclosing_scope_first_write = first_write_scope;
/* We read before writing in a loop
* hence the value must survive the loops
*/
if ((first_read <= first_write) &&
first_read_scope->is_in_loop()) {
keep_for_full_loop = true;
enclosing_scope_first_read = first_read_scope->outermost_loop();
}
/* A conditional write within a (nested) loop must survive the outermost
* loop if the last read was not within the same scope.
*/
const ProgramScope *conditional = enclosing_scope_first_write->enclosing_conditional();
if (conditional && !conditional->contains_range_of(*last_read_scope) &&
(conditional->is_switchcase_scope_in_loop() ||
conditional_ifelse_write_in_loop())) {
keep_for_full_loop = true;
enclosing_scope_first_write = conditional->outermost_loop();
}
/* Evaluate the scope that is shared by all: required first write scope,
* required first read before write scope, and last read scope.
*/
const ProgramScope *enclosing_scope = enclosing_scope_first_read;
if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
enclosing_scope = enclosing_scope_first_write;
if (last_read_scope->contains_range_of(*enclosing_scope))
enclosing_scope = last_read_scope;
while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
!enclosing_scope->contains_range_of(*last_read_scope)) {
enclosing_scope = enclosing_scope->parent();
assert(enclosing_scope);
}
/* Propagate the last read scope to the target scope */
while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
/* If the read is in a loop and we have to move up the scope we need to
* extend the live range to the end of this current loop because at this
* point we don't know whether the component was written before
* un-conditionally in the same loop.
*/
if (last_read_scope->is_loop())
last_read = last_read_scope->end();
last_read_scope = last_read_scope->parent();
}
/* If the variable has to be kept for the whole loop, and we
* are currently in a loop, then propagate the live range.
*/
if (keep_for_full_loop && first_write_scope->is_loop())
propagate_live_range_to_dominant_write_scope();
/* Propagate the first_dominant_write scope to the target scope */
while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
/* Propagate live_range if there was a break in a loop and the write was
* after the break inside that loop. Note, that this is only needed if
* we move up in the scopes.
*/
if (first_write_scope->loop_break_line() < first_write) {
keep_for_full_loop = true;
propagate_live_range_to_dominant_write_scope();
}
first_write_scope = first_write_scope->parent();
/* Propagate live_range if we are now in a loop */
if (keep_for_full_loop && first_write_scope->is_loop())
propagate_live_range_to_dominant_write_scope();
}
/* The last write past the last read is dead code, but we have to
* ensure that the component is not reused too early, hence extend the
* live_range past the last write.
*/
if (last_write >= last_read)
last_read = last_write + 1;
/* Here we are at the same scope, all is resolved */
m_range.start = first_write;
m_range.end = last_read;
}
const int
RegisterCompAccess::conditionality_untouched = std::numeric_limits<int>::max();
const int
RegisterCompAccess::write_is_unconditional = std::numeric_limits<int>::max() - 1;
RegisterAccess::RegisterAccess(const std::array<size_t, 4>& sizes)
{
for (int i = 0; i < 4; ++i)
m_access_record[i].resize(sizes[i]);
}
RegisterCompAccess& RegisterAccess::operator() (const Register& reg)
{
assert(reg.chan() < 4);
assert(m_access_record[reg.chan()].size() > (size_t)reg.index());
return m_access_record[reg.chan()][reg.index()];
}
}

View File

@ -0,0 +1,162 @@
#ifndef SFN_LIFERANGEEVALUATOR_HELPERS_H
#define SFN_LIFERANGEEVALUATOR_HELPERS_H
#include "sfn_valuefactory.h"
namespace r600 {
enum ProgramScopeType {
outer_scope, /* Outer program scope */
loop_body, /* Inside a loop */
if_branch, /* Inside if branch */
else_branch, /* Inside else branch */
switch_body, /* Inside switch statement */
switch_case_branch, /* Inside switch case statement */
switch_default_branch, /* Inside switch default statement */
undefined_scope
};
class ProgramScope {
public:
ProgramScope();
ProgramScope(ProgramScope *parent, ProgramScopeType type, int id,
int depth, int begin);
ProgramScopeType type() const;
ProgramScope *parent() const;
int nesting_depth() const;
int id() const;
int end() const;
int begin() const;
int loop_break_line() const;
const ProgramScope *in_else_scope() const;
const ProgramScope *in_ifelse_scope() const;
const ProgramScope *in_parent_ifelse_scope() const;
const ProgramScope *innermost_loop() const;
const ProgramScope *outermost_loop() const;
const ProgramScope *enclosing_conditional() const;
bool is_loop() const;
bool is_in_loop() const;
bool is_switchcase_scope_in_loop() const;
bool is_conditional() const;
bool is_child_of(const ProgramScope *scope) const;
bool is_child_of_ifelse_id_sibling(const ProgramScope *scope) const;
bool break_is_for_switchcase() const;
bool contains_range_of(const ProgramScope& other) const;
void set_end(int end);
void set_loop_break_line(int line);
private:
ProgramScopeType scope_type;
int scope_id;
int scope_nesting_depth;
int scope_begin;
int scope_end;
int break_loop_line;
ProgramScope *parent_scope;
};
/* Class to track the access to a component of a temporary register. */
struct LiveRange;
class RegisterCompAccess {
public:
RegisterCompAccess();
RegisterCompAccess(LiveRange range);
void record_read(int line, ProgramScope *scope, LiveRangeEntry::EUse use);
void record_write(int line, ProgramScope *scope);
void update_required_live_range();
const auto& range() { return m_range;}
const auto& use_type() { return m_use_type; }
private:
void propagate_live_range_to_dominant_write_scope();
bool conditional_ifelse_write_in_loop() const;
void record_ifelse_write(const ProgramScope& scope);
void record_if_write(const ProgramScope& scope);
void record_else_write(const ProgramScope& scope);
ProgramScope *last_read_scope;
ProgramScope *first_read_scope;
ProgramScope *first_write_scope;
int first_write;
int last_read;
int last_write;
int first_read;
/* This member variable tracks the current resolution of conditional writing
* to this temporary in IF/ELSE clauses.
*
* The initial value "conditionality_untouched" indicates that this
* temporary has not yet been written to within an if clause.
*
* A positive (other than "conditionality_untouched") number refers to the
* last loop id for which the write was resolved as unconditional. With each
* new loop this value will be overwitten by "conditionality_unresolved"
* on entering the first IF clause writing this temporary.
*
* The value "conditionality_unresolved" indicates that no resolution has
* been achieved so far. If the variable is set to this value at the end of
* the processing of the whole shader it also indicates a conditional write.
*
* The value "write_is_conditional" marks that the variable is written
* conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
* least one loop.
*/
int conditionality_in_loop_id;
/* Helper constants to make the tracking code more readable. */
static const int write_is_conditional = -1;
static const int conditionality_unresolved = 0;
static const int conditionality_untouched;
static const int write_is_unconditional;
/* A bit field tracking the nexting levels of if-else clauses where the
* temporary has (so far) been written to in the if branch, but not in the
* else branch.
*/
unsigned int if_scope_write_flags;
int next_ifelse_nesting_depth;
static const int supported_ifelse_nesting_depth = 32;
/* Tracks the last if scope in which the temporary was written to
* without a write in the corresponding else branch. Is also used
* to track read-before-write in the according scope.
*/
const ProgramScope *current_unpaired_if_write_scope;
/* Flag to resolve read-before-write in the else scope. */
bool was_written_in_current_else_scope;
LiveRange m_range;
std::bitset<LiveRangeEntry::use_unspecified> m_use_type;
};
class RegisterAccess {
public:
using RegisterCompAccessVector = std::vector<RegisterCompAccess>;
RegisterAccess(const std::array<size_t, 4>& sizes);
RegisterCompAccess& operator() (const Register& reg);
auto& component(int i) { return m_access_record[i]; }
private:
std::array<RegisterCompAccessVector, 4> m_access_record;
};
}
#endif // SFN_LIFERANGEEVALUATOR_HELPERS_H

View File

@ -0,0 +1,86 @@
#include "sfn_memorypool.h"
#include <cassert>
#include <iostream>
namespace r600 {
struct MemoryPoolImpl {
public:
MemoryPoolImpl();
~MemoryPoolImpl();
using MemoryBacking = ::std::pmr::monotonic_buffer_resource;
MemoryBacking *pool;
};
MemoryPool::MemoryPool() noexcept : impl(nullptr)
{
}
MemoryPool& MemoryPool::instance()
{
static thread_local MemoryPool me;
me.initialize();
return me;
}
void MemoryPool::free()
{
delete impl;
impl = nullptr;
}
void MemoryPool::initialize()
{
if (!impl)
impl = new MemoryPoolImpl();
}
void *MemoryPool::allocate(size_t size)
{
return impl->pool->allocate(size);
}
void *MemoryPool::allocate(size_t size, size_t align)
{
return impl->pool->allocate(size, align);
}
void MemoryPool::release_all()
{
instance().free();
}
void init_pool()
{
MemoryPool::instance();
}
void release_pool()
{
MemoryPool::release_all();
}
void *Allocate::operator new(size_t size)
{
return MemoryPool::instance().allocate(size);
}
void Allocate::operator delete (void *p, size_t size)
{
// MemoryPool::instance().deallocate(p, size);
}
MemoryPoolImpl::MemoryPoolImpl()
{
pool = new MemoryBacking();
}
MemoryPoolImpl::~MemoryPoolImpl()
{
delete pool;
}
}

View File

@ -0,0 +1,69 @@
#ifndef MEMORYPOOL_H
#define MEMORYPOOL_H
#include <cstdlib>
#include <memory>
#include <stack>
#if __cplusplus >= 21703L
#include <memory_resource>
#define R600_POINTER_TYPE(X) X *
#else
#error Need C++17
#endif
namespace r600 {
void init_pool();
void release_pool();
class Allocate
{
public:
void * operator new(size_t size);
void operator delete (void *p, size_t size);
};
class MemoryPool {
public:
static MemoryPool& instance();
static void release_all();
void free();
void initialize();
void *allocate(size_t size);
void *allocate(size_t size, size_t align);
private:
MemoryPool() noexcept;
struct MemoryPoolImpl* impl;
};
template <typename T>
struct Allocator {
using value_type = T;
Allocator() = default;
Allocator(const Allocator& other) = default;
template <typename U>
Allocator(const Allocator<U>& other) {(void)other;}
T *allocate(size_t n) {
return (T *)MemoryPool::instance().allocate(n * sizeof(T), alignof(T));
}
void deallocate(void *p, size_t n) {
(void)p; (void)n;
//MemoryPool::instance().deallocate(p, n * sizeof(T), alignof(T));
}
friend bool operator == (const Allocator<T>& lhs, const Allocator<T>& rhs) {
(void)lhs; (void)rhs; return true;}
};
}
#endif // MEMORYPOOL_H

View File

@ -30,19 +30,19 @@
#include "../r600_pipe.h"
#include "../r600_shader.h"
#include "util/u_prim.h"
#include "sfn_instruction_tex.h"
#include "sfn_shader_vertex.h"
#include "sfn_shader_fragment.h"
#include "sfn_shader_geometry.h"
#include "sfn_shader_compute.h"
#include "sfn_shader_tcs.h"
#include "sfn_shader_tess_eval.h"
#include "sfn_shader.h"
#include "sfn_assembler.h"
#include "sfn_debug.h"
#include "sfn_liverangeevaluator.h"
#include "sfn_nir_lower_fs_out_to_vector.h"
#include "sfn_ir_to_assembly.h"
#include "sfn_nir_lower_alu.h"
#include "sfn_nir_lower_tex.h"
#include "sfn_optimizer.h"
#include "sfn_ra.h"
#include "sfn_scheduler.h"
#include <vector>
@ -78,264 +78,11 @@ bool NirLowerInstruction::run(nir_shader *shader)
(void *)this);
}
ShaderFromNir::ShaderFromNir():sh(nullptr),
gfx_level(CLASS_UNKNOWN),
m_current_if_id(0),
m_current_loop_id(0),
scratch_size(0)
{
}
bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
r600_pipe_shader_selector *sel, r600_shader_key& key,
struct r600_shader* gs_shader, enum amd_gfx_level _chip_class)
{
sh = shader;
gfx_level = _chip_class;
assert(sh);
switch (shader->info.stage) {
case MESA_SHADER_VERTEX:
impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, gfx_level));
break;
case MESA_SHADER_TESS_CTRL:
sfn_log << SfnLog::trans << "Start TCS\n";
impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, gfx_level));
break;
case MESA_SHADER_TESS_EVAL:
sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, gfx_level));
break;
case MESA_SHADER_GEOMETRY:
sfn_log << SfnLog::trans << "Start GS\n";
impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, gfx_level));
break;
case MESA_SHADER_FRAGMENT:
sfn_log << SfnLog::trans << "Start FS\n";
impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, gfx_level));
break;
case MESA_SHADER_COMPUTE:
sfn_log << SfnLog::trans << "Start CS\n";
impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, gfx_level));
break;
default:
return false;
}
sfn_log << SfnLog::trans << "Process declarations\n";
if (!process_declaration())
return false;
// at this point all functions should be inlined
const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
sfn_log << SfnLog::trans << "Scan shader\n";
if (sfn_log.has_debug_flag(SfnLog::instr))
nir_print_shader(const_cast<nir_shader *>(shader), stderr);
nir_foreach_block(block, func->impl) {
nir_foreach_instr(instr, block) {
if (!impl->scan_instruction(instr)) {
fprintf(stderr, "Unhandled sysvalue access ");
nir_print_instr(instr, stderr);
fprintf(stderr, "\n");
return false;
}
}
}
sfn_log << SfnLog::trans << "Reserve registers\n";
if (!impl->allocate_reserved_registers()) {
return false;
}
ValuePool::array_list arrays;
sfn_log << SfnLog::trans << "Allocate local registers\n";
foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
impl->allocate_local_register(*reg, arrays);
}
sfn_log << SfnLog::trans << "Emit shader start\n";
impl->allocate_arrays(arrays);
impl->emit_shader_start();
sfn_log << SfnLog::trans << "Process shader \n";
foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
if (!process_cf_node(node))
return false;
}
// Add optimizations here
sfn_log << SfnLog::trans << "Finalize\n";
impl->finalize();
impl->get_array_info(pipe_shader->shader);
if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
sfn_log << SfnLog::trans << "Merge registers\n";
impl->remap_registers();
}
sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
return true;
}
Shader ShaderFromNir::shader() const
{
return Shader{impl->m_output, impl->get_temp_registers()};
}
bool ShaderFromNir::process_cf_node(nir_cf_node *node)
{
SFN_TRACE_FUNC(SfnLog::flow, "CF");
switch (node->type) {
case nir_cf_node_block:
return process_block(nir_cf_node_as_block(node));
case nir_cf_node_if:
return process_if(nir_cf_node_as_if(node));
case nir_cf_node_loop:
return process_loop(nir_cf_node_as_loop(node));
default:
return false;
}
}
bool ShaderFromNir::process_if(nir_if *if_stmt)
{
SFN_TRACE_FUNC(SfnLog::flow, "IF");
if (!impl->emit_if_start(m_current_if_id, if_stmt))
return false;
int if_id = m_current_if_id++;
m_if_stack.push(if_id);
foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
if (!process_cf_node(n)) return false;
if (!if_stmt->then_list.is_empty()) {
if (!impl->emit_else_start(if_id))
return false;
foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
if (!process_cf_node(n)) return false;
}
if (!impl->emit_ifelse_end(if_id))
return false;
m_if_stack.pop();
return true;
}
bool ShaderFromNir::process_loop(nir_loop *node)
{
SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
int loop_id = m_current_loop_id++;
if (!impl->emit_loop_start(loop_id))
return false;
foreach_list_typed(nir_cf_node, n, node, &node->body)
if (!process_cf_node(n)) return false;
if (!impl->emit_loop_end(loop_id))
return false;
return true;
}
bool ShaderFromNir::process_block(nir_block *block)
{
SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
nir_foreach_instr(instr, block) {
int r = emit_instruction(instr);
if (!r) {
sfn_log << SfnLog::err << "R600: Unsupported instruction: "
<< *instr << "\n";
return false;
}
}
return true;
}
ShaderFromNir::~ShaderFromNir()
{
}
pipe_shader_type ShaderFromNir::processor_type() const
{
return impl->m_processor_type;
}
bool ShaderFromNir::emit_instruction(nir_instr *instr)
{
assert(impl);
sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
switch (instr->type) {
case nir_instr_type_alu:
return impl->emit_alu_instruction(instr);
case nir_instr_type_deref:
return impl->emit_deref_instruction(nir_instr_as_deref(instr));
case nir_instr_type_intrinsic:
return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
case nir_instr_type_load_const: /* const values are loaded when needed */
return true;
case nir_instr_type_tex:
return impl->emit_tex_instruction(instr);
case nir_instr_type_jump:
return impl->emit_jump_instruction(nir_instr_as_jump(instr));
default:
fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
nir_print_instr(instr, stderr);
fprintf(stderr, "'\n");
return false;
case nir_instr_type_ssa_undef:
return impl->create_undef(nir_instr_as_ssa_undef(instr));
return true;
}
}
bool ShaderFromNir::process_declaration()
{
impl->set_shader_info(sh);
if (!impl->scan_inputs_read(sh))
return false;
// scan declarations
nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
nir_var_mem_ubo |
nir_var_mem_ssbo) {
if (!impl->process_uniforms(variable)) {
fprintf(stderr, "R600: error parsing outputs variable %s\n", variable->name);
return false;
}
}
return true;
}
const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
{
assert(impl);
return impl->m_output;
}
AssemblyFromShader::~AssemblyFromShader()
{
}
bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
bool AssemblyFromShader::lower(const Shader& ir)
{
return do_lower(ir);
}
@ -557,7 +304,6 @@ r600_nir_lower_atomics(nir_shader *shader)
nir_metadata_dominance,
NULL);
}
using r600::r600_nir_lower_int_tg4;
using r600::r600_lower_scratch_addresses;
using r600::r600_lower_fs_out_to_vector;
using r600::r600_lower_ubo_to_align16;
@ -676,6 +422,7 @@ r600_lower_shared_io(nir_shader *nir)
static nir_ssa_def *
r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
{
(void)_options;
auto old_ir = nir_instr_as_intrinsic(instr);
auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
nir_ssa_dest_init(&load->instr, &load->dest,
@ -693,6 +440,8 @@ r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
{
(void)_options;
if (instr->type != nir_instr_type_intrinsic)
return false;
@ -713,7 +462,7 @@ bool r600_lower_fs_pos_input(nir_shader *shader)
};
static bool
optimize_once(nir_shader *shader, bool vectorize)
optimize_once(nir_shader *shader)
{
bool progress = false;
NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
@ -722,9 +471,6 @@ optimize_once(nir_shader *shader, bool vectorize)
NIR_PASS(progress, shader, nir_opt_algebraic);
NIR_PASS(progress, shader, nir_opt_constant_folding);
NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
if (vectorize)
NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
NIR_PASS(progress, shader, nir_opt_remove_phis);
if (nir_opt_trivial_continues(shader)) {
@ -777,13 +523,9 @@ bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
case nir_op_fdot2:
case nir_op_fdot3:
case nir_op_fdot4:
return nir_src_bit_size(alu->src[0].src) == 64;
case nir_op_cube_r600:
return false;
case nir_op_bany_fnequal2:
case nir_op_ball_fequal2:
case nir_op_bany_inequal2:
case nir_op_ball_iequal2:
return nir_src_bit_size(alu->src[0].src) != 64;
default:
return true;
}
@ -793,15 +535,13 @@ int r600_shader_from_nir(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
r600_shader_key *key)
{
char filename[4000];
struct r600_pipe_shader_selector *sel = pipeshader->selector;
bool lower_64bit = ((sel->nir->options->lower_int64_options ||
bool lower_64bit = (rctx->b.gfx_level < CAYMAN &&
(sel->nir->options->lower_int64_options ||
sel->nir->options->lower_doubles_options) &&
(sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64);
r600::ShaderFromNir convert;
if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
nir_print_shader(sel->nir, stderr);
@ -813,10 +553,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
/* Cayman seems very crashy about accessing images that don't exists or are
* accessed out of range, this lowering seems to help (but it can also be
* another problem */
if (sel->nir->info.num_images > 0 && rctx->b.gfx_level == CAYMAN)
NIR_PASS_V(sel->nir, r600_legalize_image_load_store);
NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
nir_lower_idiv_options idiv_options = {0};
idiv_options.imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE;
@ -828,7 +565,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
if (lower_64bit)
NIR_PASS_V(sel->nir, nir_lower_int64);
while(optimize_once(sel->nir, false));
while(optimize_once(sel->nir));
NIR_PASS_V(sel->nir, r600_lower_shared_io);
NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
@ -839,8 +576,8 @@ int r600_shader_from_nir(struct r600_context *rctx,
lower_tex_options.lower_invalid_implicit_lod = true;
NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
NIR_PASS_V(sel->nir, r600::r600_nir_lower_cube_to_2darray);
NIR_PASS_V(sel->nir, r600_nir_lower_txl_txf_array_or_cube);
NIR_PASS_V(sel->nir, r600_nir_lower_cube_to_2darray);
NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
@ -851,30 +588,11 @@ int r600_shader_from_nir(struct r600_context *rctx,
NIR_PASS_V(sel->nir, nir_lower_fragcoord_wtrans);
NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
}
nir_variable_mode io_modes = nir_var_uniform |
nir_var_shader_in |
nir_var_shader_out;
nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in;
//if (sel->nir->info.stage != MESA_SHADER_FRAGMENT)
io_modes |= nir_var_shader_out;
if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
/* Lower IO to temporaries late, because otherwise we get into trouble
* with the glsl 4.40 interpolateAt swizzle tests. There seems to be a bug
* somewhere that results in the input alweas reading from the same temp
* regardless of interpolation when the lowering is done early */
NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(sel->nir),
true, true);
/* Since we're doing nir_lower_io_to_temporaries late, we need
* to lower all the copy_deref's introduced by
* lower_io_to_temporaries before calling nir_lower_io.
*/
NIR_PASS_V(sel->nir, nir_split_var_copies);
NIR_PASS_V(sel->nir, nir_lower_var_copies);
NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
}
NIR_PASS_V(sel->nir, nir_opt_combine_stores, nir_var_shader_out);
NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size,
nir_lower_io_lower_64bit_to_32);
@ -916,14 +634,27 @@ int r600_shader_from_nir(struct r600_context *rctx,
NIR_PASS_V(sh, r600_lower_tess_coord, u_tess_prim_from_shader(sh->info.tess._primitive_mode));
}
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
NIR_PASS_V(sh, nir_split_64bit_vec3_and_vec4);
NIR_PASS_V(sh, nir_lower_int64);
NIR_PASS_V(sh, nir_lower_ubo_vec4);
if (lower_64bit)
NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
NIR_PASS_V(sh, r600::r600_split_64bit_uniforms_and_ubo);
/* Lower to scalar to let some optimization work out better */
while(optimize_once(sh, false));
while(optimize_once(sh));
NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
if (lower_64bit)
NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_out, NULL);
@ -934,7 +665,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
40,
r600_get_natural_size_align_bytes);
while (optimize_once(sh, true));
while (optimize_once(sh));
NIR_PASS_V(sh, nir_lower_bool_to_int32);
NIR_PASS_V(sh, r600_nir_lower_int_tg4);
@ -945,8 +676,6 @@ int r600_shader_from_nir(struct r600_context *rctx,
NIR_PASS_V(sh, nir_lower_locals_to_regs);
//NIR_PASS_V(sh, nir_opt_algebraic);
//NIR_PASS_V(sh, nir_copy_prop);
NIR_PASS_V(sh, nir_lower_to_source_mods,
(nir_lower_to_source_mods_flags)(nir_lower_float_source_mods |
nir_lower_64bit_source_mods));
@ -974,33 +703,66 @@ int r600_shader_from_nir(struct r600_context *rctx,
pipeshader->shader.cc_dist_mask = (1 << (sh->info.cull_distance_array_size +
sh->info.clip_distance_array_size)) - 1;
}
struct r600_shader* gs_shader = nullptr;
if (rctx->gs_shader)
gs_shader = &rctx->gs_shader->current->shader;
r600_screen *rscreen = rctx->screen;
bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.gfx_level);
if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
static int shnr = 0;
r600::Shader *shader = r600::Shader::translate_from_nir(sh, &sel->so, gs_shader,
*key, rctx->isa->hw_class);
snprintf(filename, 4000, "nir-%s_%d.inc", sh->info.name, shnr++);
assert(shader);
if (!shader)
return -2;
if (access(filename, F_OK) == -1) {
FILE *f = fopen(filename, "w");
pipeshader->enabled_stream_buffers_mask = shader->enabled_stream_buffers_mask();
pipeshader->selector->info.file_count[TGSI_FILE_HW_ATOMIC] += shader->atomic_file_count();
pipeshader->selector->info.writes_memory = shader->has_flag(r600::Shader::sh_writes_memory);
if (f) {
fprintf(f, "const char *shader_blob_%s = {\nR\"(", sh->info.name);
nir_print_shader(sh, f);
fprintf(f, ")\";\n");
fclose(f);
}
}
if (!r)
return -2;
if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
std::cerr << "Shader after conversion from nir\n";
shader->print(std::cerr);
}
auto shader = convert.shader();
if (!r600::sfn_log.has_debug_flag(r600::SfnLog::noopt)) {
optimize(*shader);
if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
std::cerr << "Shader after optimization\n";
shader->print(std::cerr);
}
}
auto scheduled_shader = r600::schedule(shader);
if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
std::cerr << "Shader after scheduling\n";
shader->print(std::cerr);
}
if (!r600::sfn_log.has_debug_flag(r600::SfnLog::nomerge)) {
if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge)) {
r600::sfn_log << r600::SfnLog::merge << "Shader before RA\n";
scheduled_shader->print(std::cerr);
}
r600::sfn_log << r600::SfnLog::trans << "Merge registers\n";
auto lrm = r600::LiveRangeEvaluator().run(*scheduled_shader);
if (!r600::register_allocation(lrm)) {
R600_ERR("%s: Register allocation failed\n", __func__);
/* For now crash if the shader could not be benerated */
assert(0);
return -1;
} else if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge) ||
r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
r600::sfn_log << "Shader after RA\n";
scheduled_shader->print(std::cerr);
}
}
scheduled_shader->get_shader_info(&pipeshader->shader);
pipeshader->shader.uses_doubles = sh->info.bit_sizes_float & 64 ? 1 : 0;
r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.gfx_level, rscreen->b.family,
rscreen->has_compressed_msaa_texturing);
@ -1012,9 +774,13 @@ int r600_shader_from_nir(struct r600_context *rctx,
pipeshader->shader.bc.type = pipeshader->shader.processor_type;
pipeshader->shader.bc.isa = rctx->isa;
r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
if (!afs.lower(shader.m_ir)) {
r600::Assembler afs(&pipeshader->shader, *key);
if (!afs.lower(scheduled_shader)) {
R600_ERR("%s: Lowering to assembly failed\n", __func__);
scheduled_shader->print(std::cerr);
/* For now crash if the shader could not be benerated */
assert(0);
return -1;
}
@ -1025,8 +791,5 @@ int r600_shader_from_nir(struct r600_context *rctx,
} else {
r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
}
if (pipeshader->shader.bc.ngpr < 6)
pipeshader->shader.bc.ngpr = 6;
return 0;
}

View File

@ -31,7 +31,7 @@
#include "nir_builder.h"
#ifdef __cplusplus
#include "sfn_shader_base.h"
#include "sfn_shader.h"
#include <vector>
namespace r600 {
@ -64,56 +64,16 @@ bool r600_nir_64_to_vec2(nir_shader *sh);
bool r600_merge_vec2_stores(nir_shader *shader);
class Shader {
public:
std::vector<InstructionBlock>& m_ir;
ValueMap m_temp;
};
class ShaderFromNir {
public:
ShaderFromNir();
~ShaderFromNir();
unsigned ninputs() const;
bool lower(const nir_shader *shader, r600_pipe_shader *sh,
r600_pipe_shader_selector *sel, r600_shader_key &key,
r600_shader *gs_shader, enum amd_gfx_level gfx_level);
bool process_declaration();
pipe_shader_type processor_type() const;
bool emit_instruction(nir_instr *instr);
const std::vector<InstructionBlock> &shader_ir() const;
Shader shader() const;
private:
bool process_block();
bool process_cf_node(nir_cf_node *node);
bool process_if(nir_if *node);
bool process_loop(nir_loop *node);
bool process_block(nir_block *node);
std::unique_ptr<ShaderFromNirProcessor> impl;
const nir_shader *sh;
enum amd_gfx_level gfx_level;
int m_current_if_id;
int m_current_loop_id;
std::stack<int> m_if_stack;
int scratch_size;
};
bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh);
bool r600_lower_64bit_to_vec2(nir_shader *sh);
bool r600_split_64bit_alu_and_phi(nir_shader *sh);
class AssemblyFromShader {
public:
virtual ~AssemblyFromShader();
bool lower(const std::vector<InstructionBlock> &ir);
bool lower(const Shader& s);
private:
virtual bool do_lower(const std::vector<InstructionBlock>& ir) = 0 ;
virtual bool do_lower(const Shader& s) = 0 ;
};
}

View File

@ -32,7 +32,8 @@
static nir_ssa_def *
r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, void *_options)
r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr,
UNUSED void *_options)
{
b->cursor = nir_before_instr(instr);
auto ir = nir_instr_as_intrinsic(instr);
@ -143,7 +144,8 @@ r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, void *_opt
}
static bool
r600_legalize_image_load_store_filter(const nir_instr *instr, const void *_options)
r600_legalize_image_load_store_filter(const nir_instr *instr,
UNUSED const void *_options)
{
if (instr->type != nir_instr_type_intrinsic)
return false;

View File

@ -108,6 +108,193 @@ private:
};
class LowerLoad64Uniform : public NirLowerInstruction {
bool filter(const nir_instr *instr) const override;
nir_ssa_def *lower(nir_instr *instr) override;
};
bool LowerLoad64Uniform::filter(const nir_instr *instr) const
{
if (instr->type != nir_instr_type_intrinsic)
return false;
auto intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_uniform &&
intr->intrinsic != nir_intrinsic_load_ubo &&
intr->intrinsic != nir_intrinsic_load_ubo_vec4)
return false;
return nir_dest_bit_size(intr->dest) == 64;
}
nir_ssa_def *LowerLoad64Uniform::lower(nir_instr *instr)
{
auto intr = nir_instr_as_intrinsic(instr);
int old_components = nir_dest_num_components(intr->dest);
assert(old_components <= 2);
assert(intr->dest.is_ssa);
intr->dest.ssa.num_components *= 2;
intr->dest.ssa.bit_size = 32;
intr->num_components *= 2;
if (intr->intrinsic ==nir_intrinsic_load_ubo ||
intr->intrinsic ==nir_intrinsic_load_ubo_vec4)
nir_intrinsic_set_component(intr, 2 * nir_intrinsic_component(intr));
nir_ssa_def *result_vec[2] = {nullptr, nullptr};
for (int i = 0; i < old_components; ++i) {
result_vec[i] = nir_pack_64_2x32_split(b,
nir_channel(b, &intr->dest.ssa, 2 * i),
nir_channel(b, &intr->dest.ssa, 2 * i + 1));
}
if (old_components == 1)
return result_vec[0];
return nir_vec2(b, result_vec[0], result_vec[1]);
}
bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh)
{
return LowerLoad64Uniform().run(sh);
}
class LowerSplit64op : public NirLowerInstruction {
bool filter(const nir_instr *instr) const override {
switch (instr->type) {
case nir_instr_type_alu: {
auto alu = nir_instr_as_alu(instr);
switch (alu->op) {
case nir_op_bcsel:
return nir_dest_bit_size(alu->dest.dest) == 64;
case nir_op_f2b1:
case nir_op_f2i32:
case nir_op_f2u32:
case nir_op_f2i64:
case nir_op_f2u64:
case nir_op_u2f64:
case nir_op_i2f64:
return nir_src_bit_size(alu->src[0].src) == 64;
default:
return false;
}
}
case nir_instr_type_phi: {
auto phi = nir_instr_as_phi(instr);
return nir_dest_num_components(phi->dest) == 64;
}
default:
return false;
}
}
nir_ssa_def *lower(nir_instr *instr) override {
switch (instr->type) {
case nir_instr_type_alu: {
auto alu = nir_instr_as_alu(instr);
switch (alu->op) {
case nir_op_bcsel: {
auto lo = nir_bcsel(b, nir_ssa_for_src(b, alu->src[0].src, 1),
nir_unpack_64_2x32_split_x(b, nir_ssa_for_alu_src(b, alu, 1)),
nir_unpack_64_2x32_split_x(b, nir_ssa_for_alu_src(b, alu, 2)));
auto hi = nir_bcsel(b, nir_ssa_for_src(b, alu->src[0].src, 1),
nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 1)),
nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 2)));
return nir_pack_64_2x32_split(b, lo, hi);
}
case nir_op_f2b1: {
auto mask = nir_component_mask(nir_dest_num_components(alu->dest.dest));
return nir_fneu(b, nir_channels(b, nir_ssa_for_alu_src(b, alu, 0), mask),
nir_imm_zero(b, nir_dest_num_components(alu->dest.dest), 64));
}
case nir_op_f2i32: {
auto src = nir_ssa_for_alu_src(b, alu, 0);
auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
auto abs_src = nir_fabs(b, src);
auto value = nir_f2u32(b, abs_src);
return nir_bcsel(b, gt0, value, nir_ineg(b, value));
}
case nir_op_f2u32: {
/* fp32 doesn't hold suffient bits to represent the full range of
* u32, therefore we have to split the values, and because f2f32
* rounds, we have to remove the fractional part in the hi bits
* For values > UINT_MAX the result is undefined */
auto src = nir_ssa_for_alu_src(b, alu, 0);
auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
auto highval = nir_fmul_imm(b, src, 1.0/65536.0);
auto fract = nir_ffract(b, highval);
auto high = nir_f2u32(b, nir_f2f32(b, nir_fsub(b, highval, fract)));
auto lowval = nir_fmul_imm(b, fract, 65536.0);
auto low = nir_f2u32(b, nir_f2f32(b, lowval));
return nir_bcsel(b, gt0, nir_ior(b, nir_ishl_imm(b, high, 16), low),
nir_imm_int(b, 0));
}
case nir_op_f2i64: {
auto src = nir_ssa_for_alu_src(b, alu, 0);
auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
auto abs_src = nir_fabs(b, src);
auto value = nir_f2u64(b, abs_src);
return nir_bcsel(b, gt0, value, nir_isub(b, nir_imm_zero(b, 1, 64), value));
}
case nir_op_f2u64: {
auto src = nir_ssa_for_alu_src(b, alu, 0);
auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
auto highval = nir_fmul_imm(b, src, 1.0/(65536.0 * 65536.0));
auto fract = nir_ffract(b, highval);
auto high = nir_f2u32(b, nir_fsub(b, highval, fract));
auto low = nir_f2u32(b, nir_fmul_imm(b, fract, 65536.0 * 65536.0));
return nir_bcsel(b, gt0, nir_pack_64_2x32_split(b, low, high),
nir_imm_zero(b, 1, 64));
}
case nir_op_u2f64: {
auto src = nir_ssa_for_alu_src(b, alu, 0);
auto low = nir_unpack_64_2x32_split_x(b, src);
auto high = nir_unpack_64_2x32_split_y(b, src);
auto flow = nir_u2f64(b, low);
auto fhigh = nir_u2f64(b, high);
return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
}
case nir_op_i2f64: {
auto src = nir_ssa_for_alu_src(b, alu, 0);
auto low = nir_unpack_64_2x32_split_x(b, src);
auto high = nir_unpack_64_2x32_split_y(b, src);
auto flow = nir_u2f64(b, low);
auto fhigh = nir_i2f64(b, high);
return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
}
default:
unreachable("trying to lower instruction that was not in filter");
}
}
case nir_instr_type_phi: {
auto phi = nir_instr_as_phi(instr);
auto phi_lo = nir_phi_instr_create(b->shader);
auto phi_hi = nir_phi_instr_create(b->shader);
nir_ssa_dest_init(&phi_lo->instr, &phi_lo->dest, phi->dest.ssa.num_components * 2, 32, "");
nir_ssa_dest_init(&phi_hi->instr, &phi_hi->dest, phi->dest.ssa.num_components * 2, 32, "");
nir_foreach_phi_src(s, phi) {
auto lo = nir_unpack_32_2x16_split_x(b, nir_ssa_for_src(b, s->src, 1));
auto hi = nir_unpack_32_2x16_split_x(b, nir_ssa_for_src(b, s->src, 1));
nir_phi_instr_add_src(phi_lo, s->pred, nir_src_for_ssa(lo));
nir_phi_instr_add_src(phi_hi, s->pred, nir_src_for_ssa(hi));
}
return nir_pack_64_2x32_split(b, &phi_lo->dest.ssa, &phi_hi->dest.ssa);
}
default:
unreachable("Trying to lower instruction that was not in filter");
}
}
};
bool r600_split_64bit_alu_and_phi(nir_shader *sh)
{
return LowerSplit64op().run(sh);
}
bool
LowerSplit64BitVar::filter(const nir_instr *instr) const
{
@ -271,7 +458,7 @@ LowerSplit64BitVar::split_store_deref_array(nir_intrinsic_instr *intr, nir_deref
}
nir_ssa_def *
LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, nir_deref_instr *deref)
LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, UNUSED nir_deref_instr *deref)
{
auto old_var = nir_intrinsic_get_var(intr, 0);
unsigned old_components = old_var->type->without_array()->components();
@ -556,8 +743,6 @@ LowerSplit64BitVar::lower(nir_instr *instr)
}
case nir_instr_type_alu: {
auto alu = nir_instr_as_alu(instr);
nir_print_instr(instr, stderr);
fprintf(stderr, "\n");
switch (alu->op) {
case nir_op_bany_fnequal3:
return split_reduction3(alu, nir_op_bany_fnequal2, nir_op_fneu, nir_op_ior);
@ -845,7 +1030,7 @@ static bool store_64bit_intr(nir_src *src, void *state)
return !*s;
}
static bool double2vec2(nir_src *src, void *state)
static bool double2vec2(nir_src *src, UNUSED void *state)
{
if (nir_src_bit_size(*src) != 64)
return true;
@ -1058,6 +1243,206 @@ bool r600_merge_vec2_stores(nir_shader *shader)
return merger.combine();
}
static bool
r600_lower_64bit_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
{
b->cursor = nir_after_instr(&instr->instr);
switch (instr->intrinsic) {
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_vec4:
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_input:
case nir_intrinsic_load_interpolated_input:
case nir_intrinsic_load_per_vertex_input:
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_ssbo:
break;
default:
return false;
}
if (instr->num_components <= 2)
return false;
bool has_dest = nir_intrinsic_infos[instr->intrinsic].has_dest;
if (has_dest) {
if (nir_dest_bit_size(instr->dest) != 64)
return false;
} else {
if (nir_src_bit_size(instr->src[0]) != 64)
return false;
}
nir_intrinsic_instr *first =
nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
nir_intrinsic_instr *second =
nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
switch (instr->intrinsic) {
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_vec4:
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_store_ssbo:
break;
default: {
nir_io_semantics semantics = nir_intrinsic_io_semantics(second);
semantics.location++;
semantics.num_slots--;
nir_intrinsic_set_io_semantics(second, semantics);
nir_intrinsic_set_base(second, nir_intrinsic_base(second) + 1);
break;
}
}
first->num_components = 2;
second->num_components -= 2;
if (has_dest) {
first->dest.ssa.num_components = 2;
second->dest.ssa.num_components -= 2;
}
nir_builder_instr_insert(b, &first->instr);
nir_builder_instr_insert(b, &second->instr);
if (has_dest) {
/* Merge the two loads' results back into a vector. */
nir_ssa_scalar channels[4] = {
nir_get_ssa_scalar(&first->dest.ssa, 0),
nir_get_ssa_scalar(&first->dest.ssa, 1),
nir_get_ssa_scalar(&second->dest.ssa, 0),
nir_get_ssa_scalar(&second->dest.ssa, second->num_components > 1 ? 1 : 0),
};
nir_ssa_def *new_ir = nir_vec_scalars(b, channels, instr->num_components);
nir_ssa_def_rewrite_uses(&instr->dest.ssa, new_ir);
} else {
/* Split the src value across the two stores. */
b->cursor = nir_before_instr(&instr->instr);
nir_ssa_def *src0 = instr->src[0].ssa;
nir_ssa_scalar channels[4] = { 0 };
for (int i = 0; i < instr->num_components; i++)
channels[i] = nir_get_ssa_scalar(src0, i);
nir_intrinsic_set_write_mask(first, nir_intrinsic_write_mask(instr) & 3);
nir_intrinsic_set_write_mask(second, nir_intrinsic_write_mask(instr) >> 2);
nir_instr_rewrite_src(&first->instr, &first->src[0],
nir_src_for_ssa(nir_vec_scalars(b, channels, 2)));
nir_instr_rewrite_src(&second->instr, &second->src[0],
nir_src_for_ssa(nir_vec_scalars(b, &channels[2],
second->num_components)));
}
int offset_src = -1;
uint32_t offset_amount = 16;
switch (instr->intrinsic) {
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_ubo:
offset_src = 1;
break;
case nir_intrinsic_load_ubo_vec4:
case nir_intrinsic_load_uniform:
offset_src = 0;
offset_amount = 1;
break;
case nir_intrinsic_store_ssbo:
offset_src = 2;
break;
default:
break;
}
if (offset_src != -1) {
b->cursor = nir_before_instr(&second->instr);
nir_ssa_def *second_offset =
nir_iadd_imm(b, second->src[offset_src].ssa, offset_amount);
nir_instr_rewrite_src(&second->instr, &second->src[offset_src],
nir_src_for_ssa(second_offset));
}
/* DCE stores we generated with no writemask (nothing else does this
* currently).
*/
if (!has_dest) {
if (nir_intrinsic_write_mask(first) == 0)
nir_instr_remove(&first->instr);
if (nir_intrinsic_write_mask(second) == 0)
nir_instr_remove(&second->instr);
}
nir_instr_remove(&instr->instr);
return true;
}
static bool
r600_lower_64bit_load_const(nir_builder *b, nir_load_const_instr *instr)
{
int num_components = instr->def.num_components;
if (instr->def.bit_size != 64 || num_components <= 2)
return false;
b->cursor = nir_before_instr(&instr->instr);
nir_load_const_instr *first =
nir_load_const_instr_create(b->shader, 2, 64);
nir_load_const_instr *second =
nir_load_const_instr_create(b->shader, num_components - 2, 64);
first->value[0] = instr->value[0];
first->value[1] = instr->value[1];
second->value[0] = instr->value[2];
if (num_components == 4)
second->value[1] = instr->value[3];
nir_builder_instr_insert(b, &first->instr);
nir_builder_instr_insert(b, &second->instr);
nir_ssa_def *channels[4] = {
nir_channel(b, &first->def, 0),
nir_channel(b, &first->def, 1),
nir_channel(b, &second->def, 0),
num_components == 4 ? nir_channel(b, &second->def, 1) : NULL,
};
nir_ssa_def *new_ir = nir_vec(b, channels, num_components);
nir_ssa_def_rewrite_uses(&instr->def, new_ir);
nir_instr_remove(&instr->instr);
return true;
}
static bool
r600_lower_64bit_to_vec2_instr(nir_builder *b, nir_instr *instr, void *data)
{
switch (instr->type) {
case nir_instr_type_load_const:
return r600_lower_64bit_load_const(b, nir_instr_as_load_const(instr));
case nir_instr_type_intrinsic:
return r600_lower_64bit_intrinsic(b, nir_instr_as_intrinsic(instr));
default:
return false;
}
}
bool
r600_lower_64bit_to_vec2(nir_shader *s)
{
return nir_shader_instructions_pass(s,
r600_lower_64bit_to_vec2_instr,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
}
} // end namespace r600

View File

@ -95,13 +95,14 @@ emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_i
auto idx2 = nir_src_as_const_value(op->src[1]);
if (!idx2 || idx2->u32 != 0)
offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
offset = nir_iadd(b, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)), offset);
return nir_iadd(b, addr, offset);
}
static nir_ssa_def *
emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset)
emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op,
UNUSED nir_variable_mode mode, int src_offset)
{
nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
@ -552,7 +553,7 @@ r600_lower_tess_coord_filter(const nir_instr *instr, UNUSED const void *_options
}
static nir_ssa_def *
r600_lower_tess_coord_impl(nir_builder *b, nir_instr *instr, void *_options)
r600_lower_tess_coord_impl(nir_builder *b, UNUSED nir_instr *instr, void *_options)
{
pipe_prim_type prim_type = *(pipe_prim_type *)_options;

View File

@ -1,142 +1,9 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_nir_lower_tex.h"
#include "sfn_instruction_tex.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir_builtin_builder.h"
namespace r600 {
TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src,
unsigned sid, unsigned rid, PValue sampler_offset):
Instruction(tex),
m_opcode(op),
m_dst(dest),
m_src(src),
m_sampler_id(sid),
m_resource_id(rid),
m_flags(0),
m_inst_mode(0),
m_dest_swizzle{0,1,2,3},
m_sampler_offset(sampler_offset)
{
memset(m_offset, 0, sizeof (m_offset));
add_remappable_src_value(&m_src);
add_remappable_src_value(&m_sampler_offset);
add_remappable_dst_value(&m_dst);
}
void TexInstruction::set_gather_comp(int cmp)
{
m_inst_mode = cmp;
}
void TexInstruction::replace_values(const ValueSet& candidates, PValue new_value)
{
// I wonder whether we can actually end up here ...
for (auto c: candidates) {
if (*c == *m_src.reg_i(c->chan()))
m_src.set_reg_i(c->chan(), new_value);
if (*c == *m_dst.reg_i(c->chan()))
m_dst.set_reg_i(c->chan(), new_value);
}
}
void TexInstruction::set_offset(unsigned index, int32_t val)
{
assert(index < 3);
m_offset[index] = val;
}
int TexInstruction::get_offset(unsigned index) const
{
assert(index < 3);
return (m_offset[index] << 1 & 0x1f);
}
bool TexInstruction::is_equal_to(const Instruction& rhs) const
{
assert(rhs.type() == tex);
const auto& r = static_cast<const TexInstruction&>(rhs);
return (m_opcode == r.m_opcode &&
m_dst == r.m_dst &&
m_src == r.m_src &&
m_sampler_id == r.m_sampler_id &&
m_resource_id == r.m_resource_id);
}
void TexInstruction::do_print(std::ostream& os) const
{
const char *map_swz = "xyzw01?_";
os << opname(m_opcode) << " R" << m_dst.sel() << ".";
for (int i = 0; i < 4; ++i)
os << map_swz[m_dest_swizzle[i]];
os << " " << m_src
<< " RESID:" << m_resource_id << " SAMPLER:"
<< m_sampler_id;
}
const char *TexInstruction::opname(Opcode op)
{
switch (op) {
case ld: return "LD";
case get_resinfo: return "GET_TEXTURE_RESINFO";
case get_nsampled: return "GET_NUMBER_OF_SAMPLES";
case get_tex_lod: return "GET_LOD";
case get_gradient_h: return "GET_GRADIENTS_H";
case get_gradient_v: return "GET_GRADIENTS_V";
case set_offsets: return "SET_TEXTURE_OFFSETS";
case keep_gradients: return "KEEP_GRADIENTS";
case set_gradient_h: return "SET_GRADIENTS_H";
case set_gradient_v: return "SET_GRADIENTS_V";
case sample: return "SAMPLE";
case sample_l: return "SAMPLE_L";
case sample_lb: return "SAMPLE_LB";
case sample_lz: return "SAMPLE_LZ";
case sample_g: return "SAMPLE_G";
case sample_g_lb: return "SAMPLE_G_L";
case gather4: return "GATHER4";
case gather4_o: return "GATHER4_O";
case sample_c: return "SAMPLE_C";
case sample_c_l: return "SAMPLE_C_L";
case sample_c_lb: return "SAMPLE_C_LB";
case sample_c_lz: return "SAMPLE_C_LZ";
case sample_c_g: return "SAMPLE_C_G";
case sample_c_g_lb: return "SAMPLE_C_G_L";
case gather4_c: return "GATHER4_C";
case gather4_c_o: return "OP_GATHER4_C_O";
}
return "ERROR";
}
static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
{
b->cursor = nir_before_instr(&tex->instr);
@ -273,14 +140,14 @@ bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
nir_ssa_def *lambda_exp = nir_fexp2(b, lod);
nir_ssa_def *scale = NULL;
if (tex->is_array) {
if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
scale = nir_frcp(b, nir_channels(b, size, 1));
scale = nir_swizzle(b, scale, swizzle, 3);
} else if (tex->is_array) {
int cmp_mask = (1 << (size->num_components - 1)) - 1;
scale = nir_frcp(b, nir_channels(b, size,
(nir_component_mask_t)cmp_mask));
} else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
scale = nir_frcp(b, nir_channels(b, size, 1));
scale = nir_swizzle(b, scale, swizzle, 3);
}
nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
@ -408,7 +275,3 @@ r600_nir_lower_cube_to_2darray(nir_shader *shader)
r600_nir_lower_cube_to_2darray_filer,
r600_nir_lower_cube_to_2darray_impl, nullptr);
}
}

View File

@ -0,0 +1,10 @@
#ifndef SFN_NIR_LOWER_TEX_H
#define SFN_NIR_LOWER_TEX_H
struct nir_shader;
bool r600_nir_lower_int_tg4(nir_shader *nir);
bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
bool r600_nir_lower_cube_to_2darray(nir_shader *shader);
#endif // LALA_H

View File

@ -0,0 +1,627 @@
#include "sfn_optimizer.h"
#include "sfn_instr_alugroup.h"
#include "sfn_instr_controlflow.h"
#include "sfn_instr_export.h"
#include "sfn_instr_tex.h"
#include "sfn_instr_fetch.h"
#include "sfn_instr_lds.h"
#include "sfn_peephole.h"
#include "sfn_debug.h"
#include <sstream>
namespace r600 {
bool optimize(Shader& shader)
{
bool progress;
sfn_log << SfnLog::opt << "Shader before optimization\n";
if (sfn_log.has_debug_flag(SfnLog::opt)) {
std::stringstream ss;
shader.print(ss);
sfn_log << ss.str() << "\n\n";
}
do {
progress = false;
progress |= copy_propagation_fwd(shader);
progress |= dead_code_elimination(shader);
progress |= copy_propagation_backward(shader);
progress |= dead_code_elimination(shader);
progress |= simplify_source_vectors(shader);
progress |= peephole(shader);
progress |= dead_code_elimination(shader);
} while (progress);
return progress;
}
class DCEVisitor : public InstrVisitor {
public:
DCEVisitor();
void visit(AluInstr *instr) override;
void visit(AluGroup *instr) override;
void visit(TexInstr *instr) override;
void visit(ExportInstr *instr) override {(void)instr;};
void visit(FetchInstr *instr) override;
void visit(Block *instr) override;
void visit(ControlFlowInstr *instr) override {(void)instr;};
void visit(IfInstr *instr) override {(void)instr;};
void visit(WriteScratchInstr *instr) override {(void)instr;};
void visit(StreamOutInstr *instr) override {(void)instr;};
void visit(MemRingOutInstr *instr) override {(void)instr;};
void visit(EmitVertexInstr *instr) override {(void)instr;};
void visit(GDSInstr *instr) override {(void)instr;};
void visit(WriteTFInstr *instr) override {(void)instr;};
void visit(LDSAtomicInstr *instr) override {(void)instr;};
void visit(LDSReadInstr *instr) override;
void visit(RatInstr *instr) override {(void)instr;};
bool progress;
};
bool dead_code_elimination(Shader& shader)
{
DCEVisitor dce;
do {
sfn_log << SfnLog::opt << "start dce run\n";
dce.progress = false;
for (auto& b : shader.func())
b->accept(dce);
sfn_log << SfnLog::opt << "finished dce run\n\n";
} while (dce.progress);
sfn_log << SfnLog::opt << "Shader after DCE\n";
if (sfn_log.has_debug_flag(SfnLog::opt)) {
std::stringstream ss;
shader.print(ss);
sfn_log << ss.str() << "\n\n";
}
return dce.progress;
}
DCEVisitor::DCEVisitor():progress(false)
{
}
void DCEVisitor::visit(AluInstr *instr)
{
sfn_log << SfnLog::opt << "DCE: visit '" << *instr;
if (instr->has_instr_flag(Instr::dead))
return;
if (instr->dest() && instr->dest()->has_uses()) {
sfn_log << SfnLog::opt << " dest used\n";
return;
}
switch (instr->opcode()) {
case op2_kille:
case op2_killne:
case op2_kille_int:
case op2_killne_int:
case op2_killge:
case op2_killge_int:
case op2_killge_uint:
case op2_killgt:
case op2_killgt_int:
case op2_killgt_uint:
case op0_group_barrier:
sfn_log << SfnLog::opt << " never kill\n";
return;
default:
;
}
bool dead = instr->set_dead();
sfn_log << SfnLog::opt << (dead ? "dead" : "alive") << "\n";
progress |= dead;
}
void DCEVisitor::visit(LDSReadInstr *instr)
{
sfn_log << SfnLog::opt << "visit " << *instr << "\n";
progress |= instr->remove_unused_components();
}
void DCEVisitor::visit(AluGroup *instr)
{
/* Groups are created because the instructions are used together
* so don't try to eliminate code there */
(void)instr;
}
void DCEVisitor::visit(TexInstr *instr)
{
auto& dest = instr->dst();
bool has_uses = false;
RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
for (int i = 0; i < 4; ++i) {
if (!dest[i]->has_uses())
swz[i] = 7;
else
has_uses |= true;
}
instr->set_dest_swizzle(swz);
if (has_uses)
return;
progress |= instr->set_dead();
}
void DCEVisitor::visit(FetchInstr *instr)
{
auto& dest = instr->dst();
bool has_uses = false;
RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
for (int i = 0; i < 4; ++i) {
if (!dest[i]->has_uses())
swz[i] = 7;
else
has_uses |= true;
}
instr->set_dest_swizzle(swz);
if (has_uses)
return;
sfn_log << SfnLog::opt << "set dead: " << *instr << "\n";
progress |= instr->set_dead();
}
void DCEVisitor::visit(Block *block)
{
auto i = block->begin();
auto e = block->end();
while (i != e) {
auto n = i++;
if (!(*n)->keep()) {
(*n)->accept(*this);
if ((*n)->is_dead()) {
block->erase(n);
}
}
}
}
void visit(ControlFlowInstr *instr)
{
(void)instr;
}
void visit(IfInstr *instr)
{
(void)instr;
}
class CopyPropFwdVisitor : public InstrVisitor {
public:
CopyPropFwdVisitor();
void visit(AluInstr *instr) override;
void visit(AluGroup *instr) override;
void visit(TexInstr *instr) override;
void visit(ExportInstr *instr) override {(void)instr;}
void visit(FetchInstr *instr) override;
void visit(Block *instr) override;
void visit(ControlFlowInstr *instr) override {(void)instr;}
void visit(IfInstr *instr) override {(void)instr;}
void visit(WriteScratchInstr *instr) override {(void)instr;}
void visit(StreamOutInstr *instr) override {(void)instr;}
void visit(MemRingOutInstr *instr) override {(void)instr;}
void visit(EmitVertexInstr *instr) override {(void)instr;}
void visit(GDSInstr *instr) override {(void)instr;};
void visit(WriteTFInstr *instr) override {(void)instr;};
void visit(RatInstr *instr) override {(void)instr;};
// TODO: these two should use copy propagation
void visit(LDSAtomicInstr *instr) override {(void)instr;};
void visit(LDSReadInstr *instr) override {(void)instr;};
bool progress;
};
class CopyPropBackVisitor : public InstrVisitor {
public:
CopyPropBackVisitor();
void visit(AluInstr *instr) override;
void visit(AluGroup *instr) override;
void visit(TexInstr *instr) override;
void visit(ExportInstr *instr) override {(void)instr;}
void visit(FetchInstr *instr) override;
void visit(Block *instr) override;
void visit(ControlFlowInstr *instr) override {(void)instr;}
void visit(IfInstr *instr) override {(void)instr;}
void visit(WriteScratchInstr *instr) override {(void)instr;}
void visit(StreamOutInstr *instr) override {(void)instr;}
void visit(MemRingOutInstr *instr) override {(void)instr;}
void visit(EmitVertexInstr *instr) override {(void)instr;}
void visit(GDSInstr *instr) override {(void)instr;};
void visit(WriteTFInstr *instr) override {(void)instr;};
void visit(LDSAtomicInstr *instr) override {(void)instr;};
void visit(LDSReadInstr *instr) override {(void)instr;};
void visit(RatInstr *instr) override {(void)instr;};
bool progress;
};
bool copy_propagation_fwd(Shader& shader)
{
auto& root = shader.func();
CopyPropFwdVisitor copy_prop;
do {
copy_prop.progress = false;
for (auto b : root)
b->accept(copy_prop);
} while (copy_prop.progress);
sfn_log << SfnLog::opt << "Shader after Copy Prop forward\n";
if (sfn_log.has_debug_flag(SfnLog::opt)) {
std::stringstream ss;
shader.print(ss);
sfn_log << ss.str() << "\n\n";
}
return copy_prop.progress;
}
bool copy_propagation_backward(Shader& shader)
{
CopyPropBackVisitor copy_prop;
do {
copy_prop.progress = false;
for (auto b: shader.func())
b->accept(copy_prop);
} while (copy_prop.progress);
sfn_log << SfnLog::opt << "Shader after Copy Prop backwards\n";
if (sfn_log.has_debug_flag(SfnLog::opt)) {
std::stringstream ss;
shader.print(ss);
sfn_log << ss.str() << "\n\n";
}
return copy_prop.progress;
}
CopyPropFwdVisitor::CopyPropFwdVisitor():
progress(false)
{}
void CopyPropFwdVisitor::visit(AluInstr *instr)
{
sfn_log << SfnLog::opt << "CopyPropFwdVisitor:["
<< instr->block_id() << ":" << instr->index() << "] " << *instr
<< " dset=" << instr->dest() << " ";
if (instr->dest()) {
sfn_log << SfnLog::opt << "has uses; "
<< instr->dest()->uses().size();
}
sfn_log << SfnLog::opt << "\n";
if (!instr->can_propagate_src()) {
return;
}
auto src = instr->psrc(0);
auto dest = instr->dest();
for (auto& i : instr->dest()->uses()) {
/* SSA can always be propagated, registers only in the same block
* and only if they are not assigned to more than once */
if (dest->is_ssa() ||
(instr->block_id() == i->block_id() &&
instr->index() < i->index() &&
dest->uses().size() == 1)) {
sfn_log << SfnLog::opt << " Try replace in "
<< i->block_id() << ":" << i->index()
<< *i<< "\n";
progress |= i->replace_source(dest, src);
}
}
if (instr->dest()) {
sfn_log << SfnLog::opt << "has uses; "
<< instr->dest()->uses().size();
}
sfn_log << SfnLog::opt << " done\n";
}
void CopyPropFwdVisitor::visit(AluGroup *instr)
{
(void)instr;
}
void CopyPropFwdVisitor::visit(TexInstr *instr)
{
(void)instr;
}
void CopyPropFwdVisitor::visit(FetchInstr *instr)
{
(void)instr;
}
void CopyPropFwdVisitor::visit(Block *instr)
{
for (auto& i: *instr)
i->accept(*this);
}
CopyPropBackVisitor::CopyPropBackVisitor():
progress(false)
{
}
void CopyPropBackVisitor::visit(AluInstr *instr)
{
bool local_progress = false;
sfn_log << SfnLog::opt << "CopyPropBackVisitor:["
<< instr->block_id() << ":" << instr->index() << "] " << *instr << "\n";
if (!instr->can_propagate_dest()) {
return;
}
auto src_reg = instr->psrc(0)->as_register();
if (!src_reg) {
return;
}
if (src_reg->uses().size() > 1)
return;
auto dest = instr->dest();
if (!dest ||
!instr->has_alu_flag(alu_write)) {
return;
}
if (!dest->is_ssa() && dest->parents().size() > 1)
return;
for (auto& i: src_reg->parents()) {
sfn_log << SfnLog::opt << "Try replace dest in "
<< i->block_id() << ":" << i->index()
<< *i<< "\n";
if (i->replace_dest(dest, instr)) {
dest->del_parent(instr);
dest->add_parent(i);
for (auto d : instr->dependend_instr()) {
d->add_required_instr(i);
}
local_progress = true;
}
}
if (local_progress)
instr->set_dead();
progress |= local_progress;
}
void CopyPropBackVisitor::visit(AluGroup *instr)
{
for (auto& i: *instr) {
if (i)
i->accept(*this);
}
}
void CopyPropBackVisitor::visit(TexInstr *instr)
{
(void)instr;
}
void CopyPropBackVisitor::visit(FetchInstr *instr)
{
(void)instr;
}
void CopyPropBackVisitor::visit(Block *instr)
{
for (auto i = instr->rbegin(); i != instr->rend(); ++i)
if (!(*i)->is_dead())
(*i)->accept(*this);
}
class SimplifySourceVecVisitor : public InstrVisitor {
public:
SimplifySourceVecVisitor():progress(false) {}
void visit(AluInstr *instr) override{(void)instr;}
void visit(AluGroup *instr) override{(void)instr;}
void visit(TexInstr *instr) override;
void visit(ExportInstr *instr) override;
void visit(FetchInstr *instr) override;
void visit(Block *instr) override;
void visit(ControlFlowInstr *instr) override;
void visit(IfInstr *instr) override;
void visit(WriteScratchInstr *instr) override;
void visit(StreamOutInstr *instr) override;
void visit(MemRingOutInstr *instr) override;
void visit(EmitVertexInstr *instr) override {(void)instr;}
void visit(GDSInstr *instr) override {(void)instr;};
void visit(WriteTFInstr *instr) override {(void)instr;};
void visit(LDSAtomicInstr *instr) override {(void)instr;};
void visit(LDSReadInstr *instr) override {(void)instr;};
void visit(RatInstr *instr) override {(void)instr;};
void replace_src(Instr *instr, RegisterVec4& reg4);
bool progress;
};
bool simplify_source_vectors(Shader& sh)
{
SimplifySourceVecVisitor visitor;
for (auto b: sh.func())
b->accept(visitor);
return visitor.progress;
}
void SimplifySourceVecVisitor::visit(TexInstr *instr)
{
if (instr->opcode() != TexInstr::get_resinfo) {
replace_src(instr, instr->src());
}
}
void SimplifySourceVecVisitor::visit(WriteScratchInstr *instr)
{
(void) instr;
}
class ReplaceConstSource : public AluInstrVisitor {
public:
ReplaceConstSource(Instr *old_use_, RegisterVec4& vreg_, int i):
old_use(old_use_), vreg(vreg_), index(i),success(false) {}
using AluInstrVisitor::visit;
void visit(AluInstr *alu) override;
Instr *old_use;
RegisterVec4& vreg;
int index;
bool success;
};
void SimplifySourceVecVisitor::visit(ExportInstr *instr)
{
replace_src(instr, instr->value());
}
void SimplifySourceVecVisitor::replace_src(Instr *instr, RegisterVec4& reg4)
{
for (int i = 0; i < 4; ++i) {
auto s = reg4[i];
if (s->chan() > 3)
continue;
if (!s->is_ssa())
continue;
/* Cayman trans ops have more then one parent for
* one dest */
if (s->parents().size() != 1)
continue;
auto& op = *s->parents().begin();
ReplaceConstSource visitor(instr, reg4, i);
op->accept(visitor);
progress |= visitor.success;
}
}
void SimplifySourceVecVisitor::visit(StreamOutInstr *instr)
{
(void)instr;
}
void SimplifySourceVecVisitor::visit(MemRingOutInstr *instr)
{
(void)instr;
}
void ReplaceConstSource::visit(AluInstr *alu)
{
if (alu->opcode() != op1_mov)
return;
if (alu->has_alu_flag(alu_src0_abs) ||
alu->has_alu_flag(alu_src0_neg))
return;
auto src = alu->psrc(0);
assert(src);
int override_chan = -1;
auto ic = src->as_inline_const();
if (ic) {
if (ic->sel() == ALU_SRC_0)
override_chan = 4;
if (ic->sel() == ALU_SRC_1)
override_chan = 5;
}
auto literal = src->as_literal();
if (literal) {
if (literal->value() == 0)
override_chan = 4;
if (literal->value() == 0x3F800000)
override_chan = 5;
}
if (override_chan >= 0) {
vreg[index]->del_use(old_use);
auto reg = new Register(vreg.sel(), override_chan, vreg[index]->pin());
vreg.set_value(index, reg);
success = true;
}
}
void SimplifySourceVecVisitor::visit(FetchInstr *instr)
{
(void) instr;
}
void SimplifySourceVecVisitor::visit(Block *instr)
{
for (auto i = instr->rbegin(); i != instr->rend(); ++i)
if (!(*i)->is_dead())
(*i)->accept(*this);
}
void SimplifySourceVecVisitor::visit(ControlFlowInstr *instr)
{
(void) instr;
}
void SimplifySourceVecVisitor::visit(IfInstr *instr)
{
(void) instr;
}
}

View File

@ -0,0 +1,17 @@
#ifndef OPTIMIZER_H
#define OPTIMIZER_H
#include "sfn_shader.h"
namespace r600 {
bool dead_code_elimination(Shader& shader);
bool copy_propagation_fwd(Shader& shader);
bool copy_propagation_backward(Shader& shader);
bool simplify_source_vectors(Shader& sh);
bool optimize(Shader& shader);
}
#endif // OPTIMIZER_H

View File

@ -1,12 +0,0 @@
#include "sfn_optimizers.h"
#include "sfn_instruction_block.h"
namespace r600 {
std::vector<PInstruction>
flatten_shader(const std::vector<InstructionBlock> &ir)
{
}
}

View File

@ -1,14 +0,0 @@
#ifndef SFN_OPTIMIZERS_H
#define SFN_OPTIMIZERS_H
#include "sfn_instruction_base.h"
namespace r600 {
std::vector<PInstruction>
flatten_alu_ops(const std::vector<InstructionBlock> &ir);
}
#endif // SFN_OPTIMIZERS_H

View File

@ -0,0 +1,212 @@
#include "sfn_peephole.h"
namespace r600 {
class PeepholeVisitor : public InstrVisitor {
public:
void visit(AluInstr *instr) override;
void visit(AluGroup *instr) override;
void visit(TexInstr *instr) override {(void)instr;};
void visit(ExportInstr *instr) override {(void)instr;}
void visit(FetchInstr *instr) override {(void)instr;}
void visit(Block *instr) override;
void visit(ControlFlowInstr *instr) override {(void)instr;}
void visit(IfInstr *instr) override;
void visit(WriteScratchInstr *instr) override {(void)instr;}
void visit(StreamOutInstr *instr) override {(void)instr;}
void visit(MemRingOutInstr *instr) override {(void)instr;}
void visit(EmitVertexInstr *instr) override {(void)instr;}
void visit(GDSInstr *instr) override {(void)instr;};
void visit(WriteTFInstr *instr) override {(void)instr;};
void visit(LDSAtomicInstr *instr) override {(void)instr;};
void visit(LDSReadInstr *instr) override {(void)instr;};
void visit(RatInstr *instr) override {(void)instr;};
bool src_is_zero(PVirtualValue value);
bool src_is_one(PVirtualValue value);
void convert_to_mov(AluInstr *alu, int src_idx);
bool progress{false};
};
bool peephole(Shader& sh)
{
PeepholeVisitor peephole;
for(auto b : sh.func())
b->accept(peephole);
return peephole.progress;
}
void PeepholeVisitor::visit(AluInstr *instr)
{
switch (instr->opcode()) {
case op2_add:
case op2_add_int:
if (src_is_zero(instr->psrc(0)))
convert_to_mov(instr, 1);
else if (src_is_zero(instr->psrc(1)))
convert_to_mov(instr, 0);
break;
case op2_mul:
case op2_mul_ieee:
if (src_is_one(instr->psrc(0)))
convert_to_mov(instr, 1);
else if (src_is_one(instr->psrc(1)))
convert_to_mov(instr, 0);
break;
case op3_muladd:
case op3_muladd_ieee:
if (src_is_zero(instr->psrc(0)) ||
src_is_zero(instr->psrc(1)))
convert_to_mov(instr, 2);
break;
default:
;
}
}
bool PeepholeVisitor::src_is_zero(PVirtualValue value)
{
if (value->as_inline_const() &&
value->as_inline_const()->sel() == ALU_SRC_0)
return true;
if (value->as_literal() &&
value->as_literal()->value() == 0)
return true;
return false;
}
bool PeepholeVisitor::src_is_one(PVirtualValue value)
{
if (value->as_inline_const() &&
value->as_inline_const()->sel() == ALU_SRC_1)
return true;
if (value->as_literal() &&
value->as_literal()->value() == 0x3f800000)
return true;
return false;
}
void PeepholeVisitor::convert_to_mov(AluInstr *alu, int src_idx)
{
AluInstr::SrcValues new_src{alu->psrc(src_idx)};
alu->set_sources(new_src);
alu->set_op(op1_mov);
progress = true;
}
void PeepholeVisitor::visit(AluGroup *instr)
{
}
void PeepholeVisitor::visit(Block *instr)
{
for (auto& i: *instr)
i->accept(*this);
}
class ReplaceIfPredicate : public AluInstrVisitor {
public:
ReplaceIfPredicate(AluInstr *pred):
m_pred(pred) {}
using AluInstrVisitor::visit;
void visit(AluInstr *alu) override;
AluInstr *m_pred;
bool success{false};
};
void PeepholeVisitor::visit(IfInstr *instr)
{
auto pred = instr->predicate();
auto& src1 = pred->src(1);
if (src1.as_inline_const() &&
src1.as_inline_const()->sel() == ALU_SRC_0) {
auto src0 = pred->src(0).as_register();
if (src0 && src0->is_ssa()) {
assert(!src0->parents().empty());
auto parent = *src0->parents().begin();
ReplaceIfPredicate visitor(pred);
parent->accept(visitor);
progress |= visitor.success;
}
}
}
static EAluOp pred_from_op(EAluOp pred_op, EAluOp op)
{
switch (pred_op) {
case op2_pred_setne_int:
switch (op) {
/*
case op2_setge_dx10 : return op2_pred_setge_int;
case op2_setgt_dx10 : return op2_pred_setgt_int;
case op2_sete_dx10 : return op2_prede_int;
case op2_setne_dx10 : return op2_pred_setne_int;
*/
case op2_setge_int : return op2_pred_setge_int;
case op2_setgt_int : return op2_pred_setgt_int;
case op2_setge_uint : return op2_pred_setge_uint;
case op2_setgt_uint : return op2_pred_setgt_uint;
case op2_sete_int : return op2_prede_int;
case op2_setne_int : return op2_pred_setne_int;
default:
return op0_nop;
}
case op2_prede_int:
switch (op) {
case op2_sete_int : return op2_pred_setne_int;
case op2_setne_int : return op2_prede_int;
default:
return op0_nop;
}
default:
return op0_nop;
}
}
void ReplaceIfPredicate::visit(AluInstr *alu)
{
auto new_op = pred_from_op(m_pred->opcode(), alu->opcode());
if (new_op == op0_nop)
return;
/* Have to figure out how to pass the dependency correctly */
/*for (auto& s : alu->sources()) {
if (s->as_register() && s->as_register()->addr())
return;
}*/
m_pred->set_op(new_op);
m_pred->set_sources(alu->sources());
if (alu->has_alu_flag(alu_src0_abs))
m_pred->set_alu_flag(alu_src0_abs);
if (alu->has_alu_flag(alu_src1_abs))
m_pred->set_alu_flag(alu_src1_abs);
if (alu->has_alu_flag(alu_src0_neg))
m_pred->set_alu_flag(alu_src0_neg);
if (alu->has_alu_flag(alu_src1_neg))
m_pred->set_alu_flag(alu_src1_neg);
success = true;
}
}

View File

@ -0,0 +1,13 @@
#ifndef PEEPHOLE_H
#define PEEPHOLE_H
#include "sfn_shader.h"
namespace r600 {
bool peephole(Shader& sh);
}
#endif // PEEPHOLE_H

View File

@ -0,0 +1,268 @@
#include "sfn_debug.h"
#include "sfn_ra.h"
#include <cassert>
#include <queue>
namespace r600 {
void ComponentInterference::prepare_row(int row)
{
m_rows.resize(row + 1);
}
void ComponentInterference::add(size_t idx1, size_t idx2)
{
assert(idx1 > idx2);
assert(m_rows.size() > idx1);
m_rows[idx1].push_back(idx2);
m_rows[idx2].push_back(idx1);
}
Interference::Interference(LiveRangeMap& map):
m_map(map)
{
initialize();
}
void Interference::initialize()
{
for(int i = 0; i < 4; ++i) {
initialize(m_components_maps[i], m_map.component(i));
}
}
void Interference::initialize(ComponentInterference& comp_interference,
LiveRangeMap::ChannelLiveRange& clr)
{
for (size_t row = 0; row < clr.size(); ++row) {
auto& row_entry = clr[row];
comp_interference.prepare_row(row);
for (size_t col = 0; col < row; ++col) {
auto& col_entry = clr[col];
if (row_entry.m_end >= col_entry.m_start &&
row_entry.m_start <= col_entry.m_end)
comp_interference.add(row, col);
}
}
}
struct Group {
int priority;
std::array<PRegister, 4> channels;
};
static inline bool operator < (const Group& lhs, const Group& rhs)
{
return lhs.priority < rhs.priority;
}
using GroupRegisters = std::priority_queue<Group>;
static bool
group_allocation (LiveRangeMap& lrm, const Interference& interference, GroupRegisters& groups)
{
int color = 0;
// allocate grouped registers
while (!groups.empty()) {
auto group = groups.top();
groups.pop();
int start_comp = 0;
while (!group.channels[start_comp])
++start_comp;
sfn_log << SfnLog::merge << "Color group with " << *group.channels[start_comp] << "\n";
// don't restart registers for exports, we may be able tp merge the
// export calls, is fthe registers are consecutive
if (group.priority > 0)
color = 0;
while (color < 124) {
/* Find the coloring for the first channel */
bool color_in_use = false;
int comp = start_comp;
auto& adjecency = interference.row(start_comp, group.channels[comp]->index());
auto& regs = lrm.component(comp);
sfn_log << SfnLog::merge << "Try color "<< color;
for (auto adj : adjecency) {
if (regs[adj].m_color == color) {
color_in_use = true;
sfn_log << SfnLog::merge << " in use\n";
break;
}
}
if (color_in_use) {
++color;
continue;
}
/* First channel color found, check whether it can be used for all channels */
while (comp < 4) {
sfn_log << SfnLog::merge << " interference: ";
if (group.channels[comp]) {
auto& component_life_ranges = lrm.component(comp);
auto& adjecencies = interference.row(comp, group.channels[comp]->index());
for (auto adj_index : adjecencies) {
sfn_log << SfnLog::merge << *component_life_ranges[adj_index].m_register << " ";
if (component_life_ranges[adj_index].m_color == color) {
color_in_use = true;
sfn_log << SfnLog::merge << "used";
break;
}
}
if (color_in_use)
break;
}
++comp;
}
/* We couldn't allocate all channels with this color, so try next */
if (color_in_use) {
++color;
sfn_log << SfnLog::merge << "\n";
continue;
}
sfn_log << SfnLog::merge << " success\n";
/* Coloring successful */
for (auto reg : group.channels) {
if (reg) {
auto& vregs = lrm.component(reg->chan());
auto& vreg_cmp = vregs[reg->index()];
assert(vreg_cmp.m_start != -1 || vreg_cmp.m_end != -1);
vreg_cmp.m_color = color;
}
}
break;
}
if (color == 124)
return false;
}
return true;
}
static bool
scalar_allocation (LiveRangeMap& lrm, const Interference& interference)
{
for (int comp = 0; comp < 4; ++comp) {
auto& live_ranges = lrm.component(comp);
for (auto& r : live_ranges) {
if (r.m_color != -1)
continue;
if (r.m_start == -1 &&
r.m_end == -1)
continue;
sfn_log << SfnLog::merge << "Color " << *r.m_register << "\n";
auto& adjecency = interference.row(comp, r.m_register->index());
int color = 0;
while (color < 124) {
bool color_in_use = false;
for (auto adj : adjecency) {
if (live_ranges[adj].m_color == color) {
color_in_use = true;
break;
}
}
if (color_in_use) {
++color;
continue;
}
r.m_color = color;
break;
}
if (color == 124)
return false;
}
}
return true;
}
bool register_allocation(LiveRangeMap& lrm)
{
Interference interference(lrm);
std::map<int, Group> groups;
// setup fixed colors and group relationships
for (int i = 0; i < 4; ++i) {
auto& comp = lrm.component(i);
for (auto& entry : comp) {
sfn_log << SfnLog::merge << "Prepare RA for "
<< *entry.m_register
<< " [" << entry.m_start << ", " << entry.m_end << "]\n";
auto pin = entry.m_register->pin();
if (entry.m_start == -1 && entry.m_end == -1) {
if (pin == pin_group || pin == pin_chgr)
entry.m_register->set_chan(7);
continue;
}
auto sel = entry.m_register->sel();
/* fully pinned registers contain system values with the
* definite register index, and array values are allocated
* right after the system registers, so just reuse the IDs (for now) */
if (pin == pin_fully || pin == pin_array) {
/* Must set all array element entries */
sfn_log << SfnLog::merge << "Pin color " << sel << " to " << *entry.m_register << "\n";
entry.m_color = sel;
} else if (pin == pin_group || pin == pin_chgr) {
/* Groups must all have the same sel() value, because they are used
* as vec4 registers */
auto igroup = groups.find(sel);
if (igroup != groups.end()) {
igroup->second.channels[i] = entry.m_register;
assert(comp[entry.m_register->index()].m_register->index() == entry.m_register->index());
} else {
int priority = entry.m_use.test(LiveRangeEntry::use_export) ? - entry.m_end : entry.m_start;
Group group{priority, {nullptr, nullptr, nullptr, nullptr}};
group.channels[i] = entry.m_register;
assert(comp[group.channels[i]->index()].m_register->index() == entry.m_register->index());
groups[sel] = group;
}
}
}
}
GroupRegisters groups_sorted;
for (auto& [sel, group] : groups)
groups_sorted.push(group);
if (!group_allocation (lrm, interference, groups_sorted))
return false;
if (!scalar_allocation(lrm, interference))
return false;
for (int i = 0; i < 4; ++i) {
auto& comp = lrm.component(i);
for (auto& entry : comp) {
sfn_log << SfnLog::merge << "Set " << *entry.m_register << " to ";
entry.m_register->set_sel(entry.m_color);
entry.m_register->set_pin(pin_none);
sfn_log << SfnLog::merge << *entry.m_register << "\n";
}
}
return true;
}
}

View File

@ -0,0 +1,51 @@
#ifndef INTERFERENCE_H
#define INTERFERENCE_H
#include "sfn_valuefactory.h"
#include <vector>
namespace r600 {
class ComponentInterference
{
public:
using Row = std::vector<int>;
void prepare_row(int row);
void add(size_t idx1, size_t idx2);
auto row(int idx) const -> const Row& {
assert((size_t)idx < m_rows.size()); return m_rows[idx];}
private:
std::vector<Row> m_rows;
};
class Interference {
public:
Interference(LiveRangeMap& map);
const auto& row(int comp, int index) const {
assert(comp < 4);
return m_components_maps[comp].row(index);
}
private:
void initialize();
void initialize(ComponentInterference& comp, LiveRangeMap::ChannelLiveRange& clr);
LiveRangeMap& m_map;
std::array<ComponentInterference, 4> m_components_maps;
};
bool register_allocation(LiveRangeMap& lrm);
}
#endif // INTERFERENCE_H

View File

@ -0,0 +1,890 @@
#include "sfn_scheduler.h"
#include "sfn_instr_alugroup.h"
#include "sfn_instr_controlflow.h"
#include "sfn_instr_export.h"
#include "sfn_instr_fetch.h"
#include "sfn_instr_mem.h"
#include "sfn_instr_lds.h"
#include "sfn_instr_tex.h"
#include "sfn_debug.h"
#include <algorithm>
#include <sstream>
namespace r600 {
class CollectInstructions : public InstrVisitor {
public:
CollectInstructions(ValueFactory& vf):
m_value_factory(vf) {}
void visit(AluInstr *instr) override {
if (instr->has_alu_flag(alu_is_trans))
alu_trans.push_back(instr);
else {
if (instr->alu_slots() == 1)
alu_vec.push_back(instr);
else
alu_groups.push_back(instr->split(m_value_factory));
}
}
void visit(AluGroup *instr) override {
alu_groups.push_back(instr);
}
void visit(TexInstr *instr) override {
tex.push_back(instr);
}
void visit(ExportInstr *instr) override {
exports.push_back(instr);
}
void visit(FetchInstr *instr) override {
fetches.push_back(instr);
}
void visit(Block *instr) override {
for (auto& i: *instr)
i->accept(*this);
}
void visit(ControlFlowInstr *instr) override {
assert(!m_cf_instr);
m_cf_instr = instr;
}
void visit(IfInstr *instr) override {
assert(!m_cf_instr);
m_cf_instr = instr;
}
void visit(EmitVertexInstr *instr) override {
assert(!m_cf_instr);
m_cf_instr = instr;
}
void visit(WriteScratchInstr *instr) override {
mem_write_instr.push_back(instr);
}
void visit(StreamOutInstr *instr) override {
mem_write_instr.push_back(instr);
}
void visit(MemRingOutInstr *instr) override {
mem_ring_writes.push_back(instr);
}
void visit(GDSInstr *instr) override {
gds_op.push_back(instr);
}
void visit(WriteTFInstr *instr) override {
write_tf.push_back(instr);
}
void visit(LDSReadInstr *instr) override {
std::vector<AluInstr*> buffer;
m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
for (auto& i: buffer) {
i->accept(*this);
}
}
void visit(LDSAtomicInstr *instr) override {
std::vector<AluInstr*> buffer;
m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
for (auto& i: buffer) {
i->accept(*this);
}
}
void visit(RatInstr *instr) override {
rat_instr.push_back(instr);
}
std::list<AluInstr *> alu_trans;
std::list<AluInstr *> alu_vec;
std::list<TexInstr *> tex;
std::list<AluGroup *> alu_groups;
std::list<ExportInstr *> exports;
std::list<FetchInstr *> fetches;
std::list<WriteOutInstr *> mem_write_instr;
std::list<MemRingOutInstr *> mem_ring_writes;
std::list<GDSInstr *> gds_op;
std::list<WriteTFInstr *> write_tf;
std::list<RatInstr *> rat_instr;
Instr *m_cf_instr{nullptr};
ValueFactory& m_value_factory;
AluInstr *m_last_lds_instr{nullptr};
};
class BlockSheduler {
public:
BlockSheduler();
void run(Shader *shader);
void finalize();
private:
void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf);
bool collect_ready(CollectInstructions &available);
template <typename T>
bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
bool collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available);
bool schedule_tex(Shader::ShaderBlocks& out_blocks);
bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
template <typename I>
bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
template <typename I>
bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
bool schedule_alu(Shader::ShaderBlocks& out_blocks);
void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type);
bool schedule_alu_to_group_vec(AluGroup *group);
bool schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist);
bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list);
template <typename I>
bool schedule(std::list<I *>& ready_list);
template <typename I>
bool schedule_block(std::list<I *>& ready_list);
std::list<AluInstr *> alu_vec_ready;
std::list<AluInstr *> alu_trans_ready;
std::list<AluGroup *> alu_groups_ready;
std::list<TexInstr *> tex_ready;
std::list<ExportInstr *> exports_ready;
std::list<FetchInstr *> fetches_ready;
std::list<WriteOutInstr *> memops_ready;
std::list<MemRingOutInstr *> mem_ring_writes_ready;
std::list<GDSInstr *> gds_ready;
std::list<WriteTFInstr *> write_tf_ready;
std::list<RatInstr *> rat_instr_ready;
enum {
sched_alu,
sched_tex,
sched_fetch,
sched_free,
sched_mem_ring,
sched_gds,
sched_write_tf,
sched_rat,
} current_shed;
ExportInstr *m_last_pos;
ExportInstr *m_last_pixel;
ExportInstr *m_last_param;
Block *m_current_block;
int m_lds_addr_count{0};
int m_alu_groups_schduled{0};
};
Shader *schedule(Shader *original)
{
AluGroup::set_chipclass(original->chip_class());
sfn_log << SfnLog::schedule << "Original shader\n";
if (sfn_log.has_debug_flag(SfnLog::schedule)) {
std::stringstream ss;
original->print(ss);
sfn_log << ss.str() << "\n\n";
}
// TODO later it might be necessary to clone the shader
// to be able to re-start scheduling
auto scheduled_shader = original;
BlockSheduler s;
s.run(scheduled_shader);
s.finalize();
sfn_log << SfnLog::schedule << "Scheduled shader\n";
if (sfn_log.has_debug_flag(SfnLog::schedule)) {
std::stringstream ss;
scheduled_shader->print(ss);
sfn_log << ss.str() << "\n\n";
}
return scheduled_shader;
}
BlockSheduler::BlockSheduler():
current_shed(sched_alu),
m_last_pos(nullptr),
m_last_pixel(nullptr),
m_last_param(nullptr),
m_current_block(nullptr)
{
}
void BlockSheduler::run( Shader *shader)
{
Shader::ShaderBlocks scheduled_blocks;
for (auto& block : shader->func()) {
sfn_log << SfnLog::schedule << "Process block " << block->id() <<"\n";
if (sfn_log.has_debug_flag(SfnLog::schedule)) {
std::stringstream ss;
block->print(ss);
sfn_log << ss.str() << "\n";
}
schedule_block(*block, scheduled_blocks, shader->value_factory());
}
shader->reset_function(scheduled_blocks);
}
void BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf)
{
assert(in_block.id() >= 0);
current_shed = sched_fetch;
auto last_shed = sched_fetch;
CollectInstructions cir(vf);
in_block.accept(cir);
bool have_instr = collect_ready(cir);
m_current_block = new Block(in_block.nesting_depth(), in_block.id());
assert(m_current_block->id() >= 0);
while (have_instr) {
sfn_log << SfnLog::schedule << "Have ready instructions\n";
if (alu_vec_ready.size())
sfn_log << SfnLog::schedule << " ALU V:" << alu_vec_ready.size() << "\n";
if (alu_trans_ready.size())
sfn_log << SfnLog::schedule << " ALU T:" << alu_trans_ready.size() << "\n";
if (alu_groups_ready.size())
sfn_log << SfnLog::schedule << " ALU G:" << alu_groups_ready.size() << "\n";
if (exports_ready.size())
sfn_log << SfnLog::schedule << " EXP:" << exports_ready.size()
<< "\n";
if (tex_ready.size())
sfn_log << SfnLog::schedule << " TEX:" << tex_ready.size()
<< "\n";
if (fetches_ready.size())
sfn_log << SfnLog::schedule << " FETCH:" << fetches_ready.size()
<< "\n";
if (mem_ring_writes_ready.size())
sfn_log << SfnLog::schedule << " MEM_RING:" << mem_ring_writes_ready.size()
<< "\n";
if (memops_ready.size())
sfn_log << SfnLog::schedule << " MEM_OPS:" << mem_ring_writes_ready.size()
<< "\n";
if (!m_current_block->lds_group_active()) {
if (last_shed != sched_free && memops_ready.size() > 8)
current_shed = sched_free;
else if (mem_ring_writes_ready.size() > 5)
current_shed = sched_mem_ring;
else if (rat_instr_ready.size() > 3)
current_shed = sched_rat;
else if (gds_ready.size() > 3)
current_shed = sched_gds;
else if (tex_ready.size() > 3)
current_shed = sched_tex;
}
switch (current_shed) {
case sched_alu:
if (!schedule_alu(out_blocks)) {
assert(!m_current_block->lds_group_active());
current_shed = sched_tex;
continue;
}
last_shed = current_shed;
break;
case sched_tex:
if (tex_ready.empty() || !schedule_tex(out_blocks)) {
current_shed = sched_fetch;
continue;
}
last_shed = current_shed;
break;
case sched_fetch:
if (!fetches_ready.empty()) {
schedule_vtx(out_blocks);
last_shed = current_shed;
}
current_shed = sched_gds;
continue;
case sched_gds:
if (!gds_ready.empty()) {
schedule_gds(out_blocks, gds_ready);
last_shed = current_shed;
}
current_shed = sched_mem_ring;
continue;
case sched_mem_ring:
if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) {
current_shed = sched_write_tf;
continue;
}
last_shed = current_shed;
break;
case sched_write_tf:
if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) {
current_shed = sched_rat;
continue;
}
last_shed = current_shed;
break;
case sched_rat:
if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) {
current_shed = sched_free;
continue;
}
last_shed = current_shed;
break;
case sched_free:
if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) {
current_shed = sched_alu;
break;
}
last_shed = current_shed;
}
have_instr = collect_ready(cir);
}
/* Emit exports always at end of a block */
while (collect_ready_type(exports_ready, cir.exports))
schedule_exports(out_blocks, exports_ready);
bool fail = false;
if (!cir.alu_groups.empty()) {
std::cerr << "Unscheduled ALU groups:\n";
for (auto& a : cir.alu_groups) {
std::cerr << " " << *a << "\n";
}
fail = true;
}
if (!cir.alu_vec.empty()){
std::cerr << "Unscheduled ALU vec ops:\n";
for (auto& a : cir.alu_vec) {
std::cerr << " " << *a << "\n";
}
fail = true;
}
if (!cir.alu_trans.empty()){
std::cerr << "Unscheduled ALU trans ops:\n";
for (auto& a : cir.alu_trans) {
std::cerr << " " << *a << "\n";
}
fail = true;
}
if (!cir.mem_write_instr.empty()){
std::cerr << "Unscheduled MEM ops:\n";
for (auto& a : cir.mem_write_instr) {
std::cerr << " " << *a << "\n";
}
fail = true;
}
if (!cir.fetches.empty()){
std::cerr << "Unscheduled Fetch ops:\n";
for (auto& a : cir.fetches) {
std::cerr << " " << *a << "\n";
}
fail = true;
}
if (!cir.tex.empty()){
std::cerr << "Unscheduled Tex ops:\n";
for (auto& a : cir.tex) {
std::cerr << " " << *a << "\n";
}
fail = true;
}
assert(cir.tex.empty());
assert(cir.exports.empty());
assert(cir.fetches.empty());
assert(cir.alu_vec.empty());
assert(cir.mem_write_instr.empty());
assert(cir.mem_ring_writes.empty());
assert (!fail);
if (cir.m_cf_instr) {
// Assert that if condition is ready
m_current_block->push_back(cir.m_cf_instr);
cir.m_cf_instr->set_scheduled();
}
out_blocks.push_back(m_current_block);
}
void BlockSheduler::finalize()
{
if (m_last_pos)
m_last_pos->set_is_last_export(true);
if (m_last_pixel)
m_last_pixel->set_is_last_export(true);
if (m_last_param)
m_last_param->set_is_last_export(true);
}
bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
{
bool success = false;
AluGroup *group = nullptr;
bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty();
bool has_lds_ready = !alu_vec_ready.empty() &&
(*alu_vec_ready.begin())->has_lds_access();
/* Schedule groups first. unless we have a pending LDS instuction
* We don't want the LDS instructions to be too far apart because the
* fetch + read from queue has to be in the same ALU CF block */
if (!alu_groups_ready.empty() && !has_lds_ready) {
group = *alu_groups_ready.begin();
alu_groups_ready.erase(alu_groups_ready.begin());
sfn_log << SfnLog::schedule << "Schedule ALU group\n";
success = true;
} else {
if (has_alu_ready) {
group = new AluGroup();
sfn_log << SfnLog::schedule << "START new ALU group\n";
}
}
if (group) {
int free_slots = group->free_slots();
if (free_slots && has_alu_ready) {
if (!alu_vec_ready.empty())
success |= schedule_alu_to_group_vec(group);
/* Apparently one can't schedule a t-slot if there is already
* and LDS instruction scheduled.
* TODO: check whether this is only relevant for actual LDS instructions
* or also for instructions that read from the LDS return value queue */
if (free_slots & 0x10 && !has_lds_ready) {
sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
if (!alu_trans_ready.empty())
success |= schedule_alu_to_group_trans(group, alu_trans_ready);
if (!alu_vec_ready.empty())
success |= schedule_alu_to_group_trans(group, alu_vec_ready);
}
}
sfn_log << SfnLog::schedule << "Finalize ALU group\n";
group->set_scheduled();
group->fix_last_flag();
group->set_nesting_depth(m_current_block->nesting_depth());
if (m_current_block->type() != Block::alu) {
start_new_block(out_blocks, Block::alu);
m_alu_groups_schduled = 0;
}
/* Pessimistic hack: If we have started an LDS group,
* make sure 8 instructions groups still fit into the CF
* TODO: take care of Address slot emission
* TODO: maybe do this CF split only in the assembler
*/
/*if (group->slots() > m_current_block->remaining_slots() ||
(group->has_lds_group_start() &&
m_current_block->remaining_slots() < 7 * 8)) {
//assert(!m_current_block->lds_group_active());
start_new_block(out_blocks, Block::alu);
}*/
if (!m_current_block->try_reserve_kcache(*group)) {
assert(!m_current_block->lds_group_active());
start_new_block(out_blocks, Block::alu);
m_current_block->set_instr_flag(Instr::force_cf);
}
assert(m_current_block->try_reserve_kcache(*group));
if (group->has_lds_group_start())
m_current_block->lds_group_start(*group->begin());
m_current_block->push_back(group);
if (group->has_lds_group_end())
m_current_block->lds_group_end();
}
if (success)
++m_alu_groups_schduled;
return success;
}
bool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
{
if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() == 0) {
start_new_block(out_blocks, Block::tex);
m_current_block->set_instr_flag(Instr::force_cf);
}
if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) {
auto ii = tex_ready.begin();
sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
start_new_block(out_blocks, Block::tex);
for (auto prep : (*ii)->prepare_instr()) {
prep->set_scheduled();
m_current_block->push_back(prep);
}
(*ii)->set_scheduled();
m_current_block->push_back(*ii);
tex_ready.erase(ii);
return true;
}
return false;
}
bool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks)
{
if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) {
start_new_block(out_blocks, Block::vtx);
m_current_block->set_instr_flag(Instr::force_cf);
}
return schedule_block(fetches_ready);
}
template <typename I>
bool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
{
bool was_full = m_current_block->remaining_slots() == 0;
if (m_current_block->type() != Block::gds || was_full) {
start_new_block(out_blocks, Block::gds);
if (was_full)
m_current_block->set_instr_flag(Instr::force_cf);
}
return schedule_block(ready_list);
}
void BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type)
{
if (!m_current_block->empty()) {
sfn_log << SfnLog::schedule << "Start new block\n";
assert(!m_current_block->lds_group_active());
out_blocks.push_back(m_current_block);
m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id());
}
m_current_block->set_type(type);
}
template <typename I>
bool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
{
if (ready_list.empty())
return false;
if (m_current_block->type() != Block::cf)
start_new_block(out_blocks, Block::cf);
return schedule(ready_list);
}
bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
{
assert(group);
assert(!alu_vec_ready.empty());
bool success = false;
auto i = alu_vec_ready.begin();
auto e = alu_vec_ready.end();
while (i != e) {
sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
if (group->add_vec_instructions(*i)) {
auto old_i = i;
++i;
if ((*old_i)->has_alu_flag(alu_is_lds)) {
--m_lds_addr_count;
}
alu_vec_ready.erase(old_i);
success = true;
sfn_log << SfnLog::schedule << " success\n";
} else {
++i;
sfn_log << SfnLog::schedule << " failed\n";
}
}
return success;
}
bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist)
{
assert(group);
bool success = false;
auto i = readylist.begin();
auto e = readylist.end();
while (i != e) {
sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
if (group->add_trans_instructions(*i)) {
auto old_i = i;
++i;
readylist.erase(old_i);
success = true;
sfn_log << SfnLog::schedule << " sucess\n";
break;
} else {
++i;
sfn_log << SfnLog::schedule << " failed\n";
}
}
return success;
}
template <typename I>
bool BlockSheduler::schedule(std::list<I *>& ready_list)
{
if (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
auto ii = ready_list.begin();
sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
(*ii)->set_scheduled();
m_current_block->push_back(*ii);
ready_list.erase(ii);
return true;
}
return false;
}
template <typename I>
bool BlockSheduler::schedule_block(std::list<I *>& ready_list)
{
bool success = false;
while (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
auto ii = ready_list.begin();
sfn_log << SfnLog::schedule << "Schedule: " << **ii << " "
<< m_current_block->remaining_slots() << "\n";
(*ii)->set_scheduled();
m_current_block->push_back(*ii);
ready_list.erase(ii);
success = true;
}
return success;
}
bool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list)
{
if (m_current_block->type() != Block::cf)
start_new_block(out_blocks, Block::cf);
if (!ready_list.empty()) {
auto ii = ready_list.begin();
sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
(*ii)->set_scheduled();
m_current_block->push_back(*ii);
switch ((*ii)->export_type()) {
case ExportInstr::pos: m_last_pos = *ii; break;
case ExportInstr::param: m_last_param = *ii; break;
case ExportInstr::pixel: m_last_pixel = *ii; break;
}
(*ii)->set_is_last_export(false);
ready_list.erase(ii);
return true;
}
return false;
}
bool BlockSheduler::collect_ready(CollectInstructions &available)
{
sfn_log << SfnLog::schedule << "Ready instructions\n";
bool result = false;
result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
result |= collect_ready_type(alu_trans_ready, available.alu_trans);
result |= collect_ready_type(alu_groups_ready, available.alu_groups);
result |= collect_ready_type(gds_ready, available.gds_op);
result |= collect_ready_type(tex_ready, available.tex);
result |= collect_ready_type(fetches_ready, available.fetches);
result |= collect_ready_type(memops_ready, available.mem_write_instr);
result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes);
result |= collect_ready_type(write_tf_ready, available.write_tf);
result |= collect_ready_type(rat_instr_ready, available.rat_instr);
sfn_log << SfnLog::schedule << "\n";
return result;
}
bool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available)
{
auto i = available.begin();
auto e = available.end();
for (auto alu : ready) {
alu->add_priority(100 * alu->register_priority());
}
int max_check = 0;
while (i != e && max_check++ < 32) {
if (ready.size() < 32 && (*i)->ready()) {
int priority = 0;
/* LDS fetches that use static offsets are usually ready ery fast,
* so that they would get schedules early, and this leaves the problem
* that we allocate too many registers with just constant values,
* and this will make problems wih RA. So limit the number of LDS
* address registers.
*/
if ((*i)->has_alu_flag(alu_lds_address)) {
if (m_lds_addr_count > 64) {
++i;
continue;
} else {
++m_lds_addr_count;
}
}
/* LDS instructions are scheduled with high priority.
* instractions that can go into the t slot and don't have
* indirect access are put in last, so that they don't block
* vec-only instructions when scheduling to the vector slots
* for everything else we look at the register use */
if ((*i)->has_lds_access())
priority = 100000;
else if (AluGroup::has_t()) {
auto opinfo = alu_ops.find((*i)->opcode());
assert(opinfo != alu_ops.end());
if (opinfo->second.can_channel(AluOp::t) && !(*i)->indirect_addr().first)
priority = -1;
}
priority += 100 * (*i)->register_priority();
(*i)->add_priority(priority);
ready.push_back(*i);
auto old_i = i;
++i;
available.erase(old_i);
} else
++i;
}
for (auto& i: ready)
sfn_log << SfnLog::schedule << "V: " << *i << "\n";
ready.sort([](const AluInstr *lhs, const AluInstr *rhs) {
return lhs->priority() > rhs->priority();});
for (auto& i: ready)
sfn_log << SfnLog::schedule << "V (S): " << *i << "\n";
return !ready.empty();
}
template <typename T>
struct type_char {
};
template <>
struct type_char<AluInstr> {
static constexpr const char value = 'A';
};
template <>
struct type_char<AluGroup> {
static constexpr const char value = 'G';
};
template <>
struct type_char<ExportInstr> {
static constexpr const char value = 'E';
};
template <>
struct type_char<TexInstr> {
static constexpr const char value = 'T';
};
template <>
struct type_char<FetchInstr> {
static constexpr const char value = 'F';
};
template <>
struct type_char<WriteOutInstr> {
static constexpr const char value = 'M';
};
template <>
struct type_char<MemRingOutInstr> {
static constexpr const char value = 'R';
};
template <>
struct type_char<WriteTFInstr> {
static constexpr const char value = 'X';
};
template <>
struct type_char<GDSInstr> {
static constexpr const char value = 'S';
};
template <>
struct type_char<RatInstr> {
static constexpr const char value = 'I';
};
template <typename T>
bool BlockSheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& available)
{
auto i = available.begin();
auto e = available.end();
while (i != e) {
if ((*i)->ready()) {
ready.push_back(*i);
auto old_i = i;
++i;
available.erase(old_i);
} else
++i;
}
for (auto& i: ready)
sfn_log << SfnLog::schedule << type_char<T>::value << "; " << *i << "\n";
return !ready.empty();
}
}

View File

@ -0,0 +1,13 @@
#ifndef SHEDULER_H
#define SHEDULER_H
#include "sfn_shader.h"
namespace r600 {
Shader *schedule(Shader *original);
}
#endif // SHEDULER_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,365 @@
#ifndef SHADER_H
#define SHADER_H
#include "sfn_instr.h"
#include "sfn_instrfactory.h"
#include "sfn_instr_controlflow.h"
#include "gallium/drivers/r600/r600_shader.h"
#include "sfn_liverangeevaluator.h"
#include <bitset>
#include <memory>
#include <stack>
#include <vector>
struct nir_shader;
struct nir_cf_node;
struct nir_if;
struct nir_block;
struct nir_instr;
namespace r600 {
class ShaderIO {
public:
void set_sid(int sid);
void override_spi_sid(int spi_sid);
void print(std::ostream& os) const;
int spi_sid() const { return m_spi_sid;}
unsigned sid() const { return m_sid;}
int location() const {return m_location;}
unsigned name() const { return m_name;}
int pos() const { return m_pos;}
void set_pos(int pos) {m_pos = pos;}
bool is_param() const { return m_is_param;}
void set_is_param(bool val) { m_is_param = val;}
void set_gpr(int gpr) {m_gpr = gpr;}
int gpr() const {return m_gpr;}
protected:
ShaderIO(const char *type, int loc, int name);
private:
virtual void do_print(std::ostream& os) const = 0;
const char *m_type;
int m_location{-1};
int m_name{-1};
int m_sid{0};
int m_spi_sid{0};
int m_pos{0};
int m_is_param{false};
int m_gpr{0};
};
class ShaderOutput : public ShaderIO {
public:
ShaderOutput();
ShaderOutput(int location, int name, int writemask);
int writemask() const { return m_writemask;}
private:
void do_print(std::ostream& os) const override;
int m_writemask{0};
};
class ShaderInput : public ShaderIO {
public:
ShaderInput();
ShaderInput(int location, int name);
void set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid);
void set_uses_interpolate_at_centroid();
void set_need_lds_pos() { m_need_lds_pos = true;}
int ij_index() const { return m_ij_index;}
int interpolator() const{return m_interpolator;}
int interpolate_loc() const {return m_interpolate_loc;}
bool need_lds_pos() const {return m_need_lds_pos;}
int lds_pos() const {return m_lds_pos;}
void set_lds_pos(int pos) {m_lds_pos = pos;}
int ring_offset() const {return m_ring_offset;}
void set_ring_offset(int offs) {m_ring_offset = offs;}
bool uses_interpolate_at_centroid() const {return m_uses_interpolate_at_centroid;}
private:
void do_print(std::ostream& os) const override;
int m_interpolator{0};
int m_interpolate_loc{0};
int m_ij_index{0};
bool m_uses_interpolate_at_centroid{false};
bool m_need_lds_pos{false};
int m_lds_pos{0};
int m_ring_offset{0};
};
class Shader : public Allocate {
public:
using InputIterator = std::map<int, ShaderInput>::iterator;
using OutputIterator = std::map<int, ShaderOutput>::iterator;
using ShaderBlocks = std::list<Block::Pointer, Allocator<Block::Pointer>>;
Shader(const Shader& orig) = delete;
virtual ~Shader() {}
bool add_info_from_string(std::istream& is);
static Shader *translate_from_nir(nir_shader *nir, const pipe_stream_output_info *so_info, r600_shader *gs_shader,
r600_shader_key& key, r600_chip_class chip_class);
bool process(nir_shader *nir);
bool process_cf_node(nir_cf_node *node);
bool process_if(nir_if *node);
bool process_loop(nir_loop *node);
bool process_block(nir_block *node);
bool process_instr(nir_instr *instr);
void emit_instruction(PInst instr);
bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
void print(std::ostream& os ) const;
void print_header(std::ostream& os ) const;
bool process_intrinsic(nir_intrinsic_instr *intr);
virtual bool load_input(nir_intrinsic_instr *intr) = 0;
virtual bool store_output(nir_intrinsic_instr *intr) = 0;
bool load_uniform(nir_intrinsic_instr *intr);
bool load_ubo(nir_intrinsic_instr *intr);
ValueFactory& value_factory();
void add_output(const ShaderOutput& output) {
m_outputs[output.location()] = output;
}
void add_input(const ShaderInput& input) {
m_inputs[input.location()] = input;
}
void set_input_gpr(int driver_lcation, int gpr);
InputIterator find_input(int location) { return m_inputs.find(location);}
InputIterator input_not_found() {return m_inputs.end();}
OutputIterator find_output(int location);
OutputIterator output_not_found() {return m_outputs.end();}
ShaderBlocks& func() { return m_root; }
void reset_function(ShaderBlocks& new_root);
void emit_instruction_from_string(const std::string &s);
void set_info(nir_shader *nir);
void get_shader_info(r600_shader *sh_info);
r600_chip_class chip_class() const {return m_chip_class;};
void set_chip_class(r600_chip_class cls) {m_chip_class = cls;};
void start_new_block(int nesting_depth);
const ShaderOutput& output(int base) const;
LiveRangeMap prepare_live_range_map();
void set_last_txd(Instr *txd){m_last_txd = txd;}
Instr *last_txd(){return m_last_txd;}
// Needed for keeping the memory access in order
void chain_scratch_read(Instr *instr);
void chain_ssbo_read(Instr *instr);
virtual uint32_t enabled_stream_buffers_mask() const {return 0;};
size_t noutputs() const { return m_outputs.size();}
size_t ninputs() const { return m_inputs.size();}
enum Flags {
sh_indirect_const_file,
sh_needs_scratch_space,
sh_needs_sbo_ret_address,
sh_uses_atomics,
sh_uses_images,
sh_uses_tex_buffer,
sh_writes_memory,
sh_txs_cube_array_comp,
sh_indirect_atomic,
sh_mem_barrier,
sh_flags_count
};
void set_flag(Flags f) {m_flags.set(f);}
bool has_flag(Flags f) const {return m_flags.test(f);}
int atomic_file_count() const { return m_atomic_file_count; }
PRegister atomic_update();
int remap_atomic_base(int base);
auto evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) -> std::pair<int, PRegister>;
int ssbo_image_offset() const {return m_ssbo_image_offset;}
PRegister rat_return_address() {assert(m_rat_return_address); return m_rat_return_address;}
PRegister emit_load_to_register(PVirtualValue src);
protected:
enum ESlots {
es_face,
es_instanceid,
es_invocation_id,
es_patch_id,
es_pos,
es_rel_patch_id,
es_sample_mask_in,
es_sample_id,
es_sample_pos,
es_tess_factor_base,
es_vertexid,
es_tess_coord,
es_primitive_id,
es_helper_invocation,
es_last
};
std::bitset<es_last> m_sv_values;
Shader(const char *type_id);
const ShaderInput& input(int base) const;
bool emit_simple_mov(nir_dest& dest, int chan, PVirtualValue src, Pin pin = pin_free);
private:
virtual bool process_stage_intrinsic(nir_intrinsic_instr *intr) = 0;
bool allocate_registers_from_string(std::istream& is, Pin pin);
bool allocate_arrays_from_string(std::istream& is);
bool read_chipclass(std::istream& is);
bool load_uniform_indirect(nir_intrinsic_instr *intr, PVirtualValue addr, int offset , int buffer_id);
bool scan_shader(const nir_function *impl);
bool scan_uniforms(nir_variable *uniform);
void allocate_reserved_registers();
void allocate_local_registers(const exec_list *registers);
virtual int do_allocate_reserved_registers() = 0;
bool scan_instruction(nir_instr *instr);
virtual bool do_scan_instruction(nir_instr *instr) = 0;
void print_properties(std::ostream& os) const;
virtual void do_print_properties(std::ostream& os) const = 0;
bool read_output(std::istream& is);
bool read_input(std::istream& is);
virtual bool read_prop(std::istream& is) = 0;
bool emit_if_start(nir_if *if_stmt);
bool emit_control_flow(ControlFlowInstr::CFType type);
bool emit_store_scratch(nir_intrinsic_instr *intr);
bool emit_load_scratch(nir_intrinsic_instr *intr);
bool emit_local_store(nir_intrinsic_instr *intr);
bool emit_local_load(nir_intrinsic_instr* instr);
bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
bool emit_barrier(nir_intrinsic_instr* intr);
bool emit_shader_clock(nir_intrinsic_instr* instr);
bool emit_wait_ack();
bool equal_to(const Shader& other) const;
void finalize();
virtual void do_finalize();
virtual void do_get_shader_info(r600_shader *sh_info);
ShaderBlocks m_root;
Block::Pointer m_current_block;
InstrFactory *m_instr_factory;
const char *m_type_id;
template <typename T>
using IOMap = std::map<int, T, std::less<int>, Allocator<std::pair<const int, T>>>;
IOMap<ShaderOutput> m_outputs;
IOMap<ShaderInput> m_inputs;
r600_chip_class m_chip_class;
int m_scratch_size;
int m_next_block;
bool m_indirect_const_file{false};
Instr *m_last_txd {nullptr};
uint32_t m_indirect_files{0};
std::bitset<sh_flags_count> m_flags;
uint32_t nhwatomic_ranges{0};
std::vector<r600_shader_atomic> m_atomics;
uint32_t m_nhwatomic{0};
uint32_t m_atomic_base{0};
uint32_t m_next_hwatomic_loc{0};
std::unordered_map<int, int> m_atomic_base_map;
uint32_t m_atomic_file_count{0};
PRegister m_atomic_update{nullptr};
PRegister m_rat_return_address{nullptr};
int32_t m_ssbo_image_offset{0};
uint32_t m_nloops{0};
class InstructionChain : public InstrVisitor {
public:
void visit(AluInstr *instr) override {(void) instr;}
void visit(AluGroup *instr) override {(void) instr;}
void visit(TexInstr *instr) override {(void) instr;}
void visit(ExportInstr *instr) override {(void) instr;}
void visit(FetchInstr *instr) override {(void) instr;}
void visit(Block *instr) override {(void) instr;}
void visit(ControlFlowInstr *instr) override {(void) instr;}
void visit(IfInstr *instr) override {(void) instr;}
void visit(StreamOutInstr *instr) override {(void) instr;}
void visit(MemRingOutInstr *instr) override {(void) instr;}
void visit(EmitVertexInstr *instr) override {(void) instr;}
void visit(WriteTFInstr *instr) override {(void) instr;}
void visit(LDSAtomicInstr *instr) override {(void) instr;}
void visit(LDSReadInstr *instr) override {(void) instr;}
void visit(WriteScratchInstr *instr) override;
void visit(GDSInstr *instr) override;
void visit(RatInstr *instr) override;
void apply(Instr *current, Instr **last);
Shader *this_shader{nullptr};
Instr *last_scratch_instr{nullptr};
Instr *last_gds_instr{nullptr};
Instr *last_ssbo_instr{nullptr};
bool prepare_mem_barrier{false};
};
InstructionChain m_chain_instr;
std::vector<Instr *> m_loops;
};
std::pair<unsigned, unsigned>
r600_get_varying_semantic(unsigned varying_location);
}
#endif // SHADER_H

File diff suppressed because it is too large Load Diff

View File

@ -1,231 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef sfn_shader_from_nir_h
#define sfn_shader_from_nir_h
#include "gallium/drivers/r600/r600_shader.h"
#include "compiler/nir/nir.h"
#include "compiler/nir_types.h"
#include "sfn_instruction_block.h"
#include "sfn_instruction_export.h"
#include "sfn_alu_defines.h"
#include "sfn_valuepool.h"
#include "sfn_debug.h"
#include "sfn_instruction_cf.h"
#include "sfn_emittexinstruction.h"
#include "sfn_emitaluinstruction.h"
#include "sfn_emitssboinstruction.h"
#include <vector>
#include <set>
#include <stack>
#include <unordered_map>
struct nir_instr;
namespace r600 {
extern SfnLog sfn_log;
class ShaderFromNirProcessor : public ValuePool {
public:
ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel,
r600_shader& sh_info, int scratch_size, enum amd_gfx_level _chip_class,
int atomic_base);
virtual ~ShaderFromNirProcessor();
void emit_instruction(Instruction *ir);
PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
const GPRVector::Swizzle& swizzle, bool match = false);
bool emit_instruction(EAluOp opcode, PValue dest,
std::vector<PValue> src0,
const std::set<AluModifiers>& m_flags);
void emit_export_instruction(WriteoutInstruction *ir);
void emit_instruction(AluInstruction *ir);
bool use_legacy_math_rules(void) {
return m_sel.nir->info.use_legacy_math_rules;
};
void split_constants(nir_alu_instr* instr);
void remap_registers();
const nir_variable *get_deref_location(const nir_src& src) const;
r600_shader& sh_info() {return m_sh_info;}
void add_param_output_reg(int loc, const GPRVector *gpr);
void set_output(unsigned pos, int sel);
const GPRVector *output_register(unsigned location) const;
void evaluate_spi_sid(r600_shader_io &io);
enum amd_gfx_level get_chip_class() const;
int remap_atomic_base(int base) {
return m_atomic_base_map[base];
}
void get_array_info(r600_shader& shader) const;
virtual bool scan_inputs_read(const nir_shader *sh);
void set_shader_info(const nir_shader *sh);
protected:
void set_var_address(nir_deref_instr *instr);
void set_input(unsigned pos, PValue var);
bool scan_instruction(nir_instr *instr);
virtual bool scan_sysvalue_access(nir_instr *instr) = 0;
bool emit_if_start(int if_id, nir_if *if_stmt);
bool emit_else_start(int if_id);
bool emit_ifelse_end(int if_id);
bool emit_loop_start(int loop_id);
bool emit_loop_end(int loop_id);
bool emit_jump_instruction(nir_jump_instr *instr);
bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
bool emit_load_local_shared(nir_intrinsic_instr* instr);
bool emit_store_local_shared(nir_intrinsic_instr* instr);
bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
bool emit_barrier(nir_intrinsic_instr* instr);
bool load_preloaded_value(const nir_dest& dest, int chan, PValue value,
bool as_last = true);
void inc_atomic_file_count();
virtual void do_set_shader_info(const nir_shader *sh);
enum ESlots {
es_face,
es_instanceid,
es_invocation_id,
es_patch_id,
es_pos,
es_rel_patch_id,
es_sample_mask_in,
es_sample_id,
es_sample_pos,
es_tess_factor_base,
es_vertexid,
es_tess_coord,
es_primitive_id,
es_helper_invocation,
es_last
};
std::bitset<es_last> m_sv_values;
bool allocate_reserved_registers();
private:
virtual bool do_allocate_reserved_registers() = 0;
void emit_instruction_internal(Instruction *ir);
bool emit_alu_instruction(nir_instr *instr);
bool emit_deref_instruction(nir_deref_instr* instr);
bool emit_intrinsic_instruction(nir_intrinsic_instr* instr);
virtual bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr);
bool emit_tex_instruction(nir_instr* instr);
bool emit_discard_if(nir_intrinsic_instr* instr);
bool emit_load_ubo_vec4(nir_intrinsic_instr* instr);
bool emit_ssbo_atomic_add(nir_intrinsic_instr* instr);
bool load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufid);
/* Code creating functions */
bool emit_load_function_temp(const nir_variable *var, nir_intrinsic_instr *instr);
AluInstruction *emit_load_literal(const nir_load_const_instr *literal, const nir_src& src, unsigned writemask);
bool load_uniform(nir_intrinsic_instr* instr);
bool process_uniforms(nir_variable *uniform);
void append_block(int nesting_change);
virtual void emit_shader_start();
virtual bool emit_deref_instruction_override(nir_deref_instr* instr);
bool emit_store_scratch(nir_intrinsic_instr* instr);
bool emit_load_scratch(nir_intrinsic_instr* instr);
bool emit_shader_clock(nir_intrinsic_instr* instr);
virtual void do_finalize() = 0;
void finalize();
friend class ShaderFromNir;
std::set<nir_variable*> m_arrays;
std::map<unsigned, PValue> m_inputs;
std::map<unsigned, int> m_outputs;
std::map<unsigned, nir_variable*> m_var_derefs;
std::map<const nir_variable *, nir_variable_mode> m_var_mode;
std::map<unsigned, const glsl_type*> m_uniform_type_map;
std::map<int, IfElseInstruction *> m_if_block_start_map;
std::map<int, LoopBeginInstruction *> m_loop_begin_block_map;
pipe_shader_type m_processor_type;
std::vector<InstructionBlock> m_output;
unsigned m_nesting_depth;
unsigned m_block_number;
InstructionBlock m_export_output;
r600_shader& m_sh_info;
enum amd_gfx_level m_chip_class;
EmitTexInstruction m_tex_instr;
EmitAluInstruction m_alu_instr;
EmitSSBOInstruction m_ssbo_instr;
OutputRegisterMap m_output_register_map;
IfElseInstruction *m_pending_else;
int m_scratch_size;
int m_next_hwatomic_loc;
r600_pipe_shader_selector& m_sel;
int m_atomic_base ;
int m_image_count;
std::unordered_map<int, int> m_atomic_base_map;
AluInstruction *last_emitted_alu;
};
}
#endif

View File

@ -1,112 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2018 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sfn_shader_compute.h"
#include "sfn_instruction_fetch.h"
namespace r600 {
ComputeShaderFromNir::ComputeShaderFromNir(r600_pipe_shader *sh,
r600_pipe_shader_selector& sel,
UNUSED const r600_shader_key& key,
enum amd_gfx_level gfx_level):
ShaderFromNirProcessor (PIPE_SHADER_COMPUTE, sel, sh->shader,
sh->scratch_space_needed, gfx_level, 0),
m_reserved_registers(0)
{
}
bool ComputeShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
{
return true;
}
bool ComputeShaderFromNir::do_allocate_reserved_registers()
{
int thread_id_sel = m_reserved_registers++;
int wg_id_sel = m_reserved_registers++;
for (int i = 0; i < 3; ++i) {
auto tmp = new GPRValue(thread_id_sel, i);
tmp->set_as_input();
tmp->set_keep_alive();
m_local_invocation_id[i] = PValue(tmp);
inject_register(tmp->sel(), i, m_local_invocation_id[i], false);
tmp = new GPRValue(wg_id_sel, i);
tmp->set_as_input();
tmp->set_keep_alive();
m_workgroup_id[i] = PValue(tmp);
inject_register(tmp->sel(), i, m_workgroup_id[i], false);
}
return true;
}
bool ComputeShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
{
switch (instr->intrinsic) {
case nir_intrinsic_load_local_invocation_id:
return emit_load_3vec(instr, m_local_invocation_id);
case nir_intrinsic_load_workgroup_id:
return emit_load_3vec(instr, m_workgroup_id);
case nir_intrinsic_load_num_workgroups:
return emit_load_num_workgroups(instr);
default:
return false;
}
}
bool ComputeShaderFromNir::emit_load_3vec(nir_intrinsic_instr* instr,
const std::array<PValue,3>& src)
{
for (int i = 0; i < 3; ++i)
load_preloaded_value(instr->dest, i, src[i], i == 2);
return true;
}
bool ComputeShaderFromNir::emit_load_num_workgroups(nir_intrinsic_instr* instr)
{
PValue a_zero = get_temp_register(1);
emit_instruction(new AluInstruction(op1_mov, a_zero, Value::zero, EmitInstruction::last_write));
GPRVector dest;
for (int i = 0; i < 3; ++i)
dest.set_reg_i(i, from_nir(instr->dest, i));
dest.set_reg_i(3, from_nir(instr->dest, 7));
auto ir = new FetchInstruction(vc_fetch, no_index_offset,
fmt_32_32_32_32, vtx_nf_int, vtx_es_none, a_zero, dest, 16,
false, 16, R600_BUFFER_INFO_CONST_BUFFER, 0,
bim_none, false, false, 0, 0, 0, PValue(), {0,1,2,7});
ir->set_flag(vtx_srf_mode);
emit_instruction(ir);
return true;
}
void ComputeShaderFromNir::do_finalize()
{
}
}

View File

@ -1,62 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SFN_COMPUTE_SHADER_FROM_NIR_H
#define SFN_COMPUTE_SHADER_FROM_NIR_H
#include "sfn_shader_base.h"
#include "sfn_shaderio.h"
#include <bitset>
namespace r600 {
class ComputeShaderFromNir : public ShaderFromNirProcessor
{
public:
ComputeShaderFromNir(r600_pipe_shader *sh,
r600_pipe_shader_selector& sel,
const r600_shader_key &key,
enum amd_gfx_level gfx_level);
bool scan_sysvalue_access(nir_instr *instr) override;
private:
bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
bool do_allocate_reserved_registers() override;
void do_finalize() override;
bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PValue,3>& src);
bool emit_load_num_workgroups(nir_intrinsic_instr* instr);
int m_reserved_registers;
std::array<PValue,3> m_workgroup_id;
std::array<PValue,3> m_local_invocation_id;
};
}
#endif // SFN_COMPUTE_SHADER_FROM_NIR_H

View File

@ -0,0 +1,95 @@
#include "sfn_shader_cs.h"
#include "sfn_instr_fetch.h"
namespace r600 {
ComputeShader::ComputeShader(UNUSED const r600_shader_key& key):
Shader("CS")
{
}
bool ComputeShader::do_scan_instruction(UNUSED nir_instr *instr)
{
return false;
}
int ComputeShader::do_allocate_reserved_registers()
{
auto& vf = value_factory();
const int thread_id_sel = 0;
const int wg_id_sel = 1;
for (int i = 0; i < 3; ++i) {
m_local_invocation_id[i] = vf.allocate_pinned_register(thread_id_sel, i);
m_local_invocation_id[i]->pin_live_range(true);
m_workgroup_id[i] = vf.allocate_pinned_register(wg_id_sel, i);
m_workgroup_id[i]->pin_live_range(true);
}
return 2;
}
bool ComputeShader::process_stage_intrinsic(nir_intrinsic_instr *instr)
{
switch (instr->intrinsic) {
case nir_intrinsic_load_local_invocation_id:
return emit_load_3vec(instr, m_local_invocation_id);
case nir_intrinsic_load_workgroup_id:
return emit_load_3vec(instr, m_workgroup_id);
case nir_intrinsic_load_num_workgroups:
return emit_load_num_workgroups(instr);
default:
return false;
}
}
void ComputeShader::do_get_shader_info(r600_shader *sh_info)
{
sh_info->processor_type = PIPE_SHADER_COMPUTE;
}
bool ComputeShader::read_prop(UNUSED std::istream& is)
{
return true;
}
void ComputeShader::do_print_properties(UNUSED std::ostream& os) const
{
}
bool ComputeShader::emit_load_num_workgroups(nir_intrinsic_instr* instr)
{
auto zero = value_factory().temp_register();
emit_instruction(new AluInstr(op1_mov, zero, value_factory().inline_const(ALU_SRC_0, 0),
AluInstr::last_write));
auto dest = value_factory().dest_vec4(instr->dest, pin_group);
auto ir = new LoadFromBuffer(dest, {0,1,2,7}, zero, 16,
R600_BUFFER_INFO_CONST_BUFFER,
nullptr, fmt_32_32_32_32);
ir->set_fetch_flag(LoadFromBuffer::srf_mode);
ir->reset_fetch_flag(LoadFromBuffer::format_comp_signed);
ir->set_num_format(vtx_nf_int);
emit_instruction(ir);
return true;
}
bool ComputeShader::emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PRegister,3>& src)
{
auto& vf = value_factory();
for (int i = 0; i < 3; ++i) {
auto dest = vf.dest(instr->dest, i, pin_none);
emit_instruction(new AluInstr(op1_mov, dest, src[i], i == 2 ? AluInstr::last_write : AluInstr::write));
}
return true;
}
}

View File

@ -0,0 +1,39 @@
#ifndef COMPUTE_H
#define COMPUTE_H
#include "sfn_shader.h"
namespace r600 {
class ComputeShader : public Shader
{
public:
ComputeShader(const r600_shader_key& key);
private:
bool do_scan_instruction(nir_instr *instr) override;
int do_allocate_reserved_registers() override;
bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
void do_get_shader_info(r600_shader *sh_info) override;
bool load_input(UNUSED nir_intrinsic_instr *intr) override {
unreachable("compute shaders have bno inputs");
};
bool store_output(UNUSED nir_intrinsic_instr *intr) override {
unreachable("compute shaders have no outputs");
};
bool read_prop(std::istream& is) override;
void do_print_properties(std::ostream& os) const override;
bool emit_load_num_workgroups(nir_intrinsic_instr* instr);
bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PRegister,3>& src);
std::array<PRegister,3> m_workgroup_id{nullptr};
std::array<PRegister,3> m_local_invocation_id{nullptr};
};
}
#endif // COMPUTE_H

File diff suppressed because it is too large Load Diff

View File

@ -1,117 +0,0 @@
/* -*- mesa-c++ -*-
*
* Copyright (c) 2019 Collabora LTD
*
* Author: Gert Wollny <gert.wollny@collabora.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef sfn_fragment_shader_from_nir_h
#define sfn_fragment_shader_from_nir_h
#include "sfn_shader_base.h"
#include "sfn_shaderio.h"
#include <bitset>
namespace r600 {
class FragmentShaderFromNir : public ShaderFromNirProcessor {
public:
FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh_info,
r600_pipe_shader_selector &sel, const r600_shader_key &key,
enum amd_gfx_level gfx_level);
bool scan_sysvalue_access(nir_instr *instr) override;
private:
struct Interpolator {
bool enabled;
unsigned ij_index;
PValue i;
PValue j;
};
void emit_shader_start() override;
bool do_allocate_reserved_registers() override;
bool process_store_output(nir_intrinsic_instr *instr);
bool emit_store_output(nir_intrinsic_instr* instr);
bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, int outputs);
bool emit_export_pixel(nir_intrinsic_instr* instr, int outputs);
bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip,
int num_components, int start_comp);
bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op);
bool load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip,EAluOp op, int writemask);
bool load_interpolated_two_comp_for_one(GPRVector &dest,
ShaderInput& io, const Interpolator& ip, EAluOp op, int start, int comp);
bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
void do_finalize() override;
void load_front_face();
bool emit_load_input(nir_intrinsic_instr* instr);
bool emit_load_front_face(nir_intrinsic_instr* instr);
bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
bool emit_load_sample_pos(nir_intrinsic_instr* instr);
bool emit_load_sample_id(nir_intrinsic_instr* instr);
bool process_load_input(nir_intrinsic_instr *instr, bool interpolated);
bool emit_load_interpolated_input(nir_intrinsic_instr* instr);
bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
unsigned m_max_color_exports;
unsigned m_max_counted_color_exports;
bool m_two_sided_color;
ExportInstruction *m_last_pixel_export;
const nir_shader& m_nir;
std::array<Interpolator, 6> m_interpolator;
unsigned m_reserved_registers;
unsigned m_frag_pos_index;
PGPRValue m_front_face_reg;
PGPRValue m_sample_mask_reg;
PGPRValue m_sample_id_reg;
PGPRValue m_helper_invocation;
GPRVector m_frag_pos;
bool m_need_back_color;
bool m_front_face_loaded;
ShaderIO m_shaderio;
unsigned m_depth_exports;
std::map<unsigned, PValue> m_input_cache;
static const int s_max_interpolators = 6;
std::bitset<s_max_interpolators> m_interpolators_used;
unsigned m_apply_sample_mask;
bool m_dual_source_blend;
ShaderInput *m_pos_input;
};
}
#endif

Some files were not shown because too many files have changed in this diff Show More