r600/sfn: rewrite NIR backend

This is a rewite of the NIR backend. it adds some optimization and a scheduler. v2: - replace some magic numbers by constants - make sure constructor is always used with new - use default initialization in more places (changes suggested by Filip Gawin) Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Filip Gawin <filip@gawin.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17076>
2024-11-27 12:14:10 +08:00 · 2021-06-19 13:03:32 +02:00 · 2021-06-19 13:03:32 +02:00 · 79ca456b48
commit 79ca456b48
parent ab06b00c63
140 changed files with 27360 additions and 16187 deletions
--- a/src/gallium/drivers/r600/meson.build
+++ b/src/gallium/drivers/r600/meson.build
@ -107,6 +107,10 @@ files_r600 = files(
  'sb/sb_valtable.cpp',
  'sfn/sfn_alu_defines.cpp',
  'sfn/sfn_alu_defines.h',
+  'sfn/sfn_alu_readport_validation.cpp',
+  'sfn/sfn_alu_readport_validation.h',
+  'sfn/sfn_assembler.cpp',
+  'sfn/sfn_assembler.h',
  'sfn/sfn_callstack.cpp',
  'sfn/sfn_callstack.h',
  'sfn/sfn_conditionaljumptracker.cpp',
@ -114,73 +118,66 @@ files_r600 = files(
  'sfn/sfn_defines.h',
  'sfn/sfn_debug.cpp',
  'sfn/sfn_debug.h',
-  'sfn/sfn_emitaluinstruction.cpp',
-  'sfn/sfn_emitaluinstruction.h',
-  'sfn/sfn_emitinstruction.cpp',
-  'sfn/sfn_emitinstruction.h',
-  'sfn/sfn_emitssboinstruction.cpp',
-  'sfn/sfn_emitssboinstruction.h',
-  'sfn/sfn_emittexinstruction.cpp',
-  'sfn/sfn_emittexinstruction.h',
-  'sfn/sfn_emitinstruction.h',
-  'sfn/sfn_instruction_alu.cpp',
-  'sfn/sfn_instruction_alu.h',
-  'sfn/sfn_instruction_base.cpp',
-  'sfn/sfn_instruction_base.h',
-  'sfn/sfn_instruction_block.cpp',
-  'sfn/sfn_instruction_block.h',
-  'sfn/sfn_instruction_cf.cpp',
-  'sfn/sfn_instruction_cf.h',
-  'sfn/sfn_instruction_export.cpp',
-  'sfn/sfn_instruction_export.h',
-  'sfn/sfn_instruction_fetch.cpp',
-  'sfn/sfn_instruction_fetch.h',
-  'sfn/sfn_instruction_gds.cpp',
-  'sfn/sfn_instruction_gds.h',
-  'sfn/sfn_instruction_lds.cpp',
-  'sfn/sfn_instruction_lds.h',
-  'sfn/sfn_instruction_misc.cpp',
-  'sfn/sfn_instruction_misc.h',
-  'sfn/sfn_instruction_tex.cpp',
-  'sfn/sfn_instruction_tex.h',
-  'sfn/sfn_ir_to_assembly.cpp',
-  'sfn/sfn_ir_to_assembly.h',
-  'sfn/sfn_liverange.cpp',
-  'sfn/sfn_liverange.h',
+  'sfn/sfn_instr.cpp',
+  'sfn/sfn_instr.h',
+  'sfn/sfn_instr_alu.cpp',
+  'sfn/sfn_instr_alu.h',
+  'sfn/sfn_instr_alugroup.cpp',
+  'sfn/sfn_instr_alugroup.h',
+  'sfn/sfn_instr_controlflow.cpp',
+  'sfn/sfn_instr_controlflow.h',
+  'sfn/sfn_instr_export.cpp',
+  'sfn/sfn_instr_export.h',
+  'sfn/sfn_instr_fetch.cpp',
+  'sfn/sfn_instr_fetch.h',
+  'sfn/sfn_instr_mem.cpp',
+  'sfn/sfn_instr_mem.h',
+  'sfn/sfn_instr_lds.cpp',
+  'sfn/sfn_instr_lds.h',
+  'sfn/sfn_instr_tex.cpp',
+  'sfn/sfn_instr_tex.h',
+  'sfn/sfn_instrfactory.cpp',
+  'sfn/sfn_instrfactory.h',
+  'sfn/sfn_liverangeevaluator.cpp',
+  'sfn/sfn_liverangeevaluator.h',
+  'sfn/sfn_liverangeevaluator_helpers.cpp',
+  'sfn/sfn_liverangeevaluator_helpers.h',
+  'sfn/sfn_memorypool.cpp',
+  'sfn/sfn_memorypool.h',
  'sfn/sfn_nir.cpp',
  'sfn/sfn_nir.h',
  'sfn/sfn_nir_legalize_image_load_store.cpp',
  'sfn/sfn_nir_lower_64bit.cpp',
  'sfn/sfn_nir_lower_alu.cpp',
  'sfn/sfn_nir_lower_alu.h',
+  'sfn/sfn_nir_lower_tex.cpp',
+  'sfn/sfn_nir_lower_tex.h',
  'sfn/sfn_nir_lower_fs_out_to_vector.cpp',
  'sfn/sfn_nir_lower_fs_out_to_vector.h',
  'sfn/sfn_nir_lower_tess_io.cpp',
  'sfn/sfn_nir_vectorize_vs_inputs.c',
-  'sfn/sfn_shader_base.cpp',
-  'sfn/sfn_shader_base.h',
-  'sfn/sfn_shader_compute.cpp',
-  'sfn/sfn_shader_compute.h',
-  'sfn/sfn_shader_fragment.cpp',
-  'sfn/sfn_shader_fragment.h',
-  'sfn/sfn_shader_geometry.cpp',
-  'sfn/sfn_shader_geometry.h',
-  'sfn/sfn_shader_tcs.cpp',
-  'sfn/sfn_shader_tcs.h',
-  'sfn/sfn_shader_tess_eval.cpp',
-  'sfn/sfn_shader_tess_eval.h',
-  'sfn/sfn_shader_vertex.cpp',
-  'sfn/sfn_shader_vertex.h',
-  'sfn/sfn_shaderio.cpp',
-  'sfn/sfn_shaderio.h',
-  'sfn/sfn_value.cpp',
-  'sfn/sfn_value.h',
-  'sfn/sfn_value_gpr.cpp',
-  'sfn/sfn_value_gpr.h',
-  'sfn/sfn_valuepool.cpp',
-  'sfn/sfn_valuepool.h',
-  'sfn/sfn_vertexstageexport.cpp',
-  'sfn/sfn_vertexstageexport.h',
+  'sfn/sfn_optimizer.cpp',
+  'sfn/sfn_peephole.cpp',
+  'sfn/sfn_ra.cpp',
+  'sfn/sfn_ra.h',
+  'sfn/sfn_scheduler.cpp',
+  'sfn/sfn_scheduler.h',
+  'sfn/sfn_shader.cpp',
+  'sfn/sfn_shader.h',
+  'sfn/sfn_shader_cs.cpp',
+  'sfn/sfn_shader_cs.h',
+  'sfn/sfn_shader_fs.cpp',
+  'sfn/sfn_shader_fs.h',
+  'sfn/sfn_shader_gs.cpp',
+  'sfn/sfn_shader_gs.h',
+  'sfn/sfn_shader_tess.cpp',
+  'sfn/sfn_shader_tess.h',
+  'sfn/sfn_shader_vs.cpp',
+  'sfn/sfn_shader_vs.h',
+  'sfn/sfn_valuefactory.cpp',
+  'sfn/sfn_valuefactory.h',
+  'sfn/sfn_virtualvalues.cpp',
+  'sfn/sfn_virtualvalues.h',
  )

 egd_tables_h = custom_target(
@ -200,11 +197,13 @@ libr600 = static_library(
  'r600',
  [files_r600, egd_tables_h],
  c_args : [r600_c_args, '-Wstrict-overflow=0'],
+  cpp_args: '-std=c++17',
  gnu_symbol_visibility : 'hidden',
  include_directories : [
    inc_src, inc_mapi, inc_mesa, inc_include, inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common,
    inc_gallium_drivers,
  ],
+
  dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm, idep_nir, idep_nir_headers],
 )

@ -212,3 +211,9 @@ driver_r600 = declare_dependency(
  compile_args : '-DGALLIUM_R600',
  link_with : [libr600, libmesa, libradeonwinsys],
 )
+
+if with_tests
+  subdir('sfn/tests')
+endif
+
+
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@ -407,8 +407,8 @@ static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *
 			}
 			assignment[4] = alu;
 		} else {
-			if (assignment[chan]) {                           
-				assert(0); /* ALU.chan has already been allocated. */
+                        if (assignment[chan]) {
+			 	assert(0); /* ALU.chan has already been allocated. */
 				return -1;
 			}
 			assignment[chan] = alu;
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@ -355,6 +355,8 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
 void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
 			   unsigned *num_format, unsigned *format_comp, unsigned *endian);

+int r600_load_ar(struct r600_bytecode *bc);
+
 static inline int fp64_switch(int i)
 {
 	switch (i) {
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@ -420,11 +420,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
                if (is_nir_enabled(&rscreen->b))
                   return 1;
 		return 0;
-        case PIPE_CAP_INT64_DIVMOD:
-           /* it is actually not supported, but the nir lowering hdanles this corectly wheras
-            * the glsl lowering path seems to not initialize the buildins correctly.
-            */
-           return is_nir_enabled(&rscreen->b);
+
+	case PIPE_CAP_TWO_SIDED_COLOR:
+		return !is_nir_enabled(&rscreen->b);
+	case PIPE_CAP_INT64_DIVMOD:
+		/* it is actually not supported, but the nir lowering handles this corectly wheras
+		 * the glsl lowering path seems to not initialize the buildins correctly.
+		 */
+		return is_nir_enabled(&rscreen->b);
 	case PIPE_CAP_CULL_DISTANCE:
 		return 1;

--- a/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp
@ -46,7 +46,7 @@ const std::map<EAluOp, AluOp> alu_ops = {
   {op1_cos                 ,AluOp(1, 1, AluOp::t,"COS")},
   {op1_exp_ieee            ,AluOp(1, 1, AluOp::t,"EXP_IEEE")},
   {op1_floor               ,AluOp(1, 1, AluOp::a,"FLOOR")},
-   {op1_flt_to_int          ,AluOp(1, 0, AluOp::a,"FLT_TO_INT")},
+   {op1_flt_to_int          ,AluOp(1, 0, AluOp::v,"FLT_TO_INT")},
   {op1_flt_to_uint         ,AluOp(1, 1, AluOp::t,"FLT_TO_UINT")},
   {op1_flt_to_int_rpi      ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_RPI")},
   {op1_flt_to_int_floor    ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_FLOOR")},
@ -84,15 +84,15 @@ const std::map<EAluOp, AluOp> alu_ops = {
   {op1_recipsqrt_ieee1     ,AluOp(1, 1, AluOp::t,"RECIPSQRT_IEEE")},
   {op1_recip_int           ,AluOp(1, 0, AluOp::t,"RECIP_INT")},
   {op1_recip_uint          ,AluOp(1, 0, AluOp::t,"RECIP_UINT")},
-   {op1_recip_64            ,AluOp(1, 1, AluOp::t,"RECIP_64")},
-   {op1_recip_clamped_64    ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED_64")},
-   {op1_recipsqrt_64        ,AluOp(1, 1, AluOp::t,"RECIPSQRT_64")},
-   {op1_recipsqrt_clamped_64,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
+   {op1_recip_64            ,AluOp(2, 1, AluOp::t,"RECIP_64")},
+   {op1_recip_clamped_64    ,AluOp(2, 1, AluOp::t,"RECIP_CLAMPED_64")},
+   {op1_recipsqrt_64        ,AluOp(2, 1, AluOp::t,"RECIPSQRT_64")},
+   {op1_recipsqrt_clamped_64,AluOp(2, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
   {op1_rndne               ,AluOp(1, 1, AluOp::a,"RNDNE")},
   {op1_sqrt_ieee           ,AluOp(1, 1, AluOp::t,"SQRT_IEEE")},
   {op1_sin                 ,AluOp(1, 1, AluOp::t,"SIN")},
   {op1_trunc               ,AluOp(1, 1, AluOp::a,"TRUNC")},
-   {op1_sqrt_64             ,AluOp(1, 1, AluOp::t,"SQRT_64")},
+   {op1_sqrt_64             ,AluOp(2, 1, AluOp::t,"SQRT_64")},
   {op1_ubyte0_flt          ,AluOp(1, 1, AluOp::v,"UBYTE0_FLT")},
   {op1_ubyte1_flt          ,AluOp(1, 1, AluOp::v,"UBYTE1_FLT")},
   {op1_ubyte2_flt          ,AluOp(1, 1, AluOp::v,"UBYTE2_FLT")},
@ -273,53 +273,73 @@ const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const = {
 };

 const std::map<ESDOp, LDSOp> lds_ops = {
-   {DS_OP_ADD           , {2, "DS_ADD"}},
-   {DS_OP_SUB           , {2, "DS_SUB"}},
-   {DS_OP_RSUB          , {2, "DS_RSUB"}},
-   {DS_OP_INC           , {2, "DS_INC"}},
-   {DS_OP_DEC           , {2, "DS_DEC"}},
-   {DS_OP_MIN_INT       , {2, "DS_MIN_INT"}},
-   {DS_OP_MAX_INT       , {2, "DS_MAX_INT"}},
-   {DS_OP_MIN_UINT      , {2, "DS_MIN_UINT"}},
-   {DS_OP_MAX_UINT      , {2, "DS_MAX_UINT"}},
-   {DS_OP_AND           , {2, "DS_AND"}},
-   {DS_OP_OR            , {2, "DS_OR"}},
-   {DS_OP_XOR           , {2, "DS_XOR"}},
-   {DS_OP_MSKOR         , {3, "DS_MSKOR"}},
-   {DS_OP_WRITE         , {2, "DS_WRITE"}},
-   {DS_OP_WRITE_REL     , {3, "DS_WRITE_REL"}},
-   {DS_OP_WRITE2        , {3, "DS_WRITE2"}},
-   {DS_OP_CMP_STORE     , {3, "DS_CMP_STORE"}},
-   {DS_OP_CMP_STORE_SPF , {3, "DS_CMP_STORE_SPF"}},
-   {DS_OP_BYTE_WRITE    , {2, "DS_BYTE_WRITE"}},
-   {DS_OP_SHORT_WRITE   , {2, "DS_SHORT_WRITE"}},
-   {DS_OP_ADD_RET       , {2, "DS_ADD_RET"}},
-   {DS_OP_SUB_RET       , {2, "DS_SUB_RET"}},
-   {DS_OP_RSUB_RET      , {2, "DS_RSUB_RET"}},
-   {DS_OP_INC_RET       , {2, "DS_INC_RET"}},
-   {DS_OP_DEC_RET       , {2, "DS_DEC_RET"}},
-   {DS_OP_MIN_INT_RET   , {2, "DS_MIN_INT_RET"}},
-   {DS_OP_MAX_INT_RET   , {2, "DS_MAX_INT_RET"}},
-   {DS_OP_MIN_UINT_RET  , {2, "DS_MIN_UINT_RET"}},
-   {DS_OP_MAX_UINT_RET  , {2, "DS_MAX_UINT_RET"}},
-   {DS_OP_AND_RET       , {2, "DS_AND_RET"}},
-   {DS_OP_OR_RET        , {2, "DS_OR_RET"}},
-   {DS_OP_XOR_RET       , {2, "DS_XOR_RET"}},
-   {DS_OP_MSKOR_RET     , {3, "DS_MSKOR_RET"}},
-   {DS_OP_XCHG_RET      , {2, "DS_XCHG_RET"}},
-   {DS_OP_XCHG_REL_RET  , {3, "DS_XCHG_REL_RET"}},
-   {DS_OP_XCHG2_RET     , {3, "DS_XCHG2_RET"}},
-   {DS_OP_CMP_XCHG_RET  , {3, "DS_CMP_XCHG_RET"}},
-   {DS_OP_CMP_XCHG_SPF_RET, {3, "DS_CMP_XCHG_SPF_RET"}},
-   {DS_OP_READ_RET      , {1, "DS_READ_RET"}},
-   {DS_OP_READ_REL_RET  , {1, "DS_READ_REL_RET"}},
-   {DS_OP_READ2_RET     , {2, "DS_READ2_RET"}},
-   {DS_OP_READWRITE_RET , {3, "DS_READWRITE_RET"}},
-   {DS_OP_BYTE_READ_RET , {1, "DS_BYTE_READ_RET"}},
-   {DS_OP_UBYTE_READ_RET, {1, "DS_UBYTE_READ_RET"}},
-   {DS_OP_SHORT_READ_RET, {1, "DS_SHORT_READ_RET"}},
-   {DS_OP_USHORT_READ_RET, {1, "DS_USHORT_READ_RET"}},
-   {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "DS_ATOMIC_ORDERED_ALLOC_RET"}}
+   {DS_OP_ADD           , {2, "ADD"}},
+   {DS_OP_SUB           , {2, "SUB"}},
+   {DS_OP_RSUB          , {2, "RSUB"}},
+   {DS_OP_INC           , {2, "INC"}},
+   {DS_OP_DEC           , {2, "DEC"}},
+   {DS_OP_MIN_INT       , {2, "MIN_INT"}},
+   {DS_OP_MAX_INT       , {2, "MAX_INT"}},
+   {DS_OP_MIN_UINT      , {2, "MIN_UINT"}},
+   {DS_OP_MAX_UINT      , {2, "MAX_UINT"}},
+   {DS_OP_AND           , {2, "AND"}},
+   {DS_OP_OR            , {2, "OR"}},
+   {DS_OP_XOR           , {2, "XOR"}},
+   {DS_OP_MSKOR         , {3, "MSKOR"}},
+   {DS_OP_WRITE         , {2, "WRITE"}},
+   {DS_OP_WRITE_REL     , {3, "WRITE_REL"}},
+   {DS_OP_WRITE2        , {3, "WRITE2"}},
+   {DS_OP_CMP_STORE     , {3, "CMP_STORE"}},
+   {DS_OP_CMP_STORE_SPF , {3, "CMP_STORE_SPF"}},
+   {DS_OP_BYTE_WRITE    , {2, "BYTE_WRITE"}},
+   {DS_OP_SHORT_WRITE   , {2, "SHORT_WRITE"}},
+   {DS_OP_ADD_RET       , {2, "ADD_RET"}},
+   {DS_OP_SUB_RET       , {2, "SUB_RET"}},
+   {DS_OP_RSUB_RET      , {2, "RSUB_RET"}},
+   {DS_OP_INC_RET       , {2, "INC_RET"}},
+   {DS_OP_DEC_RET       , {2, "DEC_RET"}},
+   {DS_OP_MIN_INT_RET   , {2, "MIN_INT_RET"}},
+   {DS_OP_MAX_INT_RET   , {2, "MAX_INT_RET"}},
+   {DS_OP_MIN_UINT_RET  , {2, "MIN_UINT_RET"}},
+   {DS_OP_MAX_UINT_RET  , {2, "MAX_UINT_RET"}},
+   {DS_OP_AND_RET       , {2, "AND_RET"}},
+   {DS_OP_OR_RET        , {2, "OR_RET"}},
+   {DS_OP_XOR_RET       , {2, "XOR_RET"}},
+   {DS_OP_MSKOR_RET     , {3, "MSKOR_RET"}},
+   {DS_OP_XCHG_RET      , {2, "XCHG_RET"}},
+   {DS_OP_XCHG_REL_RET  , {3, "XCHG_REL_RET"}},
+   {DS_OP_XCHG2_RET     , {3, "XCHG2_RET"}},
+   {DS_OP_CMP_XCHG_RET  , {3, "CMP_XCHG_RET"}},
+   {DS_OP_CMP_XCHG_SPF_RET, {3, "CMP_XCHG_SPF_RET"}},
+   {DS_OP_READ_RET      , {1, "READ_RET"}},
+   {DS_OP_READ_REL_RET  , {1, "READ_REL_RET"}},
+   {DS_OP_READ2_RET     , {2, "READ2_RET"}},
+   {DS_OP_READWRITE_RET , {3, "READWRITE_RET"}},
+   {DS_OP_BYTE_READ_RET , {1, "BYTE_READ_RET"}},
+   {DS_OP_UBYTE_READ_RET, {1, "UBYTE_READ_RET"}},
+   {DS_OP_SHORT_READ_RET, {1, "SHORT_READ_RET"}},
+   {DS_OP_USHORT_READ_RET, {1, "USHORT_READ_RET"}},
+   {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "ATOMIC_ORDERED_ALLOC_RET"}},
+   {LDS_ADD_RET, {2, "LDS_ADD_RET"}},
+   {LDS_ADD, {2, "LDS_ADD"}},
+   {LDS_AND_RET, {2, "LDS_AND_RET"}},
+   {LDS_AND, {2, "LDS_AND"}},
+   {LDS_WRITE, {2, "LDS_WRITE"}},
+   {LDS_OR_RET, {2, "LDS_OR_RET"}},
+   {LDS_OR, {2, "LDS_OR"}},
+   {LDS_MAX_INT_RET, {2, "LDS_MAX_INT_RET"}},
+   {LDS_MAX_INT, {2, "LDS_MAX_INT"}},
+   {LDS_MAX_UINT_RET, {2, "LDS_MAX_UINT_RET"}},
+   {LDS_MAX_UINT, {2, "LDS_MAX_UINT"}},
+   {LDS_MIN_INT_RET, {2, "LDS_MIN_INT_RET"}},
+   {LDS_MIN_INT, {2, "LDS_MIN_INT"}},
+   {LDS_MIN_UINT_RET, {2, "LDS_MIN_UINT_RET"}},
+   {LDS_MIN_UINT, {2, "LDS_MIN_UINT"}},
+   {LDS_XOR_RET, {2, "LDS_XOR"}},
+   {LDS_XOR, {2, "LDS_XOR"}},
+   {LDS_XCHG_RET, {2, "LDS_XCHG_RET"}},
+   {LDS_CMP_XCHG_RET, {3, "LDS_CMP_XCHG_RET"}},
+   {LDS_WRITE_REL, {3, "LDS_WRITE_REL"}},
 };

 }
--- a/src/gallium/drivers/r600/sfn/sfn_alu_defines.h
+++ b/src/gallium/drivers/r600/sfn/sfn_alu_defines.h
@ -27,6 +27,8 @@
 #ifndef r600_sfn_alu_defines_h
 #define r600_sfn_alu_defines_h

+#include "../r600_isa.h"
+
 #include <map>
 #include <bitset>

@ -235,12 +237,71 @@ enum EAluOp {
   op3_cnde_int = 28<< 6,
   op3_cndgt_int = 29<< 6,
   op3_cndge_int = 30<< 6,
-   op3_mul_lit = 31<< 6
+   op3_mul_lit = 31<< 6,
+   op_invalid = 0xffff
 };

+enum AluModifiers {
+   alu_src0_neg,
+   alu_src0_abs,
+   alu_src0_rel,
+   alu_src1_neg,
+   alu_src1_abs,
+   alu_src1_rel,
+   alu_src2_neg,
+   alu_src2_rel,
+   alu_dst_clamp,
+   alu_dst_rel,
+   alu_last_instr,
+   alu_update_exec,
+   alu_update_pred,
+   alu_write,
+   alu_op3,
+   alu_is_trans,
+   alu_is_cayman_trans,
+   alu_is_lds,
+   alu_lds_group_start,
+   alu_lds_group_end,
+   alu_lds_address,
+   alu_no_schedule_bias,
+   alu_64bit_op,
+   alu_flag_count
+};

+enum AluDstModifiers {
+   omod_off = 0,
+   omod_mul2 = 1,
+   omod_mul4 = 2,
+   omod_divl2 = 3
+};

-using AluOpFlags=std::bitset<32>;
+enum AluPredSel {
+   pred_off = 0,
+   pred_zero = 2,
+   pred_one = 3
+};
+
+enum AluBankSwizzle {
+   alu_vec_012 = 0,
+   sq_alu_scl_201 = 0,
+   alu_vec_021 = 1,
+   sq_alu_scl_122 = 1,
+   alu_vec_120 = 2,
+   sq_alu_scl_212 = 2,
+   alu_vec_102 = 3,
+   sq_alu_scl_221 = 3,
+   alu_vec_201 = 4,
+   sq_alu_scl_unknown  = 4,
+   alu_vec_210 = 5,
+   alu_vec_unknown = 6
+};
+
+inline AluBankSwizzle operator ++(AluBankSwizzle& x) {
+   x = static_cast<AluBankSwizzle>(x + 1);
+   return x;
+}
+
+using AluOpFlags=std::bitset<alu_flag_count>;

 struct AluOp {
   static constexpr int x = 1;
@ -314,6 +375,8 @@ struct AluInlineConstantDescr {

 extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const;

+#define LDSOP2(X) LDS_ ## X = LDS_OP2_LDS_ ## X
+
 enum ESDOp {
   DS_OP_ADD = 0,
   DS_OP_SUB = 1,
@ -362,9 +425,31 @@ enum ESDOp {
   DS_OP_SHORT_READ_RET = 56,
   DS_OP_USHORT_READ_RET = 57,
   DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63,
-   DS_OP_INVALID = 64
+   DS_OP_INVALID = 64,
+   LDSOP2(ADD_RET),
+   LDSOP2(ADD),
+   LDSOP2(AND_RET),
+   LDSOP2(AND),
+   LDSOP2(WRITE),
+   LDSOP2(OR_RET),
+   LDSOP2(OR),
+   LDSOP2(MAX_INT_RET),
+   LDSOP2(MAX_INT),
+   LDSOP2(MAX_UINT_RET),
+   LDSOP2(MAX_UINT),
+   LDSOP2(MIN_INT_RET),
+   LDSOP2(MIN_INT),
+   LDSOP2(MIN_UINT_RET),
+   LDSOP2(MIN_UINT),
+   LDSOP2(XOR_RET),
+   LDSOP2(XOR),
+   LDSOP2(XCHG_RET),
+   LDS_CMP_XCHG_RET = LDS_OP3_LDS_CMP_XCHG_RET,
+   LDS_WRITE_REL = LDS_OP3_LDS_WRITE_REL
 };

+#undef LDSOP2
+
 struct LDSOp {
   int nsrc;
   const char *name;
@ -372,6 +457,18 @@ struct LDSOp {

 extern const std::map<ESDOp, LDSOp> lds_ops;

+struct KCacheLine {
+   int bank{0};
+   int addr{0};
+   int len{0};
+   enum KCacheLockMode {
+      free,
+      lock_1,
+      lock_2
+   } mode{free};
+};
+
+
 }

 #endif // ALU_DEFINES_H
--- a/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp
@ -0,0 +1,329 @@
+#include "sfn_alu_readport_validation.h"
+
+#include <cstring>
+
+namespace r600 {
+
+class ReserveReadport : public ConstRegisterVisitor {
+public:
+   ReserveReadport(AluReadportReservation& reserv);
+
+   void visit(const LocalArray& value) override;
+   void visit(const LiteralConstant& value) override;
+   void visit(const InlineConstant& value) override;
+
+   void reserve_gpr(int sel, int chan);
+
+   AluReadportReservation& reserver;
+   int cycle = -1;
+   int isrc = -1;
+   int src0_sel = -1;
+   int src0_chan = -1;
+   bool success = true;
+
+   static const int max_const_readports = 2;
+};
+
+
+class ReserveReadportVec : public ReserveReadport {
+public:
+   using ReserveReadport::ReserveReadport;
+
+   void visit(const Register& value) override;
+   void visit(const LocalArrayValue& value) override;
+   void visit(const UniformValue& value) override;
+};
+
+class ReserveReadportTrans : public ReserveReadport
+{
+public:
+   ReserveReadportTrans(AluReadportReservation& reserv);
+
+   int n_consts;
+};
+
+class ReserveReadportTransPass1 : public ReserveReadportTrans {
+public:
+   using ReserveReadportTrans::ReserveReadportTrans;
+
+   void visit(const Register& value) override;
+   void visit(const LocalArrayValue& value) override;
+   void visit(const UniformValue& value) override;
+   void visit(const InlineConstant& value) override;
+   void visit(const LiteralConstant& value) override;
+};
+
+
+class ReserveReadportTransPass2 : public ReserveReadportTrans {
+public:
+   using ReserveReadportTrans::ReserveReadportTrans;
+
+   void visit(const Register& value) override;
+   void visit(const LocalArrayValue& value) override;
+   void visit(const UniformValue& value) override;
+};
+
+bool AluReadportReservation::schedule_vec_src(PVirtualValue src[3],  int nsrc, AluBankSwizzle swz)
+{
+   ReserveReadportVec visitor(*this);
+
+   if (src[0]->as_register()) {
+      visitor.src0_sel = src[0]->sel();
+      visitor.src0_chan = src[0]->chan();
+   } else {
+      visitor.src0_sel = 0xffff;
+      visitor.src0_chan = 8;
+   }
+
+   for (int i = 0; i < nsrc; ++i) {
+      visitor.cycle = cycle_vec(swz, i);
+      visitor.isrc = i;
+      src[i]->accept(visitor);
+   }
+
+   return visitor.success;
+}
+
+bool AluReadportReservation::schedule_vec_instruction(const AluInstr& alu, AluBankSwizzle swz)
+{
+   ReserveReadportVec visitor(*this);
+
+   for (unsigned i = 0; i < alu.n_sources() && visitor.success; ++i) {
+      visitor.cycle = cycle_vec(swz, i);
+      visitor.isrc = i;
+      if (i == 1 && alu.src(i).equal_to(alu.src(0)))
+         continue;
+      alu.src(i).accept(visitor);
+   }
+   return visitor.success;
+}
+
+bool AluReadportReservation::schedule_trans_instruction(const AluInstr& alu, AluBankSwizzle swz)
+{
+
+   ReserveReadportTransPass1 visitor1(*this);
+
+   for (unsigned i = 0; i < alu.n_sources(); ++i) {
+      visitor1.cycle = cycle_trans(swz, i);
+      alu.src(i).accept(visitor1);
+   }
+   if (!visitor1.success)
+      return false;
+
+
+   ReserveReadportTransPass2 visitor2(*this);
+   visitor2.n_consts = visitor1.n_consts;
+
+
+   for (unsigned i = 0; i < alu.n_sources(); ++i) {
+      visitor2.cycle = cycle_trans(swz, i);
+
+      alu.src(i).accept(visitor2);
+   }
+   return visitor2.success;
+}
+
+
+AluReadportReservation::AluReadportReservation()
+{
+   for (int i = 0; i < max_chan_channels; ++i) {
+      for (int j = 0; j < max_gpr_readports; ++j)
+         m_hw_gpr[j][i] = -1;
+      m_hw_const_addr[i] = -1;
+      m_hw_const_chan[i] = -1;
+      m_hw_const_bank[i] = -1;
+   }
+}
+
+
+bool AluReadportReservation::reserve_gpr(int sel, int chan, int cycle)
+{
+   if (m_hw_gpr[cycle][chan] == -1) {
+      m_hw_gpr[cycle][chan] = sel;
+   }
+   else if (m_hw_gpr[cycle][chan] != sel) {
+      return false;
+   }
+   return true;
+}
+
+bool AluReadportReservation::reserve_const(const UniformValue& value)
+{
+   int match = -1;
+   int empty = -1;
+
+   for (int res = 0; res < ReserveReadport::max_const_readports; ++res) {
+      if (m_hw_const_addr[res] == -1)
+         empty = res;
+      else if ((m_hw_const_addr[res] == value.sel()) &&
+               (m_hw_const_bank[res] == value.kcache_bank()) &&
+               (m_hw_const_chan[res] == (value.chan() >> 1)))
+         match = res;
+   }
+
+   if (match < 0) {
+      if (empty >= 0) {
+         m_hw_const_addr[empty] = value.sel();
+         (m_hw_const_bank[empty] = value.kcache_bank());
+         m_hw_const_chan[empty] = value.chan() >> 1;
+      } else {
+         return false;
+      }
+   }
+   return true;
+}
+
+bool AluReadportReservation::add_literal(uint32_t value)
+{   
+   for (unsigned i = 0; i < m_nliterals; ++i) {
+      if (m_literals[i] == value)
+         return true;
+   }
+   if (m_nliterals < m_literals.size()) {
+      m_literals[m_nliterals++] = value;
+      return true;
+   }
+   return false;
+}
+
+int AluReadportReservation::cycle_vec(AluBankSwizzle swz, int src)
+{
+   static const int mapping[AluBankSwizzle::alu_vec_unknown][max_gpr_readports] = {
+      {0, 1, 2},
+      {0, 2, 1},
+      {1, 0, 2},
+      {1, 2, 0},
+      {2, 0, 1},
+      {2, 1, 0}
+   };
+   return mapping[swz][src];
+}
+
+int AluReadportReservation::cycle_trans(AluBankSwizzle swz, int src)
+{
+   static const int mapping[AluBankSwizzle::sq_alu_scl_unknown][max_gpr_readports] = {
+      {2, 1, 0},
+      {1, 2, 2},
+      {2, 1, 2},
+      {2, 2, 1},
+   };
+   return mapping[swz][src];
+}
+
+
+ReserveReadport::ReserveReadport(AluReadportReservation& reserv):
+   reserver(reserv)
+{
+}
+
+void ReserveReadport::visit(const LocalArray& value)
+{
+   (void)value;
+   unreachable("a full array is not available here");
+}
+
+void ReserveReadport::visit(const LiteralConstant& value)
+{
+   success &= reserver.add_literal(value.value());
+}
+
+void ReserveReadport::visit(const InlineConstant& value)
+{
+   (void)value;
+}
+
+void ReserveReadportVec::visit(const Register& value)
+{
+   reserve_gpr(value.sel(), value.chan());
+}
+
+void ReserveReadportVec::visit(const LocalArrayValue& value)
+{
+   // Set the hightest non-sign bit to indicated that we use the
+   // AR register
+   reserve_gpr(0x4000000 | value.sel(), value.chan());
+}
+
+void ReserveReadport::reserve_gpr(int sel, int chan)
+{
+   if (isrc == 1 && src0_sel == sel && src0_chan == chan)
+      return;
+   success &= reserver.reserve_gpr(sel, chan, cycle);
+}
+
+void ReserveReadportVec::visit(const UniformValue& value)
+{
+   // kcache bank?
+   success &= reserver.reserve_const(value);
+}
+
+ReserveReadportTrans::ReserveReadportTrans(AluReadportReservation& reserv):
+   ReserveReadport(reserv),
+   n_consts(0)
+{}
+
+void ReserveReadportTransPass1::visit(const Register& value)
+{
+   (void)value;
+}
+
+void ReserveReadportTransPass1::visit(const LocalArrayValue& value)
+{
+   (void)value;
+}
+
+void ReserveReadportTransPass1::visit(const UniformValue& value)
+{
+   if (n_consts >= max_const_readports) {
+      success = false;
+      return;
+   }
+   n_consts++;
+   success &= reserver.reserve_const(value);
+}
+
+void ReserveReadportTransPass1::visit(const InlineConstant& value)
+{
+   (void)value;
+   if (n_consts >= max_const_readports) {
+      success = false;
+      return;
+   }
+   n_consts++;
+}
+
+void ReserveReadportTransPass1::visit(const LiteralConstant& value)
+{
+   if (n_consts >= max_const_readports) {
+      success = false;
+      return;
+   }
+   n_consts++;
+   success &= reserver.add_literal(value.value());
+}
+
+void ReserveReadportTransPass2::visit(const Register& value)
+{
+   if (cycle < n_consts) {
+      success = false;
+      return;
+   }
+   reserve_gpr(value.sel(), value.chan());
+}
+
+void ReserveReadportTransPass2::visit(const LocalArrayValue& value)
+{
+   if (cycle < n_consts) {
+      success = false;
+      return;
+   }
+   reserve_gpr(0x4000000 | value.sel(), value.chan());
+}
+
+void ReserveReadportTransPass2::visit(const UniformValue& value)
+{
+   (void)value;
+}
+
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h
+++ b/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h
@ -0,0 +1,41 @@
+#ifndef ALUREADPORTVALIDATION_H
+#define ALUREADPORTVALIDATION_H
+
+#include "sfn_instr_alu.h"
+
+namespace r600 {
+
+class AluReadportReservation {
+public:
+   AluReadportReservation();
+   AluReadportReservation(const AluReadportReservation& orig) = default;
+   AluReadportReservation& operator = (const AluReadportReservation& orig) = default;
+
+   bool schedule_vec_src(PVirtualValue src[3],  int nsrc, AluBankSwizzle swz);
+
+   bool schedule_vec_instruction(const AluInstr& alu, AluBankSwizzle swz);
+   bool schedule_trans_instruction(const AluInstr& alu, AluBankSwizzle swz);
+
+   bool reserve_gpr(int sel, int chan, int cycle);
+   bool reserve_const(const UniformValue& value);
+
+   bool add_literal(uint32_t value);
+
+   static int cycle_vec(AluBankSwizzle swz, int src);
+   static int cycle_trans(AluBankSwizzle swz, int src);
+
+   static const int max_chan_channels = 4;
+   static const int max_gpr_readports = 3;
+
+   std::array<std::array<int, max_chan_channels>, max_gpr_readports> m_hw_gpr;
+   std::array<int, max_chan_channels> m_hw_const_addr;
+   std::array<int, max_chan_channels>  m_hw_const_chan;
+   std::array<int, max_chan_channels>  m_hw_const_bank;
+   std::array<uint32_t, max_chan_channels> m_literals;
+   uint32_t m_nliterals{0};
+};
+
+
+}
+
+#endif // ALUREADPORTVALIDATION_H
--- a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp
--- a/src/gallium/drivers/r600/sfn/sfn_assembler.h
+++ b/src/gallium/drivers/r600/sfn/sfn_assembler.h
@ -0,0 +1,26 @@
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+#include "../r600_pipe.h"
+#include "../r600_shader.h"
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+class Assembler
+{
+public:
+   Assembler(r600_shader *sh, const r600_shader_key& key);
+
+   bool lower(Shader *shader);
+private:
+   r600_shader *m_sh;
+   const r600_shader_key& m_key;
+};
+
+
+
+}
+
+#endif // ASSAMBLY_H
--- a/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h
+++ b/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h
@ -38,10 +38,7 @@ enum JumpType {

 /**
  Class to link the jump locations
-
 */
-
-
 class ConditionalJumpTracker
 {
 public:
@ -49,7 +46,6 @@ public:
   ~ConditionalJumpTracker();

   /* Mark the start of a loop or a if/else */
-
   void push(r600_bytecode_cf *start, JumpType type);

   /* Mark the end of a loop or a if/else and fixup the jump sites */
--- a/src/gallium/drivers/r600/sfn/sfn_debug.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_debug.cpp
@ -61,6 +61,10 @@ static const struct debug_named_value sfn_debug_options[] = {
   {"nomerge", SfnLog::nomerge, "Skip register merge step"},
   {"tex", SfnLog::tex, "Log texture ops"},
   {"trans", SfnLog::trans, "Log generic translation messages"},
+   {"schedule", SfnLog::schedule, "Log scheduling"},
+   {"opt", SfnLog::opt, "Log optimization"},
+   {"steps", SfnLog::steps, "Log shaders at transformation steps"},
+   {"noopt", SfnLog::noopt, "Don't run backend optimizations"},
   DEBUG_NAMED_VALUE_END
 };

--- a/src/gallium/drivers/r600/sfn/sfn_debug.h
+++ b/src/gallium/drivers/r600/sfn/sfn_debug.h
@ -64,8 +64,12 @@ public:
      merge = 1 << 10,
      tex = 1 << 11,
      trans = 1 << 12,
-      all = (1 << 13) - 1,
+      schedule = 1 << 13,
+      opt = 1 << 14,
+      all = (1 << 15) - 1,
      nomerge = 1 << 16,
+      steps = 1 << 17,
+      noopt = 1 << 18
   };

   SfnLog();
--- a/src/gallium/drivers/r600/sfn/sfn_defines.h
+++ b/src/gallium/drivers/r600/sfn/sfn_defines.h
@ -303,6 +303,9 @@ enum EVFetchFlagShift {
   vtx_alt_const,
   vtx_use_tc,
   vtx_vpm,
+   vtx_is_mega_fetch,
+   vtx_uncached,
+   vtx_indexed,
   vtx_unknown
 };

--- a/src/gallium/drivers/r600/sfn/sfn_docu.txt
+++ b/src/gallium/drivers/r600/sfn/sfn_docu.txt
@ -2,44 +2,33 @@

 This code is an attempt to implement a NIR backend for r600.

+Supported hardware: Cayman, Evergreen and NI (tested on CAYMAN, CEDAR and BARTS)
+
+Thanks to soft fp64 the OpenGL version is now 4.5 also for EG.
+
+sb can bee enabled for nir, it still gives some improvements, e.g. with Xonotic
+The aim is still to get rid of it.
+
+
 ## State

-Supported hardware: Evergreen and NI (tested on CEDAR and BARTS)
-
-Thanks to soft fp64 the OpenGL version is now 4.5
-
-sb has been enabled for nir to be able to run some more demanding work loads. The aim is
-still to get rid of it.
-
+TODO:

 piglits gpu passes mostly like with TGSI, there are some fixes but also a few regressions.

-CTS gles
- - 2 passes like with TGSI
- - 3 no regressions, a few fixes compared to TGSI
- - 31
-    * a few fixes with interpolation specifiers
-    * synchronization has some unstable tests, this might be because global synchronization is missing (in both)
-
-GL CTS:
-  * a few regressions and a hang with KHR-GL43.compute_shader.shared-max
-
 piglit:
-  * spilling arrays is broken on Barts (but it works on Cedar)
-  * a few tests fail because the register limit is exhausted, and needlessly so, because
-    with better RA it would work
+  * spilling arrays is broken on Barts and CAYMAN (but it works on Cedar)

 ## Needed optimizations:

-  - Register allocator and scheduler (Could the sb allocator and scheduler
-    be ported?)
-
  - peepholes:
-    - compare + set predicate
+    - compare + set predicate / kill
+  - use clause local registers
+  - reduce register usage
+  - don't rely on the backend to schedule addr load and Index load as well
+  - don't rely on the backend to merge some alu groups
+
+## There are still some hangs
+
+

-  - copy propagation:
-    - Moves from inputs are usually not required, they could be forwarded
-    - texture operations often move additional parameters in extra registers
-      but they are actually needed in the same registers they come from and
-      could just be swizzled into the right place
-      (lower in NIR like it is done in e.g. in ETNAVIV)
--- a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp
--- a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h
+++ b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h
@ -1,116 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_EMITALUINSTRUCTION_H
-#define SFN_EMITALUINSTRUCTION_H
-
-#include "sfn_emitinstruction.h"
-
-#include "sfn_alu_defines.h"
-#include "sfn_instruction_alu.h"
-#include "sfn_instruction_tex.h"
-
-namespace r600  {
-
-
-class EmitAluInstruction : public EmitInstruction
-{
-public:
-   EmitAluInstruction(ShaderFromNirProcessor& processor);
-
-private:
-
-   enum AluOp2Opts {
-      op2_opt_none = 0,
-      op2_opt_reverse = 1,
-      op2_opt_neg_src1 = 1 << 1
-   };
-
-   bool do_emit(nir_instr* instr) override;
-
-   void split_constants(const nir_alu_instr& instr, unsigned nsrc_comp);
-
-   bool emit_mov(const nir_alu_instr& instr);
-   bool emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, const AluOpFlags &flags = 0);
-   bool emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
-
-   bool emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode);
-   bool emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode);
-
-   bool emit_alu_inot(const nir_alu_instr& instr);
-   bool emit_alu_ineg(const nir_alu_instr& instr);
-   bool emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
-
-   bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array<uint8_t, 3> reorder={0,1,2});
-   bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false);
-
-   bool emit_alu_b2f(const nir_alu_instr& instr);
-   bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op);
-   bool emit_dot(const nir_alu_instr& instr, int n);
-   bool emit_create_vec(const nir_alu_instr& instr, unsigned nc);
-   bool emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op,  unsigned nc, bool all);
-   bool emit_any_iequal(const nir_alu_instr& instr, unsigned nc);
-
-   bool emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
-   bool emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all);
-
-   bool emit_fdph(const nir_alu_instr &instr);
-   bool emit_discard_if(const nir_intrinsic_instr *instr);
-
-   bool emit_alu_f2b32(const nir_alu_instr& instr);
-   bool emit_b2i32(const nir_alu_instr& instr);
-   bool emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op);
-   bool emit_pack_64_2x32_split(const nir_alu_instr& instr);
-   bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp);
-
-   bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine);
-   bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr);
-   bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr);
-   bool emit_pack_32_2x16_split(const nir_alu_instr& instr);
-
-   bool emit_cube(const nir_alu_instr& instr);
-private:
-   void make_last(AluInstruction *ir) const;
-   void split_alu_modifiers(const nir_alu_src &src, const GPRVector::Values& v,
-                            GPRVector::Values& out, int ncomp);
-
-   void preload_src(const nir_alu_instr& instr);
-   unsigned num_src_comp(const nir_alu_instr& instr);
-
-   using vreg = std::array<PValue, 4>;
-
-   std::array<PValue, 4> m_src[4];
-};
-
-inline void EmitAluInstruction::make_last(AluInstruction *ir) const
-{
-   if (ir)
-      ir->set_flag(alu_last_instr);
-}
-
-}
-
-#endif // SFN_EMITALUINSTRUCTION_H
--- a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp
@ -1,169 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_emitinstruction.h"
-
-#include "sfn_shader_base.h"
-
-namespace r600 {
-
-EmitInstruction::EmitInstruction(ShaderFromNirProcessor& processor):
-   m_proc(processor)
-{
-
-}
-
-EmitInstruction::~EmitInstruction()
-{
-}
-
-bool EmitInstruction::emit(nir_instr* instr)
-{
-   return do_emit(instr);
-}
-
-bool EmitInstruction::use_legacy_math_rules(void)
-{
-   return m_proc.use_legacy_math_rules();
-}
-
-PValue EmitInstruction::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
-{
-   return m_proc.from_nir(v, component, swizzled);
-}
-
-PValue EmitInstruction::from_nir(const nir_alu_src& v, unsigned component)
-{
-   return m_proc.from_nir(v, component);
-}
-
-PValue EmitInstruction::from_nir(const nir_tex_src& v, unsigned component)
-{
-   return m_proc.from_nir(v, component);
-}
-
-PValue EmitInstruction::from_nir(const nir_alu_dest& v, unsigned component)
-{
-   return m_proc.from_nir(v, component);
-}
-
-PValue EmitInstruction::from_nir(const nir_dest& v, unsigned component)
-{
-   return m_proc.from_nir(v, component);
-}
-
-PValue EmitInstruction::from_nir(const nir_src& v, unsigned component)
-{
-   return m_proc.from_nir(v, component);
-}
-
-void EmitInstruction::emit_instruction(Instruction *ir)
-{
-   return m_proc.emit_instruction(ir);
-}
-
-void EmitInstruction::emit_instruction(AluInstruction *ir)
-{
-   return m_proc.emit_instruction(ir);
-}
-
-bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest,
-                                       std::vector<PValue> src0,
-                                       const std::set<AluModifiers>& m_flags)
-{
-   return m_proc.emit_instruction(opcode, dest,src0, m_flags);
-}
-
-const nir_variable *
-EmitInstruction::get_deref_location(const nir_src& v) const
-{
-   return m_proc.get_deref_location(v);
-}
-
-PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
-{
-   return m_proc.from_nir_with_fetch_constant(src, component, channel);
-}
-
-GPRVector EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
-                                                            const GPRVector::Swizzle& swizzle, bool match)
-{
-   return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle, match);
-}
-
-PGPRValue EmitInstruction::get_temp_register(int channel)
-{
-   return m_proc.get_temp_register(channel);
-}
-
-GPRVector EmitInstruction::get_temp_vec4(const GPRVector::Swizzle& swizzle)
-{
-   return m_proc.get_temp_vec4(swizzle);
-}
-
-PValue EmitInstruction::create_register_from_nir_src(const nir_src& src, unsigned swizzle)
-{
-   return m_proc.create_register_from_nir_src(src, swizzle);
-}
-
-enum amd_gfx_level EmitInstruction::get_chip_class(void) const
-{
-   return m_proc.get_chip_class();
-}
-
-PValue EmitInstruction::literal(uint32_t value)
-{
-   return m_proc.literal(value);
-}
-
-GPRVector EmitInstruction::vec_from_nir(const nir_dest& dst, int num_components)
-{
-   return m_proc.vec_from_nir(dst, num_components);
-}
-
-bool EmitInstruction::inject_register(unsigned sel, unsigned swizzle,
-                                      const PValue& reg, bool map)
-{
-   return m_proc.inject_register(sel, swizzle, reg, map);
-}
-
-int EmitInstruction::remap_atomic_base(int base)
-{
-	return m_proc.remap_atomic_base(base);
-}
-
-void EmitInstruction::set_has_txs_cube_array_comp()
-{
-   m_proc.sh_info().has_txq_cube_array_z_comp = 1;
-}
-
-const std::set<AluModifiers> EmitInstruction::empty = {};
-const std::set<AluModifiers> EmitInstruction::write = {alu_write};
-const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr};
-const std::set<AluModifiers> EmitInstruction::last = {alu_last_instr};
-
-}
-
--- a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h
+++ b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h
@ -1,102 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef EMITINSTRUCTION_H
-#define EMITINSTRUCTION_H
-
-#include "compiler/nir/nir.h"
-#include "sfn_defines.h"
-#include "sfn_value.h"
-#include "sfn_instruction_alu.h"
-
-namespace r600 {
-
-class ShaderFromNirProcessor;
-
-class EmitInstruction
-{
-public:
-   EmitInstruction(ShaderFromNirProcessor& processor);
-   virtual ~EmitInstruction();
-   bool emit(nir_instr* instr);
-
-   static const std::set<AluModifiers> empty;
-   static const std::set<AluModifiers> write;
-   static const std::set<AluModifiers> last_write;
-   static const std::set<AluModifiers> last;
-
-protected:
-   virtual bool do_emit(nir_instr* instr) = 0;
-
-   // forwards from ValuePool
-   PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
-   PValue from_nir(const nir_src& v, unsigned component);
-   PValue from_nir(const nir_alu_src& v, unsigned component);
-   PValue from_nir(const nir_tex_src& v, unsigned component);
-   PValue from_nir(const nir_alu_dest& v, unsigned component);
-   PValue from_nir(const nir_dest& v, unsigned component);
-
-   PValue create_register_from_nir_src(const nir_src& src, unsigned comp);
-
-   PGPRValue get_temp_register(int channel = -1);
-   GPRVector get_temp_vec4(const GPRVector::Swizzle& swizzle = {0,1,2,3});
-
-   // forwards from ShaderFromNirProcessor
-   void emit_instruction(Instruction *ir);
-   void emit_instruction(AluInstruction *ir);
-   bool emit_instruction(EAluOp opcode, PValue dest,
-                         std::vector<PValue> src0,
-                         const std::set<AluModifiers>& m_flags);
-   bool use_legacy_math_rules(void);
-
-   PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
-   GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
-                                              const GPRVector::Swizzle& swizzle, bool match = false);
-
-   const nir_variable *get_deref_location(const nir_src& v) const;
-
-   enum amd_gfx_level get_chip_class(void) const;
-
-   PValue literal(uint32_t value);
-
-   GPRVector vec_from_nir(const nir_dest& dst, int num_components);
-
-   bool inject_register(unsigned sel, unsigned swizzle,
-                        const PValue& reg, bool map);
-
-   int remap_atomic_base(int base);
-
-   void set_has_txs_cube_array_comp();
-private:
-
-   ShaderFromNirProcessor& m_proc;
-};
-
-}
-
-
-
-#endif // EMITINSTRUCTION_H
--- a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
@ -1,741 +0,0 @@
-#include "sfn_emitssboinstruction.h"
-
-#include "sfn_instruction_fetch.h"
-#include "sfn_instruction_gds.h"
-#include "sfn_instruction_misc.h"
-#include "sfn_instruction_tex.h"
-#include "../r600_pipe.h"
-#include "../r600_asm.h"
-
-namespace r600 {
-
-#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
-
-EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
-   EmitInstruction(processor),
-   m_require_rat_return_address(false),
-   m_ssbo_image_offset(0)
-{
-}
-
-void EmitSSBOInstruction::set_ssbo_offset(int offset)
-{
-   m_ssbo_image_offset = offset;
-}
-
-
-void EmitSSBOInstruction::set_require_rat_return_address()
-{
-   m_require_rat_return_address = true;
-}
-
-bool
-EmitSSBOInstruction::load_rat_return_address()
-{
-   if (m_require_rat_return_address) {
-      m_rat_return_address = get_temp_vec4();
-      emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
-      emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
-      emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
-                                          literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
-      emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
-                                          m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
-      {alu_write, alu_last_instr}));
-      m_require_rat_return_address = false;
-   }
-   return true;
-}
-
-
-bool EmitSSBOInstruction::do_emit(nir_instr* instr)
-{
-   const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-   switch (intr->intrinsic) {
-   case nir_intrinsic_atomic_counter_add:
-   case nir_intrinsic_atomic_counter_and:
-   case nir_intrinsic_atomic_counter_exchange:
-   case nir_intrinsic_atomic_counter_max:
-   case nir_intrinsic_atomic_counter_min:
-   case nir_intrinsic_atomic_counter_or:
-   case nir_intrinsic_atomic_counter_xor:
-   case nir_intrinsic_atomic_counter_comp_swap:
-      return emit_atomic(intr);
-   case nir_intrinsic_atomic_counter_read:
-   case nir_intrinsic_atomic_counter_post_dec:
-      return emit_unary_atomic(intr);
-   case nir_intrinsic_atomic_counter_inc:
-      return emit_atomic_inc(intr);
-   case nir_intrinsic_atomic_counter_pre_dec:
-      return emit_atomic_pre_dec(intr);
-   case nir_intrinsic_load_ssbo:
-       return emit_load_ssbo(intr);
-   case nir_intrinsic_store_ssbo:
-      return emit_store_ssbo(intr);
-   case nir_intrinsic_ssbo_atomic_add:
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-   case nir_intrinsic_ssbo_atomic_or:
-   case nir_intrinsic_ssbo_atomic_xor:
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_ssbo_atomic_umax:
-   case nir_intrinsic_ssbo_atomic_umin:
-   case nir_intrinsic_ssbo_atomic_and:
-   case nir_intrinsic_ssbo_atomic_exchange:
-      return emit_ssbo_atomic_op(intr);
-   case nir_intrinsic_image_store:
-      return emit_image_store(intr);
-   case nir_intrinsic_image_load:
-   case nir_intrinsic_image_atomic_add:
-   case nir_intrinsic_image_atomic_and:
-   case nir_intrinsic_image_atomic_or:
-   case nir_intrinsic_image_atomic_xor:
-   case nir_intrinsic_image_atomic_exchange:
-   case nir_intrinsic_image_atomic_comp_swap:
-   case nir_intrinsic_image_atomic_umin:
-   case nir_intrinsic_image_atomic_umax:
-   case nir_intrinsic_image_atomic_imin:
-   case nir_intrinsic_image_atomic_imax:
-      return emit_image_load(intr);
-   case nir_intrinsic_image_size:
-      return emit_image_size(intr);
-   case nir_intrinsic_get_ssbo_size:
-      return emit_buffer_size(intr);
-   case nir_intrinsic_memory_barrier:
-   case nir_intrinsic_memory_barrier_image:
-   case nir_intrinsic_memory_barrier_buffer:
-   case nir_intrinsic_group_memory_barrier:
-      return make_stores_ack_and_waitack();
-   default:
-      return false;
-   }
-}
-
-bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
-{
-   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
-
-   ESDOp op = read_result ? get_opcode(instr->intrinsic) :
-                            get_opcode_wo(instr->intrinsic);
-
-   if (DS_OP_INVALID == op)
-      return false;
-
-
-
-   GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
-
-   int base = remap_atomic_base(nir_intrinsic_base(instr));
-
-   PValue uav_id = from_nir(instr->src[0], 0);
-
-   PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
-
-   GDSInstr *ir = nullptr;
-   if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap)  {
-      PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
-      ir = new GDSInstr(op, dest, value, value2, uav_id, base);
-   } else {
-      ir = new GDSInstr(op, dest, value, uav_id, base);
-   }
-
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
-{
-   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
-
-   ESDOp op = read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
-
-   if (DS_OP_INVALID == op)
-      return false;
-
-   GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
-
-   PValue uav_id = from_nir(instr->src[0], 0);
-
-   auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
-
-   emit_instruction(ir);
-   return true;
-}
-
-ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode) const
-{
-   switch (opcode) {
-   case nir_intrinsic_atomic_counter_add:
-      return DS_OP_ADD_RET;
-   case nir_intrinsic_atomic_counter_and:
-      return DS_OP_AND_RET;
-   case nir_intrinsic_atomic_counter_exchange:
-      return DS_OP_XCHG_RET;
-   case nir_intrinsic_atomic_counter_inc:
-      return DS_OP_INC_RET;
-   case nir_intrinsic_atomic_counter_max:
-      return DS_OP_MAX_UINT_RET;
-   case nir_intrinsic_atomic_counter_min:
-      return DS_OP_MIN_UINT_RET;
-   case nir_intrinsic_atomic_counter_or:
-      return DS_OP_OR_RET;
-   case nir_intrinsic_atomic_counter_read:
-      return DS_OP_READ_RET;
-   case nir_intrinsic_atomic_counter_xor:
-      return DS_OP_XOR_RET;
-   case nir_intrinsic_atomic_counter_post_dec:
-      return DS_OP_DEC_RET;
-   case nir_intrinsic_atomic_counter_comp_swap:
-      return DS_OP_CMP_XCHG_RET;
-   case nir_intrinsic_atomic_counter_pre_dec:
-   default:
-      return DS_OP_INVALID;
-   }
-}
-
-ESDOp EmitSSBOInstruction::get_opcode_wo(const nir_intrinsic_op opcode) const
-{
-   switch (opcode) {
-   case nir_intrinsic_atomic_counter_add:
-      return DS_OP_ADD;
-   case nir_intrinsic_atomic_counter_and:
-      return DS_OP_AND;
-   case nir_intrinsic_atomic_counter_inc:
-      return DS_OP_INC;
-   case nir_intrinsic_atomic_counter_max:
-      return DS_OP_MAX_UINT;
-   case nir_intrinsic_atomic_counter_min:
-      return DS_OP_MIN_UINT;
-   case nir_intrinsic_atomic_counter_or:
-      return DS_OP_OR;
-   case nir_intrinsic_atomic_counter_xor:
-      return DS_OP_XOR;
-   case nir_intrinsic_atomic_counter_post_dec:
-      return DS_OP_DEC;
-   case nir_intrinsic_atomic_counter_comp_swap:
-      return DS_OP_CMP_XCHG_RET;
-   case nir_intrinsic_atomic_counter_exchange:
-      return DS_OP_XCHG_RET;
-   case nir_intrinsic_atomic_counter_pre_dec:
-   default:
-      return DS_OP_INVALID;
-   }
-}
-
-RatInstruction::ERatOp
-EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
-{
-   switch (opcode) {
-   case nir_intrinsic_ssbo_atomic_add:
-   case nir_intrinsic_image_atomic_add:
-      return RatInstruction::ADD_RTN;
-   case nir_intrinsic_ssbo_atomic_and:
-   case nir_intrinsic_image_atomic_and:
-      return RatInstruction::AND_RTN;
-   case nir_intrinsic_ssbo_atomic_exchange:
-   case nir_intrinsic_image_atomic_exchange:
-      return RatInstruction::XCHG_RTN;
-   case nir_intrinsic_ssbo_atomic_or:
-   case nir_intrinsic_image_atomic_or:
-      return RatInstruction::OR_RTN;
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_image_atomic_imin:
-      return RatInstruction::MIN_INT_RTN;
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_image_atomic_imax:
-      return RatInstruction::MAX_INT_RTN;
-   case nir_intrinsic_ssbo_atomic_umin:
-   case nir_intrinsic_image_atomic_umin:
-      return RatInstruction::MIN_UINT_RTN;
-   case nir_intrinsic_ssbo_atomic_umax:
-   case nir_intrinsic_image_atomic_umax:
-      return RatInstruction::MAX_UINT_RTN;
-   case nir_intrinsic_ssbo_atomic_xor:
-   case nir_intrinsic_image_atomic_xor:
-      return RatInstruction::XOR_RTN;
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-   case nir_intrinsic_image_atomic_comp_swap:
-      if (util_format_is_float(format))
-         return RatInstruction::CMPXCHG_FLT_RTN;
-      else
-         return RatInstruction::CMPXCHG_INT_RTN;
-   case nir_intrinsic_image_load:
-      return RatInstruction::NOP_RTN;
-   default:
-      unreachable("Unsupported RAT instruction");
-   }
-}
-
-RatInstruction::ERatOp
-EmitSSBOInstruction::get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const
-{
-	switch (opcode) {
-   case nir_intrinsic_ssbo_atomic_add:
-   case nir_intrinsic_image_atomic_add:
-      return RatInstruction::ADD;
-   case nir_intrinsic_ssbo_atomic_and:
-   case nir_intrinsic_image_atomic_and:
-      return RatInstruction::AND;
-   case nir_intrinsic_ssbo_atomic_or:
-   case nir_intrinsic_image_atomic_or:
-      return RatInstruction::OR;
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_image_atomic_imin:
-      return RatInstruction::MIN_INT;
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_image_atomic_imax:
-      return RatInstruction::MAX_INT;
-   case nir_intrinsic_ssbo_atomic_umin:
-   case nir_intrinsic_image_atomic_umin:
-      return RatInstruction::MIN_UINT;
-   case nir_intrinsic_ssbo_atomic_umax:
-   case nir_intrinsic_image_atomic_umax:
-      return RatInstruction::MAX_UINT;
-   case nir_intrinsic_ssbo_atomic_xor:
-   case nir_intrinsic_image_atomic_xor:
-      return RatInstruction::XOR;
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-   case nir_intrinsic_image_atomic_comp_swap:
-      if (util_format_is_float(format))
-         return RatInstruction::CMPXCHG_FLT;
-      else
-         return RatInstruction::CMPXCHG_INT;
-   default:
-      unreachable("Unsupported WO RAT instruction");
-   }
-}
-
-bool EmitSSBOInstruction::load_atomic_inc_limits()
-{
-   m_atomic_update = get_temp_register();
-   m_atomic_update->set_keep_alive();
-   emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
-   {alu_write, alu_last_instr}));
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
-{
-   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
-   PValue uav_id = from_nir(instr->src[0], 0);
-   GPRVector dest = read_result ? make_dest(instr): GPRVector(0, {7,7,7,7});
-   auto ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest,
-                          m_atomic_update, uav_id,
-                          remap_atomic_base(nir_intrinsic_base(instr)));
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
-{
-   GPRVector dest = make_dest(instr);
-
-   PValue uav_id = from_nir(instr->src[0], 0);
-
-   auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
-                          remap_atomic_base(nir_intrinsic_base(instr)));
-   emit_instruction(ir);
-
-   emit_instruction(new AluInstruction(op2_sub_int,  dest.x(), dest.x(), literal(1), last_write));
-
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
-{
-   GPRVector dest = make_dest(instr);
-
-   /** src0 not used, should be some offset */
-   auto addr = from_nir(instr->src[1], 0);
-   PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
-
-   /** Should be lowered in nir */
-   emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
-                    {alu_write, alu_last_instr}));
-
-   const EVTXDataFormat formats[4] = {
-      fmt_32,
-      fmt_32_32,
-      fmt_32_32_32,
-      fmt_32_32_32_32
-   };
-
-   const std::array<int,4> dest_swt[4] = {
-      {0,7,7,7},
-      {0,1,7,7},
-      {0,1,2,7},
-      {0,1,2,3}
-   };
-
-   /* TODO fix resource index */
-   auto ir = new FetchInstruction(dest, addr_temp,
-                                  R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
-                                  , from_nir(instr->src[0], 0),
-                                  formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
-   ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
-   ir->set_flag(vtx_use_tc);
-
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
-{
-
-   GPRVector::Swizzle swz = {7,7,7,7};
-   for (unsigned i = 0; i <  nir_src_num_components(instr->src[0]); ++i)
-      swz[i] = i;
-
-   auto orig_addr = from_nir(instr->src[2], 0);
-
-   GPRVector addr_vec = get_temp_vec4({0,1,2,7});
-
-   auto temp2 = get_temp_vec4();
-
-   auto rat_id = from_nir(instr->src[1], 0);
-
-   emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
-                                       PValue(new LiteralValue(2)), write));
-   emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
-   emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
-
-
-   auto values = vec_from_nir_with_fetch_constant(instr->src[0],
-         (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
-
-   auto cf_op = cf_mem_rat;
-   //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
-   auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
-                                   values, addr_vec, m_ssbo_image_offset, rat_id, 1,
-                                   1, 0, false);
-   emit_instruction(store);
-   m_store_ops.push_back(store);
-
-   for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
-      emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), get_chip_class() == CAYMAN  ?  last_write : write));
-      emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
-                                          {addr_vec.reg_i(0), Value::one_i}, last_write));
-      store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
-                                 temp2, addr_vec, m_ssbo_image_offset, rat_id, 1,
-                                 1, 0, false);
-      emit_instruction(store);
-      if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
-         m_store_ops.push_back(store);
-   }
-
-   return true;
-}
-
-bool
-EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
-{
-   int imageid = 0;
-   PValue image_offset;
-
-   if (nir_src_is_const(intrin->src[0]))
-      imageid = nir_src_as_int(intrin->src[0]);
-   else
-      image_offset = from_nir(intrin->src[0], 0);
-
-   auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
-   auto undef = from_nir(intrin->src[2], 0);
-   auto value = vec_from_nir_with_fetch_constant(intrin->src[3],  0xf, {0,1,2,3});
-   auto unknown  = from_nir(intrin->src[4], 0);
-
-   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
-       nir_intrinsic_image_array(intrin)) {
-      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
-      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
-   }
-
-   auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
-   auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
-                                   image_offset, 1, 0xf, 0, false);
-
-   //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
-      m_store_ops.push_back(store);
-
-   emit_instruction(store);
-   return true;
-}
-
-bool
-EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
-{
-   int imageid = 0;
-   PValue image_offset;
-
-   if (nir_src_is_const(intrin->src[0]))
-      imageid = nir_src_as_int(intrin->src[0]);
-   else
-      image_offset = from_nir(intrin->src[0], 0);
-
-   bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
-   auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
-                               get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
-
-   auto coord_orig =  from_nir(intrin->src[1], 0, 0);
-   auto coord = get_temp_register(0);
-
-   emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
-
-   if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
-      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
-                                          from_nir(intrin->src[3], 0), {alu_write}));
-      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
-                                          from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
-   } else {
-      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
-                                          from_nir(intrin->src[2], 0), {alu_write}));
-      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
-   }
-
-
-   GPRVector out_vec({coord, coord, coord, coord});
-
-   auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
-                                   image_offset, 1, 0xf, 0, true);
-   emit_instruction(atomic);
-
-   if (read_result) {
-      emit_instruction(new WaitAck(0));
-
-      GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
-      auto fetch = new FetchInstruction(vc_fetch,
-                                        no_index_offset,
-                                        fmt_32,
-                                        vtx_nf_int,
-                                        vtx_es_none,
-                                        m_rat_return_address.reg_i(1),
-                                        dest,
-                                        0,
-                                        false,
-                                        0xf,
-                                        R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
-                                        0,
-                                        bim_none,
-                                        false,
-                                        false,
-                                        0,
-                                        0,
-                                        0,
-                                        image_offset,
-                                        {0,7,7,7});
-      fetch->set_flag(vtx_srf_mode);
-      fetch->set_flag(vtx_use_tc);
-      fetch->set_flag(vtx_vpm);
-      emit_instruction(fetch);
-   }
-
-   return true;
-
-}
-
-bool
-EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
-{
-   int imageid = 0;
-   PValue image_offset;
-
-   if (nir_src_is_const(intrin->src[0]))
-      imageid = nir_src_as_int(intrin->src[0]);
-   else
-      image_offset = from_nir(intrin->src[0], 0);
-
-   bool read_retvalue = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
-   auto rat_op = read_retvalue ? get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin)):
-                                 get_rat_opcode_wo(intrin->intrinsic, nir_intrinsic_format(intrin));
-
-   GPRVector::Swizzle swz = {0,1,2,3};
-   auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
-
-   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
-       nir_intrinsic_image_array(intrin)) {
-      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
-      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
-   }
-
-   if (intrin->intrinsic != nir_intrinsic_image_load) {
-      if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
-         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
-                                             from_nir(intrin->src[4], 0), {alu_write}));
-         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
-                                             from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
-      } else {
-         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
-                                             from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
-      }
-   }
-   auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
-
-   auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
-                                   image_offset, 1, 0xf, 0, true);
-   emit_instruction(store);
-   return read_retvalue ? fetch_return_value(intrin) : true;
-}
-
-bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
-{
-   emit_instruction(new WaitAck(0));
-
-   pipe_format format = nir_intrinsic_format(intrin);
-   unsigned fmt = fmt_32;
-   unsigned num_format = 0;
-   unsigned format_comp = 0;
-   unsigned endian = 0;
-
-   int imageid = 0;
-   PValue image_offset;
-
-   if (nir_src_is_const(intrin->src[0]))
-      imageid = nir_src_as_int(intrin->src[0]);
-   else
-      image_offset = from_nir(intrin->src[0], 0);
-
-   r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
-
-   GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
-
-   auto fetch = new FetchInstruction(vc_fetch,
-                                     no_index_offset,
-                                     (EVTXDataFormat)fmt,
-                                     (EVFetchNumFormat)num_format,
-                                     (EVFetchEndianSwap)endian,
-                                     m_rat_return_address.reg_i(1),
-                                     dest,
-                                     0,
-                                     false,
-                                     0x3,
-                                     R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
-                                     0,
-                                     bim_none,
-                                     false,
-                                     false,
-                                     0,
-                                     0,
-                                     0,
-                                     image_offset, {0,1,2,3});
-   fetch->set_flag(vtx_srf_mode);
-   fetch->set_flag(vtx_use_tc);
-   fetch->set_flag(vtx_vpm);
-   if (format_comp)
-      fetch->set_flag(vtx_format_comp_signed);
-
-   emit_instruction(fetch);
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
-{
-   GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
-   GPRVector src{0,{4,4,4,4}};
-
-   assert(nir_src_as_uint(intrin->src[1]) == 0);
-
-   auto const_offset = nir_src_as_const_value(intrin->src[0]);
-   auto dyn_offset = PValue();
-   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
-   if (const_offset)
-      res_id += const_offset[0].u32;
-   else
-      dyn_offset = from_nir(intrin->src[0], 0);
-
-   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
-      emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
-                       res_id,
-                       bim_none));
-      return true;
-   } else {
-      emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
-                                             0/* ?? */,
-                                             res_id, dyn_offset));
-      if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
-          nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
-         /* Need to load the layers from a const buffer */
-
-         set_has_txs_cube_array_comp();
-
-         if (const_offset) {
-            unsigned lookup_resid = const_offset[0].u32;
-            emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
-                                                PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
-                                                                        R600_BUFFER_INFO_CONST_BUFFER)),
-                                                EmitInstruction::last_write));
-         } else {
-            /* If the adressing is indirect we have to get the z-value by using a binary search */
-            GPRVector trgt;
-            GPRVector help;
-
-            auto addr = help.reg_i(0);
-            auto comp = help.reg_i(1);
-            auto low_bit = help.reg_i(2);
-            auto high_bit = help.reg_i(3);
-
-            emit_instruction(new AluInstruction(op2_lshr_int, addr, from_nir(intrin->src[0], 0),
-                             literal(2), EmitInstruction::write));
-            emit_instruction(new AluInstruction(op2_and_int, comp, from_nir(intrin->src[0], 0),
-                             literal(3), EmitInstruction::last_write));
-
-            emit_instruction(new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, R600_SHADER_BUFFER_INFO_SEL,
-                                                  R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none));
-
-            emit_instruction(new AluInstruction(op3_cnde_int, comp, high_bit, trgt.reg_i(0), trgt.reg_i(2),
-                                                EmitInstruction::write));
-            emit_instruction(new AluInstruction(op3_cnde_int, high_bit, high_bit, trgt.reg_i(1), trgt.reg_i(3),
-                                                EmitInstruction::last_write));
-
-            emit_instruction(new AluInstruction(op3_cnde_int, dest.reg_i(2), low_bit, comp, high_bit, EmitInstruction::last_write));
-         }
-      }
-   }
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
-{
-   std::array<PValue,4> dst_elms;
-
-
-   for (uint16_t i = 0; i < 4; ++i) {
-      dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
-   }
-
-   GPRVector dst(dst_elms);
-   GPRVector src(0,{4,4,4,4});
-
-   auto const_offset = nir_src_as_const_value(intr->src[0]);
-   auto dyn_offset = PValue();
-   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
-   if (const_offset)
-      res_id += const_offset[0].u32;
-   else
-      assert(0 && "dynamic buffer offset not supported in buffer_size");
-
-   emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
-                    res_id, bim_none));
-
-   return true;
-}
-
-bool EmitSSBOInstruction::make_stores_ack_and_waitack()
-{
-   for (auto&& store: m_store_ops)
-      store->set_ack();
-
-   if (!m_store_ops.empty())
-      emit_instruction(new WaitAck(0));
-
-   m_store_ops.clear();
-
-   return true;
-}
-
-GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
-{
-   GPRVector::Values v;
-   int i;
-   for (i = 0; i < 4; ++i)
-      v[i] = from_nir(ir->dest, i);
-   return GPRVector(v);
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
+++ b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
@ -1,60 +0,0 @@
-#ifndef SFN_EMITSSBOINSTRUCTION_H
-#define SFN_EMITSSBOINSTRUCTION_H
-
-#include "sfn_emitinstruction.h"
-#include "sfn_instruction_gds.h"
-#include "sfn_value_gpr.h"
-
-namespace r600 {
-
-class EmitSSBOInstruction: public EmitInstruction {
-public:
-   EmitSSBOInstruction(ShaderFromNirProcessor& processor);
-
-   void set_ssbo_offset(int offset);
-
-   void set_require_rat_return_address();
-   bool load_rat_return_address();
-   bool load_atomic_inc_limits();
-
-private:
-   bool do_emit(nir_instr *instr);
-
-   bool emit_atomic(const nir_intrinsic_instr* instr);
-   bool emit_unary_atomic(const nir_intrinsic_instr* instr);
-   bool emit_atomic_inc(const nir_intrinsic_instr* instr);
-   bool emit_atomic_pre_dec(const nir_intrinsic_instr* instr);
-
-   bool emit_load_ssbo(const nir_intrinsic_instr* instr);
-   bool emit_store_ssbo(const nir_intrinsic_instr* instr);
-
-   bool emit_image_size(const nir_intrinsic_instr *intrin);
-   bool emit_image_load(const nir_intrinsic_instr *intrin);
-   bool emit_image_store(const nir_intrinsic_instr *intrin);
-   bool emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin);
-   bool emit_buffer_size(const nir_intrinsic_instr *intrin);
-
-   bool fetch_return_value(const nir_intrinsic_instr *intrin);
-
-   bool make_stores_ack_and_waitack();
-
-   ESDOp get_opcode(nir_intrinsic_op opcode) const;
-   ESDOp get_opcode_wo(const nir_intrinsic_op opcode) const;
-
-   RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const;
-   RatInstruction::ERatOp get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const;
-
-
-   GPRVector make_dest(const nir_intrinsic_instr* instr);
-
-   PGPRValue m_atomic_update;
-
-   bool m_require_rat_return_address;
-   GPRVector m_rat_return_address;
-   int m_ssbo_image_offset;
-   std::vector<RatInstruction *> m_store_ops;
-};
-
-}
-
-#endif // SFN_EMITSSBOINSTRUCTION_H
--- a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp
@ -1,671 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_emittexinstruction.h"
-#include "sfn_shader_base.h"
-#include "sfn_instruction_fetch.h"
-
-namespace r600 {
-
-EmitTexInstruction::EmitTexInstruction(ShaderFromNirProcessor &processor):
-   EmitInstruction (processor)
-{
-}
-
-bool EmitTexInstruction::do_emit(nir_instr* instr)
-{
-   nir_tex_instr* ir = nir_instr_as_tex(instr);
-
-   TexInputs src;
-   if (!get_inputs(*ir, src))
-      return false;
-
-   if (ir->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
-      switch (ir->op) {
-      case nir_texop_txf:
-         return emit_buf_txf(ir, src);
-      case nir_texop_txs:
-         return emit_tex_txs(ir, src, {0,1,2,3});
-      default:
-         return false;
-      }
-   } else {
-      switch (ir->op) {
-      case nir_texop_tex:
-         return emit_tex_tex(ir, src);
-      case nir_texop_txf:
-         return emit_tex_txf(ir, src);
-      case nir_texop_txb:
-         return emit_tex_txb(ir, src);
-      case nir_texop_txl:
-         return emit_tex_txl(ir, src);
-      case nir_texop_txd:
-         return emit_tex_txd(ir, src);
-      case nir_texop_txs:
-         return emit_tex_txs(ir, src, {0,1,2,3});
-      case nir_texop_lod:
-         return emit_tex_lod(ir, src);
-      case nir_texop_tg4:
-         return emit_tex_tg4(ir, src);
-      case nir_texop_txf_ms:
-         return emit_tex_txf_ms(ir, src);
-      case nir_texop_query_levels:
-         return emit_tex_txs(ir, src, {3,7,7,7});
-      case nir_texop_texture_samples:
-         return emit_tex_texture_samples(ir, src, {3,7,7,7});
-      default:
-
-         return false;
-      }
-   }
-}
-
-bool EmitTexInstruction::emit_buf_txf(nir_tex_instr* instr, TexInputs &src)
-{
-   auto dst = make_dest(*instr);
-
-   auto ir = new FetchInstruction(vc_fetch, no_index_offset, dst, src.coord.reg_i(0), 0,
-                                  instr->texture_index +  R600_MAX_CONST_BUFFERS,
-                                  src.texture_offset, bim_none);
-   ir->set_flag(vtx_use_const_field);
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_tex(nir_tex_instr* instr, TexInputs& src)
-{
-
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   auto tex_op = TexInstruction::sample;
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect);
-
-   if (instr->is_shadow)  {
-      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
-                       {alu_last_instr, alu_write}));
-      tex_op = TexInstruction::sample_c;
-   }
-
-   auto dst = make_dest(*instr);
-   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
-                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   if (instr->is_array)
-      handle_array_index(*instr, src.coord, irt);
-
-   set_rect_coordinate_flags(instr, irt);
-   set_offsets(irt, src.offset);
-
-   emit_instruction(irt);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_txd(nir_tex_instr* instr, TexInputs& src)
-{
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   auto tex_op = TexInstruction::sample_g;
-   auto dst = make_dest(*instr);
-
-   GPRVector empty_dst(0,{7,7,7,7});
-
-   if (instr->is_shadow)  {
-      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
-                       {alu_last_instr, alu_write}));
-      tex_op = TexInstruction::sample_c_g;
-   }
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, src.ddx,
-                                             sampler.id,
-                                             sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   irgh->set_dest_swizzle({7,7,7,7});
-
-   TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, src.ddy,
-                           sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   irgv->set_dest_swizzle({7,7,7,7});
-
-   TexInstruction *ir = new TexInstruction(tex_op, dst, src.coord, sampler.id,
-                                           sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   if (instr->is_array)
-      handle_array_index(*instr, src.coord, ir);
-
-   set_rect_coordinate_flags(instr, ir);
-   set_offsets(ir, src.offset);
-
-   emit_instruction(irgh);
-   emit_instruction(irgv);
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_txf(nir_tex_instr* instr, TexInputs& src)
-{
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   auto dst = make_dest(*instr);
-
-   if (*src.coord.reg_i(3) != *src.lod) {
-      if (src.coord.sel() != src.lod->sel())
-         emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {alu_write, alu_last_instr}));
-      else
-         src.coord.set_reg_i(3, src.lod);
-   }
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect);
-
-   /* txf doesn't need rounding for the array index, but 1D has the array index
-    * in the z component */
-   if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
-      src.coord.set_reg_i(2, src.coord.reg_i(1));
-
-   auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
-                                    sampler.id,
-                                    sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-
-
-   if (src.offset) {
-      assert(src.offset->is_ssa);
-      AluInstruction *ir = nullptr;
-      for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
-         ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
-                  {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
-         emit_instruction(ir);
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-   }
-
-   if (instr->is_array)
-      tex_ir->set_flag(TexInstruction::z_unnormalized);
-
-   emit_instruction(tex_ir);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_lod(nir_tex_instr* instr, TexInputs& src)
-{
-   auto tex_op = TexInstruction::get_tex_lod;
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   auto dst = make_dest(*instr);
-   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
-                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   irt->set_dest_swizzle({1,0,7,7});
-   emit_instruction(irt);
-
-   return true;
-
-}
-
-bool EmitTexInstruction::emit_tex_txl(nir_tex_instr* instr, TexInputs& src)
-{
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   auto tex_op = TexInstruction::sample_l;
-   if (instr->is_shadow)  {
-      if (src.coord.sel() != src.comperator->sel())
-         emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
-      else
-         src.coord.set_reg_i(2, src.comperator);
-      tex_op = TexInstruction::sample_c_l;
-   }
-
-   if (src.coord.sel() != src.lod->sel())
-      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {last_write}));
-   else
-      src.coord.set_reg_i(3, src.lod);
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   auto dst = make_dest(*instr);
-   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
-                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-
-   if (instr->is_array)
-      handle_array_index(*instr, src.coord, irt);
-
-   set_rect_coordinate_flags(instr, irt);
-   set_offsets(irt, src.offset);
-
-   emit_instruction(irt);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_txb(nir_tex_instr* instr, TexInputs& src)
-{
-   auto tex_op = TexInstruction::sample_lb;
-
-   std::array<uint8_t, 4> in_swizzle = {0,1,2,3};
-
-   if (instr->is_shadow) {
-      if (src.coord.sel() != src.comperator->sel())
-         emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
-      else
-         src.coord.set_reg_i(2, src.comperator);
-      tex_op = TexInstruction::sample_c_lb;
-   }
-
-   if (src.coord.sel() != src.bias->sel())
-      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.bias, {last_write}));
-   else
-      src.coord.set_reg_i(3, src.bias);
-
-   GPRVector tex_src(src.coord, in_swizzle);
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   auto dst = make_dest(*instr);
-   auto irt = new TexInstruction(tex_op, dst, tex_src, sampler.id,
-                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   if (instr->is_array)
-      handle_array_index(*instr, tex_src, irt);
-
-   set_rect_coordinate_flags(instr, irt);
-   set_offsets(irt, src.offset);
-
-   emit_instruction(irt);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_txs(nir_tex_instr* instr, TexInputs& tex_src,
-                                      const std::array<int,4>& dest_swz)
-{
-   std::array<PValue,4> dst_elms;
-   std::array<PValue,4> src_elms;
-
-   for (uint16_t i = 0; i < 4; ++i) {
-      dst_elms[i] = from_nir(instr->dest, (i < instr->dest.ssa.num_components) ? i : 7);
-   }
-
-   GPRVector dst(dst_elms);
-
-   if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
-      emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
-                       instr->sampler_index + R600_MAX_CONST_BUFFERS,
-                       bim_none));
-   } else {
-      for (uint16_t i = 0; i < 4; ++i)
-         src_elms[i] =  tex_src.lod;
-      GPRVector src(src_elms);
-
-      auto sampler = get_sampler_id(instr->sampler_index, tex_src.sampler_deref);
-      assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-      auto ir = new TexInstruction(TexInstruction::get_resinfo, dst, src,
-                                   sampler.id,
-                                   sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
-      ir->set_dest_swizzle(dest_swz);
-      emit_instruction(ir);
-
-      if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-         PValue src(new UniformValue(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2),
-                                     sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER));
-
-         auto alu = new AluInstruction(op1_mov, dst[2], src, {last_write});
-         emit_instruction(alu);
-         set_has_txs_cube_array_comp();
-      }
-   }
-
-   return true;
-
-}
-
-bool EmitTexInstruction::emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
-                                                  const std::array<int, 4> &dest_swz)
-{
-   GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
-   GPRVector help{0,{4,4,4,4}};
-
-   auto dyn_offset = PValue();
-   int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index;
-
-   auto ir = new TexInstruction(TexInstruction::get_nsampled, dest, help,
-                                0, res_id, src.sampler_offset);
-   ir->set_dest_swizzle(dest_swz);
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src)
-{
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   TexInstruction *set_ofs = nullptr;
-
-   auto tex_op = TexInstruction::gather4;
-
-   if (instr->is_shadow)  {
-      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
-                       {alu_last_instr, alu_write}));
-      tex_op = TexInstruction::gather4_c;
-   }
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   bool literal_offset = false;
-   if (src.offset) {
-      literal_offset =  nir_src_as_const_value(*src.offset) != 0;
-      r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
-                       (literal_offset ? "literal" : "varying") <<
-                       "\n";
-
-      if (!literal_offset) {
-         GPRVector::Swizzle swizzle = {4,4,4,4};
-         for (unsigned i = 0; i < instr->coord_components; ++i)
-            swizzle[i] = i;
-
-         int noffsets = instr->coord_components;
-         if (instr->is_array)
-            --noffsets;
-
-         auto ofs = vec_from_nir_with_fetch_constant(*src.offset,
-                                                     ( 1 << noffsets) - 1,
-                                                     swizzle);
-         GPRVector dummy(0, {7,7,7,7});
-         tex_op = (tex_op == TexInstruction::gather4_c) ?
-                     TexInstruction::gather4_c_o : TexInstruction::gather4_o;
-
-         set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy,
-                                           ofs, sampler.id,
-                                      sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-         set_ofs->set_dest_swizzle({7,7,7,7});
-      }
-   }
-
-
-   /* pre CAYMAN needs swizzle */
-   auto dst = make_dest(*instr);
-   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
-                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-
-   if (get_chip_class() != CAYMAN)
-      irt->set_dest_swizzle({1,2,0,3});
-   irt->set_gather_comp(instr->component);
-
-   if (instr->is_array)
-      handle_array_index(*instr, src.coord, irt);
-
-   if (literal_offset) {
-      r600::sfn_log << SfnLog::tex << "emit literal offsets\n";
-      set_offsets(irt, src.offset);
-   }
-
-   set_rect_coordinate_flags(instr, irt);
-
-   if (set_ofs)
-      emit_instruction(set_ofs);
-
-   emit_instruction(irt);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src)
-{
-   assert(instr->src[0].src.is_ssa);
-
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   PGPRValue sample_id_dest_reg = get_temp_register();
-   GPRVector sample_id_dest(sample_id_dest_reg->sel(), {7,7,7,7});
-   sample_id_dest.set_reg_i(sample_id_dest_reg->chan(), sample_id_dest_reg);
-   std::array<int,4> dest_swz = {7,7,7,7};
-   dest_swz[sample_id_dest_reg->chan()] = 0;
-
-   emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3),
-                                       src.ms_index,
-                                       {alu_write, alu_last_instr}));
-
-   auto tex_sample_id_ir = new TexInstruction(TexInstruction::ld, sample_id_dest, src.coord,
-                                              sampler.id,
-                                              sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   tex_sample_id_ir->set_flag(TexInstruction::x_unnormalized);
-   tex_sample_id_ir->set_flag(TexInstruction::y_unnormalized);
-   tex_sample_id_ir->set_flag(TexInstruction::z_unnormalized);
-   tex_sample_id_ir->set_flag(TexInstruction::w_unnormalized);
-   tex_sample_id_ir->set_inst_mode(1);
-
-   tex_sample_id_ir->set_dest_swizzle(dest_swz);
-
-   emit_instruction(tex_sample_id_ir);
-
-   if (src.ms_index->type() != Value::literal ||
-       static_cast<const LiteralValue&>(*src.ms_index).value() != 0) {
-       PValue help = get_temp_register();
-
-      emit_instruction(new AluInstruction(op2_lshl_int, help,
-                                          src.ms_index, literal(2),
-      {alu_write, alu_last_instr}));
-
-      emit_instruction(new AluInstruction(op2_lshr_int, sample_id_dest_reg,
-                                          {sample_id_dest_reg, help},
-                                          {alu_write, alu_last_instr}));
-   }
-
-   emit_instruction(new AluInstruction(op2_and_int, src.coord.reg_i(3),
-                                       {sample_id_dest_reg, PValue(new LiteralValue(15))},
-                                       {alu_write, alu_last_instr}));
-
-   auto dst = make_dest(*instr);
-
-   /* txf doesn't need rounding for the array index, but 1D has the array index
-    * in the z component */
-   if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
-      src.coord.set_reg_i(2, src.coord.reg_i(1));
-
-   auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
-                                    sampler.id,
-                                    sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-
-
-   if (src.offset) {
-      assert(src.offset->is_ssa);
-      AluInstruction *ir = nullptr;
-      for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
-         ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
-                  {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
-         emit_instruction(ir);
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-   }
-
-   emit_instruction(tex_ir);
-   return true;
-}
-
-bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src)
-{
-   sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
-
-   unsigned grad_components = instr.coord_components;
-   if (instr.is_array && !instr.array_is_lowered_cube)
-      --grad_components;
-
-
-   src.offset = nullptr;
-   bool retval = true;
-   for (unsigned i = 0; i < instr.num_srcs; ++i) {
-      switch (instr.src[i].src_type) {
-      case nir_tex_src_bias:
-         src.bias = from_nir(instr.src[i], 0);
-         break;
-
-      case nir_tex_src_coord: {
-         src.coord = vec_from_nir_with_fetch_constant(instr.src[i].src,
-                                                      (1 << instr.coord_components) - 1,
-         {0,1,2,3});
-      } break;
-      case nir_tex_src_comparator:
-         src.comperator = from_nir(instr.src[i], 0);
-         break;
-      case nir_tex_src_ddx: {
-         sfn_log << SfnLog::tex << "Get DDX ";
-         src.ddx = vec_from_nir_with_fetch_constant(instr.src[i].src,
-                                                    (1 << grad_components) - 1,
-                                                    swizzle_from_comps(grad_components));
-         sfn_log << SfnLog::tex << src.ddx << "\n";
-      } break;
-      case nir_tex_src_ddy:{
-         sfn_log << SfnLog::tex << "Get DDY ";
-         src.ddy = vec_from_nir_with_fetch_constant(instr.src[i].src,
-                                                    (1 << grad_components) - 1,
-                                                    swizzle_from_comps(grad_components));
-         sfn_log << SfnLog::tex << src.ddy << "\n";
-      }  break;
-      case nir_tex_src_lod:
-         src.lod = from_nir_with_fetch_constant(instr.src[i].src, 0);
-         break;
-      case nir_tex_src_offset:
-         sfn_log << SfnLog::tex << "  -- Find offset\n";
-         src.offset = &instr.src[i].src;
-         break;
-      case nir_tex_src_sampler_deref:
-         src.sampler_deref = get_deref_location(instr.src[i].src);
-         break;
-      case nir_tex_src_texture_deref:
-         src.texture_deref = get_deref_location(instr.src[i].src);
-         break;
-      case nir_tex_src_ms_index:
-         src.ms_index = from_nir(instr.src[i], 0);
-         break;
-      case nir_tex_src_texture_offset:
-         src.texture_offset = from_nir(instr.src[i], 0);
-         break;
-      case nir_tex_src_sampler_offset:
-         src.sampler_offset = from_nir(instr.src[i], 0);
-         break;
-      case nir_tex_src_plane:
-      case nir_tex_src_projector:
-      case nir_tex_src_min_lod:
-      default:
-         sfn_log << SfnLog::tex << "Texture source type " <<  instr.src[i].src_type << " not supported\n";
-         retval = false;
-      }
-   }
-   return retval;
-}
-
-GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr)
-{
-   int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
-                                                 instr.dest.reg.reg->num_components;
-   std::array<PValue,4> dst_elms;
-   for (uint16_t i = 0; i < 4; ++i)
-      dst_elms[i] = from_nir(instr.dest, (i < num_dest_components) ? i : 7);
-   return GPRVector(dst_elms);
-}
-
-
-GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr,
-                                        const std::array<int, 4>& swizzle)
-{
-   int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
-                                                 instr.dest.reg.reg->num_components;
-   std::array<PValue,4> dst_elms;
-   for (uint16_t i = 0; i < 4; ++i) {
-      int k = swizzle[i];
-      dst_elms[i] = from_nir(instr.dest, (k < num_dest_components) ? k : 7);
-   }
-   return GPRVector(dst_elms);
-}
-
-void EmitTexInstruction::set_rect_coordinate_flags(nir_tex_instr* instr,
-                                                   TexInstruction* ir) const
-{
-   if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
-      ir->set_flag(TexInstruction::x_unnormalized);
-      ir->set_flag(TexInstruction::y_unnormalized);
-   }
-}
-
-void EmitTexInstruction::set_offsets(TexInstruction* ir, nir_src *offset)
-{
-   if (!offset)
-      return;
-
-   assert(offset->is_ssa);
-   auto literal = nir_src_as_const_value(*offset);
-   assert(literal);
-
-   for (int i = 0; i < offset->ssa->num_components; ++i) {
-      ir->set_offset(i, literal[i].i32);
-   }
-}
-
-void EmitTexInstruction::handle_array_index(const nir_tex_instr& instr, const GPRVector& src, TexInstruction *ir)
-{
-   int src_idx = instr.sampler_dim == GLSL_SAMPLER_DIM_1D ? 1 : 2;
-   emit_instruction(new AluInstruction(op1_rndne, src.reg_i(2), src.reg_i(src_idx),
-                                       {alu_last_instr, alu_write}));
-   ir->set_flag(TexInstruction::z_unnormalized);
-}
-
-EmitTexInstruction::SamplerId
-EmitTexInstruction::get_sampler_id(int sampler_id, const nir_variable *deref)
-{
-   EmitTexInstruction::SamplerId result = {sampler_id, false};
-
-   if (deref) {
-      assert(glsl_type_is_sampler(deref->type));
-      result.id = deref->data.binding;
-   }
-   return result;
-}
-
-EmitTexInstruction::TexInputs::TexInputs():
-   sampler_deref(nullptr),
-   texture_deref(nullptr),
-   offset(nullptr)
-{
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h
+++ b/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h
@ -1,96 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_EMITTEXINSTRUCTION_H
-#define SFN_EMITTEXINSTRUCTION_H
-
-#include "sfn_emitinstruction.h"
-#include "sfn_instruction_tex.h"
-
-namespace r600  {
-
-class EmitTexInstruction : public EmitInstruction
-{
-public:
-   EmitTexInstruction(ShaderFromNirProcessor& processor);
-
-private:
-   struct TexInputs {
-      TexInputs();
-      const nir_variable *sampler_deref;
-      const nir_variable *texture_deref;
-      GPRVector coord;
-      PValue bias;
-      PValue comperator;
-      PValue lod;
-      GPRVector ddx;
-      GPRVector ddy;
-      nir_src *offset;
-      PValue gather_comp;
-      PValue ms_index;
-      PValue sampler_offset;
-      PValue texture_offset;
-   };
-
-   bool emit_tex_tex(nir_tex_instr* instr, TexInputs& src);
-
-   bool emit_tex_txf(nir_tex_instr* instr, TexInputs &src);
-   bool emit_tex_txb(nir_tex_instr* instr, TexInputs& src);
-   bool emit_tex_txd(nir_tex_instr* instr, TexInputs& src);
-   bool emit_tex_txl(nir_tex_instr* instr, TexInputs& src);
-   bool emit_tex_txs(nir_tex_instr* instr, TexInputs& src,
-                     const std::array<int, 4> &dest_swz);
-   bool emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
-                                 const std::array<int, 4> &dest_swz);
-   bool emit_tex_lod(nir_tex_instr* instr, TexInputs& src);
-   bool emit_tex_tg4(nir_tex_instr* instr, TexInputs& src);
-   bool emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src);
-   bool emit_buf_txf(nir_tex_instr* instr, TexInputs& src);
-
-   bool get_inputs(const nir_tex_instr& instr, TexInputs &src);
-
-   void set_rect_coordinate_flags(nir_tex_instr* instr, TexInstruction* ir) const;
-
-   bool do_emit(nir_instr* instr) override;
-
-   GPRVector make_dest(nir_tex_instr& instr);
-   GPRVector make_dest(nir_tex_instr &instr, const std::array<int, 4> &swizzle);
-
-   void set_offsets(TexInstruction* ir, nir_src *offset);
-   void handle_array_index(const nir_tex_instr& instr, const GPRVector &src, TexInstruction* ir);
-
-   struct SamplerId {
-      int id;
-      bool indirect;
-   };
-
-   SamplerId get_sampler_id(int sampler_id, const nir_variable *deref);
-
-};
-
-}
-
-#endif // SFN_EMITTEXINSTRUCTION_H
--- a/src/gallium/drivers/r600/sfn/sfn_instr.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr.cpp
@ -0,0 +1,522 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2021 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_mem.h"
+#include "sfn_instr_lds.h"
+#include "sfn_instr_tex.h"
+#include "sfn_instr_controlflow.h"
+
+#include <iostream>
+#include <sstream>
+#include <numeric>
+
+namespace r600 {
+
+using std::string;
+using std::vector;
+
+Instr::Instr():
+   m_use_count(0),
+   m_block_id(std::numeric_limits<int>::max()),
+   m_index(std::numeric_limits<int>::max())
+{
+}
+
+Instr::~Instr()
+{
+
+}
+
+void Instr::print(std::ostream& os) const
+{
+   do_print(os);
+}
+
+bool Instr::ready() const
+{
+   for (auto& i : m_required_instr)
+      if (!i->ready())
+         return false;
+   return do_ready();
+}
+
+int int_from_string_with_prefix(const std::string& str, const std::string& prefix)
+{
+   if (str.substr(0, prefix.length()) != prefix) {
+      std::cerr << "Expect '" << prefix << "' as start of '" << str << "'\n";
+      assert(0);
+   }
+
+   std::stringstream help(str.substr(prefix.length()));
+   int retval;
+   help >> retval;
+   return retval;
+}
+
+int sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle &swz, bool& is_ssa)
+{
+   assert(str[0] == 'R' || str[0] == '_' || str[0] == 'S');
+   int sel = 0;
+
+   auto istr = str.begin() + 1;
+
+   if (str[0] == '_') {
+      while (istr != str.end() && *istr == '_')
+         ++istr;
+      sel = std::numeric_limits<int>::max();
+   } else {
+      while (istr != str.end() && isdigit(*istr)) {
+         sel *= 10;
+         sel += *istr - '0';
+         ++istr;
+      }
+   }
+
+   assert(*istr == '.');
+   istr++;
+
+   int i = 0;
+   while (istr != str.end()) {
+      switch (*istr) {
+      case 'x': swz[i] = 0; break;
+      case 'y': swz[i] = 1; break;
+      case 'z': swz[i] = 2; break;
+      case 'w': swz[i] = 3; break;
+      case '0': swz[i] = 4; break;
+      case '1': swz[i] = 5; break;
+      case '_': swz[i] = 7; break;
+      default:
+         unreachable("Unknown swizzle character");
+      }
+      ++istr;
+      ++i;
+   }
+
+   is_ssa = str[0] == 'S';
+
+   return sel;
+}
+
+bool Instr::is_last() const
+{
+   return true;
+}
+
+bool Instr::set_dead()
+{
+   if (m_instr_flags.test(always_keep))
+      return false;
+   bool is_dead = propagate_death();
+   m_instr_flags.set(dead);
+   return is_dead;
+}
+
+bool Instr::propagate_death()
+{
+   return true;
+}
+
+bool Instr::replace_source(PRegister old_src, PVirtualValue new_src)
+{
+   (void)old_src;
+   (void)new_src;
+   return false;
+}
+
+void Instr::add_required_instr(Instr *instr)
+{
+   assert(instr);
+   m_required_instr.push_back(instr);
+   instr->m_dependend_instr.push_back(this);
+}
+
+void Instr::replace_required_instr(Instr *old_instr, Instr *new_instr)
+{
+
+   for (auto i = m_required_instr.begin(); i != m_required_instr.end(); ++i) {
+      if (*i == old_instr)
+         *i = new_instr;
+   }
+}
+
+bool Instr::replace_dest(PRegister new_dest, r600::AluInstr *move_instr)
+{
+   (void)new_dest;
+   (void)move_instr;
+   return false;
+}
+
+void Instr::set_blockid(int id, int index)
+{
+   m_block_id = id;
+   m_index = index;
+   forward_set_blockid(id, index);
+}
+
+
+void Instr::forward_set_blockid(int id, int index)
+{
+   (void)id;
+   (void)index;
+}
+
+InstrWithVectorResult::InstrWithVectorResult(const RegisterVec4& dest,
+                                             const RegisterVec4::Swizzle& dest_swizzle):
+   m_dest(dest),
+   m_dest_swizzle(dest_swizzle)
+{
+   for (int i = 0; i < 4; ++i) {
+      if (m_dest_swizzle[i] < 6)
+         m_dest[i]->add_parent(this);
+   }
+}
+
+void InstrWithVectorResult::print_dest(std::ostream& os) const
+{
+   os << (m_dest[0]->is_ssa() ? 'S' : 'R' ) << m_dest.sel();
+   os << ".";
+   for (int i = 0; i < 4; ++i)
+      os << VirtualValue::chanchar[m_dest_swizzle[i]];
+}
+
+bool InstrWithVectorResult::comp_dest(const RegisterVec4& dest,
+                                      const RegisterVec4::Swizzle& dest_swizzle) const
+{
+   for(int i = 0; i < 4; ++i) {
+      if (!m_dest[i]->equal_to(*dest[i])) {
+         return false;
+      }
+      if (m_dest_swizzle[i] != dest_swizzle[i])
+         return false;
+   }
+   return true;
+}
+
+void Block::do_print(std::ostream& os) const
+{
+   for (int j = 0; j < 2 * m_nesting_depth; ++j)
+      os << ' ';
+   os << "BLOCK START\n";
+   for (auto& i : m_instructions) {
+      for (int j = 0; j < 2 * (m_nesting_depth + i->nesting_corr()) + 2; ++j)
+         os << ' ';
+      os << *i << "\n";
+   }
+   for (int j = 0; j < 2 * m_nesting_depth; ++j)
+      os << ' ';
+   os << "BLOCK END\n";
+}
+
+bool Block::is_equal_to(const Block& lhs) const
+{
+   if (m_id != lhs.m_id || m_nesting_depth != lhs.m_nesting_depth)
+      return false;
+
+   if (m_instructions.size() != lhs.m_instructions.size())
+      return false;
+
+   return std::inner_product(m_instructions.begin(), m_instructions.end(), lhs.m_instructions.begin(),
+                             true,
+                             [] (bool l, bool r) { return l && r;},
+   [](PInst l, PInst r) { return l->equal_to(*r);});
+}
+
+inline bool operator != (const Block& lhs, const Block& rhs)
+{
+   return !lhs.is_equal_to(rhs);
+}
+
+void Block::erase(iterator node)
+{
+   m_instructions.erase(node);
+}
+
+void Block::set_type(Type t)
+{
+   m_blocK_type = t;
+   switch (t) {
+   case vtx:
+   case gds:
+   case tex: m_remaining_slots = 8; break; /* TODO: 16 for >= EVERGREEN */
+   default:
+      m_remaining_slots = 0xffff;
+   }
+}
+
+Block::Block(int nesting_depth, int id):
+   m_nesting_depth(nesting_depth),
+   m_id(id),
+   m_next_index(0)
+{
+   assert(!has_instr_flag(force_cf));
+}
+
+void Block::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void Block::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+void Block::push_back(PInst instr)
+{
+   instr->set_blockid(m_id, m_next_index++);
+   if (m_remaining_slots != 0xffff) {
+      uint32_t new_slots = instr->slots();
+      m_remaining_slots -= new_slots;
+   }
+   if (m_lds_group_start)
+      m_lds_group_requirement += instr->slots();
+
+   m_instructions.push_back(instr);
+}
+
+bool Block::try_reserve_kcache(const AluGroup& group)
+{
+   auto kcache_constants = group.get_kconsts();
+   for (auto& kc : kcache_constants)  {
+      auto u = kc->as_uniform();
+      assert(u);
+      if (!try_reserve_kcache(*u))
+         return false;
+   }
+   return true;
+}
+
+bool Block::try_reserve_kcache(const UniformValue& u)
+{
+   const int kcache_banks = 4; // TODO: handle pre-evergreen
+
+   int bank = u.kcache_bank();
+   int sel  = (u.sel() - 512);
+   int line = sel >> 4;
+
+   bool found = false;
+
+   for (int i = 0; i < kcache_banks && !found; ++i) {
+      if (m_kcache[i].mode) {
+         if (m_kcache[i].bank < bank)
+            continue;
+
+         if ((m_kcache[i].bank == bank &&
+              m_kcache[i].addr > line  + 1) ||
+             m_kcache[i].bank > bank) {
+            if (m_kcache[kcache_banks - 1].mode)
+               return false;
+
+            memmove(&m_kcache[i+1],&m_kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
+            m_kcache[i].mode = KCacheLine::lock_1;
+            m_kcache[i].bank = bank;
+            m_kcache[i].addr = line;
+            return true;
+         }
+
+         int d = line - m_kcache[i].addr;
+
+         if (d == -1) {
+            m_kcache[i].addr--;
+            if (m_kcache[i].mode == KCacheLine::lock_2) {
+               /* we are prepending the line to the current set,
+          * discarding the existing second line,
+          * so we'll have to insert line+2 after it */
+               line += 2;
+               continue;
+            } else if (m_kcache[i].mode == KCacheLine::lock_1) {
+               m_kcache[i].mode = KCacheLine::lock_2;
+               return true;
+            } else {
+               /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
+               return false;
+            }
+         } else if (d == 1) {
+            m_kcache[i].mode = KCacheLine::lock_2;
+            return true;
+         } else if (d == 0)
+            return true;
+      } else { /* free kcache set - use it */
+         m_kcache[i].mode = KCacheLine::lock_1;
+         m_kcache[i].bank = bank;
+         m_kcache[i].addr = line;
+         return true;
+      }
+   }
+   return false;
+}
+
+void Block::lds_group_start(AluInstr *alu)
+{
+   assert(!m_lds_group_start);
+   m_lds_group_start = alu;
+   m_lds_group_requirement = 0;
+}
+
+void Block::lds_group_end()
+{
+   assert(m_lds_group_start);
+   m_lds_group_start->set_required_slots(m_lds_group_requirement);
+   m_lds_group_start = 0;
+}
+
+InstrWithVectorResult::InstrWithVectorResult(const InstrWithVectorResult& orig):
+   m_dest(orig.m_dest),
+   m_dest_swizzle(orig.m_dest_swizzle)
+{
+}
+
+class InstrComparer : public ConstInstrVisitor {
+public:
+   InstrComparer() = default;
+   bool result {false};
+
+#define DECLARE_MEMBER(TYPE)         \
+    InstrComparer(const TYPE *instr) \
+    {                                \
+       this_ ## TYPE = instr;        \
+    }                                \
+                                     \
+    void visit(const TYPE& instr)    \
+    {                                \
+       result = false;               \
+       if (!this_ ## TYPE)           \
+         return;                     \
+      result = this_ ## TYPE->is_equal_to(instr); \
+   }                                 \
+                                     \
+   const TYPE *this_ ## TYPE{nullptr};
+
+   DECLARE_MEMBER(AluInstr);
+   DECLARE_MEMBER(AluGroup);
+   DECLARE_MEMBER(TexInstr);
+   DECLARE_MEMBER(ExportInstr);
+   DECLARE_MEMBER(FetchInstr);
+   DECLARE_MEMBER(Block);
+   DECLARE_MEMBER(ControlFlowInstr);
+   DECLARE_MEMBER(IfInstr);
+   DECLARE_MEMBER(WriteScratchInstr);
+   DECLARE_MEMBER(StreamOutInstr);
+   DECLARE_MEMBER(MemRingOutInstr);
+   DECLARE_MEMBER(EmitVertexInstr);
+   DECLARE_MEMBER(GDSInstr);
+   DECLARE_MEMBER(WriteTFInstr);
+   DECLARE_MEMBER(LDSAtomicInstr);
+   DECLARE_MEMBER(LDSReadInstr);
+   DECLARE_MEMBER(RatInstr);
+};
+
+class InstrCompareForward: public ConstInstrVisitor {
+public:
+
+   void visit(const AluInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const AluGroup& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const TexInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const ExportInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const FetchInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const Block& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const ControlFlowInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const IfInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const WriteScratchInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const StreamOutInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const MemRingOutInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const EmitVertexInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const GDSInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const WriteTFInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const LDSAtomicInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const LDSReadInstr& instr) override {
+         m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const RatInstr& instr) override {
+         m_comparer = InstrComparer(&instr);
+   }
+
+   InstrComparer m_comparer;
+};
+
+
+bool Instr::equal_to(const Instr& lhs) const
+{
+   InstrCompareForward cmp;
+   accept(cmp);
+   lhs.accept(cmp.m_comparer);
+
+   return cmp.m_comparer.result;
+}
+
+
+
+
+} // ns r600
--- a/src/gallium/drivers/r600/sfn/sfn_instr.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr.h
@ -0,0 +1,314 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2021 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "sfn_virtualvalues.h"
+#include "sfn_alu_defines.h"
+#include "sfn_defines.h"
+#include <set>
+#include <list>
+#include <iostream>
+
+namespace r600 {
+
+class ConstInstrVisitor;
+
+class InstrVisitor;
+class AluInstr;
+class AluGroup;
+class TexInstr;
+class ExportInstr;
+class FetchInstr;
+class ControlFlowInstr;
+class IfInstr;
+class WriteScratchInstr;
+class StreamOutInstr;
+class MemRingOutInstr;
+class EmitVertexInstr;
+class GDSInstr;
+class WriteTFInstr;
+class LDSAtomicInstr;
+class LDSReadInstr;
+class RatInstr;
+
+
+int int_from_string_with_prefix(const std::string& str, const std::string& prefix);
+int sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle& swz, bool& is_ssa);
+
+class Instr : public Allocate {
+public:
+
+   enum Flags {
+      always_keep,
+      dead,
+      scheduled,
+      vpm,
+      force_cf,
+      ack_rat_return_write,
+      nflags
+      };
+
+   Instr();
+
+   Instr(const Instr& orig) = default;
+
+   virtual ~Instr();
+
+   using Pointer = R600_POINTER_TYPE(Instr);
+
+   void print(std::ostream& os) const;
+   bool equal_to(const Instr& lhs) const;
+
+   virtual void accept(ConstInstrVisitor& visitor) const = 0;
+   virtual void accept(InstrVisitor& visitor) = 0;
+   virtual bool end_group() const { return true;}
+
+   virtual bool is_last() const;
+
+   void set_always_keep() {m_instr_flags.set(always_keep);}
+   bool set_dead();
+   virtual void set_scheduled() { m_instr_flags.set(scheduled); forward_set_scheduled();}
+   void add_use() {++m_use_count;}
+   void dec_use() {assert(m_use_count > 0); --m_use_count;}
+   bool is_dead() const {return m_instr_flags.test(dead);}
+   bool is_scheduled() const {return m_instr_flags.test(scheduled);}
+   bool keep() const {return m_instr_flags.test(always_keep);}
+   bool has_uses() const {return m_use_count > 0;}
+
+   bool has_instr_flag(Flags f) const  {return m_instr_flags.test(f);}
+   void set_instr_flag(Flags f) { m_instr_flags.set(f);}
+
+   virtual bool replace_source(PRegister old_src, PVirtualValue new_src);
+   virtual bool replace_dest(PRegister new_dest, AluInstr *move_instr);
+
+   virtual int nesting_corr() const { return 0;}
+
+   virtual bool end_block() const { return false;}
+   virtual int nesting_offset() const { return 0;}
+
+   void set_blockid(int id, int index);
+   int block_id() const {return m_block_id;}
+   int index() const { return m_index;}
+
+   void add_required_instr(Instr *instr);
+   void replace_required_instr(Instr *old_instr, Instr *new_instr);
+
+   bool ready() const;
+
+   virtual uint32_t slots() const {return 0;};
+
+   using InstrList = std::list<Instr *, Allocator<Instr *>>;
+
+   const InstrList& dependend_instr() { return m_dependend_instr;}
+
+protected:
+
+   const InstrList& required_instr() const {return m_required_instr; }
+
+private:
+   virtual void forward_set_blockid(int id, int index);
+
+   virtual bool do_ready() const = 0;
+
+   virtual void do_print(std::ostream& os) const = 0;
+   virtual bool propagate_death();
+   virtual void forward_set_scheduled() {}
+
+   InstrList m_required_instr;
+   InstrList m_dependend_instr;
+
+   int m_use_count;
+   int m_block_id;
+   int m_index;
+   std::bitset<nflags> m_instr_flags{0};
+
+};
+using PInst = Instr::Pointer;
+
+class Block : public Instr {
+public:
+
+   enum Type {
+      cf,
+      alu,
+      tex,
+      vtx,
+      gds,
+      unknown
+   };
+
+   using Instructions = std::list<Instr *, Allocator<Instr *>>;
+   using Pointer = R600_POINTER_TYPE(Block);
+   using iterator = Instructions::iterator;
+   using reverse_iterator = Instructions::reverse_iterator;
+   using const_iterator = Instructions::const_iterator;
+
+   Block(int nesting_depth, int id);
+   Block(const Block& orig) = delete;
+
+   void push_back(PInst instr);
+   iterator begin() { return m_instructions.begin(); }
+   iterator end() { return m_instructions.end(); }
+   reverse_iterator rbegin() { return m_instructions.rbegin(); }
+   reverse_iterator rend() { return m_instructions.rend(); }
+
+   const_iterator begin() const { return m_instructions.begin();}
+   const_iterator end() const { return m_instructions.end();}
+
+   bool empty() const { return m_instructions.empty();}
+
+   void erase(iterator node);
+
+   bool is_equal_to(const Block& lhs) const;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   int nesting_depth() const { return m_nesting_depth;}
+
+   int id() const {return m_id;}
+
+   auto type() const {return m_blocK_type; }
+   void set_type(Type t);
+   uint32_t remaining_slots() const { return m_remaining_slots;}
+
+   bool try_reserve_kcache(const AluGroup& group);
+
+   auto last_lds_instr() {return m_last_lds_instr;}
+   void set_last_lds_instr(Instr *instr) {m_last_lds_instr = instr;}
+
+   void lds_group_start(AluInstr *alu);
+   void lds_group_end();
+   bool lds_group_active() { return m_lds_group_start != nullptr;}
+
+   size_t size() const { return m_instructions.size();}
+
+private:
+   bool try_reserve_kcache(const UniformValue& u);
+
+   bool do_ready() const override {return true;};
+   void do_print(std::ostream& os) const override;
+   Instructions m_instructions;
+   int m_nesting_depth;
+   int m_id;
+   int m_next_index;
+
+   Type m_blocK_type{unknown};
+   uint32_t m_remaining_slots{0xffff};
+
+   std::array<KCacheLine, 4> m_kcache;
+
+   Instr *m_last_lds_instr{nullptr};
+
+   int m_lds_group_requirement{0};
+   AluInstr *m_lds_group_start{nullptr};
+};
+
+class InstrWithVectorResult : public Instr {
+public:
+   InstrWithVectorResult(const RegisterVec4& dest, const RegisterVec4::Swizzle& dest_swizzle);
+
+   void set_dest_swizzle(const RegisterVec4::Swizzle& swz) {m_dest_swizzle = swz;}
+   int dest_swizzle(int i) const { return m_dest_swizzle[i];}
+   const RegisterVec4::Swizzle&  all_dest_swizzle() const { return m_dest_swizzle;}
+   const RegisterVec4& dst() const {return m_dest;}
+
+protected:
+   InstrWithVectorResult(const InstrWithVectorResult& orig);
+
+   void print_dest(std::ostream& os) const;
+   bool comp_dest(const RegisterVec4& dest, const RegisterVec4::Swizzle& dest_swizzle) const;
+
+private:
+   RegisterVec4 m_dest;
+   RegisterVec4::Swizzle m_dest_swizzle;
+};
+
+inline bool operator == (const Instr& lhs, const Instr& rhs) {
+   return lhs.equal_to(rhs);
+}
+
+inline bool operator != (const Instr& lhs, const Instr& rhs) {
+   return !(lhs == rhs);
+}
+
+inline std::ostream& operator << (std::ostream& os, const Instr& instr)
+{
+   instr.print(os);
+   return os;
+}
+
+template <typename T, typename = std::enable_if_t<std::is_base_of_v<Instr, T>>>
+std::ostream& operator<<(std::ostream& os, const T& instr) {
+  instr.print(os);
+  return os;
+}
+
+class ConstInstrVisitor {
+public:
+   virtual void visit(const AluInstr& instr) = 0;
+   virtual void visit(const AluGroup& instr) = 0;
+   virtual void visit(const TexInstr& instr) = 0;
+   virtual void visit(const ExportInstr& instr) = 0;
+   virtual void visit(const FetchInstr& instr) = 0;
+   virtual void visit(const Block& instr) = 0;
+   virtual void visit(const ControlFlowInstr& instr) = 0;
+   virtual void visit(const IfInstr& instr) = 0;
+   virtual void visit(const WriteScratchInstr& instr) = 0;
+   virtual void visit(const StreamOutInstr& instr) = 0;
+   virtual void visit(const MemRingOutInstr& instr) = 0;
+   virtual void visit(const EmitVertexInstr& instr) = 0;
+   virtual void visit(const GDSInstr& instr) = 0;
+   virtual void visit(const WriteTFInstr& instr) = 0;
+   virtual void visit(const LDSAtomicInstr& instr) = 0;
+   virtual void visit(const LDSReadInstr& instr) = 0;
+   virtual void visit(const RatInstr& instr) = 0;
+};
+
+class InstrVisitor {
+public:
+   virtual void visit(AluInstr  *instr) = 0;
+   virtual void visit(AluGroup *instr) = 0;
+   virtual void visit(TexInstr *instr) = 0;
+   virtual void visit(ExportInstr *instr) = 0;
+   virtual void visit(FetchInstr *instr) = 0;
+   virtual void visit(Block *instr) = 0;
+   virtual void visit(ControlFlowInstr *instr) = 0;
+   virtual void visit(IfInstr *instr) = 0;
+   virtual void visit(WriteScratchInstr *instr) = 0;
+   virtual void visit(StreamOutInstr *instr) = 0;
+   virtual void visit(MemRingOutInstr *instr) = 0;
+   virtual void visit(EmitVertexInstr *instr) = 0;
+   virtual void visit(GDSInstr *instr) = 0;
+   virtual void visit(WriteTFInstr *instr) = 0;
+   virtual void visit(LDSAtomicInstr *instr) = 0;
+   virtual void visit(LDSReadInstr *instr) = 0;
+   virtual void visit(RatInstr *instr) = 0;
+};
+
+
+} // ns r600
--- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
--- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h
@ -0,0 +1,193 @@
+#ifndef INSTRALU_H
+#define INSTRALU_H
+
+#include "sfn_instr.h"
+
+#include <unordered_set>
+
+struct nir_alu_instr;
+
+namespace r600 {
+
+class Shader;
+class ValueFactory;
+
+class AluInstr : public Instr {
+public:
+
+   using SrcValues = std::vector<PVirtualValue, Allocator<PVirtualValue>>;
+
+   enum Op2Options {
+      op2_opt_none = 0,
+      op2_opt_reverse = 1,
+      op2_opt_neg_src1 = 1 << 1,
+      op2_opt_abs_src0 = 1 << 2
+   };
+
+   static constexpr const AluBankSwizzle bs[6] = {
+      alu_vec_012,
+      alu_vec_021,
+      alu_vec_120,
+      alu_vec_102,
+      alu_vec_201,
+      alu_vec_210
+   };
+
+   static const AluModifiers src_abs_flags[2];
+   static const AluModifiers src_neg_flags[3];
+   static const AluModifiers src_rel_flags[3];
+
+   AluInstr(EAluOp opcode);
+   AluInstr(EAluOp opcode, int chan);
+   AluInstr(EAluOp opcode, PRegister dest,
+            SrcValues src0,
+            const std::set<AluModifiers>& flags, int alu_slot);
+
+   AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0,
+            const std::set<AluModifiers>& flags);
+
+   AluInstr(EAluOp opcode, PRegister dest,
+            PVirtualValue src0, PVirtualValue src1,
+            const std::set<AluModifiers>& flags);
+
+   AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, PVirtualValue src1,
+            PVirtualValue src2,
+            const std::set<AluModifiers>& flags);
+
+   AluInstr(ESDOp op, PVirtualValue src0, PVirtualValue src1, PVirtualValue address);
+   AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags);
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   auto opcode() const {assert(!has_alu_flag(alu_is_lds)); return m_opcode;}
+   auto lds_opcode() const {assert(has_alu_flag(alu_is_lds)); return m_lds_opcode;}
+
+   bool can_propagate_src() const;
+   bool can_propagate_dest() const;
+
+   bool replace_source(PRegister old_src, PVirtualValue new_src) override;
+   bool replace_dest(PRegister new_dest, AluInstr *move_instr) override;
+
+   void set_op(EAluOp op) {m_opcode = op;}
+
+   PRegister dest() const {return m_dest;}
+   unsigned n_sources() const {return m_src.size();}
+
+   int dest_chan() const {return m_dest ? m_dest->chan() : m_fallback_chan;}
+
+   PVirtualValue psrc(unsigned i) {return i < m_src.size() ? m_src[i] : nullptr;}
+   VirtualValue& src(unsigned i) {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
+   const VirtualValue& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
+
+   void set_sources(SrcValues src);
+   const SrcValues& sources() const {return m_src;}
+   void pin_sources_to_chan();
+
+   int register_priority() const;
+
+   void reset_alu_flag(AluModifiers flag) {m_alu_flags.reset(flag);}
+   void set_alu_flag(AluModifiers flag) {m_alu_flags.set(flag);}
+   bool has_alu_flag(AluModifiers f) const {return m_alu_flags.test(f);}
+
+   ECFAluOpCode cf_type() const {return m_cf_type;}
+   void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
+   void set_bank_swizzle(AluBankSwizzle swz) {m_bank_swizzle = swz;}
+   AluBankSwizzle bank_swizzle() const {return m_bank_swizzle;}
+
+   void set_index_offset(unsigned offs) {m_idx_offset = offs;}
+   auto  index_offset() const {return m_idx_offset;}
+
+   bool is_equal_to(const AluInstr& lhs) const;
+
+   bool has_lds_access() const;
+
+   static const std::map<ECFAluOpCode, std::string> cf_map;
+   static const std::map<AluBankSwizzle, std::string> bank_swizzle_map;
+   static Instr::Pointer from_string(std::istream &is, ValueFactory& value_factory, AluGroup *);
+   static bool from_nir(nir_alu_instr *alu, Shader& shader);
+
+   int alu_slots() const {return m_alu_slots;}
+
+   AluGroup *split(ValueFactory &vf);
+
+   bool end_group() const override { return m_alu_flags.test(alu_last_instr);}
+
+   static const std::set<AluModifiers> empty;
+   static const std::set<AluModifiers> write;
+   static const std::set<AluModifiers> last;
+   static const std::set<AluModifiers> last_write;
+
+   std::pair<PRegister, bool> indirect_addr() const;
+
+   void add_extra_dependency(PVirtualValue reg);
+
+   void set_required_slots(int nslots) { m_required_slots = nslots;}
+   unsigned  required_slots() const { return m_required_slots;}
+
+   void add_priority(int priority) { m_priority += priority;}
+   int priority() const { return m_priority;}
+   void inc_priority() { ++m_priority;}
+
+   void set_parent_group(AluGroup *group) { m_parent_group = group;}
+
+private:
+   friend class AluGroup;
+
+   void update_uses();
+
+   bool do_ready() const override;
+
+   bool can_copy_propagate() const;
+
+   bool check_readport_validation(PRegister old_src, PVirtualValue new_src) const;
+
+   void set_alu_flags(const AluOpFlags& flags) { m_alu_flags = flags; }
+   bool propagate_death() override;
+
+   void do_print(std::ostream& os) const override;
+
+   union {
+      EAluOp m_opcode;
+      ESDOp m_lds_opcode;
+   };
+
+   PRegister m_dest{nullptr};
+   SrcValues m_src;
+
+   AluOpFlags m_alu_flags;
+   AluBankSwizzle m_bank_swizzle{alu_vec_unknown};
+   ECFAluOpCode m_cf_type{cf_alu};
+   int m_alu_slots{1};
+   int m_fallback_chan{0};
+   unsigned m_idx_offset{0};
+   unsigned m_required_slots{0};
+   int m_priority{0};
+   std::set<PRegister, std::less<PRegister>, Allocator<PRegister>> m_extra_dependencies;
+   AluGroup *m_parent_group{nullptr};
+};
+
+class AluInstrVisitor : public InstrVisitor {
+public:
+   void visit(AluGroup *instr) override;
+   void visit(Block *instr) override;
+   void visit(IfInstr *instr) override;
+
+   void visit(TexInstr *instr) override {(void)instr;}
+   void visit(ExportInstr *instr) override {(void)instr;}
+   void visit(FetchInstr *instr) override {(void)instr;}
+   void visit(ControlFlowInstr *instr) override {(void)instr;}
+   void visit(WriteScratchInstr *instr) override {(void)instr;}
+   void visit(StreamOutInstr *instr) override {(void)instr;}
+   void visit(MemRingOutInstr *instr) override {(void)instr;}
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override {(void)instr;};
+   void visit(RatInstr *instr) override {(void)instr;};
+};
+
+
+}
+#endif // INSTRALU_H
--- a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp
@ -0,0 +1,361 @@
+#include "sfn_instr_alugroup.h"
+#include "sfn_debug.h"
+#include <algorithm>
+
+namespace r600 {
+
+AluGroup::AluGroup()
+{
+   std::fill(m_slots.begin(), m_slots.end(), nullptr);
+}
+
+bool AluGroup::add_instruction(AluInstr *instr)
+{
+   /* we can only schedule one op that accesses LDS or
+     the LDS read queue */
+   if (m_has_lds_op && instr->has_lds_access())
+      return false;
+
+   if (instr->has_alu_flag(alu_is_trans) && add_trans_instructions(instr))
+      return true;
+
+   if (add_vec_instructions(instr)) {
+      instr->set_parent_group(this);
+      return true;
+   }
+
+   auto opinfo = alu_ops.find(instr->opcode());
+   assert(opinfo != alu_ops.end());
+
+   if (s_max_slots > 4 &&
+       opinfo->second.can_channel(AluOp::t) &&
+       add_trans_instructions(instr)) {
+      instr->set_parent_group(this);
+      return true;
+   }
+
+   return false;
+}
+
+bool AluGroup::add_trans_instructions(AluInstr *instr)
+{
+   if (m_slots[4] || s_max_slots < 5)
+      return false;
+
+   if (!update_indirect_access(instr))
+      return false;
+
+   /* LDS instructions have to be scheduled in X */
+   if (instr->has_alu_flag(alu_is_lds))
+      return false;
+
+   auto opinfo = alu_ops.find(instr->opcode());
+   assert(opinfo != alu_ops.end());
+
+   if (!opinfo->second.can_channel(AluOp::t))
+      return false;
+
+
+   /* if we schedule a non-trans instr into the trans slot, we have to make
+    * sure that the corresponding vector slot is already occupied, otherwise
+    * the hardware will schedule it as vector op and the bank-swizzle as
+    * checked here (and in r600_asm.c) will not catch conflicts.
+    */
+   if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) {
+      if (instr->dest() && instr->dest()->pin() == pin_free) {
+         int used_slot = 3;
+         while (!m_slots[used_slot] && used_slot >= 0)
+            --used_slot;
+
+         // if we schedule a non-trans instr into the trans slot,
+         // there should always be some slot that is already used
+         assert(used_slot >= 0);
+         instr->dest()->set_chan(used_slot);
+      }
+   }
+
+   for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown ; ++i) {
+      AluReadportReservation readports_evaluator = m_readports_evaluator;
+      if (readports_evaluator.schedule_trans_instruction(*instr, i)) {
+         m_readports_evaluator = readports_evaluator;
+         m_slots[4] = instr;
+         instr->pin_sources_to_chan();
+         sfn_log << SfnLog::schedule << "T: " << *instr << "\n";
+
+         /* We added a vector op in the trans channel, so we have to
+          * make sure the corresponding vector channel is used */
+         if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()])
+            m_slots[instr->dest_chan()] =
+                  new AluInstr(op0_nop, instr->dest_chan());
+         return true;
+      }
+   }
+   return false;
+}
+
+int AluGroup::free_slots() const
+{
+   int free_mask = 0;
+   for(int i = 0; i < s_max_slots; ++i) {
+      if (!m_slots[i])
+         free_mask |= 1 << i;
+   }
+   return free_mask;
+}
+
+class AluAllowSlotSwitch : public AluInstrVisitor {
+public:
+   using AluInstrVisitor::visit;
+
+   void visit(AluInstr *alu) {
+      yes = (alu->alu_slots() == 1 || alu->has_alu_flag(alu_is_cayman_trans));
+   }
+
+   bool yes{false};
+
+};
+
+bool AluGroup::add_vec_instructions(AluInstr *instr)
+{
+   if (!update_indirect_access(instr))
+      return false;
+
+   int param_src = -1;
+   for (auto& s : instr->sources()) {
+      auto is = s->as_inline_const();
+      if (is)
+         param_src = is->sel() - ALU_SRC_PARAM_BASE;
+   }
+
+   if (param_src >= 0) {
+      if (m_param_used < 0)
+         m_param_used = param_src;
+      else if (m_param_used != param_src)
+         return false;
+   }
+
+   if (m_has_lds_op && instr->has_lds_access())
+      return false;
+
+   int preferred_chan = instr->dest_chan();
+   if (!m_slots[preferred_chan]) {
+      if (instr->bank_swizzle() != alu_vec_unknown) {
+         if (try_readport(instr, instr->bank_swizzle()))
+             return true;
+      } else {
+         for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
+            if (try_readport(instr, i))
+               return true;
+         }
+      }
+   } else {
+
+      auto dest = instr->dest();
+      if (dest && dest->pin() == pin_free) {
+
+         for (auto u : dest->uses()) {
+            AluAllowSlotSwitch swich_allowed;
+            u->accept(swich_allowed);
+            if (!swich_allowed.yes)
+               return false;
+         }
+
+         int free_chan = 0;
+         while (m_slots[free_chan] && free_chan < 4)
+            free_chan++;
+
+         if (!m_slots[free_chan] && free_chan < 4) {
+            sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
+            dest->set_chan(free_chan);
+            if (instr->bank_swizzle() != alu_vec_unknown) {
+               if (try_readport(instr, instr->bank_swizzle()))
+                  return true;
+            } else {
+               for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
+                  if (try_readport(instr, i))
+                     return true;
+               }
+            }
+         }
+      }
+   }
+   return false;
+}
+
+bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
+{
+   int preferred_chan = instr->dest_chan();
+   AluReadportReservation readports_evaluator = m_readports_evaluator;
+   if (readports_evaluator.schedule_vec_instruction(*instr, cycle)) {
+      m_readports_evaluator = readports_evaluator;
+      m_slots[preferred_chan] = instr;
+      m_has_lds_op |= instr->has_lds_access();
+      sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
+      auto dest = instr->dest();
+      if (dest && dest->pin() == pin_free)
+         dest->set_pin(pin_chan);
+      instr->pin_sources_to_chan();
+      return true;
+   }
+   return false;
+}
+
+bool AluGroup::update_indirect_access(AluInstr *instr)
+{
+   auto indirect_addr = instr->indirect_addr();
+
+   if (indirect_addr.first) {
+      if (!m_addr_used) {
+         m_addr_used = indirect_addr.first;
+         m_addr_is_index = indirect_addr.second;
+      } else if (!indirect_addr.first->equal_to(*m_addr_used)) {
+         return false;
+      }
+   }
+
+   return true;
+}
+
+void AluGroup::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void AluGroup::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+void AluGroup::set_scheduled()
+{
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i])
+         m_slots[i]->set_scheduled();
+   }
+}
+
+void AluGroup::fix_last_flag()
+{
+   bool last_seen = false;
+   for (int i = s_max_slots - 1; i >= 0; --i) {
+      if (m_slots[i]) {
+         if (!last_seen) {
+            m_slots[i]->set_alu_flag(alu_last_instr);
+            last_seen = true;
+         } else {
+            m_slots[i]->reset_alu_flag(alu_last_instr);
+         }
+      }
+   }
+}
+
+bool AluGroup::is_equal_to(const AluGroup& other) const
+{
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (!other.m_slots[i]) {
+         if (!m_slots[i])
+            continue;
+         else
+            return false;
+      }
+
+      if (m_slots[i]) {
+         if (!other.m_slots[i])
+            return false;
+         else if (!m_slots[i]->is_equal_to(*other.m_slots[i]))
+            return false;
+      }
+   }
+   return true;
+}
+
+bool AluGroup::has_lds_group_end() const
+{
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end))
+         return true;
+   }
+   return false;
+}
+
+bool AluGroup::do_ready() const
+{
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i] && !m_slots[i]->ready())
+         return false;
+   }
+   return true;
+}
+
+void AluGroup::forward_set_blockid(int id, int index)
+{
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i]) {
+         m_slots[i]->set_blockid(id, index);
+      }
+   }
+}
+
+uint32_t AluGroup::slots() const
+{
+   uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1;
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i])
+         ++result;
+   }
+   if (m_addr_used) {
+      ++result;
+      if (m_addr_is_index)
+         ++result;
+   }
+
+   return result;
+}
+
+void AluGroup::do_print(std::ostream& os) const
+{
+   const char slotname[] = "xyzwt";
+
+   os << "ALU_GROUP_BEGIN\n";
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i]) {
+         for (int j = 0; j < 2 * m_nesting_depth + 4; ++j)
+            os << ' ';
+         os << slotname[i] << ": ";
+         m_slots[i]->print(os);
+         os << "\n";
+      }
+   }
+   for (int i = 0; i < 2 * m_nesting_depth + 2; ++i)
+      os << ' ';
+   os << "ALU_GROUP_END";
+}
+
+AluInstr::SrcValues AluGroup::get_kconsts() const
+{
+   AluInstr::SrcValues result;
+
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i]) {
+         for (auto s : m_slots[i]->sources())
+            if (s->as_uniform())
+               result.push_back(s);
+      }
+   }
+   return result;
+}
+
+void AluGroup::set_chipclass(r600_chip_class chip_class)
+{
+   switch (chip_class) {
+   case ISA_CC_CAYMAN:
+      s_max_slots = 4;
+   break;
+   default:
+      s_max_slots = 5;
+   }
+}
+
+int AluGroup::s_max_slots = 5;
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h
@ -0,0 +1,89 @@
+#ifndef ALUGROUP_H
+#define ALUGROUP_H
+
+#include "sfn_instr_alu.h"
+#include "sfn_alu_readport_validation.h"
+
+namespace r600 {
+
+class AluGroup : public Instr
+{
+public:
+   using Slots = std::array<AluInstr *, 5>;
+
+   AluGroup();
+
+   using iterator = Slots::iterator;
+   using const_iterator = Slots::const_iterator;
+
+   bool add_instruction(AluInstr *instr);
+   bool add_trans_instructions(AluInstr *instr);
+   bool add_vec_instructions(AluInstr *instr);
+
+   bool is_equal_to(const AluGroup& other) const;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   auto begin() {return m_slots.begin(); }
+   auto end() {return m_slots.begin() + s_max_slots; }
+   auto begin() const {return m_slots.begin(); }
+   auto end() const {return m_slots.begin() + s_max_slots; }
+
+   bool end_group() const override { return true; }
+
+   void set_scheduled() override;
+
+   void set_nesting_depth(int depth) {m_nesting_depth = depth;}
+
+   void fix_last_flag();
+
+   static void set_chipclass(r600_chip_class chip_class);
+
+   int free_slots() const;
+
+   auto addr() const {return std::make_pair(m_addr_used, m_addr_is_index);}
+
+   uint32_t slots() const override;
+
+   AluInstr::SrcValues get_kconsts() const;
+
+   bool has_lds_group_start() const { return m_slots[0] ?
+            m_slots[0]->has_alu_flag(alu_lds_group_start) : false;}
+
+   bool has_lds_group_end() const;
+
+   const auto& readport_reserer() const { return m_readports_evaluator; }
+   void set_readport_reserer(const AluReadportReservation& rr) {
+       m_readports_evaluator = rr;
+   };
+
+   static bool has_t() { return s_max_slots == 5;}
+
+private:
+   void forward_set_blockid(int id, int index) override;
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   bool update_indirect_access(AluInstr *instr);
+   bool try_readport(AluInstr *instr, AluBankSwizzle cycle);
+
+   Slots m_slots;
+
+   AluReadportReservation m_readports_evaluator;
+
+   static int s_max_slots;
+
+   PRegister m_addr_used{nullptr};
+
+   int m_param_used{-1};
+
+   int m_nesting_depth{0};
+   bool m_has_lds_op{false};
+   bool m_addr_is_index{false};
+};
+
+
+}
+
+#endif // ALUGROUP_H
--- a/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp
@ -0,0 +1,176 @@
+#include "sfn_instr_controlflow.h"
+
+#include <sstream>
+
+namespace r600 {
+
+ControlFlowInstr::ControlFlowInstr(CFType type):
+   m_type(type)
+{
+}
+
+bool ControlFlowInstr::do_ready() const
+{
+   /* Have to rework this, but the CF should always */
+   return true;
+}
+
+
+bool ControlFlowInstr::is_equal_to(const ControlFlowInstr& rhs) const
+{
+   return m_type == rhs.m_type;
+}
+
+void ControlFlowInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void ControlFlowInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+void ControlFlowInstr::do_print(std::ostream& os) const
+{
+   switch (m_type) {
+   case cf_else: os << "ELSE"; break;
+   case cf_endif: os << "ENDIF";break;
+   case cf_loop_begin: os << "LOOP_BEGIN"; break;
+   case cf_loop_end: os << "LOOP_END"; break;
+   case cf_loop_break: os << "BREAK"; break;
+   case cf_loop_continue: os << "CONTINUE"; break;
+   case cf_wait_ack: os << "WAIT_ACK"; break;
+   default:
+      unreachable("Unknown CF type");
+   }
+}
+
+Instr::Pointer ControlFlowInstr::from_string(std::string type_str)
+{
+   if (type_str == "ELSE")
+      return new ControlFlowInstr(cf_else);
+   else if (type_str == "ENDIF")
+      return new ControlFlowInstr(cf_endif);
+   else if (type_str == "LOOP_BEGIN")
+      return new ControlFlowInstr(cf_loop_begin);
+   else if (type_str == "LOOP_END")
+      return new ControlFlowInstr(cf_loop_end);
+   else if (type_str == "BREAK")
+      return new ControlFlowInstr(cf_loop_break);
+   else if (type_str == "CONTINUE")
+      return new ControlFlowInstr(cf_loop_continue);
+   else if (type_str == "WAIT_ACK")
+      return new ControlFlowInstr(cf_wait_ack);
+   else
+      return nullptr;
+}
+
+int ControlFlowInstr::nesting_corr() const
+{
+   switch (m_type) {
+   case cf_else:
+   case cf_endif:
+   case cf_loop_end: return -1;
+   default:
+      return 0;
+   }
+}
+
+int ControlFlowInstr::nesting_offset() const
+{
+   switch (m_type) {
+   case cf_endif:
+   case cf_loop_end: return -1;
+   case cf_loop_begin: return 1;
+   default:
+      return 0;
+   }
+}
+
+IfInstr::IfInstr(AluInstr *pred):
+   m_predicate(pred)
+{
+   assert(pred);
+}
+
+IfInstr::IfInstr(const IfInstr& orig)
+{
+   m_predicate = new AluInstr(*orig.m_predicate);
+}
+
+bool IfInstr::is_equal_to(const IfInstr& rhs) const
+{
+   return m_predicate->equal_to(*rhs.m_predicate);
+}
+
+void IfInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void IfInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool IfInstr::replace_source(PRegister old_src, PVirtualValue new_src)
+{
+   return m_predicate->replace_source(old_src, new_src);
+}
+
+bool IfInstr::do_ready() const
+{
+   return m_predicate->ready();
+}
+
+void IfInstr::forward_set_scheduled()
+{
+   m_predicate->set_scheduled();
+}
+
+void IfInstr::forward_set_blockid(int id, int index)
+{
+   m_predicate->set_blockid(id, index);
+}
+
+void IfInstr::do_print(std::ostream& os) const
+{
+   os << "IF (( " << *m_predicate << " ))";
+}
+
+void IfInstr::set_predicate(AluInstr *new_predicate)
+{
+   m_predicate = new_predicate;
+   m_predicate->set_blockid(block_id(), index());
+}
+
+Instr::Pointer IfInstr::from_string(std::istream &is, ValueFactory& value_factory)
+{
+   std::string pred_start;
+   is >> pred_start;
+   if (pred_start != "((")
+      return nullptr;
+   char buf[2048];
+
+   is.get(buf, 2048, ')');
+   std::string pred_end;
+   is >> pred_end;
+
+   if (pred_end != "))") {
+      return nullptr;
+   }
+
+   std::istringstream bufstr(buf);
+
+   std::string instr_type;
+   bufstr >> instr_type;
+
+   if (instr_type != "ALU")
+      return nullptr;
+
+   auto pred = AluInstr::from_string(bufstr, value_factory, nullptr);
+   return new IfInstr(static_cast<AluInstr*>(pred));
+}
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h
@ -0,0 +1,81 @@
+#ifndef CONTROLFLOWINSTR_H
+#define CONTROLFLOWINSTR_H
+
+#include "sfn_instr_alu.h"
+
+namespace r600 {
+
+class ControlFlowInstr : public Instr
+{
+public:
+   enum CFType {
+      cf_else,
+      cf_endif,
+      cf_loop_begin,
+      cf_loop_end,
+      cf_loop_break,
+      cf_loop_continue,
+      cf_stream_write,
+      cf_wait_ack
+   };
+
+   ControlFlowInstr(CFType type);
+
+   ControlFlowInstr(const ControlFlowInstr& orig) = default;
+
+   bool is_equal_to(const ControlFlowInstr& lhs) const;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   CFType cf_type() const { return m_type;}
+
+   int nesting_corr() const override;
+
+   static Instr::Pointer from_string(std::string type_str);
+
+   bool end_block() const override { return true;}
+
+   int nesting_offset() const override;
+
+private:
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   CFType m_type;
+};
+
+class IfInstr : public Instr {
+public:
+
+   IfInstr(AluInstr *pred);
+   IfInstr(const IfInstr& orig);
+
+   bool is_equal_to(const IfInstr& lhs) const;
+
+   void set_predicate(AluInstr *new_predicate);
+
+   AluInstr *predicate() const { return m_predicate; }
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool replace_source(PRegister old_src, PVirtualValue new_src) override;
+
+   static Instr::Pointer from_string(std::istream &is, ValueFactory& value_factory);
+
+   bool end_block() const override { return true;}
+   int nesting_offset() const override { return 1;}
+
+private:
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+   void forward_set_blockid(int id, int index) override;
+   void forward_set_scheduled() override;
+
+   AluInstr *m_predicate;
+};
+
+}
+
+#endif // CONTROLFLOWINSTR_H
--- a/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp
@ -0,0 +1,524 @@
+#include "sfn_instr_export.h"
+#include "sfn_valuefactory.h"
+
+#include <sstream>
+
+namespace r600 {
+
+using std::string;
+
+static char *writemask_to_swizzle(int writemask, char *buf)
+{
+   const char *swz = "xyzw";
+   for (int i = 0; i < 4; ++i) {
+      buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
+   }
+   return buf;
+}
+
+WriteOutInstr::WriteOutInstr(const RegisterVec4& value):
+   m_value(value)
+{
+   m_value.add_use(this);
+   set_always_keep();
+}
+
+void WriteOutInstr::override_chan(int i, int chan)
+{
+   m_value.set_value(i,
+                     new Register(m_value[i]->sel(), chan,
+                                  m_value[i]->pin()));
+}
+
+ExportInstr::ExportInstr(ExportType type, unsigned loc, const RegisterVec4& value):
+   WriteOutInstr(value),
+   m_type(type),
+   m_loc(loc),
+   m_is_last(false)
+{
+}
+
+void ExportInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void ExportInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+
+bool ExportInstr::is_equal_to(const ExportInstr& lhs) const
+{
+   return
+
+         (m_type == lhs.m_type &&
+           m_loc == lhs.m_loc &&
+           value() == lhs.value() &&
+           m_is_last == lhs.m_is_last);
+}
+
+ExportInstr::ExportType ExportInstr::type_from_string(const std::string& s)
+{
+   (void)s;
+   return param;
+}
+
+void ExportInstr::do_print(std::ostream& os) const
+{
+   os << "EXPORT";
+   if (m_is_last)
+      os << "_DONE";
+
+   switch (m_type) {
+   case param: os << " PARAM "; break;
+   case pos: os << " POS "; break;
+   case pixel: os << " PIXEL "; break;
+   }
+   os << m_loc << " ";
+   value().print(os);
+}
+
+bool ExportInstr::do_ready() const
+{
+   return value().ready(block_id(), index());
+}
+
+Instr::Pointer ExportInstr::from_string(std::istream& is, ValueFactory& vf)
+{
+   return from_string_impl(is, vf);
+}
+
+Instr::Pointer ExportInstr::last_from_string(std::istream& is, ValueFactory &vf)
+{
+   auto result = from_string_impl(is, vf);
+   result->set_is_last_export(true);
+   return result;
+}
+
+ExportInstr::Pointer ExportInstr::from_string_impl(std::istream& is, ValueFactory &vf)
+{
+   string typestr;
+   int pos;
+   string value_str;
+
+   is >> typestr >> pos >> value_str;
+
+   ExportInstr::ExportType type;
+
+   if (typestr == "PARAM")
+      type = ExportInstr::param;
+   else if (typestr == "POS")
+      type = ExportInstr::pos;
+   else if (typestr == "PIXEL")
+      type = ExportInstr::pixel;
+   else
+      unreachable("Unknown export type");
+
+   RegisterVec4 value = vf.src_vec4_from_string(value_str);
+
+   return new ExportInstr( type, pos, value);
+}
+
+WriteScratchInstr::WriteScratchInstr(const RegisterVec4& value, PRegister addr,
+                                     int align, int align_offset, int writemask, int array_size):
+   WriteOutInstr(value),
+   m_address(addr),
+   m_align(align),
+   m_align_offset(align_offset),
+   m_writemask(writemask),
+   m_array_size(array_size - 1)
+{
+   addr->add_use(this);
+}
+
+WriteScratchInstr::WriteScratchInstr(const RegisterVec4& value, int loc,
+                                     int align, int align_offset,int writemask):
+   WriteOutInstr(value),
+   m_loc(loc),
+   m_align(align),
+   m_align_offset(align_offset),
+   m_writemask(writemask)
+{
+
+}
+
+void WriteScratchInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void WriteScratchInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool WriteScratchInstr::is_equal_to(const WriteScratchInstr& lhs) const
+{
+   if (m_address) {
+      if (!lhs.m_address)
+         return false;
+      if (! m_address->equal_to(*lhs.m_address))
+         return false;
+   } else if (lhs.m_address)
+      return false;
+
+   return  m_loc == lhs.m_loc &&
+         m_align == lhs.m_align &&
+         m_align_offset == lhs.m_align_offset &&
+         m_writemask == lhs.m_writemask &&
+         m_array_size == lhs.m_array_size &&
+         value().sel() == lhs.value().sel();
+}
+
+bool WriteScratchInstr::do_ready() const
+{
+   return value().ready(block_id(), index()) &&
+         (!m_address || m_address->ready(block_id(), index()));
+}
+
+void WriteScratchInstr::do_print(std::ostream& os) const
+{
+   char buf[6];
+
+   os << "WRITE_SCRATCH ";
+   if (m_address)
+      os << "@" << *m_address << "[" << m_array_size + 1<<"]";
+   else
+      os << m_loc;
+
+   os << (value()[0]->is_ssa() ? " S" : " R")
+      << value().sel() << "." << writemask_to_swizzle(m_writemask, buf)
+      << " " << "AL:" << m_align << " ALO:" << m_align_offset;
+}
+
+auto WriteScratchInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
+{
+   string loc_str;
+   string value_str;
+   string align_str;
+   string align_offset_str;
+   int offset;
+
+   int array_size = 0;
+   PVirtualValue addr_reg = nullptr;
+
+   is >> loc_str >>  value_str >> align_str >> align_offset_str;
+
+   std::istringstream loc_ss(loc_str);
+
+   auto align = int_from_string_with_prefix(align_str, "AL:");
+   auto align_offset = int_from_string_with_prefix(align_offset_str, "ALO:");
+   auto value = vf.src_vec4_from_string(value_str);
+
+   int writemask = 0;
+   for (int i = 0; i < 4; ++i) {
+      if (value[i]->chan() == i)
+         writemask |= 1 << i;
+   }
+
+   if (loc_str[0] == '@') {
+
+      string addr_str;
+      char c;
+      loc_ss >> c;
+      loc_ss >> c;
+
+      while (!loc_ss.eof() && c != '[') {
+         addr_str.append(1, c);
+         loc_ss >> c;
+      }
+      addr_reg = vf.src_from_string(addr_str);
+      assert(addr_reg && addr_reg->as_register());
+
+      loc_ss >> array_size;
+      loc_ss >> c;
+      assert(c == ']');
+      return new WriteScratchInstr(value, addr_reg->as_register(), align, align_offset, writemask, array_size);
+   } else {
+      loc_ss >> offset;
+      return new WriteScratchInstr(value, offset, align, align_offset, writemask);
+   }
+}
+
+StreamOutInstr::StreamOutInstr(const RegisterVec4& value, int num_components,
+                                         int array_base, int comp_mask, int out_buffer,
+                                         int stream):
+   WriteOutInstr(value),
+   m_element_size(num_components == 3 ? 3 : num_components - 1),
+   m_array_base(array_base),
+   m_writemask(comp_mask),
+   m_output_buffer(out_buffer),
+   m_stream(stream)
+{
+}
+
+unsigned StreamOutInstr::op() const
+{
+   int op = 0;
+   switch (m_output_buffer) {
+   case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
+   case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
+   case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
+   case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
+   }
+   return 4 * m_stream + op;
+}
+
+bool StreamOutInstr::is_equal_to(const StreamOutInstr& oth) const
+{
+
+   return value() == oth.value() &&
+         m_element_size == oth.m_element_size &&
+         m_burst_count == oth.m_burst_count &&
+         m_array_base == oth.m_array_base &&
+         m_array_size == oth.m_array_size &&
+         m_writemask == oth.m_writemask &&
+         m_output_buffer == oth.m_output_buffer &&
+         m_stream == oth.m_stream;
+}
+
+void StreamOutInstr::do_print(std::ostream& os) const
+{
+   os << "WRITE STREAM(" << m_stream << ") "  << value()
+      << " ES:" << m_element_size
+      << " BC:" << m_burst_count
+      << " BUF:" << m_output_buffer
+      << " ARRAY:" <<  m_array_base;
+   if (m_array_size != 0xfff)
+      os << "+" << m_array_size;
+}
+
+bool StreamOutInstr::do_ready() const
+{
+   return value().ready(block_id(), index());
+}
+
+void StreamOutInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void StreamOutInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+
+MemRingOutInstr::MemRingOutInstr(ECFOpCode ring, EMemWriteType type,
+                                           const RegisterVec4& value,
+                                           unsigned base_addr, unsigned ncomp,
+                                           PRegister index):
+   WriteOutInstr(value),
+   m_ring_op(ring),
+   m_type(type),
+   m_base_address(base_addr),
+   m_num_comp(ncomp),
+   m_export_index(index)
+{
+   assert(m_ring_op  == cf_mem_ring || m_ring_op  == cf_mem_ring1||
+          m_ring_op  == cf_mem_ring2 || m_ring_op  == cf_mem_ring3);
+   assert(m_num_comp <= 4);
+
+   if (m_export_index)
+      m_export_index->add_use(this);
+}
+
+unsigned MemRingOutInstr::ncomp() const
+{
+   switch (m_num_comp) {
+   case 1: return 0;
+   case 2: return 1;
+   case 3:
+   case 4: return 3;
+   default:
+      assert(0);
+   }
+   return 3;
+}
+
+bool MemRingOutInstr::is_equal_to(const MemRingOutInstr& oth) const
+{
+
+   bool equal = value() == oth.value() &&
+                m_ring_op == oth.m_ring_op &&
+                m_type == oth.m_type &&
+                m_num_comp == oth.m_num_comp &&
+                m_base_address == oth.m_base_address;
+
+   if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
+      equal &= (*m_export_index == *oth.m_export_index);
+   return equal;
+
+}
+
+static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" };
+void MemRingOutInstr::do_print(std::ostream& os) const
+{
+
+   os << "MEM_RING " << (m_ring_op == cf_mem_ring ? 0 : m_ring_op - cf_mem_ring1 + 1);
+   os << " " << write_type_str[m_type] << " " << m_base_address;
+   os << " " << value();
+   if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
+      os << " @" << *m_export_index;
+   os << " ES:" << m_num_comp;
+}
+
+void MemRingOutInstr::patch_ring(int stream, PRegister index)
+{
+   const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
+
+   assert(stream < 4);
+   m_ring_op = ring_op[stream];
+   m_export_index = index;
+}
+
+bool MemRingOutInstr::do_ready() const
+{
+   if (m_export_index && !m_export_index->ready(block_id(), index()))
+      return false;
+
+   return value().ready(block_id(), index());
+}
+
+void MemRingOutInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void MemRingOutInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+static const std::map<string,  MemRingOutInstr::EMemWriteType> type_lookop =
+{
+   {"WRITE", MemRingOutInstr::mem_write},
+   {"WRITE_IDX", MemRingOutInstr::mem_write_ind},
+   {"WRITE_ACK", MemRingOutInstr::mem_write_ack},
+   {"WRITE_IDX_ACK", MemRingOutInstr::mem_write_ind_ack}
+};
+
+auto MemRingOutInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
+{
+   string type_str;
+
+   int ring;
+
+   int base_address;
+   string value_str;
+
+   is >> ring >> type_str >> base_address >> value_str;
+   assert(ring < 4);
+
+   auto itype = type_lookop.find(type_str);
+   assert(itype != type_lookop.end());
+
+   auto type = itype->second;
+
+   PVirtualValue index{nullptr};
+   if (type == mem_write_ind || type == mem_write_ind_ack) {
+      char c;
+      string index_str;
+      is >> c >> index_str;
+      assert('@' == c );
+      index = vf.src_from_string(index_str);
+   }
+
+   string elm_size_str;
+   is >> elm_size_str;
+
+   int num_comp = int_from_string_with_prefix(elm_size_str, "ES:");
+
+   auto value = vf.src_vec4_from_string(value_str);
+
+   ECFOpCode opcodes[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
+   assert(ring < 4);
+
+   return new MemRingOutInstr(opcodes[ring], type, value, base_address, num_comp, index->as_register());
+}
+
+EmitVertexInstr::EmitVertexInstr(int stream, bool cut):
+   m_stream(stream),
+   m_cut(cut)
+{
+
+}
+
+
+bool EmitVertexInstr::is_equal_to(const EmitVertexInstr& oth) const
+{
+   return oth.m_stream == m_stream &&
+         oth.m_cut == m_cut;
+}
+
+void EmitVertexInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void EmitVertexInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool EmitVertexInstr::do_ready() const
+{
+   return true;
+}
+
+void EmitVertexInstr::do_print(std::ostream& os) const
+{
+   os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream;
+}
+
+auto EmitVertexInstr::from_string(std::istream& is, bool cut) -> Pointer
+{
+   char c;
+   is >> c;
+   assert(c == '@');
+
+   int stream;
+   is >> stream;
+
+   return new EmitVertexInstr(stream, cut);
+}
+
+void WriteTFInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void WriteTFInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool WriteTFInstr::is_equal_to(const WriteTFInstr& rhs) const
+{
+   return value() == rhs.value();
+}
+
+auto WriteTFInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
+{
+   string value_str;
+   is >> value_str;
+
+   auto value = vf.src_vec4_from_string(value_str);
+
+   return new WriteTFInstr(value);
+}
+
+bool WriteTFInstr::do_ready() const
+{
+   return value().ready(block_id(), index());
+}
+
+void WriteTFInstr::do_print(std::ostream& os) const
+{
+   os << "WRITE_TF " << value();
+}
+
+}
+
--- a/src/gallium/drivers/r600/sfn/sfn_instr_export.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_export.h
@ -0,0 +1,213 @@
+#ifndef INSTR_EXPORT_H
+#define INSTR_EXPORT_H
+
+#include "sfn_instr.h"
+
+namespace r600 {
+
+class ValueFactory;
+
+
+class WriteOutInstr: public Instr {
+public:
+   WriteOutInstr(const RegisterVec4& value);
+   WriteOutInstr(const WriteOutInstr& orig) = delete;
+
+   void override_chan(int i, int chan);
+
+   const RegisterVec4& value() const {return m_value;};
+   RegisterVec4& value() {return m_value;};
+private:
+
+   RegisterVec4 m_value;
+};
+
+class ExportInstr: public WriteOutInstr {
+public:
+   enum ExportType {
+      pixel,
+      pos,
+      param
+   };
+
+   using Pointer = R600_POINTER_TYPE(ExportInstr);
+
+   ExportInstr(ExportType type, unsigned loc, const RegisterVec4& value);
+   ExportInstr(const ExportInstr& orig) = delete;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const ExportInstr& lhs) const;
+
+
+   static ExportType type_from_string(const std::string& s);
+
+   ExportType export_type() const {return m_type;}
+
+   unsigned location() const {return m_loc;}
+
+   void set_is_last_export(bool value) {m_is_last = value;}
+   bool is_last_export()  const {return m_is_last;}
+
+   static Instr::Pointer from_string(std::istream& is, ValueFactory &vf);
+   static Instr::Pointer last_from_string(std::istream& is, ValueFactory &vf);
+
+private:
+   static ExportInstr::Pointer from_string_impl(std::istream& is, ValueFactory &vf);
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   ExportType m_type;
+   unsigned m_loc;
+   bool m_is_last;
+};
+
+class WriteScratchInstr : public WriteOutInstr {
+public:
+   WriteScratchInstr(const RegisterVec4& value, PRegister addr,
+                     int align, int align_offset, int writemask, int array_size);
+   WriteScratchInstr(const RegisterVec4& value, int addr,  int align, int align_offset,
+                     int writemask);
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const WriteScratchInstr& lhs) const;
+
+   unsigned location() const { return m_loc;};
+   int write_mask() const { return m_writemask;}
+   auto address() const { return m_address;}
+   bool indirect() const { return !!m_address;}
+   int array_size() const { return m_array_size;}
+
+   static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
+private:
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   unsigned m_loc{0};
+   PRegister m_address {nullptr};
+   unsigned m_align;
+   unsigned m_align_offset;
+   unsigned m_writemask;
+   int m_array_size{0};
+};
+
+class StreamOutInstr: public WriteOutInstr {
+public:
+   StreamOutInstr(const RegisterVec4& value, int num_components,
+                       int array_base, int comp_mask, int out_buffer,
+                       int stream);
+   int element_size() const { return m_element_size;}
+   int burst_count() const { return m_burst_count;}
+   int array_base() const { return m_array_base;}
+   int array_size() const { return m_array_size;}
+   int comp_mask() const { return m_writemask;}
+   unsigned op() const;
+
+   bool is_equal_to(const StreamOutInstr& lhs) const;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+private:
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   int m_element_size{0};
+   int m_burst_count{1};
+   int m_array_base{0};
+   int m_array_size{0xfff};
+   int m_writemask{0};
+   int m_output_buffer{0};
+   int m_stream{0};
+};
+
+class MemRingOutInstr: public WriteOutInstr {
+public:
+
+   enum EMemWriteType {
+      mem_write = 0,
+      mem_write_ind = 1,
+      mem_write_ack = 2,
+      mem_write_ind_ack = 3,
+   };
+
+   MemRingOutInstr(ECFOpCode ring, EMemWriteType type,
+                        const RegisterVec4& value, unsigned base_addr,
+                        unsigned ncomp, PRegister m_index);
+
+   unsigned op() const{return m_ring_op;}
+   unsigned ncomp() const;
+   unsigned addr() const {return m_base_address;}
+   EMemWriteType type() const {return m_type;}
+   unsigned index_reg() const {assert(m_export_index->sel() >= 0); return m_export_index->sel();}
+   unsigned array_base() const {return m_base_address; }
+   PVirtualValue export_index()  const {return m_export_index;}
+
+   void patch_ring(int stream, PRegister index);
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const MemRingOutInstr& lhs) const;
+
+   static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
+
+private:
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   ECFOpCode m_ring_op;
+   EMemWriteType m_type;
+   unsigned m_base_address;
+   unsigned m_num_comp;
+   PRegister m_export_index;
+};
+
+class EmitVertexInstr : public Instr {
+public:
+   EmitVertexInstr(int stream, bool cut);
+   ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;}
+   int stream() const { return m_stream;}
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const EmitVertexInstr& lhs) const;
+
+   static auto from_string(std::istream& is, bool cut) -> Pointer;
+
+private:
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   int m_stream;
+   bool m_cut;
+};
+
+class WriteTFInstr : public WriteOutInstr {
+public:
+   using WriteOutInstr::WriteOutInstr;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const WriteTFInstr& rhs) const;
+
+   static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
+
+private:
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+};
+
+
+}
+
+#endif // INSTR_EXPORT_H
--- a/src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp
@ -0,0 +1,659 @@
+#include "sfn_instr_fetch.h"
+#include "sfn_valuefactory.h"
+#include "sfn_defines.h"
+
+#include <sstream>
+
+namespace r600 {
+
+using std::string;
+using std::istringstream;
+
+FetchInstr::FetchInstr(EVFetchInstr opcode,
+                       const RegisterVec4& dst,
+                       const RegisterVec4::Swizzle& dest_swizzle,
+                       PRegister src,
+                       uint32_t src_offset,
+                       EVFetchType fetch_type,
+                       EVTXDataFormat data_format,
+                       EVFetchNumFormat num_format,
+                       EVFetchEndianSwap endian_swap,
+                       uint32_t resource_id,
+                       PRegister resource_offset):
+   InstrWithVectorResult(dst, dest_swizzle),
+   m_opcode(opcode),
+   m_src(src),
+   m_src_offset(src_offset),
+   m_fetch_type(fetch_type),
+   m_data_format(data_format),
+   m_num_format(num_format),
+   m_endian_swap(endian_swap),
+   m_resource_id(resource_id),
+   m_resource_offset(resource_offset),
+   m_mega_fetch_count(0),
+   m_array_base(0),
+   m_array_size(0),
+   m_elm_size(0)
+{
+   switch (m_opcode) {
+   case vc_fetch :
+      m_opname ="VFETCH";
+   break;
+   case vc_semantic :
+      m_opname = "FETCH_SEMANTIC";
+   break;
+   case vc_get_buf_resinfo :
+      set_print_skip(mfc);
+      set_print_skip(fmt);
+      set_print_skip(ftype);
+      m_opname = "GET_BUF_RESINFO";
+   break;
+   case vc_read_scratch :
+      m_opname = "READ_SCRATCH";
+   break;
+   default:
+      unreachable("Unknwon fetch instruction");
+   }
+
+   if (m_src)
+      m_src->add_use(this);
+
+   if (m_resource_offset && m_resource_offset->as_register())
+      m_resource_offset->as_register()->add_use(this);
+}
+
+void FetchInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void FetchInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool FetchInstr::is_equal_to(const FetchInstr& rhs) const
+{
+   if (m_src) {
+      if (rhs.m_src) {
+         if (!m_src->equal_to(*rhs.m_src))
+            return false;
+      } else
+         return false;
+   } else if (rhs.m_src)
+      return false;
+
+   if (!comp_dest(rhs.dst(), rhs.all_dest_swizzle()))
+      return false;
+
+   if (m_tex_flags != rhs.m_tex_flags)
+      return false;
+
+   if (m_resource_offset && rhs.m_resource_offset) {
+      if (!m_resource_offset->equal_to(*rhs.m_resource_offset))
+         return false;
+   } else if (!(!!m_resource_offset == !!rhs.m_resource_offset))
+      return false;
+
+   return m_opcode == rhs.m_opcode &&
+         m_src_offset == rhs.m_src_offset &&
+         m_fetch_type == rhs.m_fetch_type &&
+         m_data_format == rhs.m_data_format &&
+         m_num_format == rhs.m_num_format &&
+         m_endian_swap == rhs.m_endian_swap &&
+         m_resource_id == rhs.m_resource_id &&
+         m_mega_fetch_count == rhs.m_mega_fetch_count &&
+         m_array_base == rhs.m_array_base &&
+         m_array_size == rhs.m_array_size &&
+         m_elm_size == rhs.m_elm_size;
+}
+
+bool FetchInstr::propagate_death()
+{
+   auto reg = m_src->as_register();
+   if (reg)
+      reg->del_use(this);
+   return true;
+}
+
+bool FetchInstr::replace_source(PRegister old_src, PVirtualValue new_src)
+{
+   bool success = false;
+   auto new_reg = new_src->as_register();
+   if (new_reg) {
+      if (old_src->equal_to(*m_src)) {
+         m_src->del_use(this);
+         m_src = new_reg;
+         new_reg->add_use(this);
+         success = true;
+      }
+      if (m_resource_offset && old_src->equal_to(*m_resource_offset)) {
+         m_resource_offset->del_use(this);
+         m_resource_offset = new_reg;
+         new_reg->add_use(this);
+         success = true;
+      }
+   }
+   return success;
+}
+
+bool FetchInstr::do_ready() const
+{
+   for (auto i: required_instr()) {
+      if (!i->is_scheduled())
+         return false;
+   }
+
+   bool result = m_src && m_src->ready(block_id(), index());
+   if (m_resource_offset) {
+      auto r = m_resource_offset->as_register();
+      if (r)
+         result &= r->ready(block_id(), index());
+   }
+   return result;
+}
+
+void FetchInstr::do_print(std::ostream& os) const
+{
+   os << m_opname << ' ';
+
+   print_dest(os);
+
+   os << " :";
+
+   if (m_opcode != vc_get_buf_resinfo) {
+
+      if (m_src && m_src->chan() < 7) {
+         os << " " << *m_src;
+         if (m_src_offset)
+            os << " + " << m_src_offset << "b";
+      }
+   }
+
+   if (m_opcode != vc_read_scratch)
+      os << " RID:" << m_resource_id;
+
+   if (m_resource_offset) {
+      os << " + ";
+      m_resource_offset->print(os);
+   }
+
+   if (!m_skip_print.test(ftype)) {
+      switch (m_fetch_type) {
+      case vertex_data : os << " VERTEX"; break;
+      case instance_data : os << " INSTANCE_DATA"; break;
+      case no_index_offset : os << " NO_IDX_OFFSET"; break;
+      default:
+         unreachable("Unknwon fetch instruction type");
+      }
+   }
+
+   if (!m_skip_print.test(fmt)) {
+      os << " FMT(";
+      auto fmt = s_data_format_map.find(m_data_format);
+      if (fmt != s_data_format_map.end())
+         os << fmt->second << ",";
+      else
+         unreachable("unknwon data format");
+
+      if (m_tex_flags.test(format_comp_signed))
+         os << "S";
+      else
+         os << "U";
+
+      switch (m_num_format) {
+      case vtx_nf_norm : os << "NORM"; break;
+      case vtx_nf_int : os << "INT"; break;
+      case vtx_nf_scaled: os << "SCALED"; break;
+      default:
+         unreachable("Unknwon number format");
+      }
+
+      os << ")";
+   }
+
+   if (m_array_base) {
+      if (m_opcode != vc_read_scratch)
+         os << " BASE:" << m_array_base;
+      else
+         os << " L[0x" << std::uppercase << std::hex << m_array_base << std::dec << "]";
+   }
+
+   if (m_array_size)
+      os << " SIZE:" << m_array_size + 1;
+
+   if (m_tex_flags.test(is_mega_fetch) && !m_skip_print.test(mfc))
+      os << " MFC:" << m_mega_fetch_count;
+
+   if (m_elm_size)
+      os << " ES:" << m_elm_size;
+
+   if (m_tex_flags.test(fetch_whole_quad)) os << " WQ";
+   if (m_tex_flags.test(use_const_field)) os << " UCF";
+   if (m_tex_flags.test(srf_mode)) os << " SRF";
+   if (m_tex_flags.test(buf_no_stride)) os << " BNS";
+   if (m_tex_flags.test(alt_const)) os << " AC";
+   if (m_tex_flags.test(use_tc)) os << " TC";
+   if (m_tex_flags.test(vpm)) os << " VPM";
+   if (m_tex_flags.test(uncached) && m_opcode != vc_read_scratch) os << " UNCACHED";
+   if (m_tex_flags.test(indexed) && m_opcode != vc_read_scratch) os << " INDEXED";
+}
+
+Instr::Pointer FetchInstr::from_string(std::istream& is, ValueFactory& vf)
+{
+   return from_string_impl(is, vc_fetch, vf);
+}
+
+Instr::Pointer FetchInstr::from_string_impl(std::istream& is, EVFetchInstr opcode, ValueFactory& vf)
+{
+   std::string deststr;
+   is >> deststr;
+
+   RegisterVec4::Swizzle dst_swz;
+   auto dest_reg = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
+
+   char help;
+   is >> help;
+   assert(help == ':');
+
+   string srcstr;
+   is >> srcstr;
+
+   std::cerr << "Get source "  << srcstr << "\n";
+
+   auto src_reg = vf.src_from_string(srcstr)->as_register();
+   assert(src_reg);
+
+   string res_id_str;
+   string next;
+   is >> next;
+
+   int src_offset_val = 0;
+
+   if (next == "+") {
+      is >> src_offset_val;
+      is >> help;
+      assert(help == 'b');
+      is >> res_id_str;
+   } else {
+      res_id_str = next;
+   }
+
+   int res_id = int_from_string_with_prefix(res_id_str, "RID:");
+
+   string fetch_type_str;
+   is >> fetch_type_str;
+
+   EVFetchType fetch_type = vertex_data;
+   if (fetch_type_str == "VERTEX") {
+      fetch_type = vertex_data;
+   } else {
+      assert("Fetch type not yet implemented");
+   }
+
+   string format_str;
+   is >> format_str;
+
+   assert(!strncmp(format_str.c_str(), "FMT(", 4));
+   string data_format;
+   string num_format_str;
+
+   istringstream fmt_stream(format_str.substr(4));
+   bool is_num_fmr = false;
+   assert(!fmt_stream.eof());
+
+   do {
+      char c;
+      fmt_stream >> c;
+
+      if (c == ',')  {
+         is_num_fmr = true;
+         continue;
+      }
+
+      if (!is_num_fmr)
+         data_format.append(1, c);
+      else
+         num_format_str.append(1, c);
+   } while (!fmt_stream.eof());
+
+   EVTXDataFormat fmt = fmt_invalid;
+
+   for (auto& [f, name] :  s_data_format_map) {
+      if (data_format == name) {
+         fmt = f;
+         break;
+      }
+   }
+
+   assert(fmt != fmt_invalid);
+
+   bool fmt_signed = num_format_str[0] == 'S';
+   assert(fmt_signed || num_format_str[0] == 'U');
+
+   size_t num_format_end = num_format_str.find(')');
+   num_format_str = num_format_str.substr(1, num_format_end - 1) ;
+
+   EVFetchNumFormat num_fmt;
+   if (num_format_str == "NORM")
+      num_fmt = vtx_nf_norm;
+   else if (num_format_str == "INT")
+      num_fmt = vtx_nf_int;
+   else if (num_format_str == "SCALED")
+      num_fmt = vtx_nf_scaled;
+   else {
+      std::cerr << "Number format: '" << num_format_str << "' : ";
+      unreachable("Unknown number format");
+   }
+
+   auto fetch = new FetchInstr(opcode, dest_reg, dst_swz,
+                               src_reg, src_offset_val, fetch_type, fmt, num_fmt,
+                               vtx_es_none, res_id, nullptr);
+   if (fmt_signed)
+      fetch->set_fetch_flag(format_comp_signed);
+
+   while (!is.eof() && is.good()) {
+      std::string next_token;
+      is >> next_token;
+
+      if (next_token.empty())
+         break;
+
+      if (next_token.find(':') != string::npos) {
+         fetch->set_param_from_string(next_token);
+      } else {
+         fetch->set_flag_from_string(next_token);
+      }
+   }
+
+   return fetch;
+}
+
+void FetchInstr::set_param_from_string(const std::string& token)
+{
+   if (token.substr(0,4) == "MFC:")
+      set_mfc(int_from_string_with_prefix(token, "MFC:"));
+   else if (token.substr(0,5) == "ARRB:")
+      set_array_base(int_from_string_with_prefix(token, "ARRB:"));
+   else if (token.substr(0,5) == "ARRS:")
+      set_array_size(int_from_string_with_prefix(token, "ARRS:"));
+   else if (token.substr(0,3) == "ES:")
+      set_element_size(int_from_string_with_prefix(token, "ES:"));
+   else {
+      std::cerr << "Token '" << token << "': ";
+      unreachable("Unknown token in fetch param list");
+   }
+}
+
+void FetchInstr::set_flag_from_string(const std::string& token)
+{
+   auto flag = s_flag_map.find(token.c_str());
+   if (flag != s_flag_map.end())
+      set_fetch_flag(flag->second);
+   else {
+      std::cerr << "Token: " << token << " : ";
+      unreachable("Unknown token in fetch flag list");
+   }
+}
+
+
+const std::map<const char *, FetchInstr::EFlags> FetchInstr::s_flag_map = {
+   {"WQ", fetch_whole_quad},
+   {"UCF", use_const_field},
+   {"SRF", srf_mode},
+   {"BNS", buf_no_stride},
+   {"AC", alt_const},
+   {"TC", use_tc},
+   {"VPM", vpm},
+   {"UNCACHED", uncached},
+   {"INDEXED", indexed}
+};
+
+const std::map<EVTXDataFormat, const char *> FetchInstr::s_data_format_map = {
+   {fmt_invalid, "INVALID"},
+   {fmt_8, "8"},
+   {fmt_4_4, "4_4"},
+   {fmt_3_3_2, "3_3_2"},
+   {fmt_reserved_4, "RESERVED_4"},
+   {fmt_16, "16"},
+   {fmt_16_float, "16F"},
+   {fmt_8_8, "8_8"},
+   {fmt_5_6_5, "5_6_5"},
+   {fmt_6_5_5, "6_5_5"},
+   {fmt_1_5_5_5, "1_5_5_5"},
+   {fmt_4_4_4_4, "4_4_4_4"},
+   {fmt_5_5_5_1, "5_5_5_1"},
+   {fmt_32, "32"},
+   {fmt_32_float, "32F"},
+   {fmt_16_16,  "16_16"},
+   {fmt_16_16_float, "16_16F"},
+   {fmt_8_24, "8_24"},
+   {fmt_8_24_float, "8_24F"},
+   {fmt_24_8, "24_8"},
+   {fmt_24_8_float, "24_8F"},
+   {fmt_10_11_11, "10_11_11"},
+   {fmt_10_11_11_float, "10_11_11F"},
+   {fmt_11_11_10, "11_11_10"},
+   {fmt_10_11_11_float, "11_11_10F"},
+   {fmt_2_10_10_10, "2_10_10_10"},
+   {fmt_8_8_8_8, "8_8_8_8"},
+   {fmt_10_10_10_2, "10_10_10_2"},
+   {fmt_x24_8_32_float, "X24_8_32F"},
+   {fmt_32_32, "32_32"},
+   {fmt_32_32_float, "32_32F"},
+   {fmt_16_16_16_16, "16_16_16_16"},
+   {fmt_16_16_16_16_float, "16_16_16_16F"},
+   {fmt_reserved_33, "RESERVED_33"},
+   {fmt_32_32_32_32, "32_32_32_32"},
+   {fmt_32_32_32_32_float, "32_32_32_32F"},
+   {fmt_reserved_36, "RESERVED_36"},
+   {fmt_1, "1"},
+   {fmt_1_reversed, "1_REVERSED"},
+   {fmt_gb_gr, "GB_GR"},
+   {fmt_bg_rg, "BG_RG"},
+   {fmt_32_as_8, "32_AS_8"},
+   {fmt_32_as_8_8, "32_AS_8_8"},
+   {fmt_5_9_9_9_sharedexp, "5_9_9_9_SHAREDEXP"},
+   {fmt_8_8_8, "8_8_8"},
+   {fmt_16_16_16, "16_16_16"},
+   {fmt_16_16_16_float, "16_16_16F"},
+   {fmt_32_32_32, "32_32_32"},
+   {fmt_32_32_32_float, "32_32_32F"},
+   {fmt_bc1, "BC1"},
+   {fmt_bc2, "BC2"},
+   {fmt_bc3, "BC3"},
+   {fmt_bc4, "BC4"},
+   {fmt_bc5, "BC5"},
+   {fmt_apc0, "APC0"},
+   {fmt_apc1, "APC1"},
+   {fmt_apc2, "APC2"},
+   {fmt_apc3, "APC3"},
+   {fmt_apc4, "APC4"},
+   {fmt_apc5, "APC5"},
+   {fmt_apc6, "APC6"},
+   {fmt_apc7, "APC7"},
+   {fmt_ctx1, "CTX1"},
+   {fmt_reserved_63, "RESERVED_63"}
+};
+
+
+QueryBufferSizeInstr::QueryBufferSizeInstr(const RegisterVec4& dst,
+                                           const RegisterVec4::Swizzle& dst_swz,
+                                           uint32_t resid):
+   FetchInstr(vc_get_buf_resinfo,
+              dst, dst_swz,
+              new Register( 0, 7, pin_fully),
+              0,
+              no_index_offset,
+              fmt_32_32_32_32,
+              vtx_nf_norm,
+              vtx_es_none,
+              resid,
+              nullptr)
+{
+   set_fetch_flag(format_comp_signed);
+   set_print_skip(mfc);
+   set_print_skip(fmt);
+   set_print_skip(ftype);
+}
+
+Instr::Pointer QueryBufferSizeInstr::from_string(std::istream& is, ValueFactory& vf)
+{
+   std::string deststr, res_id_str;
+   is >> deststr;
+
+   char help;
+   is >> help;
+   assert(help == ':');
+
+   is >> res_id_str;
+
+   RegisterVec4::Swizzle dst_swz;
+   auto dst = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
+   int res_id = int_from_string_with_prefix(res_id_str, "RID:");
+
+   return new QueryBufferSizeInstr( dst, dst_swz, res_id);
+}
+
+LoadFromBuffer::LoadFromBuffer(const RegisterVec4& dst, const RegisterVec4::Swizzle& dst_swizzle,
+                               PRegister addr, uint32_t addr_offset,
+                               uint32_t resid, PRegister res_offset, EVTXDataFormat data_format):
+   FetchInstr(vc_fetch, dst,  dst_swizzle, addr, addr_offset, no_index_offset,
+              data_format, vtx_nf_scaled, vtx_es_none, resid, res_offset)
+{
+   set_fetch_flag(format_comp_signed);
+   set_mfc(16);
+   override_opname("LOAD_BUF");
+   set_print_skip(mfc);
+   set_print_skip(fmt);
+   set_print_skip(ftype);
+}
+
+Instr::Pointer LoadFromBuffer::from_string(std::istream& is, ValueFactory& vf)
+{
+   std::string deststr;
+   is >> deststr;
+
+   RegisterVec4::Swizzle dst_swz;
+   auto dst = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
+
+   char help;
+   is >> help;
+   assert(help == ':');
+
+   string addrstr;
+   is >> addrstr;
+   auto addr_reg = vf.src_from_string(addrstr)->as_register();
+
+   string res_id_str;
+   string next;
+   is >> next;
+
+   int addr_offset_val = 0;
+
+   if (next == "+") {
+      is >> addr_offset_val;
+      is >> help;
+      assert(help == 'b');
+      is >> res_id_str;
+   } else {
+      res_id_str = next;
+   }
+
+   int res_id = int_from_string_with_prefix(res_id_str, "RID:");
+
+   next.clear();
+   is >> next;
+   PRegister res_offset = nullptr;
+   if (next == "+") {
+      string res_offset_str;
+      is >> res_offset_str;
+      res_offset = vf.src_from_string(res_offset_str)->as_register();
+   }
+
+   auto fetch = new LoadFromBuffer( dst, dst_swz,
+                                    addr_reg, addr_offset_val,
+                                    res_id, res_offset, fmt_32_32_32_32_float);
+   is >> next;
+   if (next == "SRF")
+      fetch->set_fetch_flag(srf_mode);
+
+   return fetch;
+}
+
+class AddrResolver: public RegisterVisitor {
+public:
+   AddrResolver(LoadFromScratch *lfs) : m_lfs(lfs) {}
+
+   void visit(Register& value) {
+      m_lfs->set_fetch_flag(FetchInstr::indexed);
+      m_lfs->set_src(&value);
+      value.add_use(m_lfs);
+   }
+   void visit(LocalArray& value) {assert(0);(void)value;}
+   void visit(LocalArrayValue& value) {assert(0);(void)value;}
+   void visit(UniformValue& value) {assert(0);(void)value;}
+   void visit(LiteralConstant& value) {
+      m_lfs->set_array_base(value.value());
+      m_lfs->set_src(new Register( 0, 7, pin_none));
+   }
+   void visit(InlineConstant& value) {assert(0);(void)value;}
+
+   LoadFromScratch *m_lfs;
+};
+
+
+
+LoadFromScratch::LoadFromScratch(const RegisterVec4& dst, const RegisterVec4::Swizzle& dst_swz, PVirtualValue addr, uint32_t scratch_size):
+   FetchInstr(vc_read_scratch,
+              dst, dst_swz,
+              nullptr,
+              0,
+              no_index_offset,
+              fmt_32_32_32_32,
+              vtx_nf_int,
+              vtx_es_none,
+              0,
+              nullptr)
+{
+   set_fetch_flag(uncached);
+   set_fetch_flag(wait_ack);
+
+   assert(scratch_size >= 1);
+   set_array_size(scratch_size - 1);
+   set_array_base(0);
+   AddrResolver ar(this);
+   addr->accept(ar);
+
+   set_print_skip(mfc);
+   set_print_skip(fmt);
+   set_print_skip(ftype);
+   set_element_size(3);
+}
+
+Instr::Pointer LoadFromScratch::from_string(std::istream& is, ValueFactory &vf)
+{
+   std::string deststr;
+   is >> deststr;
+
+   RegisterVec4::Swizzle dst_swz;
+   auto dest = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
+
+   char help;
+   is >> help;
+   assert(help == ':');
+
+   string addrstr;
+   is >> addrstr;
+   auto addr_reg = vf.src_from_string(addrstr);
+
+   string offsetstr;
+   is >> offsetstr;
+   int size = int_from_string_with_prefix(offsetstr, "SIZE:");
+   assert(size >= 1);
+
+   return new LoadFromScratch( dest, dst_swz, addr_reg, size);
+}
+
+}
+
--- a/src/gallium/drivers/r600/sfn/sfn_instr_fetch.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_fetch.h
@ -0,0 +1,152 @@
+#ifndef INSTR_FETCH_H
+#define INSTR_FETCH_H
+
+#include "sfn_instr.h"
+
+namespace r600 {
+
+class ValueFactory;
+
+class FetchInstr : public InstrWithVectorResult {
+public:
+
+   enum EFlags {
+      fetch_whole_quad,
+      use_const_field,
+      format_comp_signed,
+      srf_mode,
+      buf_no_stride,
+      alt_const,
+      use_tc,
+      vpm,
+      is_mega_fetch,
+      uncached,
+      indexed,
+      wait_ack,
+      unknown
+   };
+
+   enum EPrintSkip {
+      fmt,
+      ftype,
+      mfc,
+      count
+   };
+
+   FetchInstr(EVFetchInstr opcode,
+              const RegisterVec4& dst,
+              const RegisterVec4::Swizzle& dest_swizzle,
+              PRegister src,
+              uint32_t src_offset,
+              EVFetchType fetch_type,
+              EVTXDataFormat data_format,
+              EVFetchNumFormat num_format,
+              EVFetchEndianSwap endian_swap,
+              uint32_t resource_id,
+              PRegister resource_offset);
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   void set_src(PRegister src) { m_src = src; }
+   const auto& src() const {assert(m_src); return *m_src;}
+   uint32_t src_offset() const {return m_src_offset;}
+
+   uint32_t resource_id() const {return m_resource_id;}
+   auto resource_offset() const {return m_resource_offset;}
+
+   EVFetchType fetch_type() const {return m_fetch_type;}
+   EVTXDataFormat data_format() const {return m_data_format;}
+   void  set_num_format(EVFetchNumFormat nf) {m_num_format = nf;}
+   EVFetchNumFormat num_format() const {return m_num_format;}
+   EVFetchEndianSwap endian_swap() const {return m_endian_swap;}
+
+   uint32_t mega_fetch_count() const {return m_mega_fetch_count;}
+   uint32_t array_base() const {return m_array_base;}
+   uint32_t array_size() const {return m_array_size;}
+   uint32_t elm_size() const {return m_elm_size;}
+
+   void reset_fetch_flag(EFlags flag) {m_tex_flags.reset(flag);}
+   void set_fetch_flag(EFlags flag) {m_tex_flags.set(flag);}
+   bool has_fetch_flag(EFlags flag) const { return m_tex_flags.test(flag);}
+
+   EVFetchInstr opcode() const {return m_opcode;}
+
+   bool is_equal_to(const FetchInstr& rhs) const;
+
+   static Instr::Pointer from_string(std::istream& is, ValueFactory &vf);
+
+   void set_mfc(int mfc) {m_tex_flags.set(is_mega_fetch); m_mega_fetch_count = mfc;}
+   void set_array_base(int arrb) {m_array_base = arrb;}
+   void set_array_size(int arrs) {m_array_size = arrs;}
+
+   void set_element_size(int size) { m_elm_size = size;}
+   void set_print_skip(EPrintSkip skip) {m_skip_print.set(skip);}
+   uint32_t slots() const override {return 1;};
+
+   bool replace_source(PRegister old_src, PVirtualValue new_src) override;
+
+protected:
+   static Instr::Pointer from_string_impl(std::istream& is, EVFetchInstr opcode, ValueFactory &vf);
+
+   void override_opname(const char *opname) { m_opname = opname;}
+
+private:
+   bool do_ready() const override;
+
+   void do_print(std::ostream& os) const override;
+
+   void set_param_from_string(const std::string& next_token);
+   void set_flag_from_string(const std::string& next_token);
+
+   static const std::map<EVTXDataFormat, const char *> s_data_format_map;
+   static const std::map<const char *, EFlags> s_flag_map;
+
+   bool propagate_death() override;
+
+   EVFetchInstr m_opcode;
+
+   PRegister m_src;
+   uint32_t m_src_offset;
+
+   EVFetchType m_fetch_type;
+   EVTXDataFormat m_data_format;
+   EVFetchNumFormat m_num_format;
+   EVFetchEndianSwap m_endian_swap;
+
+   uint32_t m_resource_id;
+   PRegister m_resource_offset;
+
+   std::bitset<EFlags::unknown> m_tex_flags;
+   std::bitset<EPrintSkip::count> m_skip_print;
+
+   uint32_t m_mega_fetch_count;
+   uint32_t m_array_base;
+   uint32_t m_array_size;
+   uint32_t m_elm_size;
+
+   std::string m_opname;
+};
+
+class QueryBufferSizeInstr : public FetchInstr {
+public:
+   QueryBufferSizeInstr(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, uint32_t resid);
+   static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
+};
+
+class LoadFromBuffer : public FetchInstr {
+public:
+   LoadFromBuffer(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle,
+                  PRegister addr, uint32_t addr_offset,
+                  uint32_t resid, PRegister res_offset, EVTXDataFormat data_format);
+   static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
+};
+
+class LoadFromScratch : public FetchInstr {
+public:
+   LoadFromScratch(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, PVirtualValue addr, uint32_t offset);
+   static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
+};
+
+}
+#endif // INSTR_FETCH_H
--- a/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp
@ -0,0 +1,411 @@
+#include "sfn_instr_lds.h"
+#include "sfn_instr_alu.h"
+#include "sfn_debug.h"
+
+namespace r600 {
+
+using std::istream;
+
+LDSReadInstr::LDSReadInstr(std::vector<PRegister, Allocator<PRegister>>& value,
+                           AluInstr::SrcValues& address):
+   m_address(address),
+   m_dest_value(value)
+{
+   assert(m_address.size() == m_dest_value.size());
+
+   for (auto& v: value)
+      v->add_parent(this);
+
+   for (auto& s: m_address)
+      if (s->as_register())
+         s->as_register()->add_use(this);
+}
+
+void LDSReadInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void LDSReadInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool LDSReadInstr::remove_unused_components()
+{
+   uint8_t inactive_mask = 0;
+   for (size_t i = 0; i < m_dest_value.size(); ++i) {
+      if (m_dest_value[i]->uses().empty())
+         inactive_mask |= 1 << i;
+   }
+
+   if (!inactive_mask)
+      return false;
+
+   auto new_addr = AluInstr::SrcValues();
+   auto new_dest = std::vector<PRegister, Allocator<PRegister>>();
+
+   for (size_t i = 0; i < m_dest_value.size(); ++i) {
+      if ((1 << i) & inactive_mask) {
+         if (m_address[i]->as_register())
+            m_address[i]->as_register()->del_use(this);
+         m_dest_value[i]->del_parent(this);
+      } else {
+         new_dest.push_back(m_dest_value[i]);
+         new_addr.push_back(m_address[i]);
+      }
+   }
+
+   m_dest_value.swap(new_dest);
+   m_address.swap(new_addr);
+
+   return m_address.size() != new_addr.size();
+}
+
+class SetLDSAddrProperty : public AluInstrVisitor {
+   using AluInstrVisitor::visit;
+   void visit(AluInstr *instr) override {
+      instr->set_alu_flag(alu_lds_address);
+   }
+};
+
+AluInstr *LDSReadInstr::split(std::vector<AluInstr*>& out_block, AluInstr *last_lds_instr)
+{
+   AluInstr* first_instr = nullptr;
+   SetLDSAddrProperty prop;
+   for (auto& addr: m_address) {
+      auto reg = addr->as_register();
+      if (reg) {
+         reg->del_use(this);
+         if (reg->parents().size() == 1) {
+            for (auto& p: reg->parents()) {
+               p->accept(prop);
+            }
+         }
+      }
+
+      auto instr = new AluInstr(DS_OP_READ_RET, nullptr, nullptr, addr);
+      instr->set_blockid(block_id(), index());
+
+      if (last_lds_instr)
+         instr->add_required_instr(last_lds_instr);
+      out_block.push_back(instr);
+      last_lds_instr = instr;
+      if (!first_instr) {
+         first_instr = instr;
+         first_instr->set_alu_flag(alu_lds_group_start);
+      } else {
+         /* In order to make it possible that the scheduler
+          * keeps the loads of a group close together, we
+          * require that the addresses are all already available
+          * when the first read instruction is emitted.
+          * Otherwise it might happen that the loads and reads from the
+          * queue are split across ALU cf clauses, and this is not allowed */
+         first_instr->add_extra_dependency(addr);
+      }
+   }
+
+   for (auto& dest: m_dest_value) {
+      dest->del_parent(this);
+      auto instr = new AluInstr(op1_mov, dest,
+                                new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
+                                AluInstr::last_write);
+      instr->add_required_instr(last_lds_instr);
+      instr->set_blockid(block_id(), index());
+      out_block.push_back(instr);
+      last_lds_instr = instr;
+   }
+   if (last_lds_instr)
+      last_lds_instr->set_alu_flag(alu_lds_group_end);
+
+   return last_lds_instr;
+}
+
+bool LDSReadInstr::do_ready() const
+{
+   unreachable("This instruction is not handled by the schduler");
+   return false;
+}
+
+void LDSReadInstr::do_print(std::ostream& os) const
+{
+   os << "LDS_READ ";
+
+   os << "[ ";
+   for (auto d: m_dest_value) {
+      os << *d << " ";
+   }
+   os << "] : [ ";
+   for (auto a: m_address) {
+      os << *a << " ";
+   }
+   os << "]";
+}
+
+bool LDSReadInstr::is_equal_to(const LDSReadInstr& rhs) const
+{
+   if (m_address.size() != rhs.m_address.size())
+      return false;
+
+   for (unsigned i = 0; i < num_values(); ++i) {
+      if (!m_address[i]->equal_to(*rhs.m_address[i]))
+         return false;
+      if (!m_dest_value[i]->equal_to(*rhs.m_dest_value[i]))
+         return false;
+   }
+   return true;
+}
+
+auto LDSReadInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer
+{
+   /* LDS_READ [ d1, d2, d3 ... ] : a1 a2 a3 ... */
+
+   std::string temp_str;
+
+   is >> temp_str;
+   assert(temp_str == "[");
+
+   std::vector<PRegister, Allocator<PRegister> > dests;
+   AluInstr::SrcValues srcs;
+
+   is >> temp_str;
+   while (temp_str != "]") {
+      auto dst = value_factory.dest_from_string(temp_str);
+      assert(dst);
+      dests.push_back(dst);
+      is >> temp_str;
+   }
+
+   is >> temp_str;
+   assert(temp_str == ":");
+   is >> temp_str;
+   assert(temp_str == "[");
+
+   is >> temp_str;
+   while (temp_str != "]") {
+      auto src = value_factory.src_from_string(temp_str);
+      assert(src);
+      srcs.push_back(src);
+      is >> temp_str;
+   };
+   assert(srcs.size() == dests.size() && !dests.empty());
+
+   return new LDSReadInstr(dests, srcs);
+}
+
+LDSAtomicInstr::LDSAtomicInstr(ESDOp op, PRegister dest, PVirtualValue address,
+                               const SrcValues& srcs):
+   m_opcode(op),
+   m_address(address),
+   m_dest(dest),
+   m_srcs(srcs)
+{
+   if (m_dest)
+      m_dest->add_parent(this);
+
+   if (m_address->as_register())
+      m_address->as_register()->add_use(this);
+
+   for (auto& s: m_srcs) {
+      if (s->as_register())
+         s->as_register()->add_use(this);
+   }
+}
+
+
+void LDSAtomicInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void LDSAtomicInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+AluInstr *LDSAtomicInstr::split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr)
+{
+   AluInstr::SrcValues srcs = {m_address};
+
+   for(auto& s : m_srcs)
+      srcs.push_back(s);
+
+   for(auto& s :srcs) {
+      if (s->as_register())
+         s->as_register()->del_use(this);
+   }
+
+   SetLDSAddrProperty prop;
+   auto reg = srcs[0]->as_register();
+   if (reg) {
+      reg->del_use(this);
+      if (reg->parents().size() == 1) {
+         for (auto& p: reg->parents()) {
+            p->accept(prop);
+         }
+      }
+   }
+
+   auto op_instr = new AluInstr(m_opcode, srcs, {});
+   op_instr->set_blockid(block_id(), index());
+
+   if (last_lds_instr) {
+      op_instr->add_required_instr(last_lds_instr);
+   }
+
+   out_block.push_back(op_instr);
+   if (m_dest) {
+      op_instr->set_alu_flag(alu_lds_group_start);
+      m_dest->del_parent(this);
+      auto read_instr = new AluInstr(op1_mov, m_dest,
+                                     new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
+                                     AluInstr::last_write);
+      read_instr->add_required_instr(op_instr);
+      read_instr->set_blockid(block_id(), index());
+      read_instr->set_alu_flag(alu_lds_group_end);
+      out_block.push_back(read_instr);
+      last_lds_instr = read_instr;
+   }
+   return last_lds_instr;
+}
+
+bool LDSAtomicInstr::replace_source(PRegister old_src, PVirtualValue new_src)
+{
+   bool process = false;
+
+
+   if (new_src->as_uniform() && m_srcs.size() > 2) {
+      int nconst = 0;
+      for (auto& s : m_srcs) {
+         if (s->as_uniform() && !s->equal_to(*old_src))
+            ++nconst;
+      }
+      /* Conservative check: with two kcache values can always live,
+       * tree might be a problem, don't care for now, just reject
+       */
+      if (nconst > 2)
+         return false;
+   }
+
+   /* If the old source is an array element, we assume that there
+    * might have been an (untracked) indirect access, so don't replace
+    * this source */
+   if (old_src->pin() == pin_array)
+      return false;
+
+   if (new_src->get_addr()) {
+      for (auto& s : m_srcs) {
+         auto addr = s->get_addr();
+         /* can't have two differen't indirect addresses in the same instr */
+         if (addr && !addr->equal_to(*new_src->get_addr()))
+            return false;
+      }
+   }
+
+   for (unsigned i = 0; i < m_srcs.size(); ++i) {
+      if (old_src->equal_to(*m_srcs[i])) {
+         m_srcs[i] = new_src;
+         process = true;
+      }
+   }
+
+   if (process) {
+      auto r = new_src->as_register();
+      if (r)
+         r->add_use(this);
+      old_src->del_use(this);
+   }
+   return process;
+}
+
+bool LDSAtomicInstr::do_ready() const
+{
+   unreachable("This instruction is not handled by the schduler");
+   return false;
+}
+
+void LDSAtomicInstr::do_print(std::ostream& os) const
+{
+   auto ii = lds_ops.find(m_opcode);
+   assert(ii != lds_ops.end());
+
+   os << "LDS " << ii->second.name << " ";
+   if (m_dest)
+      os << *m_dest;
+   else
+      os << "__.x";
+
+   os << " [ " << *m_address << " ] : " << *m_srcs[0];
+   if (m_srcs.size() > 1)
+      os << " " << *m_srcs[1];
+}
+
+bool LDSAtomicInstr::is_equal_to(const LDSAtomicInstr& rhs) const
+{
+   if (m_srcs.size() != rhs.m_srcs.size())
+      return false;
+
+   for (unsigned i = 0; i < m_srcs.size(); ++i) {
+      if (!m_srcs[i]->equal_to(*rhs.m_srcs[i]))
+         return false;
+   }
+
+   return m_opcode == rhs.m_opcode &&
+         sfn_value_equal(m_address, rhs.m_address) &&
+         sfn_value_equal(m_dest, rhs.m_dest);
+}
+
+
+auto LDSAtomicInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer
+{
+   /* LDS WRITE2 __.x [ R1.x ] : R2.y R3.z */
+   /* LDS WRITE __.x [ R1.x ] : R2.y  */
+   /* LDS ATOMIC_ADD_RET [ R5.y ] : R2.y  */
+
+   std::string temp_str;
+
+   is >> temp_str;
+
+   ESDOp opcode = DS_OP_INVALID;
+   int nsrc = 0;
+
+   for (auto& [op, opinfo] : lds_ops) {
+      if (temp_str == opinfo.name) {
+         opcode = op;
+         nsrc = opinfo.nsrc;
+         break;
+      }
+   }
+
+   assert(opcode != DS_OP_INVALID);
+
+   is >> temp_str;
+
+   PRegister dest = nullptr;
+   if (temp_str[0] != '_')
+      dest = value_factory.dest_from_string(temp_str);
+
+   is >> temp_str;
+   assert(temp_str == "[");
+   is >> temp_str;
+   auto addr = value_factory.src_from_string(temp_str);
+
+   is >> temp_str;
+   assert(temp_str == "]");
+
+   is >> temp_str;
+   assert(temp_str == ":");
+
+   AluInstr::SrcValues srcs;
+   for (int i = 0; i < nsrc - 1; ++i) {
+      is >> temp_str;
+      auto src = value_factory.src_from_string(temp_str);
+      assert(src);
+      srcs.push_back(src);
+   }
+
+   return new LDSAtomicInstr(opcode, dest, addr, srcs);
+}
+
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_instr_lds.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_lds.h
@ -0,0 +1,80 @@
+#ifndef LDSINSTR_H
+#define LDSINSTR_H
+
+#include "sfn_instr_alu.h"
+#include "sfn_valuefactory.h"
+
+namespace r600 {
+
+class LDSReadInstr : public Instr {
+public:
+   LDSReadInstr(std::vector<PRegister, Allocator<PRegister>>& value,
+                AluInstr::SrcValues& address);
+
+   unsigned num_values() const { return m_dest_value.size();}
+   auto address(unsigned i) const { return m_address[i];}
+   auto dest(unsigned i) const { return m_dest_value[i];}
+
+   auto address(unsigned i){ return m_address[i];}
+   auto dest(unsigned i)  { return m_dest_value[i];}
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   AluInstr *split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr);
+   bool is_equal_to(const LDSReadInstr& lhs) const;
+
+   static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
+
+   bool remove_unused_components();
+
+private:
+
+   bool do_ready() const override;
+
+   void do_print(std::ostream& os) const override;
+
+   AluInstr::SrcValues m_address;
+   std::vector<PRegister, Allocator<PRegister>> m_dest_value;
+};
+
+class LDSAtomicInstr : public Instr {
+public:
+   using SrcValues = AluInstr::SrcValues;
+
+   LDSAtomicInstr(ESDOp op, PRegister dest, PVirtualValue address, const SrcValues& src);
+
+   auto address() const { return m_address;}
+   auto dest() const { return m_dest;}
+   auto src0() const { return m_srcs[0];}
+   auto src1() const { return m_srcs.size() > 1 ? m_srcs[1] : nullptr;}
+
+   PVirtualValue address() { return m_address;}
+   PRegister dest()  { return m_dest;}
+   PVirtualValue src0() { return m_srcs[0];}
+   PVirtualValue src1() { return m_srcs.size() > 1 ? m_srcs[1] : nullptr;}
+
+   unsigned op() const {return m_opcode;}
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   AluInstr *split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr);
+   bool is_equal_to(const LDSAtomicInstr& lhs) const;
+
+   static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
+   bool replace_source(PRegister old_src, PVirtualValue new_src) override;
+
+private:
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   ESDOp m_opcode;
+   PVirtualValue m_address{nullptr};
+   PRegister m_dest{nullptr};
+   SrcValues m_srcs;
+};
+
+}
+
+#endif // LDSINSTR_H
--- a/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp
@ -0,0 +1,844 @@
+#include "sfn_instr_mem.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_tex.h"
+#include "sfn_shader.h"
+
+namespace r600 {
+
+
+GDSInstr::GDSInstr(ESDOp op, Register *dest,
+                   const RegisterVec4& src, int uav_base,
+                   PRegister uav_id):
+   m_op(op),
+   m_dest(dest),
+   m_src(src),
+   m_uav_base(uav_base),
+   m_uav_id(uav_id)
+{
+   set_always_keep();
+
+   m_src.add_use(this);
+   m_dest->add_parent(this);
+
+   if (m_uav_id)
+      m_uav_id->add_use(this);
+}
+
+bool GDSInstr::is_equal_to(const GDSInstr& rhs) const
+{
+#define NE(X) (X != rhs. X)
+
+   if (NE(m_op) ||
+       NE(m_src) ||
+       NE(m_uav_base))
+      return false;
+
+   sfn_value_equal(m_dest, rhs.m_dest);
+
+   return sfn_value_equal(m_uav_id, rhs.m_uav_id);
+}
+
+void GDSInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void GDSInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool GDSInstr::do_ready() const
+{
+   return m_src.ready(block_id(), index()) &&
+         (!m_uav_id || m_uav_id->ready(block_id(), index()));
+}
+
+void GDSInstr::do_print(std::ostream& os) const
+{
+   os << "GDS " << lds_ops.at(m_op).name
+      << *m_dest;
+   os << " " << m_src;
+   os << " BASE:" << m_uav_base;
+
+   if (m_uav_id)
+      os << " UAV:" << *m_uav_id;
+}
+
+bool GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_atomic_counter_add:
+   case nir_intrinsic_atomic_counter_and:
+   case nir_intrinsic_atomic_counter_exchange:
+   case nir_intrinsic_atomic_counter_max:
+   case nir_intrinsic_atomic_counter_min:
+   case nir_intrinsic_atomic_counter_or:
+   case nir_intrinsic_atomic_counter_xor:
+   case nir_intrinsic_atomic_counter_comp_swap:
+      return emit_atomic_op2(intr, shader);
+   case nir_intrinsic_atomic_counter_read:
+   case nir_intrinsic_atomic_counter_post_dec:
+      return emit_atomic_read(intr, shader);
+   case nir_intrinsic_atomic_counter_inc:
+      return emit_atomic_inc(intr, shader);
+   case nir_intrinsic_atomic_counter_pre_dec:
+      return emit_atomic_pre_dec(intr, shader);
+   default:
+      return false;
+   }
+}
+
+static ESDOp get_opcode(const nir_intrinsic_op opcode)
+{
+   switch (opcode) {
+   case nir_intrinsic_atomic_counter_add:
+      return DS_OP_ADD_RET;
+   case nir_intrinsic_atomic_counter_and:
+      return DS_OP_AND_RET;
+   case nir_intrinsic_atomic_counter_exchange:
+      return DS_OP_XCHG_RET;
+   case nir_intrinsic_atomic_counter_inc:
+      return DS_OP_INC_RET;
+   case nir_intrinsic_atomic_counter_max:
+      return DS_OP_MAX_UINT_RET;
+   case nir_intrinsic_atomic_counter_min:
+      return DS_OP_MIN_UINT_RET;
+   case nir_intrinsic_atomic_counter_or:
+      return DS_OP_OR_RET;
+   case nir_intrinsic_atomic_counter_read:
+      return DS_OP_READ_RET;
+   case nir_intrinsic_atomic_counter_xor:
+      return DS_OP_XOR_RET;
+   case nir_intrinsic_atomic_counter_post_dec:
+      return DS_OP_DEC_RET;
+   case nir_intrinsic_atomic_counter_comp_swap:
+      return DS_OP_CMP_XCHG_RET;
+   case nir_intrinsic_atomic_counter_pre_dec:
+   default:
+      return DS_OP_INVALID;
+   }
+}
+
+static ESDOp get_opcode_wo(const nir_intrinsic_op opcode)
+{
+   switch (opcode) {
+   case nir_intrinsic_atomic_counter_add:
+      return DS_OP_ADD;
+   case nir_intrinsic_atomic_counter_and:
+      return DS_OP_AND;
+   case nir_intrinsic_atomic_counter_inc:
+      return DS_OP_INC;
+   case nir_intrinsic_atomic_counter_max:
+      return DS_OP_MAX_UINT;
+   case nir_intrinsic_atomic_counter_min:
+      return DS_OP_MIN_UINT;
+   case nir_intrinsic_atomic_counter_or:
+      return DS_OP_OR;
+   case nir_intrinsic_atomic_counter_xor:
+      return DS_OP_XOR;
+   case nir_intrinsic_atomic_counter_post_dec:
+      return DS_OP_DEC;
+   case nir_intrinsic_atomic_counter_comp_swap:
+      return DS_OP_CMP_XCHG_RET;
+   case nir_intrinsic_atomic_counter_exchange:
+      return DS_OP_XCHG_RET;
+   case nir_intrinsic_atomic_counter_pre_dec:
+   default:
+      return DS_OP_INVALID;
+   }
+}
+
+
+bool GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
+
+   ESDOp op = read_result ? get_opcode(instr->intrinsic) :
+                            get_opcode_wo(instr->intrinsic);
+
+   if (DS_OP_INVALID == op)
+      return false;
+
+   auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
+   offset += nir_intrinsic_base(instr);
+
+   auto dest = vf.dest(instr->dest, 0, pin_free);
+
+   PRegister src_as_register = nullptr;
+   auto src_val = vf.src(instr->src[1], 0);
+   if (!src_val->as_register()) {
+      auto temp_src_val = vf.temp_register();
+      shader.emit_instruction(new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
+      src_as_register = temp_src_val;
+   } else
+      src_as_register = src_val->as_register();
+
+   if (uav_id != nullptr)
+      shader.set_flag(Shader::sh_indirect_atomic);
+
+   GDSInstr *ir = nullptr;
+   if (shader.chip_class() < ISA_CC_CAYMAN) {
+      RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free);
+      ir = new GDSInstr(op, dest, src, offset, uav_id);
+
+   } else {
+      auto dest = vf.dest(instr->dest, 0, pin_free);
+      auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
+      if (uav_id)
+         shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
+                                 AluInstr::write));
+      else
+         shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
+      shader.emit_instruction(new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
+      ir = new GDSInstr(op, dest, tmp, 0, nullptr);
+   }
+   shader.emit_instruction(ir);
+   return true;
+}
+
+bool GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
+   offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
+
+   auto dest = vf.dest(instr->dest, 0, pin_free);
+
+   GDSInstr *ir = nullptr;
+
+   if (shader.chip_class() < ISA_CC_CAYMAN) {
+      RegisterVec4 src = RegisterVec4(0, true, {7,7,7,7});
+      ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id);
+   } else {
+      auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7});
+      if (uav_id)
+         shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
+                                 AluInstr::write));
+      else
+         shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
+
+      ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr);
+   }
+
+   shader.emit_instruction(ir);
+   return true;
+}
+
+bool GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
+
+   auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
+   offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
+
+   GDSInstr *ir = nullptr;
+
+   if (shader.chip_class() < ISA_CC_CAYMAN) {
+      auto dest = vf.dest(instr->dest, 0, pin_free);
+      RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
+      ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
+                             dest, src, offset, uav_id);
+   } else {
+      auto dest = vf.dest(instr->dest, 0, pin_free);
+      auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
+
+      if (uav_id)
+         shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
+                                 AluInstr::write));
+      else
+         shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
+
+      shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
+      ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
+                        dest, tmp, 0, nullptr);
+   }
+   shader.emit_instruction(ir);
+   return true;
+}
+
+bool GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
+   offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
+
+   auto *tmp_dest = vf.temp_register();
+
+   GDSInstr *ir = nullptr;
+
+   if (shader.chip_class() < ISA_CC_CAYMAN) {
+      RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
+      ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, src, offset, uav_id);
+   } else {
+      auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
+      if (uav_id)
+         shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
+                                 AluInstr::write));
+      else
+         shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
+
+      shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
+      ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, tmp, 0, nullptr);
+   }
+
+   shader.emit_instruction(ir);
+   shader.emit_instruction(new AluInstr(op2_sub_int,  vf.dest(instr->dest, 0, pin_free),
+                                        tmp_dest, vf.one_i(), AluInstr::last_write));
+   return true;
+}
+
+
+RatInstr::RatInstr(ECFOpCode cf_opcode, ERatOp rat_op,
+                   const RegisterVec4& data, const RegisterVec4& index,
+                   int rat_id, PRegister rat_id_offset,
+                   int burst_count, int comp_mask, int element_size):
+   m_cf_opcode(cf_opcode),
+   m_rat_op(rat_op),
+   m_data(data),
+   m_index(index),
+   m_rat_id_offset(rat_id_offset),
+   m_rat_id(rat_id),
+   m_burst_count(burst_count),
+   m_comp_mask(comp_mask),
+   m_element_size(element_size)
+{
+   set_always_keep();
+
+   m_data.add_use(this);
+   m_index.add_use(this);
+   if (m_rat_id_offset)
+      m_rat_id_offset->add_use(this);
+}
+
+
+void RatInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void RatInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool RatInstr::is_equal_to(const RatInstr& lhs) const
+{
+   (void)lhs;
+   assert(0);
+   return false;
+}
+
+bool RatInstr::do_ready() const
+{  
+   if (m_rat_op != STORE_TYPED) {
+      for (auto i: required_instr()) {
+         if (!i->is_scheduled()) {
+            return false;
+         }
+      }
+   }
+
+   return m_data.ready(block_id(), index()) &&
+         m_index.ready(block_id(), index());
+}
+
+void RatInstr::do_print(std::ostream& os) const
+{
+   os << "MEM_RAT RAT " << m_rat_id;
+   if (m_rat_id_offset)
+      os << "+" << *m_rat_id_offset;
+   os << " @" << m_index;
+   os << " OP:" << m_rat_op << " " << m_data;
+   os << " BC:" << m_burst_count
+      << " MASK:" << m_comp_mask
+      << " ES:" << m_element_size;
+   if (m_need_ack)
+      os << " ACK";
+}
+
+static RatInstr::ERatOp
+get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format)
+{
+	switch (opcode) {
+   case nir_intrinsic_image_load:
+      return RatInstr::NOP_RTN;
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_image_atomic_add:
+      return RatInstr::ADD_RTN;
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_image_atomic_and:
+      return RatInstr::AND_RTN;
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_image_atomic_or:
+      return RatInstr::OR_RTN;
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_image_atomic_imin:
+      return RatInstr::MIN_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_image_atomic_imax:
+      return RatInstr::MAX_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_image_atomic_umin:
+      return RatInstr::MIN_UINT_RTN;
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_image_atomic_umax:
+      return RatInstr::MAX_UINT_RTN;
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_image_atomic_xor:
+      return RatInstr::XOR_RTN;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_comp_swap:
+      if (util_format_is_float(format))
+         return RatInstr::CMPXCHG_FLT_RTN;
+      else
+         return RatInstr::CMPXCHG_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_image_atomic_exchange:
+      return RatInstr::XCHG_RTN;
+   default:
+      unreachable("Unsupported WO RAT instruction");
+   }
+}
+
+static RatInstr::ERatOp
+get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format)
+{
+	switch (opcode) {
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_image_atomic_add:
+      return RatInstr::ADD;
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_image_atomic_and:
+      return RatInstr::AND;
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_image_atomic_or:
+      return RatInstr::OR;
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_image_atomic_imin:
+      return RatInstr::MIN_INT;
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_image_atomic_imax:
+      return RatInstr::MAX_INT;
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_image_atomic_umin:
+      return RatInstr::MIN_UINT;
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_image_atomic_umax:
+      return RatInstr::MAX_UINT;
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_image_atomic_xor:
+      return RatInstr::XOR;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_comp_swap:
+      if (util_format_is_float(format))
+         return RatInstr::CMPXCHG_FLT;
+      else
+         return RatInstr::CMPXCHG_INT;
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_image_atomic_exchange:
+      return RatInstr::XCHG_RTN;
+   default:
+      unreachable("Unsupported WO RAT instruction");
+   }
+}
+
+bool RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_ssbo:
+      return emit_ssbo_load(intr, shader);
+   case nir_intrinsic_store_ssbo:
+      return emit_ssbo_store(intr, shader);
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_ssbo_atomic_exchange:
+      return emit_ssbo_atomic_op(intr, shader);
+   case nir_intrinsic_image_store:
+      return emit_image_store(intr, shader);
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_imax:
+      return emit_image_load_or_atomic(intr, shader);
+   case nir_intrinsic_image_size:
+      return emit_image_size(intr, shader);
+   case nir_intrinsic_get_ssbo_size:
+      return emit_ssbo_size(intr, shader);
+   default:
+      return false;
+   }
+}
+
+bool RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
+{
+   auto &vf = shader.value_factory();
+   auto dest = vf.dest_vec4(intr->dest, pin_group);
+
+   /** src0 not used, should be some offset */
+   auto addr = vf.src(intr->src[1], 0);
+   auto addr_temp = vf.temp_register();
+
+   /** Should be lowered in nir */
+   shader.emit_instruction(new AluInstr(op2_lshr_int, addr_temp, addr, vf.literal(2),
+                                        {alu_write, alu_last_instr}));
+
+   const EVTXDataFormat formats[4] = {
+      fmt_32,
+      fmt_32_32,
+      fmt_32_32_32,
+      fmt_32_32_32_32
+   };
+
+   RegisterVec4::Swizzle dest_swz[4] = {
+      {0,7,7,7},
+      {0,1,7,7},
+      {0,1,2,7},
+      {0,1,2,3}
+   };
+
+   int comp_idx = nir_dest_num_components(intr->dest) - 1;
+
+   auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0); {}
+
+   auto res_id =  R600_IMAGE_REAL_RESOURCE_OFFSET + offset +
+                  shader.ssbo_image_offset();
+
+   auto ir = new LoadFromBuffer(dest, dest_swz[comp_idx], addr_temp,  0,
+                                res_id, res_offset, formats[comp_idx]);
+   ir->set_fetch_flag(FetchInstr::use_tc);
+   ir->set_num_format(vtx_nf_int);
+
+   shader.emit_instruction(ir);
+   return true;
+}
+
+bool RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
+{
+
+   /* Forche the scheduler to not move the preparation too far away, by starting
+    * a new block (TODO: better priority handling in the scheduler)*/
+   if (nir_src_num_components(instr->src[0]) > 2)
+      shader.start_new_block(0);
+
+   auto &vf = shader.value_factory();
+   auto orig_addr = vf.src(instr->src[2], 0);
+
+   auto addr_base = vf.temp_register();
+
+   auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1);
+
+   shader.emit_instruction(new AluInstr(op2_lshr_int, addr_base, orig_addr,
+                           vf.literal(2), AluInstr::write));
+
+   for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
+      auto addr_vec = vf.temp_vec4(pin_group, {0,1,2,7});
+      if (i == 0) {
+         shader.emit_instruction(new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
+      } else {
+         shader.emit_instruction(new AluInstr(op2_add_int, addr_vec[0], addr_base,
+                                 vf.literal(i),
+                                 AluInstr::last_write));
+      }
+      auto value = vf.src(instr->src[0], i);
+      PRegister v = vf.temp_register(0);
+      shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
+      auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
+      auto store = new RatInstr(cf_mem_rat, RatInstr::STORE_TYPED,
+                                value_vec, addr_vec, offset + shader.ssbo_image_offset(),
+                                rat_id, 1, 1, 0);
+      shader.emit_instruction(store);
+   }
+
+   return true;
+}
+
+bool RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0); {}
+
+   bool read_result = !intr->dest.is_ssa || !list_is_empty(&intr->dest.ssa.uses);
+   auto opcode = read_result ? get_rat_opcode(intr->intrinsic, PIPE_FORMAT_R32_UINT) :
+                               get_rat_opcode_wo(intr->intrinsic, PIPE_FORMAT_R32_UINT);
+
+   auto coord_orig =  vf.src(intr->src[1], 0);
+   auto coord = vf.temp_register(0);
+
+   auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
+
+   shader.emit_instruction(new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
+
+   shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
+
+
+   if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write));
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
+                                           vf.src(intr->src[2], 0), {alu_last_instr, alu_write}));
+   } else {
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
+   }
+
+
+   RegisterVec4 out_vec(coord, coord, coord, coord, pin_group);
+
+   auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, out_vec, imageid + shader.ssbo_image_offset(),
+                              image_offset, 1, 0xf, 0);
+   shader.emit_instruction(atomic);
+
+   atomic->set_ack(); 
+   if (read_result) {
+      atomic->set_instr_flag(ack_rat_return_write);
+      auto dest = vf.dest_vec4(intr->dest, pin_group);
+
+      auto fetch = new FetchInstr(vc_fetch,
+                                  dest, {0, 1, 2, 3},
+                                  shader.rat_return_address(),
+                                  0,
+                                  no_index_offset,
+                                  fmt_32,
+                                  vtx_nf_int,
+                                  vtx_es_none,
+                                  R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
+                                  image_offset);
+      fetch->set_mfc(15);
+      fetch->set_fetch_flag(FetchInstr::srf_mode);
+      fetch->set_fetch_flag(FetchInstr::use_tc);
+      fetch->set_fetch_flag(FetchInstr::vpm);
+      fetch->set_fetch_flag(FetchInstr::wait_ack);
+      fetch->add_required_instr(atomic);
+      shader.chain_ssbo_read(fetch);
+      shader.emit_instruction(fetch);
+   }
+
+   return true;
+
+}
+
+bool RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   auto dest = vf.dest_vec4(intr->dest, pin_group);
+
+   auto const_offset = nir_src_as_const_value(intr->src[0]);
+   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
+   if (const_offset)
+      res_id += const_offset[0].u32;
+   else
+      assert(0 && "dynamic buffer offset not supported in buffer_size");
+
+   shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3},res_id));
+   return true;
+}
+
+bool RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
+
+
+   auto coord_load = vf.src_vec4(intrin->src[1], pin_chan);
+   auto coord =  vf.temp_vec4(pin_group);
+
+   auto value_load = vf.src_vec4(intrin->src[3], pin_chan);
+   auto value =  vf.temp_vec4(pin_group);
+
+   RegisterVec4::Swizzle swizzle = {0,1,2,3};
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
+       nir_intrinsic_image_array(intrin))
+      swizzle  = {0,2,1,3};
+
+   for (int i = 0; i < 4; ++i) {
+      auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
+      shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
+   }
+   for (int i = 0; i < 4; ++i) {
+      auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
+      shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
+   }
+
+   auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
+   auto store = new RatInstr(op, RatInstr::STORE_TYPED, value, coord, imageid,
+                             image_offset, 1, 0xf, 0);
+
+   if (nir_intrinsic_has_access(intrin) & ACCESS_COHERENT)
+      store->set_ack();
+   shader.emit_instruction(store);
+   return true;
+}
+
+bool RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
+
+   bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
+   auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
+                               get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
+
+   auto coord_orig =  vf.src_vec4(intrin->src[1], pin_chan);
+   auto coord = vf.temp_vec4(pin_group);
+
+   auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
+
+   RegisterVec4::Swizzle swizzle = {0,1,2,3};
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
+       nir_intrinsic_image_array(intrin))
+      swizzle  = {0,2,1,3};
+
+   for (int i = 0; i < 4; ++i) {
+      auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
+      shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
+   }
+
+   shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
+
+   if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write));
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
+                                          vf.src(intrin->src[3], 0), AluInstr::last_write));
+   } else {
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0],
+                                          vf.src(intrin->src[3], 0), AluInstr::write));
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
+   }
+
+   auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid,
+                              image_offset, 1, 0xf, 0);
+   shader.emit_instruction(atomic);
+
+   atomic->set_ack();
+   if (read_result) {
+      atomic->set_instr_flag(ack_rat_return_write);
+      auto dest = vf.dest_vec4(intrin->dest, pin_group);
+
+      pipe_format format = nir_intrinsic_format(intrin);
+      unsigned fmt = fmt_32;
+      unsigned num_format = 0;
+      unsigned format_comp = 0;
+      unsigned endian = 0;
+      r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
+
+      auto fetch = new FetchInstr(vc_fetch,
+                                  dest, {0, 1, 2, 3},
+                                  shader.rat_return_address(),
+                                  0,
+                                  no_index_offset,
+                                  (EVTXDataFormat)fmt,
+                                  (EVFetchNumFormat)num_format,
+                                  (EVFetchEndianSwap)endian,
+                                  R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
+                                  image_offset);
+      fetch->set_mfc(3);
+      fetch->set_fetch_flag(FetchInstr::srf_mode);
+      fetch->set_fetch_flag(FetchInstr::use_tc);
+      fetch->set_fetch_flag(FetchInstr::vpm);
+      fetch->set_fetch_flag(FetchInstr::wait_ack);
+      if (format_comp)
+         fetch->set_fetch_flag(FetchInstr::format_comp_signed);
+
+      shader.chain_ssbo_read(fetch);
+      shader.emit_instruction(fetch);
+   }
+
+   return true;
+}
+
+#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
+
+bool RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   auto src = RegisterVec4(0, true, {4,4,4,4});
+
+   assert(nir_src_as_uint(intrin->src[1]) == 0);
+
+   auto const_offset = nir_src_as_const_value(intrin->src[0]);
+   PRegister dyn_offset = nullptr;
+
+   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
+   if (const_offset)
+      res_id += const_offset[0].u32;
+   else
+      dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
+
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
+      auto dest = vf.dest_vec4(intrin->dest, pin_group);
+      shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3}, res_id));
+      return true;
+   } else {
+
+      if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
+          nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
+         /* Need to load the layers from a const buffer */
+
+         auto dest = vf.dest_vec4(intrin->dest, pin_group);
+         shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,7,3},
+                                              src, 0/* ?? */, res_id, dyn_offset));
+
+         shader.set_flag(Shader::sh_txs_cube_array_comp);
+
+         if (const_offset) {
+            unsigned lookup_resid = const_offset[0].u32;
+            shader.emit_instruction(new AluInstr(op1_mov, dest[2],
+                                    vf.uniform(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL,  lookup_resid % 4,
+                                               R600_BUFFER_INFO_CONST_BUFFER),
+                                    AluInstr::last_write));
+         } else {
+            /* If the adressing is indirect we have to get the z-value by using a binary search */
+            auto addr = vf.temp_register();
+            auto comp1 = vf.temp_register();
+            auto comp2 = vf.temp_register();
+            auto low_bit = vf.temp_register();
+            auto high_bit = vf.temp_register();
+
+            auto trgt = vf.temp_vec4(pin_group);
+
+            shader.emit_instruction(new AluInstr(op2_lshr_int, addr, vf.src(intrin->src[0], 0),
+                             vf.literal(2), AluInstr::write));
+            shader.emit_instruction(new AluInstr(op2_and_int, low_bit, vf.src(intrin->src[0], 0),
+                             vf.one_i(), AluInstr::write));
+            shader.emit_instruction(new AluInstr(op2_and_int, high_bit, vf.src(intrin->src[0], 0),
+                             vf.literal(2), AluInstr::last_write));
+
+            shader.emit_instruction(new LoadFromBuffer(trgt, {0, 1, 2, 3}, addr, R600_SHADER_BUFFER_INFO_SEL,
+                                                  R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float));
+
+            // this may be wrong
+            shader.emit_instruction(new AluInstr(op3_cnde_int, comp1, high_bit, trgt[0], trgt[2],
+                                                AluInstr::write));
+            shader.emit_instruction(new AluInstr(op3_cnde_int, comp2, high_bit, trgt[1], trgt[3],
+                                                 AluInstr::last_write));
+            shader.emit_instruction(new AluInstr(op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
+         }
+      } else {
+         auto dest = vf.dest_vec4(intrin->dest, pin_group);
+         shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,2,3},
+                                              src, 0/* ?? */, res_id, dyn_offset));
+
+      }
+   }
+   return true;
+}
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_instr_mem.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_mem.h
@ -0,0 +1,177 @@
+#ifndef GDSINSTR_H
+#define GDSINSTR_H
+
+#include "sfn_instr.h"
+#include "sfn_valuefactory.h"
+
+namespace r600 {
+
+class Shader;
+
+class GDSInstr : public Instr {
+public:
+
+   GDSInstr(ESDOp op, Register *dest,
+            const RegisterVec4& src, int uav_base,
+            PRegister uav_id);
+
+   bool is_equal_to(const GDSInstr& lhs) const;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool do_ready() const override;
+
+   auto opcode() const {return m_op;}
+   auto src() const { return m_src;}
+
+   const auto& dest() const { return m_dest;}
+   auto& dest() { return m_dest;}
+
+   auto uav_id() const {return m_uav_id;}
+   auto uav_base() const {return m_uav_base;}
+
+   static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
+
+   static bool emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader);
+   uint32_t slots() const override {return 1;};
+
+private:
+
+   static bool emit_atomic_read(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_atomic_op2(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_atomic_inc(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_atomic_pre_dec(nir_intrinsic_instr *intr, Shader& shader);
+
+   void do_print(std::ostream& os) const override;
+
+   ESDOp m_op{DS_OP_INVALID};
+   Register *m_dest;
+
+   RegisterVec4 m_src;
+
+   int m_uav_base{0};
+   PRegister m_uav_id{nullptr};
+   std::bitset<8> m_tex_flags;
+};
+
+
+class RatInstr : public Instr {
+
+public:
+   enum ERatOp {
+      NOP,
+      STORE_TYPED,
+      STORE_RAW,
+      STORE_RAW_FDENORM,
+      CMPXCHG_INT,
+      CMPXCHG_FLT,
+      CMPXCHG_FDENORM,
+      ADD,
+      SUB,
+      RSUB,
+      MIN_INT,
+      MIN_UINT,
+      MAX_INT,
+      MAX_UINT,
+      AND,
+      OR,
+      XOR,
+      MSKOR,
+      INC_UINT,
+      DEC_UINT,
+      NOP_RTN = 32,
+      XCHG_RTN = 34,
+      XCHG_FDENORM_RTN,
+      CMPXCHG_INT_RTN,
+      CMPXCHG_FLT_RTN,
+      CMPXCHG_FDENORM_RTN,
+      ADD_RTN,
+      SUB_RTN,
+      RSUB_RTN,
+      MIN_INT_RTN,
+      MIN_UINT_RTN,
+      MAX_INT_RTN,
+      MAX_UINT_RTN,
+      AND_RTN,
+      OR_RTN,
+      XOR_RTN,
+      MSKOR_RTN,
+      UINT_RTN,
+      UNSUPPORTED
+   };
+
+   RatInstr(ECFOpCode cf_opcode, ERatOp rat_op,
+            const RegisterVec4& data, const RegisterVec4& index,
+            int rat_id, PRegister rat_id_offset,
+            int burst_count, int comp_mask, int element_size);
+
+   auto rat_id_offset() const { return m_rat_id_offset;}
+   int  rat_id() const { return m_rat_id;}
+
+   ERatOp rat_op() const {return m_rat_op;}
+
+   const auto& value() const { return m_data;}
+   auto& value() { return m_data;}
+
+   const auto& addr() const { return m_index;}
+   auto& addr() { return m_index;}
+
+   int data_gpr() const {return m_data.sel();}
+   int index_gpr() const {return m_index.sel();}
+   int elm_size() const {return m_element_size;}
+
+   int comp_mask() const {return m_comp_mask;}
+
+   bool need_ack() const {return m_need_ack;}
+   int burst_count() const {return m_burst_count;}
+
+   int data_swz(int chan) const {return m_data[chan]->chan();}
+
+   ECFOpCode cf_opcode() const { return m_cf_opcode;}
+
+   void set_ack() {m_need_ack = true; set_mark(); }
+   void set_mark() {m_need_mark = true; }
+   bool mark() {return m_need_mark;}
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const RatInstr& lhs) const;
+
+   static bool emit(nir_intrinsic_instr *intr, Shader& shader);
+
+private:
+
+   static bool emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_ssbo_store(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader);
+
+   static bool emit_image_store(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_image_load_or_atomic(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_image_size(nir_intrinsic_instr *intr, Shader& shader);
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   ECFOpCode m_cf_opcode;
+   ERatOp m_rat_op;
+
+   RegisterVec4 m_data;
+   RegisterVec4 m_index;
+   PRegister m_rat_id_offset{nullptr};
+
+   int m_rat_id{0};
+   int m_burst_count{0};
+   int m_comp_mask{15};
+   int m_element_size{3};
+   bool m_need_ack{false};
+   bool m_need_mark{false};
+
+};
+
+
+}
+
+#endif // GDSINSTR_H
--- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp
--- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.h
@ -0,0 +1,166 @@
+#ifndef INSTR_TEX_H
+#define INSTR_TEX_H
+
+#include "sfn_instr.h"
+#include "sfn_valuefactory.h"
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+class TexInstr : public InstrWithVectorResult {
+public:
+   enum Opcode {
+      ld = FETCH_OP_LD,
+      get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
+      get_nsamples = FETCH_OP_GET_NUMBER_OF_SAMPLES,
+      get_tex_lod = FETCH_OP_GET_LOD,
+      get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
+      get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
+      set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
+      keep_gradients = FETCH_OP_KEEP_GRADIENTS,
+      set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
+      set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
+      sample = FETCH_OP_SAMPLE,
+      sample_l = FETCH_OP_SAMPLE_L,
+      sample_lb = FETCH_OP_SAMPLE_LB,
+      sample_lz = FETCH_OP_SAMPLE_LZ,
+      sample_g = FETCH_OP_SAMPLE_G,
+      sample_g_lb = FETCH_OP_SAMPLE_G_L,
+      gather4 = FETCH_OP_GATHER4,
+      gather4_o =  FETCH_OP_GATHER4_O,
+
+      sample_c = FETCH_OP_SAMPLE_C,
+      sample_c_l = FETCH_OP_SAMPLE_C_L,
+      sample_c_lb = FETCH_OP_SAMPLE_C_LB,
+      sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
+      sample_c_g = FETCH_OP_SAMPLE_C_G,
+      sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
+      gather4_c = FETCH_OP_GATHER4_C,
+      gather4_c_o =  FETCH_OP_GATHER4_C_O,
+      unknown = 255
+   };
+
+   enum Flags {
+      x_unnormalized,
+      y_unnormalized,
+      z_unnormalized,
+      w_unnormalized,
+      grad_fine,
+      num_tex_flag
+   };
+
+   struct Inputs {
+      Inputs(const nir_tex_instr& instr, ValueFactory &vf);
+      const nir_variable *sampler_deref;
+      const nir_variable *texture_deref;
+      RegisterVec4 coord;
+      PVirtualValue bias;
+      PVirtualValue comperator;
+      PVirtualValue lod;
+      RegisterVec4 ddx;
+      RegisterVec4 ddy;
+      nir_src *offset;
+      PVirtualValue gather_comp;
+      PVirtualValue ms_index;
+      PVirtualValue sampler_offset;
+      PVirtualValue texture_offset;
+
+      RegisterVec4::Swizzle swizzle_from_ncomps(int comps) const;
+
+      Opcode opcode;
+   private:
+      auto get_opcode(const nir_tex_instr& instr) -> Opcode;
+   };
+
+   TexInstr(Opcode op, const RegisterVec4& dest,
+            const RegisterVec4::Swizzle& dest_swizzle,
+            const RegisterVec4& src, unsigned sid, unsigned rid,
+            PVirtualValue sampler_offs = nullptr);
+
+   TexInstr(const TexInstr& orig) = delete;
+   TexInstr(const TexInstr&& orig) = delete;
+   TexInstr& operator =(const TexInstr& orig) = delete;
+   TexInstr& operator =(const TexInstr&& orig) = delete;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   const auto& src() const {return m_src;}
+   auto& src() {return m_src;}
+
+   unsigned opcode() const {return m_opcode;}
+   unsigned sampler_id() const {return m_sampler_id;}
+   unsigned resource_id() const {return m_resource_id;}
+
+   void set_offset(unsigned index, int32_t val);
+   int get_offset(unsigned index) const;
+
+   void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
+   int inst_mode() const { return m_inst_mode;}
+
+   void set_tex_flag(Flags flag) {m_tex_flags.set(flag);}
+   bool has_tex_flag(Flags flag) const {return m_tex_flags.test(flag);}
+
+   void set_sampler_offset(PVirtualValue ofs) {m_sampler_offset = ofs;}
+   auto* sampler_offset() const {return m_sampler_offset;}
+
+   void set_gather_comp(int cmp);
+   bool is_equal_to(const TexInstr& lhs) const;
+
+   static Opcode op_from_string(const std::string& s);
+   static Instr::Pointer from_string(std::istream& is, ValueFactory& value_fctory);
+
+   static bool from_nir(nir_tex_instr *tex, Shader& shader);
+
+   uint32_t slots() const override {return 1;};
+
+   auto prepare_instr() const { return m_prepare_instr;}
+
+private:
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+   bool propagate_death() override;
+
+   static const char *opname(Opcode code);
+   static bool is_gather(Opcode op);
+
+   void read_tex_coord_normalitazion(const std::string& next_token);
+   void set_tex_param(const std::string& next_token);
+
+   static auto prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader &shader) -> RegisterVec4;
+
+   static bool emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_txs(nir_tex_instr *tex, Inputs& src,
+                            RegisterVec4::Swizzle dest_swz, Shader& shader);
+   static bool emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader);
+   static bool emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_tg4(nir_tex_instr* instr, Inputs& src , Shader& shader);
+   static bool emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader);
+
+   void set_coord_offsets(nir_src *offset);
+   void set_rect_coordinate_flags(nir_tex_instr* instr);
+   void add_prepare_instr(TexInstr *ir) {m_prepare_instr.push_back(ir);};
+
+   Opcode m_opcode;
+
+   RegisterVec4 m_src;
+   PVirtualValue m_sampler_offset;
+   std::bitset<num_tex_flag> m_tex_flags;
+   int m_offset[3];
+   int m_inst_mode;
+   unsigned m_sampler_id;
+   unsigned m_resource_id;
+
+   static const std::map<Opcode, std::string> s_opcode_map;
+   std::list<TexInstr *> m_prepare_instr;
+};
+
+}
+
+#endif // INSTR_TEX_H
--- a/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp
@ -0,0 +1,188 @@
+#include "sfn_instrfactory.h"
+
+#include "sfn_instr_alugroup.h"
+#include "sfn_debug.h"
+#include "sfn_instr_controlflow.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_lds.h"
+#include "sfn_instr_mem.h"
+#include "sfn_instr_tex.h"
+
+#include "sfn_alu_defines.h"
+
+#include "sfn_shader.h"
+
+#include <string>
+#include <sstream>
+#include <vector>
+
+namespace r600 {
+
+using std::string;
+using std::vector;
+
+InstrFactory::InstrFactory():
+   group(nullptr)
+{
+
+}
+
+PInst InstrFactory::from_string(const std::string& s, int nesting_depth)
+{
+   string type;
+   std::istringstream is(s);
+
+   PInst result = nullptr;
+
+   do {
+      is >> type;
+   } while (type.empty() && is.good());
+
+   if (type == "ALU_GROUP_BEGIN") {
+      group = new AluGroup();
+      group->set_nesting_depth(nesting_depth);
+      return nullptr;
+   } else if (type == "ALU_GROUP_END") {
+      AluGroup *retval = group;
+      group = nullptr;
+      return retval;
+   } else if (type == "ALU") {
+      result = AluInstr::from_string(is, m_value_factory, group);
+   } else if (type == "TEX") {
+      result = TexInstr::from_string(is, m_value_factory);
+   } else if (type == "EXPORT") {
+      result = ExportInstr::from_string(is, m_value_factory);
+   } else if (type == "EXPORT_DONE") {
+      result = ExportInstr::last_from_string(is, m_value_factory);
+   } else if (type == "VFETCH") {
+      result = FetchInstr::from_string(is, m_value_factory);
+   } else if (type == "GET_BUF_RESINFO") {
+      result = QueryBufferSizeInstr::from_string(is, m_value_factory);
+   } else if (type == "LOAD_BUF") {
+      result = LoadFromBuffer::from_string(is, m_value_factory);
+   } else if (type == "READ_SCRATCH") {
+      result = LoadFromScratch::from_string(is, m_value_factory);
+   } else if (type == "IF") {
+      result = IfInstr::from_string(is, m_value_factory);
+   } else if (type == "WRITE_SCRATCH") {
+      result = WriteScratchInstr::from_string(is, m_value_factory);
+   } else if (type == "MEM_RING") {
+      result = MemRingOutInstr::from_string(is, m_value_factory);
+   } else if (type == "EMIT_VERTEX") {
+      result = EmitVertexInstr::from_string(is, false);
+   } else if (type == "EMIT_CUT_VERTEX") {
+      result = EmitVertexInstr::from_string(is, true);
+   } else if (type == "LDS_READ") {
+      result = LDSReadInstr::from_string(is, m_value_factory);
+   } else if (type == "LDS") {
+      result = LDSAtomicInstr::from_string(is, m_value_factory);
+   } else if (type == "WRITE_TF") {
+      result = WriteTFInstr::from_string(is, m_value_factory);
+   } else
+      result = ControlFlowInstr::from_string(type);
+
+   if (!result && !group) {
+      std::cerr << "Error translating '" << s << "'\n";
+   }
+
+   return result;
+}
+
+bool InstrFactory::from_nir(nir_instr *instr, Shader& shader)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      return AluInstr::from_nir(nir_instr_as_alu(instr), shader);
+   case nir_instr_type_intrinsic:
+      return shader.process_intrinsic(nir_instr_as_intrinsic(instr));
+   case nir_instr_type_load_const:
+      return load_const(nir_instr_as_load_const(instr), shader);
+   case nir_instr_type_tex:
+      return TexInstr::from_nir(nir_instr_as_tex(instr), shader);
+   case nir_instr_type_jump:
+      return process_jump(nir_instr_as_jump(instr), shader);
+   case nir_instr_type_ssa_undef:
+      return process_undef(nir_instr_as_ssa_undef(instr), shader);
+   default:
+      fprintf(stderr, "Instruction type %d not supported\n", instr->type);
+   return false;
+   }
+}
+
+bool InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader)
+{
+   AluInstr *ir = nullptr;
+
+   if (literal->def.bit_size == 64) {
+      for (int i = 0; i < literal->def.num_components; ++i) {
+         auto dest0 = m_value_factory.dest(literal->def, 2 * i, pin_none);
+         auto src0 = m_value_factory.literal(literal->value[i].u64 & 0xffffffff);
+         shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, {alu_write}));
+
+         auto dest1 = m_value_factory.dest(literal->def, 2 * i + 1, pin_none);
+         auto src1 = m_value_factory.literal((literal->value[i].u64 >> 32) & 0xffffffff);
+         shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::last_write));
+      }
+   } else {
+      Pin pin = literal->def.num_components == 1 ? pin_free : pin_none;
+      for (int i = 0; i < literal->def.num_components; ++i) {
+         auto dest = m_value_factory.dest(literal->def, i, pin);
+         uint32_t v = literal->value[i].i32;
+         PVirtualValue src = nullptr;
+         switch (v) {
+         case 0: src = m_value_factory.zero(); break;
+      case 1: src = m_value_factory.one_i(); break;
+         case 0xffffffff: src = m_value_factory.inline_const(ALU_SRC_M_1_INT, 0); break;
+         case 0x3f800000: src = m_value_factory.inline_const(ALU_SRC_1, 0); break;
+      case 0x3f000000: src = m_value_factory.inline_const(ALU_SRC_0_5, 0); break;
+         default: src = m_value_factory.literal(v);
+      }
+
+         ir = new AluInstr(op1_mov, dest, src, {alu_write});
+         shader.emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_alu_flag(alu_last_instr);
+
+   }
+   return true;
+}
+
+bool InstrFactory::process_jump(nir_jump_instr *instr, Shader& shader)
+{
+   ControlFlowInstr::CFType type;
+   switch (instr->type) {
+   case nir_jump_break:
+      type = ControlFlowInstr::cf_loop_break;
+   break;
+
+   case nir_jump_continue:
+      type = ControlFlowInstr::cf_loop_continue;
+   break;
+
+   default: {
+      nir_instr *i = reinterpret_cast<nir_instr*>(instr);
+      sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
+      return false;
+   }
+   }
+   shader.emit_instruction(new ControlFlowInstr(type));
+   shader.start_new_block(0);
+
+   return true;
+}
+
+bool InstrFactory::process_undef(nir_ssa_undef_instr *undef, Shader& shader)
+{
+   for (int i = 0; i < undef->def.num_components; ++i) {
+      auto dest = shader.value_factory().undef(undef->def.index, i);
+      shader.emit_instruction(new AluInstr(op1_mov, dest,
+                                           value_factory().zero(),
+                                           AluInstr::last_write));
+   }
+   return true;
+}
+
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_instrfactory.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instrfactory.h
@ -0,0 +1,34 @@
+#ifndef INSTRFACTORY_H
+#define INSTRFACTORY_H
+
+#include "sfn_instr.h"
+#include "sfn_valuefactory.h"
+
+
+#include <iosfwd>
+
+namespace r600 {
+
+class Shader;
+class InstrFactory : public Allocate {
+public:
+	InstrFactory();
+
+   PInst from_string(const std::string &s, int nesting_depth);
+   bool from_nir(nir_instr *instr, Shader& shader);
+   auto& value_factory() { return m_value_factory;}
+
+private:
+   bool load_const(nir_load_const_instr *lc, Shader& shader);
+   bool process_jump(nir_jump_instr *instr, Shader& shader);
+   bool process_undef(nir_ssa_undef_instr *undef, Shader& shader);
+
+   Instr::Pointer export_from_string(std::istream& is, bool is_last);
+
+	ValueFactory m_value_factory;
+   AluGroup *group;
+};
+
+}
+
+#endif // INSTRFACTORY_H
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp
@ -1,183 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_alu.h"
-#include "sfn_valuepool.h"
-
-namespace r600  {
-
-const AluModifiers AluInstruction::src_abs_flags[2] =
-   {alu_src0_abs, alu_src1_abs};
-const AluModifiers AluInstruction::src_neg_flags[3] =
-   {alu_src0_neg, alu_src1_neg, alu_src2_neg};
-const AluModifiers AluInstruction::src_rel_flags[3] =
-   {alu_src0_rel, alu_src1_rel, alu_src2_rel};
-
-AluInstruction::AluInstruction(EAluOp opcode):
-   Instruction (Instruction::alu),
-   m_opcode(opcode),
-   m_src(alu_ops.at(opcode).nsrc),
-   m_bank_swizzle(alu_vec_unknown),
-   m_cf_type(cf_alu)
-{
-   if (alu_ops.at(opcode).nsrc == 3)
-      m_flags.set(alu_op3);
-}
-
-AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
-                               std::vector<PValue> src,
-                               const std::set<AluModifiers>& flags):
-   Instruction (Instruction::alu),
-   m_opcode(opcode),
-   m_dest(dest),
-   m_bank_swizzle(alu_vec_unknown),
-   m_cf_type(cf_alu)
-{
-   assert(dest);
-   m_src.swap(src);
-   for (auto f : flags)
-      m_flags.set(f);
-
-   if (alu_ops.at(opcode).nsrc == 3)
-      m_flags.set(alu_op3);
-
-   for (auto &s: m_src)
-      add_remappable_src_value(&s);
-
-   add_remappable_dst_value(&m_dest);
-}
-
-AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
-                               const std::set<AluModifiers>& flags):
-   AluInstruction(opcode, dest, std::vector<PValue>{src0}, flags)
-{
-}
-
-AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
-                               PValue src0, PValue src1,
-                               const std::set<AluModifiers> &m_flags):
-   AluInstruction(opcode, dest, {src0, src1}, m_flags)
-{
-}
-
-AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
-                               PValue src1, PValue src2,
-                               const std::set<AluModifiers> &flags):
-   AluInstruction(opcode, dest, {src0, src1, src2}, flags)
-{
-}
-
-bool AluInstruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == alu);
-   const auto& oth = static_cast<const AluInstruction&>(lhs);
-
-   if (m_opcode != oth.m_opcode) {
-      return false;
-   }
-
-   if (*m_dest != *oth.m_dest)
-      return false;
-
-   if (m_src.size() != oth.m_src.size())
-      return false;
-
-   for (unsigned i = 0; i < m_src.size(); ++i)
-     if (*m_src[i] != *oth.m_src[i]) {
-        return false;
-     }
-   return (m_flags == oth.m_flags && m_cf_type == oth.m_cf_type);
-}
-
-void AluInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   for (auto c: candidates) {
-      if (*c == *m_dest)
-         m_dest = new_value;
-
-      for (auto& s: m_src) {
-         if (*c == *s)
-            s = new_value;
-      }
-   }
-}
-
-PValue AluInstruction::remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
-                                           ValueMap &values)
-{
-   auto new_index = map[reg->sel()];
-   if (new_index.valid)
-      reg = values.get_or_inject(new_index.new_reg, reg->chan());
-   map[reg->sel()].used = true;
-   return reg;
-}
-
-
-void AluInstruction::set_flag(AluModifiers flag)
-{
-   m_flags.set(flag);
-}
-
-void AluInstruction::set_bank_swizzle(AluBankSwizzle bswz)
-{
-   m_bank_swizzle = bswz;
-}
-
-unsigned AluInstruction::n_sources() const
-{
-   return m_src.size();
-}
-
-void AluInstruction::do_print(std::ostream& os) const
-{
-   os << "ALU " << alu_ops.at(m_opcode).name;
-   if (m_flags.test(alu_dst_clamp))
-      os << "_CLAMP";
-   if (m_dest)
-      os << ' ' << *m_dest << " : "  ;
-
-   for (unsigned i = 0; i < m_src.size(); ++i) {
-      int pflags = 0;
-      if (i)
-         os << ' ';
-      if (m_flags.test(src_neg_flags[i])) pflags |= Value::PrintFlags::has_neg;
-      if (m_flags.test(src_rel_flags[i])) pflags |= Value::PrintFlags::is_rel;
-      if (i < 2)
-         if (m_flags.test(src_abs_flags[i])) pflags |= Value::PrintFlags::has_abs;
-      m_src[i]->print(os, Value::PrintFlags(0, pflags));
-   }
-   os << " {";
-   os << (m_flags.test(alu_write) ? 'W' : ' ');
-   os << (m_flags.test(alu_last_instr) ? 'L' : ' ');
-   os << (m_flags.test(alu_update_exec) ? 'E' : ' ');
-   os << (m_flags.test(alu_update_pred) ? 'P' : ' ');
-   os << "}";
-
-   os <<  " BS:" << m_bank_swizzle;
-   os <<  " CF:" << m_cf_type;
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h
@ -1,142 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef sfn_r600_instruction_alu_h
-#define sfn_r600_instruction_alu_h
-
-#include "sfn_instruction_base.h"
-#include "sfn_alu_defines.h"
-
-namespace r600 {
-
-enum AluModifiers {
-   alu_src0_neg,
-   alu_src0_abs,
-   alu_src0_rel,
-   alu_src1_neg,
-   alu_src1_abs,
-   alu_src1_rel,
-   alu_src2_neg,
-   alu_src2_rel,
-   alu_dst_clamp,
-   alu_dst_rel,
-   alu_last_instr,
-   alu_update_exec,
-   alu_update_pred,
-   alu_write,
-   alu_op3
-};
-
-enum AluDstModifiers {
-   omod_off = 0,
-   omod_mul2 = 1,
-   omod_mul4 = 2,
-   omod_divl2 = 3
-};
-
-enum AluPredSel {
-   pred_off = 0,
-   pred_zero = 2,
-   pred_one = 3
-};
-
-enum AluBankSwizzle {
-   alu_vec_012 = 0,
-   sq_alu_scl_201 = 0,
-   alu_vec_021 = 1,
-   sq_alu_scl_122 = 1,
-   alu_vec_120 = 2,
-   sq_alu_scl_212 = 2,
-   alu_vec_102 = 3,
-   sq_alu_scl_221 = 3,
-   alu_vec_201 = 4,
-   alu_vec_210 = 5,
-   alu_vec_unknown = 6
-};
-
-class AluInstruction : public Instruction {
-public:
-
-   static const AluModifiers src_abs_flags[2];
-   static const AluModifiers src_neg_flags[3];
-   static const AluModifiers src_rel_flags[3];
-
-   AluInstruction(EAluOp opcode);
-   AluInstruction(EAluOp opcode, PValue dest,
-                  std::vector<PValue> src0,
-                  const std::set<AluModifiers>& m_flags);
-
-   AluInstruction(EAluOp opcode, PValue dest, PValue src0,
-                  const std::set<AluModifiers>& m_flags);
-
-   AluInstruction(EAluOp opcode, PValue dest,
-                  PValue src0, PValue src1,
-                  const std::set<AluModifiers>& m_flags);
-
-   AluInstruction(EAluOp opcode, PValue dest, PValue src0, PValue src1,
-                  PValue src2,
-                  const std::set<AluModifiers>& m_flags);
-
-   void set_flag(AluModifiers flag);
-   unsigned n_sources() const;
-
-   PValue dest() {return m_dest;}
-   EAluOp opcode() const {return m_opcode;}
-   const Value *dest() const {return m_dest.get();}
-   Value& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
-   PValue *psrc(unsigned i) {assert(i < m_src.size()); return &m_src[i];}
-   bool is_last() const {return m_flags.test(alu_last_instr);}
-   bool write() const {return m_flags.test(alu_write);}
-   bool flag(AluModifiers f) const {return m_flags.test(f);}
-   void set_bank_swizzle(AluBankSwizzle swz);
-   int bank_swizzle() const {return m_bank_swizzle;}
-   ECFAluOpCode cf_type() const {return m_cf_type;}
-   void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
-
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-   PValue remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
-                              ValueMap &values);
-
-
-   EAluOp m_opcode;
-   PValue m_dest;
-   std::vector<PValue> m_src;
-   AluOpFlags m_flags;
-   AluBankSwizzle m_bank_swizzle;
-   ECFAluOpCode m_cf_type;
-};
-
-}
-
-#endif
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp
@ -1,187 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include <algorithm>
-#include <cassert>
-
-#include "sfn_instruction_base.h"
-#include "sfn_liverange.h"
-#include "sfn_valuepool.h"
-
-namespace r600  {
-
-ValueRemapper::ValueRemapper(std::vector<rename_reg_pair>& m,
-                             ValueMap& values):
-   m_map(m),
-   m_values(values)
-{
-}
-
-void ValueRemapper::remap(PValue& v)
-{
-   if (!v)
-      return;
-   if (v->type() == Value::gpr) {
-      v = remap_one_registers(v);
-   } else if (v->type() == Value::gpr_array_value) {
-      GPRArrayValue& val = static_cast<GPRArrayValue&>(*v);
-      auto value = val.value();
-      auto addr = val.indirect();
-      val.reset_value(remap_one_registers(value));
-      if (addr) {
-         if (addr->type() == Value::gpr)
-            val.reset_addr(remap_one_registers(addr));
-      }
-      size_t range_start = val.sel();
-      size_t range_end = range_start + val.array_size();
-      while (range_start < range_end)
-         m_map[range_start++].used = true;
-   } else if (v->type() == Value::kconst) {
-      auto& val = static_cast<UniformValue&>(*v);
-      auto addr = val.addr();
-      if (addr && addr->type() == Value::gpr)
-            val.reset_addr(remap_one_registers(addr));
-   }
-
-}
-
-void ValueRemapper::remap(GPRVector& v)
-{
-   for (int i = 0; i < 4; ++i) {
-      if (v.reg_i(i)) {
-         auto& ns_idx = m_map[v.reg_i(i)->sel()];
-         if (ns_idx.valid)
-            v.set_reg_i(i,m_values.get_or_inject(ns_idx.new_reg, v.reg_i(i)->chan()));
-         m_map[v.reg_i(i)->sel()].used = true;
-      }
-   }
-}
-
-PValue ValueRemapper::remap_one_registers(PValue& reg)
-{
-   auto new_index = m_map[reg->sel()];
-   if (new_index.valid)
-      reg = m_values.get_or_inject(new_index.new_reg, reg->chan());
-   m_map[reg->sel()].used = true;
-   return reg;
-}
-
-
-Instruction::Instruction(instr_type t):
-   m_type(t)
-{
-}
-
-Instruction::~Instruction()
-{
-}
-
-void Instruction::print(std::ostream& os) const
-{
-   os << "OP:";
-   do_print(os);
-}
-
-
-void Instruction::remap_registers(ValueRemapper& map)
-{
-   sfn_log << SfnLog::merge << "REMAP " << *this << "\n";
-   for (auto& v: m_mappable_src_registers)
-      map.remap(*v);
-
-   for (auto& v: m_mappable_src_vectors)
-      map.remap(*v);
-
-   for (auto& v: m_mappable_dst_registers)
-      map.remap(*v);
-
-   for (auto& v: m_mappable_dst_vectors)
-      map.remap(*v);
-   sfn_log << SfnLog::merge << "TO    " << *this << "\n\n";
-}
-
-void Instruction::add_remappable_src_value(PValue *v)
-{
-   if (*v)
-      m_mappable_src_registers.push_back(v);
-}
-
-void Instruction::add_remappable_src_value(GPRVector *v)
-{
-   m_mappable_src_vectors.push_back(v);
-}
-
-void Instruction::add_remappable_dst_value(PValue *v)
-{
-   if (v)
-      m_mappable_dst_registers.push_back(v);
-}
-
-void Instruction::add_remappable_dst_value(GPRVector *v)
-{
-   m_mappable_dst_vectors.push_back(v);
-}
-
-void Instruction::replace_values(UNUSED const ValueSet& candidates, UNUSED PValue new_value)
-{
-
-}
-
-void Instruction::evalue_liveness(LiverangeEvaluator& eval) const
-{
-   sfn_log << SfnLog::merge << "Scan " << *this << "\n";
-   for (const auto& s: m_mappable_src_registers)
-      if (*s)
-         eval.record_read(**s);
-
-   for (const auto& s: m_mappable_src_vectors)
-      eval.record_read(*s);
-
-   for (const auto& s: m_mappable_dst_registers)
-      if (*s)
-         eval.record_write(**s);
-
-   for (const auto& s: m_mappable_dst_vectors)
-      eval.record_write(*s);
-
-   do_evalue_liveness(eval);
-}
-
-void Instruction::do_evalue_liveness(UNUSED LiverangeEvaluator& eval) const
-{
-
-}
-
-bool operator == (const Instruction& lhs, const Instruction& rhs)
-{
-   if (rhs.m_type != lhs.m_type)
-      return false;
-
-   return lhs.is_equal_to(rhs);
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_base.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_base.h
@ -1,155 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef sfn_r600_instr_h
-#define sfn_r600_instr_h
-
-#include "sfn_instructionvisitor.h"
-#include "sfn_value_gpr.h"
-#include "sfn_defines.h"
-
-#include "gallium/drivers/r600/r600_isa.h"
-#include <iostream>
-#include <memory>
-#include <vector>
-#include <set>
-
-namespace r600 {
-
-struct rename_reg_pair {
-   bool valid;
-   bool used;
-   int new_reg;
-};
-
-class LiverangeEvaluator;
-class ValueMap;
-
-
-class ValueRemapper {
-public:
-   ValueRemapper(std::vector<rename_reg_pair>& m,
-                 ValueMap& values);
-
-   void remap(PValue& v);
-   void remap(GPRVector& v);
-private:
-   PValue remap_one_registers(PValue& reg);
-
-   std::vector<rename_reg_pair>& m_map;
-   ValueMap& m_values;
-};
-
-
-using OutputRegisterMap = std::map<unsigned, const GPRVector *>;
-
-class Instruction {
-public:
-   enum instr_type {
-      alu,
-      exprt,
-      tex,
-      vtx,
-      wait_ack,
-      cond_if,
-      cond_else,
-      cond_endif,
-      lds_atomic,
-      lds_read,
-      lds_write,
-      loop_begin,
-      loop_end,
-      loop_break,
-      loop_continue,
-      phi,
-      streamout,
-      ring,
-      emit_vtx,
-      mem_wr_scratch,
-      gds,
-      rat,
-      tf_write,
-      block,
-      unknown
-   };
-
-   typedef std::shared_ptr<Instruction> Pointer;
-
-   friend bool operator == (const Instruction& lhs, const Instruction& rhs);
-
-   Instruction(instr_type t);
-
-   virtual ~Instruction();
-
-   instr_type type() const { return m_type;}
-
-   void print(std::ostream& os) const;
-
-   virtual void replace_values(const ValueSet& candidates, PValue new_value);
-
-   void evalue_liveness(LiverangeEvaluator& eval) const;
-
-   void remap_registers(ValueRemapper& map);
-
-   virtual bool accept(InstructionVisitor& visitor) = 0;
-   virtual bool accept(ConstInstructionVisitor& visitor) const = 0;
-
-protected:
-
-   void add_remappable_src_value(PValue *v);
-   void add_remappable_src_value(GPRVector *v);
-   void add_remappable_dst_value(PValue *v);
-   void add_remappable_dst_value(GPRVector *v);
-
-private:
-
-   virtual void do_evalue_liveness(LiverangeEvaluator& eval) const;
-
-   virtual bool is_equal_to(const Instruction& lhs) const = 0;
-
-   instr_type m_type;
-
-   virtual void do_print(std::ostream& os) const = 0;
-
-   std::vector<PValue*> m_mappable_src_registers;
-   std::vector<GPRVector*> m_mappable_src_vectors;
-   std::vector<PValue*> m_mappable_dst_registers;
-   std::vector<GPRVector*> m_mappable_dst_vectors;
-};
-
-using PInstruction=Instruction::Pointer;
-
-inline std::ostream& operator << (std::ostream& os, const Instruction& instr)
-{
-   instr.print(os);
-   return os;
-}
-
-bool operator == (const Instruction& lhs, const Instruction& rhs);
-
-}
-
-#endif
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp
@ -1,57 +0,0 @@
-#include "sfn_instruction_block.h"
-
-namespace r600 {
-
-
-InstructionBlock::InstructionBlock(unsigned nesting_depth, unsigned block_number):
-   Instruction(block),
-   m_block_number(block_number),
-   m_nesting_depth(nesting_depth)
-{
-}
-
-void InstructionBlock::emit(PInstruction instr)
-{
-   m_block.push_back(instr);
-}
-
-void InstructionBlock::remap_registers(ValueRemapper& map)
-{
-   for(auto& i: m_block)
-      i->remap_registers(map);
-}
-
-void InstructionBlock::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   for(auto& i: m_block)
-      i->evalue_liveness(eval);
-}
-
-bool InstructionBlock::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == block);
-   auto& l = static_cast<const InstructionBlock&>(lhs);
-
-   if (m_block.size() != l.m_block.size())
-      return false;
-
-   if (m_block_number != l.m_block_number)
-      return false;
-
-   return std::equal(m_block.begin(), m_block.end(), l.m_block.begin(),
-                     [](PInstruction ri, PInstruction li) {return *ri == *li;});
-}
-
-PInstruction InstructionBlock::last_instruction()
-{
-   return m_block.size() ? *m_block.rbegin() : nullptr;
-}
-
-void InstructionBlock::do_print(std::ostream& os) const
-{
-   std::string space(" ", 2 * m_nesting_depth);
-   for(auto& i: m_block)
-      os << space << *i << "\n";
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_block.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_block.h
@ -1,82 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef sfn_instruction_block_h
-#define sfn_instruction_block_h
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class InstructionBlock : public Instruction
-{
-public:
-	InstructionBlock(unsigned nesting_depth, unsigned block_number);
-
-        void emit(PInstruction instr);
-
-
-        std::vector<PInstruction>::const_iterator begin() const  {
-           return m_block.begin();
-        }
-        std::vector<PInstruction>::const_iterator end() const {
-           return m_block.end();
-        }
-
-        void remap_registers(ValueRemapper& map);
-
-        size_t size() const {
-           return m_block.size();
-        }
-
-        const PInstruction& operator [] (int i) const {
-           return m_block[i];
-        }
-
-        unsigned number() const  {
-           return m_block_number;
-        }
-
-        PInstruction last_instruction();
-
-        bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-        bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-        void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-        bool is_equal_to(const Instruction& lhs) const override;
-        void do_print(std::ostream& os) const override;
-
-        std::vector<PInstruction> m_block;
-
-        unsigned m_block_number;
-        unsigned m_nesting_depth;
-};
-
-}
-
-#endif // INSTRUCTIONBLOCK_H
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp
@ -1,195 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_cf.h"
-#include "sfn_liverange.h"
-
-namespace  r600 {
-
-CFInstruction::CFInstruction(instr_type type):Instruction(type)
-{
-
-}
-
-IfElseInstruction::IfElseInstruction(instr_type type):
-   CFInstruction (type)
-{
-
-}
-
-IfInstruction::IfInstruction(AluInstruction *pred):
-   IfElseInstruction(cond_if),
-   m_pred(pred)
-{
-   PValue *v = m_pred->psrc(0);
-   add_remappable_src_value(v);
-   pred->set_cf_type(cf_alu_push_before);
-}
-
-void IfInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_if();
-}
-
-bool IfInstruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == cond_if);
-   const IfInstruction& l = static_cast<const IfInstruction&>(lhs);
-   return *l.m_pred == *m_pred;
-}
-
-void IfInstruction::do_print(std::ostream& os) const
-{
-   os << "PRED = " << *m_pred << "\n";
-   os << "IF (PRED)";
-}
-
-ElseInstruction::ElseInstruction(IfInstruction *jump_src):
-   IfElseInstruction(cond_else),
-   m_jump_src(jump_src)
-{
-}
-
-void ElseInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_else();
-}
-
-
-bool ElseInstruction::is_equal_to(const Instruction& lhs) const
-{
-   if (lhs.type() != cond_else)
-      return false;
-   auto& l = static_cast<const ElseInstruction&>(lhs);
-   return (*m_jump_src == *l.m_jump_src);
-}
-
-void ElseInstruction::do_print(std::ostream& os) const
-{
-   os << "ELSE";
-}
-
-IfElseEndInstruction::IfElseEndInstruction():
-   IfElseInstruction(cond_endif)
-{
-}
-
-void IfElseEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_endif();
-}
-
-bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const
-{
-   if (lhs.type() != cond_endif)
-      return false;
-   return true;
-}
-
-void IfElseEndInstruction::do_print(std::ostream& os) const
-{
-   os << "ENDIF";
-}
-
-LoopBeginInstruction::LoopBeginInstruction():
-   CFInstruction(loop_begin)
-{
-}
-
-void LoopBeginInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_loop_begin();
-}
-
-bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == loop_begin);
-   return true;
-}
-
-void LoopBeginInstruction::do_print(std::ostream& os) const
-{
-   os << "BGNLOOP";
-}
-
-LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start):
-   CFInstruction (loop_end),
-   m_start(start)
-{
-}
-
-void LoopEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_loop_end();
-}
-
-bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == loop_end);
-   const auto& other = static_cast<const LoopEndInstruction&>(lhs);
-   return *m_start == *other.m_start;
-}
-
-void LoopEndInstruction::do_print(std::ostream& os) const
-{
-   os << "ENDLOOP";
-}
-
-LoopBreakInstruction::LoopBreakInstruction():
-   CFInstruction (loop_break)
-{
-}
-
-void LoopBreakInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_loop_break();
-}
-
-bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const
-{
-   return true;
-}
-
-void LoopBreakInstruction::do_print(std::ostream& os) const
-{
-   os << "BREAK";
-}
-
-LoopContInstruction::LoopContInstruction():
-   CFInstruction (loop_continue)
-{
-}
-
-bool LoopContInstruction::is_equal_to(UNUSED const Instruction& lhs) const
-{
-   return true;
-}
-void LoopContInstruction::do_print(std::ostream& os) const
-{
-   os << "CONTINUE";
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h
@ -1,142 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_IFELSEINSTRUCTION_H
-#define SFN_IFELSEINSTRUCTION_H
-
-#include "sfn_instruction_alu.h"
-
-namespace r600  {
-
-class CFInstruction : public Instruction {
-protected:
-   CFInstruction(instr_type type);
-};
-
-class IfElseInstruction : public CFInstruction {
-public:
-   IfElseInstruction(instr_type type);
-
-};
-
-class IfInstruction : public IfElseInstruction {
-public:
-   IfInstruction(AluInstruction *pred);
-   const AluInstruction& pred() const {return *m_pred;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-   std::shared_ptr<AluInstruction> m_pred;
-};
-
-class ElseInstruction : public IfElseInstruction {
-public:
-   ElseInstruction(IfInstruction *jump_src);
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   IfElseInstruction *m_jump_src;
-};
-
-class IfElseEndInstruction : public IfElseInstruction {
-public:
-   IfElseEndInstruction();
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-};
-
-class LoopBeginInstruction: public CFInstruction {
-public:
-   LoopBeginInstruction();
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-};
-
-class LoopEndInstruction: public CFInstruction {
-public:
-   LoopEndInstruction(LoopBeginInstruction *start);
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-   LoopBeginInstruction *m_start;
-};
-
-class LoopBreakInstruction: public CFInstruction {
-public:
-   LoopBreakInstruction();
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-};
-
-class LoopContInstruction: public CFInstruction {
-public:
-   LoopContInstruction();
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-};
-
-}
-
-#endif // SFN_IFELSEINSTRUCTION_H
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp
@ -1,341 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "sfn_instruction_export.h"
-#include "sfn_liverange.h"
-#include "sfn_valuepool.h"
-
-namespace r600 {
-
-WriteoutInstruction::WriteoutInstruction(instr_type t, const GPRVector& value):
-   Instruction(t),
-   m_value(value)
-{
-   add_remappable_src_value(&m_value);
-}
-
-void WriteoutInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   // I wonder whether we can actually end up here ...
-   for (auto c: candidates) {
-      if (*c == *m_value.reg_i(c->chan()))
-         m_value.set_reg_i(c->chan(), new_value);
-   }
-
-   replace_values_child(candidates, new_value);
-}
-
-void WriteoutInstruction::replace_values_child(UNUSED const ValueSet& candidates,
-                                               UNUSED PValue new_value)
-{
-}
-
-void WriteoutInstruction::remap_registers_child(UNUSED std::vector<rename_reg_pair>& map,
-                                                UNUSED ValueMap& values)
-{
-}
-
-ExportInstruction::ExportInstruction(unsigned loc, const GPRVector &value, ExportType type):
-   WriteoutInstruction(Instruction::exprt, value),
-   m_type(type),
-   m_loc(loc),
-   m_is_last(false)
-{
-}
-
-
-bool ExportInstruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == exprt);
-   const auto& oth = static_cast<const ExportInstruction&>(lhs);
-
-   return (gpr() == oth.gpr()) &&
-         (m_type == oth.m_type) &&
-         (m_loc == oth.m_loc) &&
-         (m_is_last == oth.m_is_last);
-}
-
-void ExportInstruction::do_print(std::ostream& os) const
-{
-   os << (m_is_last ? "EXPORT_DONE ":"EXPORT ");
-   switch (m_type) {
-   case et_pixel: os << "PIXEL "; break;
-   case et_pos: os << "POS "; break;
-   case et_param: os << "PARAM "; break;
-   }
-   os << m_loc << " " << gpr();
-}
-
-void ExportInstruction::update_output_map(OutputRegisterMap& map) const
-{
-   map[m_loc] = gpr_ptr();
-}
-
-void ExportInstruction::set_last()
-{
-   m_is_last = true;
-}
-
-WriteScratchInstruction::WriteScratchInstruction(unsigned loc, const GPRVector& value,
-                                                 int align, int align_offset, int writemask):
-   WriteoutInstruction (Instruction::mem_wr_scratch, value),
-   m_loc(loc),
-   m_align(align),
-   m_align_offset(align_offset),
-   m_writemask(writemask),
-   m_array_size(0)
-{
-}
-
-WriteScratchInstruction::WriteScratchInstruction(const PValue& address, const GPRVector& value,
-                                                 int align, int align_offset, int writemask, int array_size):
-   WriteoutInstruction (Instruction::mem_wr_scratch, value),
-   m_loc(0),
-   m_address(address),
-   m_align(align),
-   m_align_offset(align_offset),
-   m_writemask(writemask),
-   m_array_size(array_size - 1)
-{
-   add_remappable_src_value(&m_address);
-}
-
-bool WriteScratchInstruction::is_equal_to(const Instruction& lhs) const
-{
-   if (lhs.type() != Instruction::mem_wr_scratch)
-      return false;
-   const auto& other = static_cast<const WriteScratchInstruction&>(lhs);
-
-   if (m_address) {
-      if (!other.m_address)
-         return false;
-      if (*m_address != *other.m_address)
-         return false;
-   } else {
-      if (other.m_address)
-         return false;
-   }
-
-   return gpr() == other.gpr() &&
-         m_loc == other.m_loc &&
-         m_align == other.m_align &&
-         m_align_offset == other.m_align_offset &&
-         m_writemask == other.m_writemask;
-}
-
-static char *writemask_to_swizzle(int writemask, char *buf)
-{
-   const char *swz = "xyzw";
-   for (int i = 0; i < 4; ++i) {
-      buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
-   }
-   return buf;
-}
-
-void WriteScratchInstruction::do_print(std::ostream& os) const
-{
-   char buf[5];
-
-   os << "MEM_SCRATCH_WRITE ";
-   if (m_address)
-      os << "@" << *m_address << "+";
-
-   os << m_loc  << "." << writemask_to_swizzle(m_writemask, buf)
-      << " " <<  gpr()  << " AL:" << m_align << " ALO:" << m_align_offset;
-}
-
-void WriteScratchInstruction::replace_values_child(const ValueSet& candidates, PValue new_value)
-{
-   if (!m_address)
-      return;
-
-   for (auto c: candidates) {
-      if (*c == *m_address)
-         m_address = new_value;
-   }
-}
-
-void WriteScratchInstruction::remap_registers_child(std::vector<rename_reg_pair>& map,
-                           ValueMap& values)
-{
-   if (!m_address)
-      return;
-   sfn_log << SfnLog::merge << "Remap " << *m_address <<  " of type " << m_address->type() << "\n";
-   assert(m_address->type() == Value::gpr);
-   auto new_index = map[m_address->sel()];
-   if (new_index.valid)
-      m_address = values.get_or_inject(new_index.new_reg, m_address->chan());
-   map[m_address->sel()].used = true;
-}
-
-StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components,
-                                         int array_base, int comp_mask, int out_buffer,
-                                         int stream):
-   WriteoutInstruction(Instruction::streamout, value),
-   m_element_size(num_components == 3 ? 3 : num_components - 1),
-   m_burst_count(1),
-   m_array_base(array_base),
-   m_array_size(0xfff),
-   m_writemask(comp_mask),
-   m_output_buffer(out_buffer),
-   m_stream(stream)
-{
-}
-
-unsigned StreamOutIntruction::op() const
-{
-   int op = 0;
-   switch (m_output_buffer) {
-   case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
-   case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
-   case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
-   case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
-   }
-   return 4 * m_stream + op;
-}
-
-bool StreamOutIntruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == streamout);
-   const auto& oth = static_cast<const StreamOutIntruction&>(lhs);
-
-   return gpr() == oth.gpr() &&
-         m_element_size == oth.m_element_size &&
-         m_burst_count == oth.m_burst_count &&
-         m_array_base == oth.m_array_base &&
-         m_array_size == oth.m_array_size &&
-         m_writemask == oth.m_writemask &&
-         m_output_buffer == oth.m_output_buffer &&
-         m_stream == oth.m_stream;
-}
-
-void StreamOutIntruction::do_print(std::ostream& os) const
-{
-   os << "WRITE STREAM(" << m_stream << ") "  << gpr()
-      << " ES:" << m_element_size
-      << " BC:" << m_burst_count
-      << " BUF:" << m_output_buffer
-      << " ARRAY:" <<  m_array_base;
-   if (m_array_size != 0xfff)
-      os << "+" << m_array_size;
-}
-
-MemRingOutIntruction::MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
-                                           const GPRVector& value,
-                                           unsigned base_addr, unsigned ncomp,
-                                           PValue index):
-   WriteoutInstruction(Instruction::ring, value),
-   m_ring_op(ring),
-   m_type(type),
-   m_base_address(base_addr),
-   m_num_comp(ncomp),
-   m_index(index)
-{
-   add_remappable_src_value(&m_index);
-
-   assert(m_ring_op  == cf_mem_ring || m_ring_op  == cf_mem_ring1||
-          m_ring_op  == cf_mem_ring2 || m_ring_op  == cf_mem_ring3);
-   assert(m_num_comp <= 4);
-}
-
-unsigned MemRingOutIntruction::ncomp() const
-{
-   switch (m_num_comp) {
-   case 1: return 0;
-   case 2: return 1;
-   case 3:
-   case 4: return 3;
-   default:
-      assert(0);
-   }
-   return 3;
-}
-
-bool MemRingOutIntruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == streamout);
-   const auto& oth = static_cast<const MemRingOutIntruction&>(lhs);
-
-   bool equal = gpr() == oth.gpr() &&
-                m_ring_op == oth.m_ring_op &&
-                m_type == oth.m_type &&
-                m_num_comp == oth.m_num_comp &&
-                m_base_address == oth.m_base_address;
-
-   if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
-      equal &= (*m_index == *oth.m_index);
-   return equal;
-
-}
-
-static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" };
-void MemRingOutIntruction::do_print(std::ostream& os) const
-{
-   os << "MEM_RING " << m_ring_op;
-   os << " " << write_type_str[m_type] << " " << m_base_address;
-   os << " " << gpr();
-   if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
-      os << " @" << *m_index;
-   os << " ES:" << m_num_comp;
-}
-
-
-void MemRingOutIntruction::replace_values_child(const ValueSet& candidates,
-                                                PValue new_value)
-{
-   if (!m_index)
-      return;
-
-   for (auto c: candidates) {
-      if (*c == *m_index)
-         m_index = new_value;
-   }
-}
-
-void MemRingOutIntruction::remap_registers_child(std::vector<rename_reg_pair>& map,
-                                                 ValueMap& values)
-{
-   if (!m_index)
-      return;
-
-   assert(m_index->type() == Value::gpr);
-   auto new_index = map[m_index->sel()];
-   if (new_index.valid)
-      m_index = values.get_or_inject(new_index.new_reg, m_index->chan());
-   map[m_index->sel()].used = true;
-}
-
-void MemRingOutIntruction::patch_ring(int stream, PValue index)
-{
-   const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
-
-   assert(stream < 4);
-   m_ring_op = ring_op[stream];
-   m_index = index;
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_export.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_export.h
@ -1,185 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_EXPORTINSTRUCTION_H
-#define SFN_EXPORTINSTRUCTION_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class WriteoutInstruction: public Instruction {
-public:
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-   const GPRVector&  gpr() const {return m_value;}
-   const GPRVector  *gpr_ptr() const {return &m_value;}
-protected:
-   WriteoutInstruction(instr_type t, const GPRVector& value);
-private:
-   virtual void replace_values_child(const ValueSet& candidates, PValue new_value);
-   virtual void remap_registers_child(std::vector<rename_reg_pair>& map,
-                        ValueMap& values);
-
-   GPRVector m_value;
-};
-
-class ExportInstruction : public WriteoutInstruction {
-public:
-   enum ExportType {
-      et_pixel,
-      et_pos,
-      et_param
-   };
-
-   ExportInstruction(unsigned loc, const GPRVector& value, ExportType type);
-   void set_last();
-
-   ExportType export_type() const {return m_type;}
-
-   unsigned location() const {return m_loc;}
-   bool is_last_export() const {return m_is_last;}
-
-   void update_output_map(OutputRegisterMap& map) const;
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   ExportType m_type;
-   unsigned m_loc;
-   bool m_is_last;
-};
-
-class WriteScratchInstruction : public WriteoutInstruction {
-public:
-
-   WriteScratchInstruction(unsigned loc, const GPRVector& value, int align,
-                           int align_offset, int writemask);
-   WriteScratchInstruction(const PValue& address, const GPRVector& value,
-                           int align, int align_offset, int writemask, int array_size);
-   unsigned location() const {return m_loc;}
-
-   int write_mask() const { return m_writemask;}
-   int address() const { assert(m_address); return m_address->sel();}
-   bool indirect() const { return !!m_address;}
-   int array_size() const { return m_array_size;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   void replace_values_child(const ValueSet& candidates, PValue new_value) override;
-   void remap_registers_child(std::vector<rename_reg_pair>& map,
-                              ValueMap& values)override;
-
-   unsigned m_loc;
-   PValue m_address;
-   unsigned m_align;
-   unsigned m_align_offset;
-   unsigned m_writemask;
-   int m_array_size;
-};
-
-
-class StreamOutIntruction: public WriteoutInstruction {
-public:
-   StreamOutIntruction(const GPRVector& value, int num_components,
-                       int array_base, int comp_mask, int out_buffer,
-                       int stream);
-   int element_size() const { return m_element_size;}
-   int burst_count() const { return m_burst_count;}
-   int array_base() const { return m_array_base;}
-   int array_size() const { return m_array_size;}
-   int comp_mask() const { return m_writemask;}
-   unsigned op() const;
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   int m_element_size;
-   int m_burst_count;
-   int m_array_base;
-   int m_array_size;
-   int m_writemask;
-   int m_output_buffer;
-   int m_stream;
-};
-
-enum EMemWriteType {
-   mem_write = 0,
-   mem_write_ind = 1,
-   mem_write_ack = 2,
-   mem_write_ind_ack = 3,
-};
-
-class MemRingOutIntruction: public WriteoutInstruction {
-public:
-
-   MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
-                        const GPRVector& value, unsigned base_addr,
-                        unsigned ncomp, PValue m_index);
-
-   unsigned op() const{return m_ring_op;}
-   unsigned ncomp() const;
-   unsigned addr() const {return m_base_address;}
-   EMemWriteType type() const {return m_type;}
-   unsigned index_reg() const {return m_index->sel();}
-   unsigned array_base() const {return m_base_address; }
-   void replace_values_child(const ValueSet& candidates, PValue new_value) override;
-   void remap_registers_child(std::vector<rename_reg_pair>& map,
-                        ValueMap& values) override;
-   void patch_ring(int stream, PValue index);
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   ECFOpCode m_ring_op;
-   EMemWriteType m_type;
-   unsigned m_base_address;
-   unsigned m_num_comp;
-   PValue m_index;
-
-};
-
-}
-
-
-#endif // SFN_EXPORTINSTRUCTION_H
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp
@ -1,480 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_fetch.h"
-
-#include "gallium/drivers/r600/r600_pipe.h"
-
-namespace r600 {
-
-/* refactor this to add status create methods for specific tasks */
-FetchInstruction::FetchInstruction(EVFetchInstr op,
-                                   EVFetchType type,
-                                   GPRVector dst,
-                                   PValue src, int offset,
-                                   int buffer_id, PValue buffer_offset,
-                                   EBufferIndexMode cp_rel,
-                                   bool use_const_field):
-   Instruction(vtx),
-   m_vc_opcode(op),
-   m_fetch_type(type),
-   m_endian_swap(vtx_es_none),
-   m_src(src),
-   m_dst(dst),
-   m_offset(offset),
-   m_is_mega_fetch(1),
-   m_mega_fetch_count(16),
-   m_buffer_id(buffer_id),
-   m_semantic_id(0),
-   m_buffer_index_mode(cp_rel),
-   m_flags(0),
-   m_uncached(false),
-   m_indexed(false),
-   m_array_base(0),
-   m_array_size(0),
-   m_elm_size(0),
-   m_buffer_offset(buffer_offset),
-   m_dest_swizzle({0,1,2,3})
-{
-   if (use_const_field) {
-      m_flags.set(vtx_use_const_field);
-      m_data_format = fmt_invalid;
-      m_num_format = vtx_nf_norm;
-   } else {
-      m_flags.set(vtx_format_comp_signed);
-      m_data_format = fmt_32_32_32_32_float;
-      m_num_format = vtx_nf_scaled;
-   }
-
-   add_remappable_src_value(&m_src);
-   add_remappable_src_value(&m_buffer_offset);
-
-   add_remappable_dst_value(&m_dst);
-}
-
-/* Resource query */
-FetchInstruction::FetchInstruction(EVFetchInstr vc_opcode,
-                                   EVFetchType fetch_type,
-                                   EVTXDataFormat data_format,
-                                   EVFetchNumFormat num_format,
-                                   EVFetchEndianSwap endian_swap,
-                                   const PValue src,
-                                   const GPRVector dst,
-                                   uint32_t offset,
-                                   bool is_mega_fetch,
-                                   uint32_t mega_fetch_count,
-                                   uint32_t buffer_id,
-                                   uint32_t semantic_id,
-
-                                   EBufferIndexMode buffer_index_mode,
-                                   bool uncached,
-                                   bool indexed,
-                                   int array_base,
-                                   int array_size,
-                                   int elm_size,
-                                   PValue buffer_offset,
-                                   const std::array<int, 4>& dest_swizzle):
-   Instruction(vtx),
-   m_vc_opcode(vc_opcode),
-   m_fetch_type(fetch_type),
-   m_data_format(data_format),
-   m_num_format(num_format),
-   m_endian_swap(endian_swap),
-   m_src(src),
-   m_dst(dst),
-   m_offset(offset),
-   m_is_mega_fetch(is_mega_fetch),
-   m_mega_fetch_count(mega_fetch_count),
-   m_buffer_id(buffer_id),
-   m_semantic_id(semantic_id),
-   m_buffer_index_mode(buffer_index_mode),
-   m_uncached(uncached),
-   m_indexed(indexed),
-   m_array_base(array_base),
-   m_array_size(array_size),
-   m_elm_size(elm_size),
-   m_buffer_offset(buffer_offset),
-   m_dest_swizzle(dest_swizzle)
-{
-   add_remappable_src_value(&m_src);
-   add_remappable_dst_value(&m_dst);
-   add_remappable_src_value(&m_buffer_offset);
-}
-
-FetchInstruction::FetchInstruction(GPRVector dst,
-                                   PValue src,
-                                   int buffer_id, PValue buffer_offset,
-                                   EVTXDataFormat format,
-                                   EVFetchNumFormat num_format):
-   Instruction(vtx),
-   m_vc_opcode(vc_fetch),
-   m_fetch_type(no_index_offset),
-   m_data_format(format),
-   m_num_format(num_format),
-   m_endian_swap(vtx_es_none),
-   m_src(src),
-   m_dst(dst),
-   m_offset(0),
-   m_is_mega_fetch(0),
-   m_mega_fetch_count(0),
-   m_buffer_id(buffer_id),
-   m_semantic_id(0),
-   m_buffer_index_mode(bim_none),
-   m_flags(0),
-   m_uncached(false),
-   m_indexed(false),
-   m_array_base(0),
-   m_array_size(0),
-   m_elm_size(1),
-   m_buffer_offset(buffer_offset),
-   m_dest_swizzle({0,1,2,3})
-{
-   m_flags.set(vtx_format_comp_signed);
-
-   add_remappable_src_value(&m_src);
-   add_remappable_dst_value(&m_dst);
-   add_remappable_src_value(&m_buffer_offset);
-}
-
-
-/* Resource query */
-FetchInstruction::FetchInstruction(GPRVector dst,
-                                   PValue src,
-                                   int buffer_id,
-                                   EBufferIndexMode cp_rel):
-   Instruction(vtx),
-   m_vc_opcode(vc_get_buf_resinfo),
-   m_fetch_type(no_index_offset),
-   m_data_format(fmt_32_32_32_32),
-   m_num_format(vtx_nf_norm),
-   m_endian_swap(vtx_es_none),
-   m_src(src),
-   m_dst(dst),
-   m_offset(0),
-   m_is_mega_fetch(0),
-   m_mega_fetch_count(16),
-   m_buffer_id(buffer_id),
-   m_semantic_id(0),
-   m_buffer_index_mode(cp_rel),
-   m_flags(0),
-   m_uncached(false),
-   m_indexed(false),
-   m_array_base(0),
-   m_array_size(0),
-   m_elm_size(0),
-   m_dest_swizzle({0,1,2,3})
-{
-   m_flags.set(vtx_format_comp_signed);
-   add_remappable_src_value(&m_src);
-   add_remappable_dst_value(&m_dst);
-   add_remappable_src_value(&m_buffer_offset);
-}
-
-FetchInstruction::FetchInstruction(GPRVector dst, PValue src, int scratch_size):
-   Instruction(vtx),
-   m_vc_opcode(vc_read_scratch),
-   m_fetch_type(vertex_data),
-   m_data_format(fmt_32_32_32_32),
-   m_num_format(vtx_nf_int),
-   m_endian_swap(vtx_es_none),
-   m_dst(dst),
-   m_offset(0),
-   m_is_mega_fetch(0),
-   m_mega_fetch_count(16),
-   m_buffer_id(0),
-   m_semantic_id(0),
-   m_buffer_index_mode(bim_none),
-   m_flags(0),
-   m_uncached(true),
-   m_array_base(0),
-   m_array_size(0),
-   m_elm_size(3),
-   m_dest_swizzle({0,1,2,3})
-{
-   if (src->type() == Value::literal) {
-      const auto& lv = static_cast<const LiteralValue&>(*src);
-      m_array_base = lv.value();
-      m_indexed = false;
-      m_src.reset(new GPRValue(0,0));
-      m_array_size = 0;
-   } else {
-      m_array_base = 0;
-      m_src = src;
-      m_indexed = true;
-      m_array_size = scratch_size - 1;
-   }
-   add_remappable_src_value(&m_src);
-   add_remappable_dst_value(&m_dst);
-   add_remappable_src_value(&m_buffer_offset);
-}
-
-void FetchInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   if (!m_src)
-      return;
-   for (auto c: candidates) {
-      for (int i = 0; i < 4; ++i) {
-         if (*c == *m_dst.reg_i(i))
-            m_dst.set_reg_i(i, new_value);
-      }
-      if (*m_src == *c)
-         m_src = new_value;
-   }
-}
-
-
-bool FetchInstruction::is_equal_to(const Instruction& lhs) const
-{
-   auto& l = static_cast<const FetchInstruction&>(lhs);
-   if (m_src) {
-      if (!l.m_src)
-         return false;
-      if (*m_src != *l.m_src)
-         return false;
-   } else {
-      if (l.m_src)
-         return false;
-   }
-
-   return m_vc_opcode == l.m_vc_opcode &&
-         m_fetch_type == l.m_fetch_type &&
-         m_data_format == l.m_data_format &&
-         m_num_format == l.m_num_format &&
-         m_endian_swap == l.m_endian_swap &&
-         m_dst == l.m_dst &&
-         m_offset == l.m_offset &&
-         m_buffer_id == l.m_buffer_id &&
-         m_semantic_id == l.m_semantic_id &&
-         m_buffer_index_mode == l.m_buffer_index_mode &&
-         m_flags == l.m_flags &&
-         m_indexed == l.m_indexed &&
-         m_uncached == l.m_uncached;
-}
-
-void FetchInstruction::set_format(EVTXDataFormat fmt)
-{
-   m_data_format = fmt;
-}
-
-
-void FetchInstruction::set_dest_swizzle(const std::array<int,4>& swz)
-{
-   m_dest_swizzle = swz;
-}
-
-void FetchInstruction::prelude_append(Instruction *instr)
-{
-   assert(instr);
-   m_prelude.push_back(PInstruction(instr));
-}
-
-const std::vector<PInstruction>& FetchInstruction::prelude() const
-{
-   return m_prelude;
-}
-
-LoadFromScratch::LoadFromScratch(GPRVector dst, PValue src, int scratch_size):
-   FetchInstruction(dst, src, scratch_size)
-{
-}
-
-FetchGDSOpResult::FetchGDSOpResult(const GPRVector dst, const PValue src):
-   FetchInstruction(vc_fetch,
-                    no_index_offset,
-                    fmt_32,
-                    vtx_nf_int,
-                    vtx_es_none,
-                    src,
-                    dst,
-                    0,
-                    false,
-                    0xf,
-                    R600_IMAGE_IMMED_RESOURCE_OFFSET,
-                    0,
-                    bim_none,
-                    false,
-                    false,
-                    0,
-                    0,
-                    0,
-                    PValue(),
-                    {0,7,7,7})
-{
-   set_flag(vtx_srf_mode);
-   set_flag(vtx_vpm);
-}
-
-FetchTCSIOParam::FetchTCSIOParam(GPRVector dst, PValue src, int offset):
-   FetchInstruction(vc_fetch,
-                    no_index_offset,
-                    fmt_32_32_32_32,
-                    vtx_nf_scaled,
-                    vtx_es_none,
-                    src,
-                    dst,
-                    offset,
-                    false,
-                    16,
-                    R600_LDS_INFO_CONST_BUFFER,
-                    0,
-                    bim_none,
-                    false,
-                    false,
-                    0,
-                    0,
-                    0,
-                    PValue(),
-                    {0,1,2,3})
-{
-   set_flag(vtx_srf_mode);
-   set_flag(vtx_format_comp_signed);
-}
-
-
-static const char *fmt_descr[64] = {
-   "INVALID",
-   "8",
-   "4_4",
-   "3_3_2",
-   "RESERVED_4",
-   "16",
-   "16F",
-   "8_8",
-   "5_6_5",
-   "6_5_5",
-   "1_5_5_5",
-   "4_4_4_4",
-   "5_5_5_1",
-   "32",
-   "32F",
-   "16_16",
-   "16_16F",
-   "8_24",
-   "8_24F",
-   "24_8",
-   "24_8F",
-   "10_11_11",
-   "10_11_11F",
-   "11_11_10",
-   "11_11_10F",
-   "2_10_10_10",
-   "8_8_8_8",
-   "10_10_10_2",
-   "X24_8_32F",
-   "32_32",
-   "32_32F",
-   "16_16_16_16",
-   "16_16_16_16F",
-   "RESERVED_33",
-   "32_32_32_32",
-   "32_32_32_32F",
-   "RESERVED_36",
-   "1",
-   "1_REVERSED",
-   "GB_GR",
-   "BG_RG",
-   "32_AS_8",
-   "32_AS_8_8",
-   "5_9_9_9_SHAREDEXP",
-   "8_8_8",
-   "16_16_16",
-   "16_16_16F",
-   "32_32_32",
-   "32_32_32F",
-   "BC1",
-   "BC2",
-   "BC3",
-   "BC4",
-   "BC5",
-   "APC0",
-   "APC1",
-   "APC2",
-   "APC3",
-   "APC4",
-   "APC5",
-   "APC6",
-   "APC7",
-   "CTX1",
-   "RESERVED_63"
-};
-
-
-void FetchInstruction::do_print(std::ostream& os) const
-{
-   static const std::string num_format_char[] = {"norm", "int", "scaled"};
-   static const std::string endian_swap_code[] = {
-      "noswap", "8in16", "8in32"
-   };
-   static const char buffer_index_mode_char[] = "_01E";
-   static const char *flag_string[] = {"WQM",  "CF", "signed", "no_zero",
-                                       "nostride", "AC", "TC", "VPM"};
-   switch (m_vc_opcode) {
-   case vc_fetch:
-      os << "Fetch " << m_dst;
-      break;
-   case vc_semantic:
-      os << "Fetch Semantic ID:" << m_semantic_id;
-      break;
-   case vc_get_buf_resinfo:
-      os << "Fetch BufResinfo:" << m_dst;
-      break;
-   case vc_read_scratch:
-      os << "MEM_READ_SCRATCH:" << m_dst;
-      break;
-   default:
-      os << "Fetch ERROR";
-      return;
-   }
-
-   os << ", " << *m_src;
-
-   if (m_offset)
-      os << "+" << m_offset;
-
-   os << " BUFID:" << m_buffer_id
-      << " FMT:(" << fmt_descr[m_data_format]
-      << " " << num_format_char[m_num_format]
-      << " " << endian_swap_code[m_endian_swap]
-      << ")";
-   if (m_buffer_index_mode > 0)
-      os << " IndexMode:" << buffer_index_mode_char[m_buffer_index_mode];
-
-
-   if (m_is_mega_fetch)
-      os << " MFC:" << m_mega_fetch_count;
-   else
-      os << " mfc*:" << m_mega_fetch_count;
-
-   if (m_flags.any()) {
-      os << " Flags:";
-      for( int i = 0; i < vtx_unknown; ++i) {
-         if (m_flags.test(i))
-            os << ' ' << flag_string[i];
-      }
-   }
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h
@ -1,187 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_INSTRUCTION_FETCH_H
-#define SFN_INSTRUCTION_FETCH_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class FetchInstruction : public Instruction {
-public:
-
-   FetchInstruction(EVFetchInstr vc_opcode,
-                    EVFetchType fetch_type,
-                    EVTXDataFormat data_format,
-                    EVFetchNumFormat num_format,
-                    EVFetchEndianSwap endian_swap,
-                    const PValue src,
-                    const GPRVector dst,
-                    uint32_t offset,
-                    bool is_mega_fetch,
-                    uint32_t mega_fetch_count,
-                    uint32_t buffer_id,
-                    uint32_t semantic_id,
-
-                    EBufferIndexMode buffer_index_mode,
-                    bool uncached,
-                    bool indexed,
-                    int array_base,
-                    int array_size,
-                    int elm_size,
-                    PValue buffer_offset,
-                    const std::array<int, 4>& dest_swizzle);
-
-   FetchInstruction(EVFetchInstr op,
-                    EVFetchType type,
-                    GPRVector dst,
-                    PValue src, int offset,
-                    int buffer_id, PValue buffer_offset,
-                    EBufferIndexMode cp_rel,
-                    bool use_const_field = false);
-
-   FetchInstruction(GPRVector dst,
-                    PValue src,
-                    int buffer_id,
-                    PValue buffer_offset,
-                    EVTXDataFormat format,
-                    EVFetchNumFormat num_format);
-
-   FetchInstruction(GPRVector dst,
-                    PValue src,
-                    int buffer_id,
-                    EBufferIndexMode cp_rel);
-
-   FetchInstruction(GPRVector dst, PValue src, int scratch_size);
-
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-   EVFetchInstr vc_opcode() const { return m_vc_opcode;}
-   EVFetchType fetch_type() const { return m_fetch_type;}
-
-   EVTXDataFormat data_format() const { return m_data_format;}
-   EVFetchNumFormat num_format() const { return m_num_format;}
-   EVFetchEndianSwap endian_swap() const { return m_endian_swap;}
-
-   const Value& src() const { return *m_src;}
-   const GPRVector& dst() const { return m_dst;}
-   uint32_t offset() const { return m_offset;}
-
-   bool is_mega_fetchconst() { return m_is_mega_fetch;}
-   uint32_t mega_fetch_count() const { return m_mega_fetch_count;}
-
-   uint32_t buffer_id() const { return m_buffer_id;}
-   uint32_t semantic_id() const { return m_semantic_id;}
-   EBufferIndexMode buffer_index_mode() const{ return m_buffer_index_mode;}
-
-   bool is_signed() const { return m_flags.test(vtx_format_comp_signed);}
-   bool use_const_fields() const { return m_flags.test(vtx_use_const_field);}
-
-   bool srf_mode_no_zero() const { return m_flags.test(vtx_srf_mode);}
-
-   void set_flag(EVFetchFlagShift flag) {m_flags.set(flag);}
-
-   bool uncached() const {return m_uncached; }
-   bool indexed() const {return m_indexed; }
-   int array_base()const {return m_array_base; }
-   int array_size() const {return m_array_size; }
-   int elm_size() const {return m_elm_size; }
-
-   void set_buffer_offset(PValue buffer_offset) {
-      m_buffer_offset = buffer_offset;
-      add_remappable_src_value(&m_buffer_offset);
-   }
-   PValue buffer_offset() const { return m_buffer_offset; }
-
-   void set_dest_swizzle(const std::array<int,4>& swz);
-   void set_format(EVTXDataFormat fmt);
-
-   int swz(int idx) const { return m_dest_swizzle[idx];}
-
-   bool use_tc() const {return m_flags.test(vtx_use_tc);}
-
-   bool use_vpm() const {return m_flags.test(vtx_vpm);}
-
-   void prelude_append(Instruction *instr);
-
-   const std::vector<PInstruction>& prelude() const;
-
-   bool has_prelude() const {return !m_prelude.empty();}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   EVFetchInstr m_vc_opcode;
-   EVFetchType m_fetch_type;
-
-   EVTXDataFormat m_data_format;
-   EVFetchNumFormat m_num_format;
-   EVFetchEndianSwap m_endian_swap;
-
-   PValue m_src;
-   GPRVector m_dst;
-   uint32_t m_offset;
-
-   bool m_is_mega_fetch;
-   uint32_t m_mega_fetch_count;
-
-   uint32_t m_buffer_id;
-   uint32_t m_semantic_id;
-
-   EBufferIndexMode m_buffer_index_mode;
-   std::bitset<16> m_flags;
-   bool m_uncached;
-   bool m_indexed;
-   int m_array_base;
-   int m_array_size;
-   int m_elm_size;
-   PValue m_buffer_offset;
-   std::array<int, 4> m_dest_swizzle;
-   std::vector<PInstruction> m_prelude;
-};
-
-class LoadFromScratch: public FetchInstruction {
-public:
-   LoadFromScratch(GPRVector dst, PValue src, int scratch_size);
-};
-
-class FetchGDSOpResult : public FetchInstruction {
-public:
-   FetchGDSOpResult(const GPRVector dst, const PValue src);
-};
-
-class FetchTCSIOParam : public FetchInstruction {
-public:
-   FetchTCSIOParam(GPRVector dst, PValue src, int offset);
-};
-
-}
-
-#endif // SFN_INSTRUCTION_FETCH_H
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp
@ -1,180 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_gds.h"
-#include "sfn_liverange.h"
-
-namespace  r600 {
-
-GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
-                   const PValue& value2, const PValue& uav_id, int uav_base):
-   Instruction(gds),
-   m_op(op),
-   m_src(value),
-   m_src2(value2),
-   m_dest(dest),
-   m_dest_swizzle({PIPE_SWIZZLE_X,7,7,7}),
-   m_src_swizzle({PIPE_SWIZZLE_0, PIPE_SWIZZLE_X, PIPE_SWIZZLE_0}),
-   m_buffer_index_mode(bim_none),
-   m_uav_id(uav_id),
-   m_uav_base(uav_base),
-   m_flags(0)
-{
-   add_remappable_src_value(&m_src);
-   add_remappable_src_value(&m_src2);
-   add_remappable_src_value(&m_uav_id);
-   add_remappable_dst_value(&m_dest);
-   m_dest_swizzle[0] = m_dest.chan_i(0);
-}
-
-GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
-                   const PValue& uav_id, int uav_base):
-   GDSInstr(op, dest,  value, PValue(), uav_id, uav_base)
-{
-      assert(value);
-      m_src_swizzle[1] = value->chan();
-      m_src_swizzle[2] = PIPE_SWIZZLE_0;
-}
-
-GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,
-                   const PValue& uav_id, int uav_base):
-   GDSInstr(op, dest,  PValue(), PValue(), uav_id, uav_base)
-{
-   m_src_swizzle[1] = PIPE_SWIZZLE_0;
-}
-
-bool GDSInstr::is_equal_to(UNUSED const Instruction& lhs) const
-{
-   return false;
-}
-
-void GDSInstr::do_print(std::ostream& os) const
-{
-   const char *swz = "xyzw01?_";
-   os << lds_ops.at(m_op).name << " R" << m_dest.sel() << ".";
-   for (int i = 0; i < 4; ++i) {
-      os << swz[m_dest_swizzle[i]];
-   }
-   if (m_src)
-      os << " " << *m_src;
-
-   os << " UAV:" << *m_uav_id;
-}
-
-RatInstruction::RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
-                               const GPRVector& data, const GPRVector& index,
-                               int rat_id, const PValue& rat_id_offset,
-                               int burst_count, int comp_mask, int element_size, bool ack):
-   Instruction(rat),
-   m_cf_opcode(cf_opcode),
-   m_rat_op(rat_op),
-   m_data(data),
-   m_index(index),
-   m_rat_id(rat_id),
-   m_rat_id_offset(rat_id_offset),
-   m_burst_count(burst_count),
-   m_comp_mask(comp_mask),
-   m_element_size(element_size),
-   m_need_ack(ack)
-{
-   add_remappable_src_value(&m_data);
-   add_remappable_src_value(&m_rat_id_offset);
-   add_remappable_src_value(&m_index);
-}
-
-bool RatInstruction::is_equal_to(UNUSED const Instruction& lhs) const
-{
-   return false;
-}
-
-void RatInstruction::do_print(std::ostream& os) const
-{
-   os << "MEM_RAT RAT(" << m_rat_id;
-   if (m_rat_id_offset)
-      os << "+" << *m_rat_id_offset;
-   os << ") @" << m_index;
-   os << " OP:" << m_rat_op << " " << m_data;
-   os << " BC:" << m_burst_count
-      << " MASK:" << m_comp_mask
-      << " ES:" << m_element_size;
-   if (m_need_ack)
-      os << " ACK";
-}
-
-RatInstruction::ERatOp RatInstruction::opcode(nir_intrinsic_op opcode)
-{
-   switch (opcode) {
-   case nir_intrinsic_ssbo_atomic_add:
-      return ADD_RTN;
-   case nir_intrinsic_ssbo_atomic_and:
-      return AND_RTN;
-   case nir_intrinsic_ssbo_atomic_exchange:
-      return XCHG_RTN;
-   case nir_intrinsic_ssbo_atomic_umax:
-      return MAX_UINT_RTN;
-   case nir_intrinsic_ssbo_atomic_umin:
-      return MIN_UINT_RTN;
-   case nir_intrinsic_ssbo_atomic_imax:
-      return MAX_INT_RTN;
-   case nir_intrinsic_ssbo_atomic_imin:
-      return MIN_INT_RTN;
-   case nir_intrinsic_ssbo_atomic_xor:
-      return XOR_RTN;
-   default:
-      return UNSUPPORTED;
-   }
-}
-
-GDSStoreTessFactor::GDSStoreTessFactor(GPRVector& value):
-   Instruction(tf_write),
-   m_value(value)
-{
-   add_remappable_src_value(&m_value);
-}
-
-void GDSStoreTessFactor::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   for (auto& c: candidates) {
-      for (int i = 0; i < 4; ++i) {
-         if (*c == *m_value[i])
-            m_value[i] = new_value;
-      }
-   }
-}
-
-
-bool GDSStoreTessFactor::is_equal_to(const Instruction& lhs) const
-{
-   auto& other = static_cast<const GDSStoreTessFactor&>(lhs);
-   return m_value == other.m_value;
-}
-
-void GDSStoreTessFactor::do_print(std::ostream& os) const
-{
-   os << "TF_WRITE " << m_value;
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
@ -1,225 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_GDSINSTR_H
-#define SFN_GDSINSTR_H
-
-#include "sfn_instruction_base.h"
-
-#include <bitset>
-
-namespace r600 {
-
-class GDSInstr : public Instruction
-{
-public:
-   GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
-            const PValue &uav_id, int uav_base);
-   GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
-            const PValue& value2, const PValue &uav_id, int uav_base);
-   GDSInstr(ESDOp op, const GPRVector& dest,  const PValue &uav_id, int uav_base);
-
-   ESDOp op() const {return m_op;}
-
-   int src_sel() const {
-      if (!m_src)
-         return 0;
-
-      assert(m_src->type() == Value::gpr);
-      return m_src->sel();
-   }
-
-   int src2_chan() const {
-      if (!m_src2)
-         return 0;
-
-      assert(m_src->type() == Value::gpr);
-      return m_src->chan();
-   }
-
-   int src_swizzle(int idx) const {assert(idx < 3); return m_src_swizzle[idx];}
-
-   int dest_sel() const {
-      return m_dest.sel();
-   }
-
-   int dest_swizzle(int i) const {
-      if (i < 4)
-         return m_dest_swizzle[i];
-      return 7;
-   }
-
-   void set_dest_swizzle(const std::array<int,4>& swz) {
-      m_dest_swizzle = swz;
-   }
-
-   PValue uav_id() const {return m_uav_id;}
-   int uav_base() const {return m_uav_base;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   ESDOp m_op;
-
-   PValue m_src;
-   PValue m_src2;
-   GPRVector m_dest;
-   std::array <int, 4> m_dest_swizzle;
-   std::array <int, 3> m_src_swizzle;
-
-   EBufferIndexMode m_buffer_index_mode;
-   PValue m_uav_id;
-   int m_uav_base;
-   std::bitset<8> m_flags;
-
-};
-
-class RatInstruction : public Instruction {
-
-public:
-   enum ERatOp {
-      NOP,
-      STORE_TYPED,
-      STORE_RAW,
-      STORE_RAW_FDENORM,
-      CMPXCHG_INT,
-      CMPXCHG_FLT,
-      CMPXCHG_FDENORM,
-      ADD,
-      SUB,
-      RSUB,
-      MIN_INT,
-      MIN_UINT,
-      MAX_INT,
-      MAX_UINT,
-      AND,
-      OR,
-      XOR,
-      MSKOR,
-      INC_UINT,
-      DEC_UINT,
-      NOP_RTN = 32,
-      XCHG_RTN = 34,
-      XCHG_FDENORM_RTN,
-      CMPXCHG_INT_RTN,
-      CMPXCHG_FLT_RTN,
-      CMPXCHG_FDENORM_RTN,
-      ADD_RTN,
-      SUB_RTN,
-      RSUB_RTN,
-      MIN_INT_RTN,
-      MIN_UINT_RTN,
-      MAX_INT_RTN,
-      MAX_UINT_RTN,
-      AND_RTN,
-      OR_RTN,
-      XOR_RTN,
-      MSKOR_RTN,
-      UINT_RTN,
-      UNSUPPORTED
-   };
-
-   RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
-                  const GPRVector& data, const GPRVector& index,
-                  int rat_id, const PValue& rat_id_offset,
-                  int burst_count, int comp_mask, int element_size,
-                  bool ack);
-
-   PValue rat_id_offset() const { return m_rat_id_offset;}
-   int  rat_id() const { return m_rat_id;}
-
-   ERatOp rat_op() const {return m_rat_op;}
-
-   int data_gpr() const {return m_data.sel();}
-   int index_gpr() const {return m_index.sel();}
-   int elm_size() const {return m_element_size;}
-
-   int comp_mask() const {return m_comp_mask;}
-
-   bool need_ack() const {return m_need_ack;}
-   int burst_count() const {return m_burst_count;}
-
-   static ERatOp opcode(nir_intrinsic_op opcode);
-
-   int data_swz(int chan) const {return m_data.chan_i(chan);}
-
-   ECFOpCode cf_opcode() const { return m_cf_opcode;}
-
-   void set_ack() {m_need_ack = true; }
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-
-private:
-
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   ECFOpCode m_cf_opcode;
-   ERatOp m_rat_op;
-
-   GPRVector m_data;
-   GPRVector m_index;
-
-   int m_rat_id;
-   PValue m_rat_id_offset;
-   int m_burst_count;
-   int m_comp_mask;
-   int m_element_size;
-
-   std::bitset<8> m_flags;
-
-   bool m_need_ack;
-
-};
-
-class GDSStoreTessFactor : public Instruction {
-public:
-      GDSStoreTessFactor(GPRVector& value);
-      int sel() const {return m_value.sel();}
-      int chan(int i ) const {return m_value.chan_i(i);}
-
-      void replace_values(const ValueSet& candiates, PValue new_value) override;
-
-      bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-      bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-      bool is_equal_to(const Instruction& lhs) const override;
-      void do_print(std::ostream& os) const override;
-
-      GPRVector m_value;
-};
-
-}
-
-#endif // SFN_GDSINSTR_H
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp
@ -1,151 +0,0 @@
-#include "sfn_instruction_lds.h"
-
-namespace r600 {
-
-void LDSReadInstruction::do_print(std::ostream& os) const
-{
-   os << "LDS Read  [";
-   for (auto& v : m_dest_value)
-      os << *v << " ";
-   os << "], ";
-   for (auto& a : m_address)
-      os << *a << " ";
-}
-
-LDSReadInstruction::LDSReadInstruction(std::vector<PValue>& address, std::vector<PValue>& value):
-   Instruction(lds_read),
-   m_address(address),
-   m_dest_value(value)
-{
-   assert(address.size() == value.size());
-
-   for (unsigned i = 0; i < address.size(); ++i) {
-      add_remappable_src_value(&m_address[i]);
-      add_remappable_dst_value(&m_dest_value[i]);
-   }
-}
-
-void LDSReadInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   for (auto& c : candidates) {
-      for (auto& d: m_dest_value) {
-         if (*c == *d)
-            d = new_value;
-      }
-
-      for (auto& a: m_address) {
-         if (*c == *a)
-            a = new_value;
-      }
-   }
-}
-
-bool LDSReadInstruction::is_equal_to(const Instruction& lhs) const
-{
-   auto& other = static_cast<const LDSReadInstruction&>(lhs);
-   return m_address == other.m_address &&
-         m_dest_value == other.m_dest_value;
-}
-
-LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op):
-   Instruction(lds_atomic),
-   m_address(address),
-   m_dest_value(dest),
-   m_src0_value(src0),
-   m_src1_value(src1),
-   m_opcode(op)
-{
-   add_remappable_src_value(&m_src0_value);
-   add_remappable_src_value(&m_src1_value);
-   add_remappable_src_value(&m_address);
-   add_remappable_dst_value(&m_dest_value);
-}
-
-LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op):
-   LDSAtomicInstruction(dest, src0, PValue(), address, op)
-{
-
-}
-
-
-void LDSAtomicInstruction::do_print(std::ostream& os) const
-{
-   os << "LDS " << m_opcode << " " << *m_dest_value << " ";
-   os << "[" << *m_address << "] " << *m_src0_value;
-   if (m_src1_value)
-      os << ", " << *m_src1_value;
-}
-
-bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const
-{
-   auto& other = static_cast<const LDSAtomicInstruction&>(lhs);
-
-   return m_opcode == other.m_opcode &&
-         *m_dest_value == *other.m_dest_value &&
-         *m_src0_value == *other.m_src0_value &&
-         *m_address == *other.m_address &&
-         ((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) ||
-          (!m_src1_value && !other.m_src1_value));
-}
-
-LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0):
-   LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue())
-
-{
-}
-
-LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1):
-   Instruction(lds_write),
-   m_address(address),
-   m_value0(value0),
-   m_value1(value1),
-   m_idx_offset(idx_offset)
-{
-   add_remappable_src_value(&m_address);
-   add_remappable_src_value(&m_value0);
-   if (m_value1)
-      add_remappable_src_value(&m_value1);
-}
-
-
-void LDSWriteInstruction::do_print(std::ostream& os) const
-{
-   os << "LDS Write" << num_components()
-      << " " << address() << ", " << value0();
-   if (num_components() > 1)
-      os << ", " << value1();
-}
-
-void LDSWriteInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   for (auto c: candidates) {
-      if (*c == *m_address)
-         m_address = new_value;
-
-      if (*c == *m_value0)
-         m_value0 = new_value;
-
-      if (*c == *m_value1)
-         m_value1 = new_value;
-   }
-}
-
-bool LDSWriteInstruction::is_equal_to(const Instruction& lhs) const
-{
-   auto& other = static_cast<const LDSWriteInstruction&>(lhs);
-
-   if (m_value1) {
-      if (!other.m_value1)
-         return false;
-      if (*m_value1 != *other.m_value1)
-         return false;
-   } else {
-      if (other.m_value1)
-         return false;
-   }
-
-   return (m_value0 != other.m_value0 &&
-           *m_address != *other.m_address);
-}
-
-} // namespace r600
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h
@ -1,82 +0,0 @@
-#ifndef LDSINSTRUCTION_H
-#define LDSINSTRUCTION_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class LDSReadInstruction : public Instruction {
-public:
-   LDSReadInstruction(std::vector<PValue>& value, std::vector<PValue>& address);
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-
-   unsigned num_values() const { return m_dest_value.size();}
-   const Value& address(unsigned i) const { return *m_address[i];}
-   const Value& dest(unsigned i) const { return *m_dest_value[i];}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_print(std::ostream& os) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-
-   std::vector<PValue> m_address;
-   std::vector<PValue> m_dest_value;
-};
-
-class LDSAtomicInstruction : public Instruction {
-public:
-   LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op);
-   LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op);
-
-   const Value& address() const { return *m_address;}
-   const Value& dest() const { return *m_dest_value;}
-   const Value& src0() const { return *m_src0_value;}
-   const PValue& src1() const { return m_src1_value;}
-   unsigned op() const {return m_opcode;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_print(std::ostream& os) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-
-   PValue m_address;
-   PValue m_dest_value;
-   PValue m_src0_value;
-   PValue m_src1_value;
-   unsigned m_opcode;
-};
-
-class LDSWriteInstruction : public Instruction {
-public:
-   LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0);
-   LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1);
-
-   const Value& address() const {return *m_address;};
-   const Value& value0() const { return *m_value0;}
-   const Value& value1() const { return *m_value1;}
-   unsigned num_components() const { return m_value1 ? 2 : 1;}
-   unsigned idx_offset() const {return m_idx_offset;};
-
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_print(std::ostream& os) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-
-   PValue m_address;
-   PValue m_value0;
-   PValue m_value1;
-   unsigned m_idx_offset;
-
-};
-
-}
-
-#endif // LDSINSTRUCTION_H
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp
@ -1,68 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_misc.h"
-
-namespace r600 {
-EmitVertex::EmitVertex(int stream, bool cut):
-   Instruction (emit_vtx),
-   m_stream(stream),
-   m_cut(cut)
-{
-
-}
-
-bool EmitVertex::is_equal_to(const Instruction& lhs) const
-{
-   auto& oth = static_cast<const EmitVertex&>(lhs);
-   return oth.m_stream == m_stream &&
-         oth.m_cut == m_cut;
-}
-
-void EmitVertex::do_print(std::ostream& os) const
-{
-   os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream;
-}
-
-WaitAck::WaitAck(int nack):
-   Instruction (wait_ack),
-   m_nack(nack)
-{
-
-}
-
-bool WaitAck::is_equal_to(const Instruction& lhs) const
-{
-   const auto& l = static_cast<const WaitAck&>(lhs);
-   return m_nack == l.m_nack;
-}
-
-void WaitAck::do_print(std::ostream& os) const
-{
-   os << "WAIT_ACK @" << m_nack;
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h
@ -1,69 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_INSTRUCTION_MISC_H
-#define SFN_INSTRUCTION_MISC_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class EmitVertex : public Instruction {
-public:
-   EmitVertex(int stream, bool cut);
-   ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;}
-   int stream() const { return m_stream;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-   int m_stream;
-   bool m_cut;
-};
-
-class WaitAck : public Instruction {
-public:
-   WaitAck(int nack);
-   ECFOpCode op() const {return cf_wait_ack;}
-   int n_ack() const {return m_nack;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-   int m_nack;
-};
-
-}
-
-#endif // SFN_INSTRUCTION_MISC_H
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h
@ -1,143 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef INSTRUCTION_TEX_H
-#define INSTRUCTION_TEX_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class TexInstruction : public Instruction {
-public:
-   enum Opcode {
-      ld = FETCH_OP_LD,
-      get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
-      get_nsampled = FETCH_OP_GET_NUMBER_OF_SAMPLES,
-      get_tex_lod = FETCH_OP_GET_LOD,
-      get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
-      get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
-      set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
-      keep_gradients = FETCH_OP_KEEP_GRADIENTS,
-      set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
-      set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
-      sample = FETCH_OP_SAMPLE,
-      sample_l = FETCH_OP_SAMPLE_L,
-      sample_lb = FETCH_OP_SAMPLE_LB,
-      sample_lz = FETCH_OP_SAMPLE_LZ,
-      sample_g = FETCH_OP_SAMPLE_G,
-      sample_g_lb = FETCH_OP_SAMPLE_G_L,
-      gather4 = FETCH_OP_GATHER4,
-      gather4_o =  FETCH_OP_GATHER4_O,
-
-      sample_c = FETCH_OP_SAMPLE_C,
-      sample_c_l = FETCH_OP_SAMPLE_C_L,
-      sample_c_lb = FETCH_OP_SAMPLE_C_LB,
-      sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
-      sample_c_g = FETCH_OP_SAMPLE_C_G,
-      sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
-      gather4_c = FETCH_OP_GATHER4_C,
-      gather4_c_o =  FETCH_OP_GATHER4_C_O,
-
-   };
-
-   enum Flags {
-      x_unnormalized,
-      y_unnormalized,
-      z_unnormalized,
-      w_unnormalized,
-      grad_fine
-   };
-
-   TexInstruction(Opcode op, const GPRVector& dest, const GPRVector& src, unsigned sid,
-                  unsigned rid, PValue sampler_offset);
-
-   const GPRVector& src() const {return m_src;}
-   const GPRVector& dst() const {return m_dst;}
-   unsigned opcode() const {return m_opcode;}
-   unsigned sampler_id() const {return m_sampler_id;}
-   unsigned resource_id() const {return m_resource_id;}
-
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-
-   void set_offset(unsigned index, int32_t val);
-   int get_offset(unsigned index) const;
-
-   void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
-
-   int inst_mode() const { return m_inst_mode;}
-
-   void set_flag(Flags flag) {
-      m_flags.set(flag);
-   }
-
-   PValue sampler_offset() const {
-      return m_sampler_offset;
-   }
-
-   bool has_flag(Flags flag) const {
-      return m_flags.test(flag);
-   }
-
-   int dest_swizzle(int i) const {
-      assert(i < 4);
-      return m_dest_swizzle[i];
-   }
-
-   void set_dest_swizzle(const std::array<int,4>& swz) {
-      m_dest_swizzle = swz;
-   }
-
-   void set_gather_comp(int cmp);
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   static const char *opname(Opcode code);
-
-   Opcode m_opcode;
-   GPRVector m_dst;
-   GPRVector m_src;
-   unsigned m_sampler_id;
-   unsigned m_resource_id;
-   std::bitset<8> m_flags;
-   int m_offset[3];
-   int m_inst_mode;
-   std::array<int,4> m_dest_swizzle;
-   PValue m_sampler_offset;
-};
-
-bool r600_nir_lower_int_tg4(nir_shader *nir);
-bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
-bool r600_nir_lower_cube_to_2darray(nir_shader *shader);
-
-}
-
-#endif // INSTRUCTION_TEX_H
--- a/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp
--- a/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h
@ -1,91 +0,0 @@
-#ifndef INSTRUCTIONVISITOR_H
-#define INSTRUCTIONVISITOR_H
-
-namespace r600 {
-
-
-class AluInstruction;
-class ExportInstruction;
-class TexInstruction;
-class FetchInstruction;
-class IfInstruction;
-class ElseInstruction;
-class IfElseEndInstruction;
-class LoopBeginInstruction;
-class LoopEndInstruction;
-class LoopBreakInstruction;
-class LoopContInstruction;
-class StreamOutIntruction;
-class MemRingOutIntruction;
-class EmitVertex;
-class WaitAck;
-class WriteScratchInstruction;
-class GDSInstr;
-class RatInstruction;
-class LDSWriteInstruction;
-class LDSReadInstruction;
-class LDSAtomicInstruction;
-class GDSStoreTessFactor;
-class InstructionBlock;
-
-class InstructionVisitor
-{
-public:
-   virtual ~InstructionVisitor() {};
-   virtual bool visit(AluInstruction& i) = 0;
-   virtual bool visit(ExportInstruction& i) = 0;
-   virtual bool visit(TexInstruction& i) = 0;
-   virtual bool visit(FetchInstruction& i) = 0;
-   virtual bool visit(IfInstruction& i) = 0;
-   virtual bool visit(ElseInstruction& i) = 0;
-   virtual bool visit(IfElseEndInstruction& i) = 0;
-   virtual bool visit(LoopBeginInstruction& i) = 0;
-   virtual bool visit(LoopEndInstruction& i) = 0;
-   virtual bool visit(LoopBreakInstruction& i) = 0;
-   virtual bool visit(LoopContInstruction& i) = 0;
-   virtual bool visit(StreamOutIntruction& i) = 0;
-   virtual bool visit(MemRingOutIntruction& i) = 0;
-   virtual bool visit(EmitVertex& i) = 0;
-   virtual bool visit(WaitAck& i) = 0;
-   virtual bool visit(WriteScratchInstruction& i) = 0;
-   virtual bool visit(GDSInstr& i) = 0;
-   virtual bool visit(RatInstruction& i) = 0;
-   virtual bool visit(LDSWriteInstruction& i) = 0;
-   virtual bool visit(LDSReadInstruction& i) = 0;
-   virtual bool visit(LDSAtomicInstruction& i) = 0;
-   virtual bool visit(GDSStoreTessFactor& i) = 0;
-   virtual bool visit(InstructionBlock& i) = 0;
-};
-
-class ConstInstructionVisitor
-{
-public:
-   virtual ~ConstInstructionVisitor() {};
-   virtual bool visit(const AluInstruction& i) = 0;
-   virtual bool visit(const ExportInstruction& i) = 0;
-   virtual bool visit(const TexInstruction& i) = 0;
-   virtual bool visit(const FetchInstruction& i) = 0;
-   virtual bool visit(const IfInstruction& i) = 0;
-   virtual bool visit(const ElseInstruction& i) = 0;
-   virtual bool visit(const IfElseEndInstruction& i) = 0;
-   virtual bool visit(const LoopBeginInstruction& i) = 0;
-   virtual bool visit(const LoopEndInstruction& i) = 0;
-   virtual bool visit(const LoopBreakInstruction& i) = 0;
-   virtual bool visit(const LoopContInstruction& i) = 0;
-   virtual bool visit(const StreamOutIntruction& i) = 0;
-   virtual bool visit(const MemRingOutIntruction& i) = 0;
-   virtual bool visit(const EmitVertex& i) = 0;
-   virtual bool visit(const WaitAck& i) = 0;
-   virtual bool visit(const WriteScratchInstruction& i) = 0;
-   virtual bool visit(const GDSInstr& i) = 0;
-   virtual bool visit(const RatInstruction& i) = 0;
-   virtual bool visit(const LDSWriteInstruction& i) = 0;
-   virtual bool visit(const LDSReadInstruction& i) = 0;
-   virtual bool visit(const LDSAtomicInstruction& i) = 0;
-   virtual bool visit(const GDSStoreTessFactor& i) = 0;
-   virtual bool visit(const InstructionBlock& i) = 0;
-};
-
-}
-
-#endif // INSTRUCTIONVISITOR_H
--- a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h
+++ b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h
@ -1,45 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "sfn_nir.h"
-
-struct r600_shader;
-union r600_shader_key;
-
-namespace r600 {
-
-class AssemblyFromShaderLegacy : public AssemblyFromShader {
-public:
-   AssemblyFromShaderLegacy(struct r600_shader *sh, r600_shader_key *key);
-   ~AssemblyFromShaderLegacy() override;
-private:
-   bool do_lower(const std::vector<InstructionBlock> &ir)  override ;
-
-   struct AssemblyFromShaderLegacyImpl *impl;
-};
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_liverange.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_liverange.cpp
--- a/src/gallium/drivers/r600/sfn/sfn_liverange.h
+++ b/src/gallium/drivers/r600/sfn/sfn_liverange.h
@ -1,314 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_LIVERANGE_H
-#define SFN_LIVERANGE_H
-
-#include <cstdint>
-#include <ostream>
-#include <vector>
-#include <limits>
-
-#include "sfn_instruction_base.h"
-#include "sfn_nir.h"
-
-namespace r600 {
-
-/** Storage to record the required live range of a temporary register
- * begin == end == -1 indicates that the register can be reused without
- * limitations. Otherwise, "begin" indicates the first instruction in which
- * a write operation may target this temporary, and end indicates the
- * last instruction in which a value can be read from this temporary.
- * Hence, a register R2 can be merged with a register R1 if R1.end <= R2.begin.
- */
-struct register_live_range {
-   int begin;
-   int end;
-   bool is_array_elm;
-};
-
-enum prog_scope_type {
-   outer_scope,           /* Outer program scope */
-   loop_body,             /* Inside a loop */
-   if_branch,             /* Inside if branch */
-   else_branch,           /* Inside else branch */
-   switch_body,           /* Inside switch statement */
-   switch_case_branch,    /* Inside switch case statement */
-   switch_default_branch, /* Inside switch default statement */
-   undefined_scope
-};
-
-class prog_scope {
-public:
-   prog_scope();
-   prog_scope(prog_scope *parent, prog_scope_type type, int id,
-              int depth, int begin);
-
-   prog_scope_type type() const;
-   prog_scope *parent() const;
-   int nesting_depth() const;
-   int id() const;
-   int end() const;
-   int begin() const;
-   int loop_break_line() const;
-
-   const prog_scope *in_else_scope() const;
-   const prog_scope *in_ifelse_scope() const;
-   const prog_scope *in_parent_ifelse_scope() const;
-   const prog_scope *innermost_loop() const;
-   const prog_scope *outermost_loop() const;
-   const prog_scope *enclosing_conditional() const;
-
-   bool is_loop() const;
-   bool is_in_loop() const;
-   bool is_switchcase_scope_in_loop() const;
-   bool is_conditional() const;
-   bool is_child_of(const prog_scope *scope) const;
-   bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const;
-
-   bool break_is_for_switchcase() const;
-   bool contains_range_of(const prog_scope& other) const;
-
-   void set_end(int end);
-   void set_loop_break_line(int line);
-
-private:
-   prog_scope_type scope_type;
-   int scope_id;
-   int scope_nesting_depth;
-   int scope_begin;
-   int scope_end;
-   int break_loop_line;
-   prog_scope *parent_scope;
-};
-
-/* Some storage class to encapsulate the prog_scope (de-)allocations */
-class prog_scope_storage {
-public:
-   prog_scope_storage(int n);
-   ~prog_scope_storage();
-   prog_scope * create(prog_scope *p, prog_scope_type type, int id,
-                       int lvl, int s_begin);
-private:
-   int current_slot;
-   std::vector<prog_scope> storage;
-};
-
-/* Class to track the access to a component of a temporary register. */
-
-class temp_comp_access {
-public:
-   temp_comp_access();
-
-   void record_read(int line, prog_scope *scope);
-   void record_write(int line, prog_scope *scope);
-   register_live_range get_required_live_range();
-private:
-   void propagate_live_range_to_dominant_write_scope();
-   bool conditional_ifelse_write_in_loop() const;
-
-   void record_ifelse_write(const prog_scope& scope);
-   void record_if_write(const prog_scope& scope);
-   void record_else_write(const prog_scope& scope);
-
-   prog_scope *last_read_scope;
-   prog_scope *first_read_scope;
-   prog_scope *first_write_scope;
-
-   int first_write;
-   int last_read;
-   int last_write;
-   int first_read;
-
-   /* This member variable tracks the current resolution of conditional writing
-    * to this temporary in IF/ELSE clauses.
-    *
-    * The initial value "conditionality_untouched" indicates that this
-    * temporary has not yet been written to within an if clause.
-    *
-    * A positive (other than "conditionality_untouched") number refers to the
-    * last loop id for which the write was resolved as unconditional. With each
-    * new loop this value will be overwitten by "conditionality_unresolved"
-    * on entering the first IF clause writing this temporary.
-    *
-    * The value "conditionality_unresolved" indicates that no resolution has
-    * been achieved so far. If the variable is set to this value at the end of
-    * the processing of the whole shader it also indicates a conditional write.
-    *
-    * The value "write_is_conditional" marks that the variable is written
-    * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
-    * least one loop.
-    */
-   int conditionality_in_loop_id;
-
-   /* Helper constants to make the tracking code more readable. */
-   static const int write_is_conditional = -1;
-   static const int conditionality_unresolved = 0;
-   static const int conditionality_untouched;
-   static const int write_is_unconditional;
-
-   /* A bit field tracking the nexting levels of if-else clauses where the
-    * temporary has (so far) been written to in the if branch, but not in the
-    * else branch.
-    */
-   unsigned int if_scope_write_flags;
-
-   int next_ifelse_nesting_depth;
-   static const int supported_ifelse_nesting_depth = 32;
-
-   /* Tracks the last if scope in which the temporary was written to
-    * without a write in the corresponding else branch. Is also used
-    * to track read-before-write in the according scope.
-    */
-   const prog_scope *current_unpaired_if_write_scope;
-
-   /* Flag to resolve read-before-write in the else scope. */
-   bool was_written_in_current_else_scope;
-};
-
-/* Class to track the access to all components of a temporary register. */
-class temp_access {
-public:
-   temp_access();
-   void record_read(int line, prog_scope *scope, int swizzle, bool is_array_elm);
-   void record_write(int line, prog_scope *scope, int writemask, bool is_array_elm);
-   register_live_range get_required_live_range();
-private:
-   void update_access_mask(int mask);
-
-   temp_comp_access comp[4];
-   int access_mask;
-   bool needs_component_tracking;
-   bool is_array_element;
-};
-
-/* Helper class to merge the live ranges of an arrays.
- *
- * For arrays the array length, live range, and component access needs to
- * be kept, because when live ranges are merged or arrays are interleaved
- * one can only merge or interleave an array into another with equal or more
- * elements. For interleaving it is also required that the sum of used swizzles
- * is at most four.
- */
-
-class array_live_range {
-public:
-   array_live_range();
-   array_live_range(unsigned aid, unsigned alength);
-   array_live_range(unsigned aid, unsigned alength, int first_access,
-		  int last_access, int mask);
-
-   void set_live_range(int first_access, int last_access);
-   void set_begin(int _begin){first_access = _begin;}
-   void set_end(int _end){last_access = _end;}
-   void set_access_mask(int s);
-
-   static void merge(array_live_range *a, array_live_range *b);
-   static void interleave(array_live_range *a, array_live_range *b);
-
-   int array_id() const {return id;}
-   int target_array_id() const {return target_array ? target_array->id : 0;}
-   const array_live_range *final_target() const {return target_array ?
-	       target_array->final_target() : this;}
-   unsigned array_length() const { return length;}
-   int begin() const { return first_access;}
-   int end() const { return last_access;}
-   int access_mask() const { return component_access_mask;}
-   int used_components() const {return used_component_count;}
-
-   bool time_doesnt_overlap(const array_live_range& other) const;
-
-   void print(std::ostream& os) const;
-
-   bool is_mapped() const { return target_array != nullptr;}
-
-   int8_t remap_one_swizzle(int8_t idx) const;
-
-private:
-   void init_swizzles();
-   void set_target(array_live_range  *target);
-   void merge_live_range_from(array_live_range *other);
-   void interleave_into(array_live_range *other);
-
-   unsigned id;
-   unsigned length;
-   int first_access;
-   int last_access;
-   uint8_t component_access_mask;
-   uint8_t used_component_count;
-   array_live_range *target_array;
-   int8_t swizzle_map[4];
-};
-
-
-
-class LiverangeEvaluator {
-public:
-   LiverangeEvaluator();
-
-   void run(const Shader& shader,
-            std::vector<register_live_range> &register_live_ranges);
-
-   void scope_if();
-   void scope_else();
-   void scope_endif();
-   void scope_loop_begin();
-   void scope_loop_end();
-   void scope_loop_break();
-
-   void record_read(const Value& src, bool is_array_elm = false);
-   void record_write(const Value& dst, bool is_array_elm = false);
-
-   void record_read(const GPRVector& src);
-   void record_write(const GPRVector& dst);
-
-private:
-
-   prog_scope *create_scope(prog_scope *parent, prog_scope_type type, int id,
-                            int lvl, int s_begin);
-
-
-   void get_required_live_ranges(std::vector<register_live_range>& register_live_ranges);
-
-   int line;
-   int loop_id;
-   int if_id;
-   int switch_id;
-   bool is_at_end;
-   int n_scopes;
-   std::unique_ptr<prog_scope_storage> scopes;
-   prog_scope *cur_scope;
-
-   std::vector<temp_access> temp_acc;
-
-};
-
-std::vector<rename_reg_pair>
-get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges);
-
-} // end namespace r600
-
-#endif
--- a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp
@ -0,0 +1,438 @@
+#include "sfn_liverangeevaluator.h"
+#include "sfn_liverangeevaluator_helpers.h"
+
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_controlflow.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_mem.h"
+#include "sfn_instr_tex.h"
+#include "sfn_shader.h"
+#include "sfn_debug.h"
+
+#include <algorithm>
+#include <map>
+
+namespace r600 {
+
+class LiveRangeInstrVisitor : public InstrVisitor {
+public:
+   LiveRangeInstrVisitor(LiveRangeMap& live_range_map);
+
+   void visit(AluInstr *instr) override;
+   void visit(AluGroup *instr) override;
+   void visit(TexInstr *instr) override;
+   void visit(ExportInstr *instr) override;
+   void visit(FetchInstr *instr) override;
+   void visit(Block *instr) override;
+   void visit(ControlFlowInstr *instr) override;
+   void visit(IfInstr *instr) override;
+   void visit(WriteScratchInstr *instr) override;
+   void visit(StreamOutInstr *instr) override;
+   void visit(MemRingOutInstr *instr) override;
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override;
+   void visit(WriteTFInstr *instr) override;
+   void visit(LDSAtomicInstr *instr) override;
+   void visit(LDSReadInstr *instr) override;
+   void visit(RatInstr *instr) override;
+
+   void finalize();
+private:
+
+   void record_write(const Register *reg);
+   void record_read(const Register *reg, LiveRangeEntry::EUse use);
+
+   void record_write(const RegisterVec4& reg, const RegisterVec4::Swizzle& swizzle);
+   void record_read(const RegisterVec4 &reg, LiveRangeEntry::EUse use);
+
+   void scope_if();
+   void scope_else();
+   void scope_endif();
+   void scope_loop_begin();
+   void scope_loop_end();
+   void scope_loop_break();
+   ProgramScope *create_scope(ProgramScope *parent, ProgramScopeType type,
+                              int id, int nesting_depth, int line);
+
+   std::vector<std::unique_ptr<ProgramScope>> m_scopes;
+   ProgramScope *m_current_scope;
+   LiveRangeMap& m_live_range_map;
+   RegisterAccess m_register_access;
+
+   int m_line{0};
+   int m_if_id{1};
+   int m_loop_id{1};
+};
+
+LiveRangeEvaluator::LiveRangeEvaluator()
+{
+
+}
+
+
+LiveRangeMap LiveRangeEvaluator::run(Shader& sh)
+{
+
+   LiveRangeMap range_map = sh.prepare_live_range_map();
+
+
+   LiveRangeInstrVisitor evaluator(range_map);
+
+   for (auto& b : sh.func())
+      b->accept(evaluator);
+
+   evaluator.finalize();
+
+   return range_map;
+}
+
+void LiveRangeInstrVisitor::finalize()
+{
+   m_current_scope->set_end(m_line);
+
+   for (int i = 0; i < 4; ++i) {
+
+      auto& live_ranges = m_live_range_map.component(i);
+      for(const auto& r : live_ranges) {
+         if (r.m_register->live_end_pinned())
+            record_read(r.m_register, LiveRangeEntry::use_unspecified);
+      }
+
+      auto& comp_access = m_register_access.component(i);
+
+      for (size_t i = 0; i < comp_access.size(); ++i) {
+         sfn_log << SfnLog::merge << "Evaluae access for " << *live_ranges[i].m_register << "\n";
+
+         auto& rca = comp_access[i];
+         rca.update_required_live_range();
+         live_ranges[i].m_start = rca.range().start;
+         live_ranges[i].m_end = rca.range().end;
+         live_ranges[i].m_use = rca.use_type();
+      }
+   }
+}
+
+LiveRangeInstrVisitor::LiveRangeInstrVisitor(LiveRangeMap& live_range_map):
+   m_live_range_map(live_range_map),
+   m_register_access(live_range_map.sizes())
+{
+   if (sfn_log.has_debug_flag(SfnLog::merge)) {
+      sfn_log << SfnLog::merge << "Have component register numbers: ";
+      for (auto n : live_range_map.sizes())
+         sfn_log <<  n << " ";
+      sfn_log << "\n";
+   }
+
+   m_scopes.push_back(std::make_unique<ProgramScope>(nullptr, outer_scope, 0, 0, 0));
+   m_current_scope = m_scopes[0].get();
+
+   for (int i = 0; i < 4; ++i) {
+      const auto& comp = live_range_map.component(i);
+      for(const auto& r : comp) {
+         if (r.m_register->live_start_pinned())
+            record_write(r.m_register);
+      }
+   }
+   m_line = 1;
+}
+
+void LiveRangeInstrVisitor::record_write(const RegisterVec4& reg, const RegisterVec4::Swizzle &swizzle)
+{
+   for (int i = 0; i < 4; ++i) {
+      if (swizzle[i] < 6 && reg[i]->chan() < 4)
+         record_write(reg[i]);
+   }
+}
+
+void LiveRangeInstrVisitor::record_read(const RegisterVec4& reg, LiveRangeEntry::EUse use)
+{
+   for (int i = 0; i < 4; ++i) {
+      if (reg[i]->chan() < 4)
+         record_read(reg[i], use);
+   }
+}
+
+void LiveRangeInstrVisitor::scope_if()
+{
+   m_current_scope = create_scope(m_current_scope, if_branch, m_if_id++,
+                                  m_current_scope->nesting_depth() + 1, m_line + 1);
+}
+
+void LiveRangeInstrVisitor::scope_else()
+{
+   assert(m_current_scope->type() == if_branch);
+   m_current_scope->set_end(m_line - 1);
+
+   m_current_scope = create_scope(m_current_scope->parent(), else_branch, m_current_scope->id(),
+                                  m_current_scope->nesting_depth() + 1, m_line + 1);
+}
+
+void LiveRangeInstrVisitor::scope_endif()
+{
+   m_current_scope->set_end(m_line - 1);
+   m_current_scope = m_current_scope->parent();
+   assert(m_current_scope);
+}
+
+void LiveRangeInstrVisitor::scope_loop_begin()
+{
+   m_current_scope = create_scope(m_current_scope, loop_body, m_loop_id++,
+                                  m_current_scope->nesting_depth() + 1, m_line);
+}
+
+void LiveRangeInstrVisitor::scope_loop_end()
+{
+   m_current_scope->set_end(m_line);
+   m_current_scope = m_current_scope->parent();
+   assert(m_current_scope);
+}
+
+void LiveRangeInstrVisitor::scope_loop_break()
+{
+   m_current_scope->set_loop_break_line(m_line);
+}
+
+ProgramScope *LiveRangeInstrVisitor::create_scope(ProgramScope *parent, ProgramScopeType type,
+                                                  int id, int nesting_depth, int line)
+{
+   m_scopes.emplace_back(std::make_unique<ProgramScope>(parent, type, id, nesting_depth, line));
+   return m_scopes[m_scopes.size() - 1].get();
+}
+
+void LiveRangeInstrVisitor::visit(AluInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   if (instr->has_alu_flag(alu_write))
+      record_write(instr->dest());
+   for (unsigned i = 0; i < instr->n_sources(); ++i) {
+      record_read(instr->src(i).as_register(), LiveRangeEntry::use_unspecified);
+      auto uniform = instr->src(i).as_uniform();
+      if (uniform && uniform->buf_addr()) {
+         record_read(uniform->buf_addr()->as_register(), LiveRangeEntry::use_unspecified);
+      }
+   }
+}
+
+void LiveRangeInstrVisitor::visit(AluGroup *group)
+{
+   for (auto i : *group)
+      if (i)
+         i->accept(*this);
+}
+
+void LiveRangeInstrVisitor::visit(TexInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   record_write(instr->dst(), instr->all_dest_swizzle());
+
+   auto src = instr->src();
+   record_read(src, LiveRangeEntry::use_unspecified);
+
+   if (instr->sampler_offset() && instr->sampler_offset()->as_register())
+      record_read(instr->sampler_offset()->as_register(), LiveRangeEntry::use_unspecified);
+
+}
+
+void LiveRangeInstrVisitor::visit(ExportInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   auto src = instr->value();
+   record_read(src, LiveRangeEntry::use_export);
+}
+
+void LiveRangeInstrVisitor::visit(FetchInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   record_write(instr->dst(), instr->all_dest_swizzle());
+   auto& src = instr->src();
+   if (src.chan() < 4) /* Channel can be 7 to disable source */
+      record_read(&src, LiveRangeEntry::use_unspecified);
+}
+
+void LiveRangeInstrVisitor::visit(Block *instr)
+{
+   sfn_log << SfnLog::merge << "Visit block\n";
+   for (auto i : *instr) {
+      i->accept(*this);
+      if (i->end_group())
+         ++m_line;
+   }
+   sfn_log << SfnLog::merge << "End block\n";
+}
+
+void LiveRangeInstrVisitor::visit(WriteScratchInstr *instr)
+{
+   auto& src = instr->value();
+   for (int i = 0; i < 4; ++i) {
+      if ((1 << i) & instr->write_mask()) {
+         record_read(src[i], LiveRangeEntry::use_unspecified);
+      }
+   }
+
+   auto addr = instr->address();
+   if (addr)
+      record_read(addr, LiveRangeEntry::use_unspecified);
+}
+
+void LiveRangeInstrVisitor::visit(StreamOutInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   auto src = instr->value();
+   record_read(src, LiveRangeEntry::use_export);
+}
+
+void LiveRangeInstrVisitor::visit(MemRingOutInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   auto src = instr->value();
+   record_read(src, LiveRangeEntry::use_export);
+
+   auto idx = instr->export_index();
+   if (idx && idx->as_register())
+      record_read(idx->as_register(), LiveRangeEntry::use_unspecified);
+}
+
+void LiveRangeInstrVisitor::visit(ControlFlowInstr *instr)
+{
+   switch (instr->cf_type()) {
+   case ControlFlowInstr::cf_else: scope_else(); break;
+   case ControlFlowInstr::cf_endif: scope_endif(); break;
+   case ControlFlowInstr::cf_loop_begin: scope_loop_begin(); break;
+   case ControlFlowInstr::cf_loop_end: scope_loop_end(); break;
+   case ControlFlowInstr::cf_loop_break: scope_loop_break(); break;
+   case ControlFlowInstr::cf_loop_continue: break;
+   case ControlFlowInstr::cf_wait_ack: break;
+   default:
+      unreachable("Flow control unreachanble");
+   }
+}
+
+void LiveRangeInstrVisitor::visit(IfInstr *instr)
+{
+   instr->predicate()->accept(*this);
+   scope_if();
+}
+
+void LiveRangeInstrVisitor::visit(GDSInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   record_read(instr->src(), LiveRangeEntry::use_unspecified);
+   if (instr->uav_id())
+      record_read(instr->uav_id(), LiveRangeEntry::use_unspecified);
+   record_write(instr->dest());
+}
+
+void LiveRangeInstrVisitor::visit(RatInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   record_read(instr->value(), LiveRangeEntry::use_unspecified);
+   record_read(instr->addr(), LiveRangeEntry::use_unspecified);
+
+   auto idx = instr->rat_id_offset();
+   if (idx)
+      record_read(idx, LiveRangeEntry::use_unspecified);
+}
+
+
+void LiveRangeInstrVisitor::visit(WriteTFInstr *instr)
+{
+   record_read(instr->value(), LiveRangeEntry::use_export);
+}
+
+void LiveRangeInstrVisitor::visit(UNUSED LDSAtomicInstr *instr)
+{   
+   unreachable("LDSAtomicInstr must be lowered before scheduling and live range evaluation");
+}
+
+void LiveRangeInstrVisitor::visit(UNUSED LDSReadInstr *instr)
+{
+   unreachable("LDSReadInstr must be lowered before scheduling and live range evaluation");
+}
+
+void LiveRangeInstrVisitor::record_write(const Register *reg)
+{
+   auto addr = reg->get_addr();
+   if (addr && addr->as_register()) {
+      record_read(addr->as_register(), LiveRangeEntry::use_unspecified);
+
+      const auto av = static_cast<const LocalArrayValue *>(reg);
+      auto& array = av->array();
+
+      sfn_log << SfnLog::merge << array << " write:" << m_line << "\n";
+
+      for (auto i = 0u; i < array.size(); ++i) {
+         auto& rav = m_register_access(array(i, reg->chan()));
+         rav.record_write(m_line, m_current_scope);
+      }
+   } else {
+      auto& ra = m_register_access(*reg);
+      sfn_log << SfnLog::merge << *reg  << " write:" << m_line << "\n";
+      ra.record_write(m_line, m_current_scope);
+   }
+}
+
+void LiveRangeInstrVisitor::record_read(const Register *reg, LiveRangeEntry::EUse use)
+{
+   if (!reg)
+      return;
+
+   auto addr = reg->get_addr();
+   if (addr && addr->as_register()) {
+      sfn_log << SfnLog::merge << "Record reading address register " << *addr  << "\n";
+
+      auto& ra = m_register_access(*addr->as_register());
+      ra.record_read(m_line, m_current_scope, use);
+
+      const auto av = static_cast<const LocalArrayValue *>(reg);
+      auto& array = av->array();
+      sfn_log << SfnLog::merge << array << " read:" << m_line << "\n";
+
+      for (auto i = 0u; i < array.size(); ++i) {
+         auto& rav = m_register_access(array(i, reg->chan()));
+         rav.record_read(m_line, m_current_scope, use);
+      }
+   } else {
+      sfn_log << SfnLog::merge << *reg << " read:" << m_line << "\n";
+      auto& ra = m_register_access(*reg);
+      ra.record_read(m_line, m_current_scope, use);
+   }
+}
+
+std::ostream& operator <<  (std::ostream& os, const LiveRangeMap& lrm)
+{
+   os << "Live ranges\n";
+   for (int i = 0; i < 4; ++i) {
+      const auto& comp = lrm.component(i);
+      for (auto& range : comp)
+         os << "  " << range << "\n";
+   }
+   return os;
+}
+
+bool operator == (const LiveRangeMap& lhs, const LiveRangeMap& rhs)
+{
+   for (int i = 0; i < 4; ++i) {
+      const auto& lc = lhs.component(i);
+      const auto& rc = rhs.component(i);
+      if (lc.size() != rc.size())
+         return false;
+
+      for (auto j = 0u; j < lc.size(); ++j) {
+         const auto& lv = lc[j];
+         const auto& rv = rc[j];
+
+         if (lv.m_start != rv.m_start ||
+             lv.m_end != rv.m_end ||
+             lv.m_color != rv.m_color ||
+             !lv.m_register->equal_to(*rv.m_register))
+            return false;
+      }
+   }
+
+   return true;
+}
+
+
+}
+
--- a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h
+++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h
@ -0,0 +1,23 @@
+#ifndef LIFERANGEEVALUATOR_H
+#define LIFERANGEEVALUATOR_H
+
+#include "sfn_valuefactory.h"
+
+#include <map>
+#include <cassert>
+
+namespace r600 {
+
+class Shader;
+
+class LiveRangeEvaluator  {
+public:
+
+   LiveRangeEvaluator();
+
+   LiveRangeMap run(Shader &sh);
+};
+
+}
+
+#endif // LIFERANGEEVALUATOR_H
--- a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp
@ -0,0 +1,623 @@
+#include "sfn_liverangeevaluator_helpers.h"
+
+#include "sfn_virtualvalues.h"
+
+#include "util/u_math.h"
+
+#include <limits>
+#include <cassert>
+#include <iostream>
+
+namespace r600 {
+
+ProgramScope::ProgramScope(ProgramScope *parent, ProgramScopeType type, int id,
+                           int depth, int scope_begin):
+   scope_type(type),
+   scope_id(id),
+   scope_nesting_depth(depth),
+   scope_begin(scope_begin),
+   scope_end(-1),
+   break_loop_line(std::numeric_limits<int>::max()),
+   parent_scope(parent)
+{
+}
+
+ProgramScope::ProgramScope():
+   ProgramScope(nullptr, undefined_scope, -1, -1, -1)
+{
+}
+
+ProgramScopeType ProgramScope::type() const
+{
+   return scope_type;
+}
+
+ProgramScope *ProgramScope::parent() const
+{
+   return parent_scope;
+}
+
+int ProgramScope::nesting_depth() const
+{
+   return scope_nesting_depth;
+}
+
+bool ProgramScope::is_loop() const
+{
+   return (scope_type == loop_body);
+}
+
+bool ProgramScope::is_in_loop() const
+{
+   if (scope_type == loop_body)
+      return true;
+
+   if (parent_scope)
+      return parent_scope->is_in_loop();
+
+   return false;
+}
+
+const ProgramScope *ProgramScope::innermost_loop() const
+{
+   if (scope_type == loop_body)
+      return this;
+
+   if (parent_scope)
+      return parent_scope->innermost_loop();
+
+   return nullptr;
+}
+
+const ProgramScope *ProgramScope::outermost_loop() const
+{
+   const ProgramScope *loop = nullptr;
+   const ProgramScope *p = this;
+
+   do {
+      if (p->type() == loop_body)
+         loop = p;
+      p = p->parent();
+   } while (p);
+
+   return loop;
+}
+
+bool ProgramScope::is_child_of_ifelse_id_sibling(const ProgramScope *scope) const
+{
+   const ProgramScope *my_parent = in_parent_ifelse_scope();
+   while (my_parent) {
+      /* is a direct child? */
+      if (my_parent == scope)
+         return false;
+      /* is a child of the conditions sibling? */
+      if (my_parent->id() == scope->id())
+         return true;
+      my_parent = my_parent->in_parent_ifelse_scope();
+   }
+   return false;
+}
+
+bool ProgramScope::is_child_of(const ProgramScope *scope) const
+{
+   const ProgramScope *my_parent = parent();
+   while (my_parent) {
+      if (my_parent == scope)
+         return true;
+      my_parent = my_parent->parent();
+   }
+   return false;
+}
+
+const ProgramScope *ProgramScope::enclosing_conditional() const
+{
+   if (is_conditional())
+      return this;
+
+   if (parent_scope)
+      return parent_scope->enclosing_conditional();
+
+   return nullptr;
+}
+
+bool ProgramScope::contains_range_of(const ProgramScope& other) const
+{
+   return (begin() <= other.begin()) && (end() >= other.end());
+}
+
+bool ProgramScope::is_conditional() const
+{
+   return scope_type == if_branch ||
+         scope_type == else_branch ||
+         scope_type == switch_case_branch ||
+         scope_type == switch_default_branch;
+}
+
+const ProgramScope *ProgramScope::in_else_scope() const
+{
+   if (scope_type == else_branch)
+      return this;
+
+   if (parent_scope)
+      return parent_scope->in_else_scope();
+
+   return nullptr;
+}
+
+const ProgramScope *ProgramScope::in_parent_ifelse_scope() const
+{
+   if (parent_scope)
+      return parent_scope->in_ifelse_scope();
+   else
+      return nullptr;
+}
+
+const ProgramScope *ProgramScope::in_ifelse_scope() const
+{
+   if (scope_type == if_branch ||
+       scope_type == else_branch)
+      return this;
+
+   if (parent_scope)
+      return parent_scope->in_ifelse_scope();
+
+   return nullptr;
+}
+
+bool ProgramScope::is_switchcase_scope_in_loop() const
+{
+   return (scope_type == switch_case_branch ||
+           scope_type == switch_default_branch) &&
+         is_in_loop();
+}
+
+bool ProgramScope::break_is_for_switchcase() const
+{
+   if (scope_type == loop_body)
+      return false;
+
+   if (scope_type == switch_case_branch ||
+       scope_type == switch_default_branch ||
+       scope_type == switch_body)
+      return true;
+
+   if (parent_scope)
+      return parent_scope->break_is_for_switchcase();
+
+   return false;
+}
+
+int ProgramScope::id() const
+{
+   return scope_id;
+}
+
+int ProgramScope::begin() const
+{
+   return scope_begin;
+}
+
+int ProgramScope::end() const
+{
+   return scope_end;
+}
+
+void ProgramScope::set_end(int end)
+{
+   if (scope_end == -1)
+      scope_end = end;
+}
+
+void ProgramScope::set_loop_break_line(int line)
+{
+   if (scope_type == loop_body) {
+      break_loop_line = MIN2(break_loop_line, line);
+   } else {
+      if (parent_scope)
+         parent()->set_loop_break_line(line);
+   }
+}
+
+int ProgramScope::loop_break_line() const
+{
+   return break_loop_line;
+}
+
+RegisterCompAccess::RegisterCompAccess(LiveRange range):
+   last_read_scope(nullptr),
+   first_read_scope(nullptr),
+   first_write_scope(nullptr),
+   first_write(range.start),
+   last_read(range.end),
+   last_write(range.start),
+   first_read(std::numeric_limits<int>::max()),
+   conditionality_in_loop_id(conditionality_untouched),
+   if_scope_write_flags(0),
+   next_ifelse_nesting_depth(0),
+   current_unpaired_if_write_scope(nullptr),
+   was_written_in_current_else_scope(false),
+   m_range(range)
+{
+
+}
+
+RegisterCompAccess::RegisterCompAccess():
+   RegisterCompAccess(LiveRange(-1,-1))
+{
+}
+
+
+void RegisterCompAccess::record_read(int line, ProgramScope *scope, LiveRangeEntry::EUse use)
+{
+   last_read_scope = scope;
+   if (use != LiveRangeEntry::use_unspecified)
+      m_use_type.set(use);
+   if (last_read < line)
+      last_read = line;
+
+   if (first_read > line) {
+      first_read = line;
+      first_read_scope = scope;
+   }
+
+   /* If the conditionality of the first write is already resolved then
+    * no further checks are required.
+    */
+   if (conditionality_in_loop_id == write_is_unconditional ||
+       conditionality_in_loop_id == write_is_conditional)
+      return;
+
+   /* Check whether we are in a condition within a loop */
+   const ProgramScope *ifelse_scope = scope->in_ifelse_scope();
+   const ProgramScope *enclosing_loop;
+   if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) {
+
+      /* If we have either not yet written to this register nor writes are
+       * resolved as unconditional in the enclosing loop then check whether
+       * we read before write in an IF/ELSE branch.
+       */
+      if ((conditionality_in_loop_id != write_is_conditional) &&
+          (conditionality_in_loop_id != enclosing_loop->id())) {
+
+         if (current_unpaired_if_write_scope)  {
+
+            /* Has been written in this or a parent scope? - this makes the temporary
+             * unconditionally set at this point.
+             */
+            if (scope->is_child_of(current_unpaired_if_write_scope))
+               return;
+
+            /* Has been written in the same scope before it was read? */
+            if (ifelse_scope->type() == if_branch) {
+               if (current_unpaired_if_write_scope->id() == scope->id())
+                  return;
+            } else {
+               if (was_written_in_current_else_scope)
+                  return;
+            }
+         }
+
+         /* The temporary was read (conditionally) before it is written, hence
+          * it should survive a loop. This can be signaled like if it were
+          * conditionally written.
+          */
+         conditionality_in_loop_id = write_is_conditional;
+      }
+   }
+}
+
+void RegisterCompAccess::record_write(int line, ProgramScope *scope)
+{
+   last_write = line;
+
+   if (first_write < 0) {
+      first_write = line;
+      first_write_scope = scope;
+
+      /* If the first write we encounter is not in a conditional branch, or
+       * the conditional write is not within a loop, then this is to be
+       * considered an unconditional dominant write.
+       */
+      const ProgramScope *conditional = scope->enclosing_conditional();
+      if (!conditional || !conditional->innermost_loop()) {
+         conditionality_in_loop_id = write_is_unconditional;
+      }
+   }
+
+   /* The conditionality of the first write is already resolved. */
+   if (conditionality_in_loop_id == write_is_unconditional ||
+       conditionality_in_loop_id == write_is_conditional)
+      return;
+
+   /* If the nesting depth is larger than the supported level,
+    * then we assume conditional writes.
+    */
+   if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) {
+      conditionality_in_loop_id = write_is_conditional;
+      return;
+   }
+
+   /* If we are in an IF/ELSE scope within a loop and the loop has not
+    * been resolved already, then record this write.
+    */
+   const ProgramScope *ifelse_scope = scope->in_ifelse_scope();
+   if (ifelse_scope && ifelse_scope->innermost_loop() &&
+       ifelse_scope->innermost_loop()->id()  != conditionality_in_loop_id)
+      record_ifelse_write(*ifelse_scope);
+}
+
+void RegisterCompAccess::record_ifelse_write(const ProgramScope& scope)
+{
+   if (scope.type() == if_branch) {
+      /* The first write in an IF branch within a loop implies unresolved
+       * conditionality (if it was untouched or unconditional before).
+       */
+      conditionality_in_loop_id = conditionality_unresolved;
+      was_written_in_current_else_scope = false;
+      record_if_write(scope);
+   } else {
+      was_written_in_current_else_scope = true;
+      record_else_write(scope);
+   }
+}
+
+void RegisterCompAccess::record_if_write(const ProgramScope& scope)
+{
+   /* Don't record write if this IF scope if it ...
+    * - is not the first write in this IF scope,
+    * - has already been written in a parent IF scope.
+    * In both cases this write is a secondary write that doesn't contribute
+    * to resolve conditionality.
+    *
+    * Record the write if it
+    * - is the first one (obviously),
+    * - happens in an IF branch that is a child of the ELSE branch of the
+    *   last active IF/ELSE pair. In this case recording this write is used to
+    *   established whether the write is (un-)conditional in the scope enclosing
+    *   this outer IF/ELSE pair.
+    */
+   if (!current_unpaired_if_write_scope ||
+       (current_unpaired_if_write_scope->id() != scope.id() &&
+        scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope)))  {
+      if_scope_write_flags |= 1 << next_ifelse_nesting_depth;
+      current_unpaired_if_write_scope = &scope;
+      next_ifelse_nesting_depth++;
+   }
+}
+
+void RegisterCompAccess::record_else_write(const ProgramScope& scope)
+{
+   int mask = 1 << (next_ifelse_nesting_depth - 1);
+
+   /* If the temporary was written in an IF branch on the same scope level
+    * and this branch is the sibling of this ELSE branch, then we have a
+    * pair of writes that makes write access to this temporary unconditional
+    * in the enclosing scope.
+    */
+
+   if ((if_scope_write_flags & mask) &&
+       (scope.id() == current_unpaired_if_write_scope->id())) {
+      --next_ifelse_nesting_depth;
+      if_scope_write_flags &= ~mask;
+
+      /* The following code deals with propagating unconditionality from
+          * inner levels of nested IF/ELSE to the outer levels like in
+          *
+          * 1: var t;
+          * 2: if (a) {        <- start scope A
+          * 3:    if (b)
+          * 4:         t = ...
+          * 5:    else
+          * 6:         t = ...
+          * 7: } else {        <- start scope B
+          * 8:    if (c)
+          * 9:         t = ...
+          * A:    else         <- start scope C
+          * B:         t = ...
+          * C: }
+          *
+          */
+
+      const ProgramScope *parent_ifelse = scope.parent()->in_ifelse_scope();
+
+      if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) {
+         /* We are at the end of scope C and already recorded a write
+             * within an IF scope (A), the sibling of the parent ELSE scope B,
+             * and it is not yet resolved. Mark that as the last relevant
+             * IF scope. Below the write will be resolved for the A/B
+             * scope pair.
+             */
+         current_unpaired_if_write_scope = parent_ifelse;
+      } else {
+         current_unpaired_if_write_scope = nullptr;
+      }
+      /* Promote the first write scope to the enclosing scope because
+     * the current IF/ELSE pair is now irrelevant for the analysis.
+     * This is also required to evaluate the minimum life time for t in
+     * {
+     *    var t;
+     *    if (a)
+     *      t = ...
+     *    else
+     *      t = ...
+     *    x = t;
+     *    ...
+     * }
+     */
+      first_write_scope = scope.parent();
+
+      /* If some parent is IF/ELSE and in a loop then propagate the
+          * write to that scope. Otherwise the write is unconditional
+          * because it happens in both corresponding IF/ELSE branches
+          * in this loop, and hence, record the loop id to signal the
+          * resolution.
+          */
+      if (parent_ifelse && parent_ifelse->is_in_loop()) {
+         record_ifelse_write(*parent_ifelse);
+      } else {
+         conditionality_in_loop_id = scope.innermost_loop()->id();
+      }
+   } else {
+      /* The temporary was not written in the IF branch corresponding
+      * to this ELSE branch, hence the write is conditional.
+      */
+      conditionality_in_loop_id = write_is_conditional;
+   }
+}
+
+bool RegisterCompAccess::conditional_ifelse_write_in_loop() const
+{
+   return conditionality_in_loop_id <= conditionality_unresolved;
+}
+
+void RegisterCompAccess::propagate_live_range_to_dominant_write_scope()
+{
+   first_write = first_write_scope->begin();
+   int lr = first_write_scope->end();
+
+   if (last_read < lr)
+      last_read = lr;
+}
+
+void RegisterCompAccess::update_required_live_range()
+{
+   bool keep_for_full_loop = false;
+
+   /* This register component is not used at all, or only read,
+    * mark it as unused and ignore it when renaming.
+    * glsl_to_tgsi_visitor::renumber_registers will take care of
+    * eliminating registers that are not written to.
+    */
+   if (last_write < 0) {
+      m_range.start = -1;
+      m_range.end = -1;
+      return;
+   }
+
+   /* Only written to, just make sure the register component is not
+    * reused in the range it is used to write to
+    */
+   if (!last_read_scope) {
+      m_range.start = first_write;
+      m_range.end = last_write + 1;
+      return;
+   }
+
+   assert(first_write_scope || m_range.start >= 0);
+
+   /* The register was pre-defines, so th first write scope is the outerpost scopw */
+   if (!first_write_scope) {
+      first_write_scope = first_read_scope;
+      while (first_write_scope->parent())
+         first_write_scope = first_write_scope->parent();
+   }
+
+   const ProgramScope *enclosing_scope_first_read = first_read_scope;
+   const ProgramScope *enclosing_scope_first_write = first_write_scope;
+
+   /* We read before writing in a loop
+    * hence the value must survive the loops
+    */
+   if ((first_read <= first_write) &&
+       first_read_scope->is_in_loop()) {
+      keep_for_full_loop = true;
+      enclosing_scope_first_read = first_read_scope->outermost_loop();
+   }
+
+   /* A conditional write within a (nested) loop must survive the outermost
+    * loop if the last read was not within the same scope.
+    */
+   const ProgramScope *conditional = enclosing_scope_first_write->enclosing_conditional();
+   if (conditional && !conditional->contains_range_of(*last_read_scope) &&
+       (conditional->is_switchcase_scope_in_loop() ||
+        conditional_ifelse_write_in_loop())) {
+      keep_for_full_loop = true;
+      enclosing_scope_first_write = conditional->outermost_loop();
+   }
+
+   /* Evaluate the scope that is shared by all: required first write scope,
+    * required first read before write scope, and last read scope.
+    */
+   const ProgramScope *enclosing_scope = enclosing_scope_first_read;
+   if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
+      enclosing_scope = enclosing_scope_first_write;
+
+   if (last_read_scope->contains_range_of(*enclosing_scope))
+      enclosing_scope = last_read_scope;
+
+   while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
+          !enclosing_scope->contains_range_of(*last_read_scope)) {
+      enclosing_scope = enclosing_scope->parent();
+      assert(enclosing_scope);
+   }
+
+   /* Propagate the last read scope to the target scope */
+   while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
+      /* If the read is in a loop and we have to move up the scope we need to
+       * extend the live range to the end of this current loop because at this
+       * point we don't know whether the component was written before
+       * un-conditionally in the same loop.
+       */
+      if (last_read_scope->is_loop())
+         last_read = last_read_scope->end();
+
+      last_read_scope = last_read_scope->parent();
+   }
+
+   /* If the variable has to be kept for the whole loop, and we
+    * are currently in a loop, then propagate the live range.
+    */
+   if (keep_for_full_loop && first_write_scope->is_loop())
+      propagate_live_range_to_dominant_write_scope();
+
+   /* Propagate the first_dominant_write scope to the target scope */
+   while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
+      /* Propagate live_range if there was a break in a loop and the write was
+       * after the break inside that loop. Note, that this is only needed if
+       * we move up in the scopes.
+       */
+      if (first_write_scope->loop_break_line() < first_write) {
+         keep_for_full_loop = true;
+         propagate_live_range_to_dominant_write_scope();
+      }
+
+      first_write_scope = first_write_scope->parent();
+
+      /* Propagate live_range if we are now in a loop */
+      if (keep_for_full_loop && first_write_scope->is_loop())
+         propagate_live_range_to_dominant_write_scope();
+   }
+
+   /* The last write past the last read is dead code, but we have to
+    * ensure that the component is not reused too early, hence extend the
+    * live_range past the last write.
+    */
+   if (last_write >= last_read)
+      last_read = last_write + 1;
+
+   /* Here we are at the same scope, all is resolved */
+   m_range.start = first_write;
+   m_range.end = last_read;
+}
+
+const int
+RegisterCompAccess::conditionality_untouched = std::numeric_limits<int>::max();
+
+const int
+RegisterCompAccess::write_is_unconditional = std::numeric_limits<int>::max() - 1;
+
+
+RegisterAccess::RegisterAccess(const std::array<size_t, 4>& sizes)
+{
+   for (int i = 0; i < 4; ++i)
+      m_access_record[i].resize(sizes[i]);
+}
+
+RegisterCompAccess& RegisterAccess::operator() (const Register& reg)
+{
+   assert(reg.chan() < 4);
+   assert(m_access_record[reg.chan()].size() > (size_t)reg.index());
+   return m_access_record[reg.chan()][reg.index()];
+}
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h
+++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h
@ -0,0 +1,162 @@
+#ifndef SFN_LIFERANGEEVALUATOR_HELPERS_H
+#define SFN_LIFERANGEEVALUATOR_HELPERS_H
+
+#include "sfn_valuefactory.h"
+
+namespace r600 {
+
+enum ProgramScopeType {
+   outer_scope,           /* Outer program scope */
+   loop_body,             /* Inside a loop */
+   if_branch,             /* Inside if branch */
+   else_branch,           /* Inside else branch */
+   switch_body,           /* Inside switch statement */
+   switch_case_branch,    /* Inside switch case statement */
+   switch_default_branch, /* Inside switch default statement */
+   undefined_scope
+};
+
+class ProgramScope {
+public:
+   ProgramScope();
+   ProgramScope(ProgramScope *parent, ProgramScopeType type, int id,
+                int depth, int begin);
+
+   ProgramScopeType type() const;
+   ProgramScope *parent() const;
+   int nesting_depth() const;
+   int id() const;
+   int end() const;
+   int begin() const;
+   int loop_break_line() const;
+
+   const ProgramScope *in_else_scope() const;
+   const ProgramScope *in_ifelse_scope() const;
+   const ProgramScope *in_parent_ifelse_scope() const;
+   const ProgramScope *innermost_loop() const;
+   const ProgramScope *outermost_loop() const;
+   const ProgramScope *enclosing_conditional() const;
+
+   bool is_loop() const;
+   bool is_in_loop() const;
+   bool is_switchcase_scope_in_loop() const;
+   bool is_conditional() const;
+   bool is_child_of(const ProgramScope *scope) const;
+   bool is_child_of_ifelse_id_sibling(const ProgramScope *scope) const;
+
+   bool break_is_for_switchcase() const;
+   bool contains_range_of(const ProgramScope& other) const;
+
+   void set_end(int end);
+   void set_loop_break_line(int line);
+
+private:
+   ProgramScopeType scope_type;
+   int scope_id;
+   int scope_nesting_depth;
+   int scope_begin;
+   int scope_end;
+   int break_loop_line;
+   ProgramScope *parent_scope;
+};
+
+/* Class to track the access to a component of a temporary register. */
+
+struct LiveRange;
+
+class RegisterCompAccess {
+public:
+   RegisterCompAccess();
+   RegisterCompAccess(LiveRange range);
+
+   void record_read(int line, ProgramScope *scope, LiveRangeEntry::EUse use);
+   void record_write(int line, ProgramScope *scope);
+
+   void update_required_live_range();
+
+   const auto& range() { return m_range;}
+
+   const auto& use_type() { return m_use_type; }
+private:
+   void propagate_live_range_to_dominant_write_scope();
+   bool conditional_ifelse_write_in_loop() const;
+
+   void record_ifelse_write(const ProgramScope& scope);
+   void record_if_write(const ProgramScope& scope);
+   void record_else_write(const ProgramScope& scope);
+
+   ProgramScope *last_read_scope;
+   ProgramScope *first_read_scope;
+   ProgramScope *first_write_scope;
+
+   int first_write;
+   int last_read;
+   int last_write;
+   int first_read;
+
+   /* This member variable tracks the current resolution of conditional writing
+    * to this temporary in IF/ELSE clauses.
+    *
+    * The initial value "conditionality_untouched" indicates that this
+    * temporary has not yet been written to within an if clause.
+    *
+    * A positive (other than "conditionality_untouched") number refers to the
+    * last loop id for which the write was resolved as unconditional. With each
+    * new loop this value will be overwitten by "conditionality_unresolved"
+    * on entering the first IF clause writing this temporary.
+    *
+    * The value "conditionality_unresolved" indicates that no resolution has
+    * been achieved so far. If the variable is set to this value at the end of
+    * the processing of the whole shader it also indicates a conditional write.
+    *
+    * The value "write_is_conditional" marks that the variable is written
+    * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
+    * least one loop.
+    */
+   int conditionality_in_loop_id;
+
+   /* Helper constants to make the tracking code more readable. */
+   static const int write_is_conditional = -1;
+   static const int conditionality_unresolved = 0;
+   static const int conditionality_untouched;
+   static const int write_is_unconditional;
+
+   /* A bit field tracking the nexting levels of if-else clauses where the
+    * temporary has (so far) been written to in the if branch, but not in the
+    * else branch.
+    */
+   unsigned int if_scope_write_flags;
+
+   int next_ifelse_nesting_depth;
+   static const int supported_ifelse_nesting_depth = 32;
+
+   /* Tracks the last if scope in which the temporary was written to
+    * without a write in the corresponding else branch. Is also used
+    * to track read-before-write in the according scope.
+    */
+   const ProgramScope *current_unpaired_if_write_scope;
+
+   /* Flag to resolve read-before-write in the else scope. */
+   bool was_written_in_current_else_scope;
+
+   LiveRange m_range;
+
+   std::bitset<LiveRangeEntry::use_unspecified> m_use_type;
+};
+
+class RegisterAccess {
+public:
+   using RegisterCompAccessVector = std::vector<RegisterCompAccess>;
+
+   RegisterAccess(const std::array<size_t, 4>& sizes);
+
+   RegisterCompAccess& operator() (const Register& reg);
+
+   auto& component(int i) { return m_access_record[i]; }
+
+private:
+   std::array<RegisterCompAccessVector, 4> m_access_record;
+};
+
+}
+#endif // SFN_LIFERANGEEVALUATOR_HELPERS_H
--- a/src/gallium/drivers/r600/sfn/sfn_memorypool.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_memorypool.cpp
@ -0,0 +1,86 @@
+#include "sfn_memorypool.h"
+
+#include <cassert>
+#include <iostream>
+
+namespace r600 {
+
+struct MemoryPoolImpl {
+public:
+   MemoryPoolImpl();
+   ~MemoryPoolImpl();
+
+   using MemoryBacking = ::std::pmr::monotonic_buffer_resource;
+
+   MemoryBacking *pool;
+};
+
+MemoryPool::MemoryPool() noexcept : impl(nullptr)
+{
+}
+
+MemoryPool& MemoryPool::instance()
+{
+    static thread_local MemoryPool me;
+    me.initialize();
+    return me;
+}
+
+void MemoryPool::free()
+{
+   delete impl;
+   impl = nullptr;
+}
+
+void MemoryPool::initialize()
+{
+   if (!impl)
+      impl = new MemoryPoolImpl();
+}
+
+void *MemoryPool::allocate(size_t size)
+{
+   return impl->pool->allocate(size);
+}
+
+void *MemoryPool::allocate(size_t size, size_t align)
+{
+   return impl->pool->allocate(size, align);
+}
+
+void MemoryPool::release_all()
+{
+   instance().free();
+}
+
+void init_pool()
+{
+    MemoryPool::instance();
+}
+
+void release_pool()
+{
+    MemoryPool::release_all();
+}
+
+void *Allocate::operator new(size_t size)
+{
+    return MemoryPool::instance().allocate(size);
+}
+
+void Allocate::operator delete (void *p, size_t size)
+{
+    // MemoryPool::instance().deallocate(p, size);
+}
+
+MemoryPoolImpl::MemoryPoolImpl()
+{
+   pool = new MemoryBacking();
+}
+
+MemoryPoolImpl::~MemoryPoolImpl()
+{   
+   delete pool;
+}
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_memorypool.h
+++ b/src/gallium/drivers/r600/sfn/sfn_memorypool.h
@ -0,0 +1,69 @@
+#ifndef MEMORYPOOL_H
+#define MEMORYPOOL_H
+
+#include <cstdlib>
+#include <memory>
+#include <stack>
+
+#if __cplusplus >= 21703L
+#include <memory_resource>
+#define R600_POINTER_TYPE(X) X *
+#else
+#error Need C++17
+#endif
+
+namespace r600  {
+
+void init_pool();
+void release_pool();
+
+class Allocate
+{
+public:
+    void * operator new(size_t size);
+    void operator delete (void *p, size_t size);
+};
+
+class MemoryPool  {
+public:
+    static MemoryPool& instance();
+    static void release_all();
+
+    void free();
+    void initialize();
+
+    void *allocate(size_t size);
+    void *allocate(size_t size, size_t align);
+
+private:
+    MemoryPool() noexcept;
+
+    struct MemoryPoolImpl* impl;
+};
+
+template <typename T>
+struct Allocator {
+   using value_type = T;
+
+   Allocator() = default;
+   Allocator(const Allocator& other) = default;
+
+   template <typename U>
+   Allocator(const Allocator<U>& other) {(void)other;}
+
+   T *allocate(size_t n) {
+      return (T *)MemoryPool::instance().allocate(n * sizeof(T), alignof(T));
+   }
+
+   void deallocate(void *p, size_t n) {
+      (void)p; (void)n;
+      //MemoryPool::instance().deallocate(p, n * sizeof(T), alignof(T));
+   }
+
+   friend bool operator == (const Allocator<T>& lhs, const Allocator<T>& rhs) {
+      (void)lhs; (void)rhs; return true;}
+};
+
+}
+
+#endif // MEMORYPOOL_H
--- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@ -30,19 +30,19 @@
 #include "../r600_pipe.h"
 #include "../r600_shader.h"

+
 #include "util/u_prim.h"

-#include "sfn_instruction_tex.h"
-
-#include "sfn_shader_vertex.h"
-#include "sfn_shader_fragment.h"
-#include "sfn_shader_geometry.h"
-#include "sfn_shader_compute.h"
-#include "sfn_shader_tcs.h"
-#include "sfn_shader_tess_eval.h"
+#include "sfn_shader.h"
+#include "sfn_assembler.h"
+#include "sfn_debug.h"
+#include "sfn_liverangeevaluator.h"
 #include "sfn_nir_lower_fs_out_to_vector.h"
-#include "sfn_ir_to_assembly.h"
 #include "sfn_nir_lower_alu.h"
+#include "sfn_nir_lower_tex.h"
+#include "sfn_optimizer.h"
+#include "sfn_ra.h"
+#include "sfn_scheduler.h"

 #include <vector>

@ -78,264 +78,11 @@ bool NirLowerInstruction::run(nir_shader *shader)
                                        (void *)this);
 }

-
-ShaderFromNir::ShaderFromNir():sh(nullptr),
-   gfx_level(CLASS_UNKNOWN),
-   m_current_if_id(0),
-   m_current_loop_id(0),
-   scratch_size(0)
-{
-}
-
-bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
-                          r600_pipe_shader_selector *sel, r600_shader_key& key,
-                          struct r600_shader* gs_shader, enum amd_gfx_level _chip_class)
-{
-   sh = shader;
-   gfx_level = _chip_class;
-   assert(sh);
-
-   switch (shader->info.stage) {
-   case MESA_SHADER_VERTEX:
-      impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, gfx_level));
-      break;
-   case MESA_SHADER_TESS_CTRL:
-      sfn_log << SfnLog::trans << "Start TCS\n";
-      impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, gfx_level));
-      break;
-   case MESA_SHADER_TESS_EVAL:
-      sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
-      impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, gfx_level));
-      break;
-   case MESA_SHADER_GEOMETRY:
-      sfn_log << SfnLog::trans << "Start GS\n";
-      impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, gfx_level));
-      break;
-   case MESA_SHADER_FRAGMENT:
-      sfn_log << SfnLog::trans << "Start FS\n";
-      impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, gfx_level));
-      break;
-   case MESA_SHADER_COMPUTE:
-      sfn_log << SfnLog::trans << "Start CS\n";
-      impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, gfx_level));
-      break;
-   default:
-      return false;
-   }
-
-   sfn_log << SfnLog::trans << "Process declarations\n";
-   if (!process_declaration())
-      return false;
-
-   // at this point all functions should be inlined
-   const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
-
-   sfn_log << SfnLog::trans << "Scan shader\n";
-
-   if (sfn_log.has_debug_flag(SfnLog::instr))
-      nir_print_shader(const_cast<nir_shader *>(shader), stderr);
-
-   nir_foreach_block(block, func->impl) {
-      nir_foreach_instr(instr, block) {
-         if (!impl->scan_instruction(instr)) {
-            fprintf(stderr, "Unhandled sysvalue access ");
-            nir_print_instr(instr, stderr);
-            fprintf(stderr, "\n");
-            return false;
-         }
-      }
-   }
-
-   sfn_log << SfnLog::trans << "Reserve registers\n";
-   if (!impl->allocate_reserved_registers()) {
-      return false;
-   }
-
-   ValuePool::array_list arrays;
-   sfn_log << SfnLog::trans << "Allocate local registers\n";
-   foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
-      impl->allocate_local_register(*reg, arrays);
-   }
-
-   sfn_log << SfnLog::trans << "Emit shader start\n";
-   impl->allocate_arrays(arrays);
-
-   impl->emit_shader_start();
-
-   sfn_log << SfnLog::trans << "Process shader \n";
-   foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
-      if (!process_cf_node(node))
-         return false;
-   }
-
-   // Add optimizations here
-   sfn_log << SfnLog::trans << "Finalize\n";
-   impl->finalize();
-
-   impl->get_array_info(pipe_shader->shader);
-
-   if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
-      sfn_log << SfnLog::trans << "Merge registers\n";
-      impl->remap_registers();
-   }
-
-   sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
-   return true;
-}
-
-Shader ShaderFromNir::shader() const
-{
-   return Shader{impl->m_output, impl->get_temp_registers()};
-}
-
-
-bool ShaderFromNir::process_cf_node(nir_cf_node *node)
-{
-   SFN_TRACE_FUNC(SfnLog::flow, "CF");
-   switch (node->type) {
-   case nir_cf_node_block:
-      return process_block(nir_cf_node_as_block(node));
-   case nir_cf_node_if:
-      return process_if(nir_cf_node_as_if(node));
-   case nir_cf_node_loop:
-      return process_loop(nir_cf_node_as_loop(node));
-   default:
-      return false;
-   }
-}
-
-bool ShaderFromNir::process_if(nir_if *if_stmt)
-{
-   SFN_TRACE_FUNC(SfnLog::flow, "IF");
-
-   if (!impl->emit_if_start(m_current_if_id, if_stmt))
-      return false;
-
-   int if_id = m_current_if_id++;
-   m_if_stack.push(if_id);
-
-   foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
-         if (!process_cf_node(n)) return false;
-
-   if (!if_stmt->then_list.is_empty()) {
-      if (!impl->emit_else_start(if_id))
-         return false;
-
-      foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
-            if (!process_cf_node(n)) return false;
-   }
-
-   if (!impl->emit_ifelse_end(if_id))
-      return false;
-
-   m_if_stack.pop();
-   return true;
-}
-
-bool ShaderFromNir::process_loop(nir_loop *node)
-{
-   SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
-   int loop_id = m_current_loop_id++;
-
-   if (!impl->emit_loop_start(loop_id))
-      return false;
-
-   foreach_list_typed(nir_cf_node, n, node, &node->body)
-         if (!process_cf_node(n)) return false;
-
-   if (!impl->emit_loop_end(loop_id))
-      return false;
-
-   return true;
-}
-
-bool ShaderFromNir::process_block(nir_block *block)
-{
-   SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
-   nir_foreach_instr(instr, block) {
-      int r = emit_instruction(instr);
-      if (!r) {
-         sfn_log << SfnLog::err << "R600: Unsupported instruction: "
-                 << *instr << "\n";
-         return false;
-      }
-   }
-   return true;
-}
-
-
-ShaderFromNir::~ShaderFromNir()
-{
-}
-
-pipe_shader_type ShaderFromNir::processor_type() const
-{
-   return impl->m_processor_type;
-}
-
-
-bool ShaderFromNir::emit_instruction(nir_instr *instr)
-{
-   assert(impl);
-
-   sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
-
-   switch (instr->type) {
-   case nir_instr_type_alu:
-      return impl->emit_alu_instruction(instr);
-   case nir_instr_type_deref:
-      return impl->emit_deref_instruction(nir_instr_as_deref(instr));
-   case nir_instr_type_intrinsic:
-      return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
-   case nir_instr_type_load_const: /* const values are loaded when needed */
-      return true;
-   case nir_instr_type_tex:
-      return impl->emit_tex_instruction(instr);
-   case nir_instr_type_jump:
-      return impl->emit_jump_instruction(nir_instr_as_jump(instr));
-   default:
-      fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
-      nir_print_instr(instr, stderr);
-      fprintf(stderr, "'\n");
-      return false;
-   case nir_instr_type_ssa_undef:
-      return impl->create_undef(nir_instr_as_ssa_undef(instr));
-      return true;
-   }
-}
-
-bool ShaderFromNir::process_declaration()
-{
-   impl->set_shader_info(sh);
-
-   if (!impl->scan_inputs_read(sh))
-      return false;
-
-   // scan declarations
-   nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
-                                                 nir_var_mem_ubo |
-                                                 nir_var_mem_ssbo) {
-      if (!impl->process_uniforms(variable)) {
-         fprintf(stderr, "R600: error parsing outputs variable %s\n", variable->name);
-         return false;
-      }
-   }
-
-   return true;
-}
-
-const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
-{
-   assert(impl);
-   return impl->m_output;
-}
-
-
 AssemblyFromShader::~AssemblyFromShader()
 {
 }

-bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
+bool AssemblyFromShader::lower(const Shader& ir)
 {
   return do_lower(ir);
 }
@ -557,7 +304,6 @@ r600_nir_lower_atomics(nir_shader *shader)
                                       nir_metadata_dominance,
                                       NULL);
 }
-using r600::r600_nir_lower_int_tg4;
 using r600::r600_lower_scratch_addresses;
 using r600::r600_lower_fs_out_to_vector;
 using r600::r600_lower_ubo_to_align16;
@ -676,6 +422,7 @@ r600_lower_shared_io(nir_shader *nir)
 static nir_ssa_def *
 r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
 {
+   (void)_options;
   auto old_ir = nir_instr_as_intrinsic(instr);
   auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
   nir_ssa_dest_init(&load->instr, &load->dest,
@ -693,6 +440,8 @@ r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)

 bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
 {
+   (void)_options;
+
   if (instr->type != nir_instr_type_intrinsic)
      return false;

@ -713,7 +462,7 @@ bool r600_lower_fs_pos_input(nir_shader *shader)
 };

 static bool
-optimize_once(nir_shader *shader, bool vectorize)
+optimize_once(nir_shader *shader)
 {
   bool progress = false;
   NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
@ -722,9 +471,6 @@ optimize_once(nir_shader *shader, bool vectorize)
   NIR_PASS(progress, shader, nir_opt_algebraic);
   NIR_PASS(progress, shader, nir_opt_constant_folding);
   NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
-   if (vectorize)
-      NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
-
   NIR_PASS(progress, shader, nir_opt_remove_phis);

   if (nir_opt_trivial_continues(shader)) {
@ -777,13 +523,9 @@ bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
   case nir_op_fdot2:
   case nir_op_fdot3:
   case nir_op_fdot4:
+      return nir_src_bit_size(alu->src[0].src) == 64;
   case nir_op_cube_r600:
      return false;
-   case nir_op_bany_fnequal2:
-   case nir_op_ball_fequal2:
-   case nir_op_bany_inequal2:
-   case nir_op_ball_iequal2:
-      return nir_src_bit_size(alu->src[0].src) != 64;
   default:
      return true;
   }
@ -793,15 +535,13 @@ int r600_shader_from_nir(struct r600_context *rctx,
                         struct r600_pipe_shader *pipeshader,
                         r600_shader_key *key)
 {
-   char filename[4000];
   struct r600_pipe_shader_selector *sel = pipeshader->selector;

-   bool lower_64bit = ((sel->nir->options->lower_int64_options ||
+   bool lower_64bit = (rctx->b.gfx_level < CAYMAN  &&
+                       (sel->nir->options->lower_int64_options ||
                        sel->nir->options->lower_doubles_options) &&
                       (sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64);

-   r600::ShaderFromNir convert;
-
   if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
      fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
      nir_print_shader(sel->nir, stderr);
@ -813,10 +553,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
   /* Cayman seems very crashy about accessing images that don't exists or are
    * accessed out of range, this lowering seems to help (but it can also be
    * another problem */
-   if (sel->nir->info.num_images > 0 && rctx->b.gfx_level == CAYMAN)
-       NIR_PASS_V(sel->nir, r600_legalize_image_load_store);

-   NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
   NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
   nir_lower_idiv_options idiv_options = {0};
   idiv_options.imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE;
@ -828,7 +565,7 @@ int r600_shader_from_nir(struct r600_context *rctx,

   if (lower_64bit)
      NIR_PASS_V(sel->nir, nir_lower_int64);
-   while(optimize_once(sel->nir, false));
+   while(optimize_once(sel->nir));

   NIR_PASS_V(sel->nir, r600_lower_shared_io);
   NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
@ -839,8 +576,8 @@ int r600_shader_from_nir(struct r600_context *rctx,
   lower_tex_options.lower_invalid_implicit_lod = true;

   NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
-   NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
-   NIR_PASS_V(sel->nir, r600::r600_nir_lower_cube_to_2darray);
+   NIR_PASS_V(sel->nir, r600_nir_lower_txl_txf_array_or_cube);
+   NIR_PASS_V(sel->nir, r600_nir_lower_cube_to_2darray);

   NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);

@ -851,30 +588,11 @@ int r600_shader_from_nir(struct r600_context *rctx,
      NIR_PASS_V(sel->nir, nir_lower_fragcoord_wtrans);
      NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
   }
+   nir_variable_mode io_modes = nir_var_uniform |
+                                nir_var_shader_in |
+                                nir_var_shader_out;

-   nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in;
-
-   //if (sel->nir->info.stage != MESA_SHADER_FRAGMENT)
-      io_modes |= nir_var_shader_out;
-
-   if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
-
-      /* Lower IO to temporaries late, because otherwise we get into trouble
-       * with the glsl 4.40 interpolateAt swizzle tests. There seems to be a bug
-       * somewhere that results in the input alweas reading from the same temp
-       * regardless of interpolation when the lowering is done early */
-      NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(sel->nir),
-              true, true);
-
-      /* Since we're doing nir_lower_io_to_temporaries late, we need
-       * to lower all the copy_deref's introduced by
-       * lower_io_to_temporaries before calling nir_lower_io.
-       */
-      NIR_PASS_V(sel->nir, nir_split_var_copies);
-      NIR_PASS_V(sel->nir, nir_lower_var_copies);
-      NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
-   }
-
+   NIR_PASS_V(sel->nir, nir_opt_combine_stores, nir_var_shader_out);
   NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size,
                 nir_lower_io_lower_64bit_to_32);

@ -916,14 +634,27 @@ int r600_shader_from_nir(struct r600_context *rctx,
      NIR_PASS_V(sh, r600_lower_tess_coord, u_tess_prim_from_shader(sh->info.tess._primitive_mode));
   }

+   NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+   NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
+   NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+
+   NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
+   NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
+   NIR_PASS_V(sh, nir_split_64bit_vec3_and_vec4);
+   NIR_PASS_V(sh, nir_lower_int64);
+
   NIR_PASS_V(sh, nir_lower_ubo_vec4);
+
+
   if (lower_64bit)
      NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);

+   NIR_PASS_V(sh, r600::r600_split_64bit_uniforms_and_ubo);
   /* Lower to scalar to let some optimization work out better */
-   while(optimize_once(sh, false));
+   while(optimize_once(sh));

-   NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
+   if (lower_64bit)
+      NIR_PASS_V(sh, r600::r600_merge_vec2_stores);

   NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
   NIR_PASS_V(sh, nir_remove_dead_variables,  nir_var_shader_out, NULL);
@ -934,7 +665,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
              40,
              r600_get_natural_size_align_bytes);

-   while (optimize_once(sh, true));
+   while (optimize_once(sh));

   NIR_PASS_V(sh, nir_lower_bool_to_int32);
   NIR_PASS_V(sh, r600_nir_lower_int_tg4);
@ -945,8 +676,6 @@ int r600_shader_from_nir(struct r600_context *rctx,

   NIR_PASS_V(sh, nir_lower_locals_to_regs);

-   //NIR_PASS_V(sh, nir_opt_algebraic);
-   //NIR_PASS_V(sh, nir_copy_prop);
   NIR_PASS_V(sh, nir_lower_to_source_mods,
 	      (nir_lower_to_source_mods_flags)(nir_lower_float_source_mods |
 					       nir_lower_64bit_source_mods));
@ -974,33 +703,66 @@ int r600_shader_from_nir(struct r600_context *rctx,
      pipeshader->shader.cc_dist_mask = (1 <<  (sh->info.cull_distance_array_size +
                                                sh->info.clip_distance_array_size)) - 1;
   }
-
   struct r600_shader* gs_shader = nullptr;   
   if (rctx->gs_shader)
      gs_shader = &rctx->gs_shader->current->shader;
   r600_screen *rscreen = rctx->screen;

-   bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.gfx_level);
-   if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
-      static int shnr = 0;
+   r600::Shader *shader = r600::Shader::translate_from_nir(sh, &sel->so, gs_shader,
+                                                           *key, rctx->isa->hw_class);

-      snprintf(filename, 4000, "nir-%s_%d.inc", sh->info.name, shnr++);
+   assert(shader);
+   if (!shader)
+      return -2;

-      if (access(filename, F_OK) == -1) {
-         FILE *f = fopen(filename, "w");
+   pipeshader->enabled_stream_buffers_mask = shader->enabled_stream_buffers_mask();
+   pipeshader->selector->info.file_count[TGSI_FILE_HW_ATOMIC] += shader->atomic_file_count();
+   pipeshader->selector->info.writes_memory = shader->has_flag(r600::Shader::sh_writes_memory);

-         if (f) {
-            fprintf(f, "const char *shader_blob_%s = {\nR\"(", sh->info.name);
-            nir_print_shader(sh, f);
-            fprintf(f, ")\";\n");
-            fclose(f);
-         }
-      }
-      if (!r)
-         return -2;
+   if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
+      std::cerr << "Shader after conversion from nir\n";
+      shader->print(std::cerr);
   }

-   auto shader = convert.shader();
+   if (!r600::sfn_log.has_debug_flag(r600::SfnLog::noopt)) {
+      optimize(*shader);
+
+      if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
+         std::cerr << "Shader after optimization\n";
+         shader->print(std::cerr);
+      }
+   }
+
+   auto scheduled_shader = r600::schedule(shader);
+   if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
+      std::cerr << "Shader after scheduling\n";
+      shader->print(std::cerr);
+   }
+
+   if (!r600::sfn_log.has_debug_flag(r600::SfnLog::nomerge)) {
+
+      if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge)) {
+         r600::sfn_log << r600::SfnLog::merge << "Shader before RA\n";
+         scheduled_shader->print(std::cerr);
+      }
+
+      r600::sfn_log << r600::SfnLog::trans << "Merge registers\n";
+      auto lrm = r600::LiveRangeEvaluator().run(*scheduled_shader);
+
+      if (!r600::register_allocation(lrm)) {
+         R600_ERR("%s: Register allocation failed\n", __func__);
+         /* For now crash if the shader could not be benerated */
+         assert(0);
+         return -1;
+      } else if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge) ||
+                 r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
+         r600::sfn_log << "Shader after RA\n";
+         scheduled_shader->print(std::cerr);
+      }
+   }
+
+   scheduled_shader->get_shader_info(&pipeshader->shader);
+   pipeshader->shader.uses_doubles = sh->info.bit_sizes_float & 64 ? 1 : 0;

   r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.gfx_level, rscreen->b.family,
                      rscreen->has_compressed_msaa_texturing);
@ -1012,9 +774,13 @@ int r600_shader_from_nir(struct r600_context *rctx,
   pipeshader->shader.bc.type = pipeshader->shader.processor_type;
   pipeshader->shader.bc.isa = rctx->isa;

-   r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
-   if (!afs.lower(shader.m_ir)) {
+   r600::Assembler afs(&pipeshader->shader, *key);
+   if (!afs.lower(scheduled_shader)) {
      R600_ERR("%s: Lowering to assembly failed\n", __func__);
+
+      scheduled_shader->print(std::cerr);
+      /* For now crash if the shader could not be benerated */
+      assert(0);
      return -1;
   }

@ -1025,8 +791,5 @@ int r600_shader_from_nir(struct r600_context *rctx,
   } else {
      r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
   }
-   if (pipeshader->shader.bc.ngpr < 6)
-      pipeshader->shader.bc.ngpr = 6;
-
   return 0;
 }
--- a/src/gallium/drivers/r600/sfn/sfn_nir.h
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.h
@ -31,7 +31,7 @@
 #include "nir_builder.h"

 #ifdef __cplusplus
-#include "sfn_shader_base.h"
+#include "sfn_shader.h"
 #include <vector>

 namespace r600 {
@ -64,56 +64,16 @@ bool r600_nir_64_to_vec2(nir_shader *sh);

 bool r600_merge_vec2_stores(nir_shader *shader);

-class Shader {
-public:
-   std::vector<InstructionBlock>& m_ir;
-   ValueMap m_temp;
-};
-
-class ShaderFromNir {
-public:
-   ShaderFromNir();
-   ~ShaderFromNir();
-
-   unsigned ninputs() const;
-
-   bool lower(const nir_shader *shader, r600_pipe_shader *sh,
-              r600_pipe_shader_selector *sel, r600_shader_key &key,
-              r600_shader *gs_shader, enum amd_gfx_level gfx_level);
-
-   bool process_declaration();
-
-   pipe_shader_type processor_type() const;
-
-   bool emit_instruction(nir_instr *instr);
-
-   const std::vector<InstructionBlock> &shader_ir() const;
-
-   Shader shader() const;
-private:
-
-   bool process_block();
-   bool process_cf_node(nir_cf_node *node);
-   bool process_if(nir_if *node);
-   bool process_loop(nir_loop *node);
-   bool process_block(nir_block *node);
-
-   std::unique_ptr<ShaderFromNirProcessor> impl;
-   const nir_shader *sh;
-
-   enum amd_gfx_level gfx_level;
-   int m_current_if_id;
-   int m_current_loop_id;
-   std::stack<int> m_if_stack;
-   int scratch_size;
-};
+bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh);
+bool r600_lower_64bit_to_vec2(nir_shader *sh);
+bool r600_split_64bit_alu_and_phi(nir_shader *sh);

 class AssemblyFromShader {
 public:
   virtual ~AssemblyFromShader();
-   bool lower(const std::vector<InstructionBlock> &ir);
+   bool lower(const Shader& s);
 private:
-   virtual bool do_lower(const std::vector<InstructionBlock>& ir)  = 0 ;
+   virtual bool do_lower(const Shader& s)  = 0 ;
 };

 }
--- a/src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp
@ -32,7 +32,8 @@


 static nir_ssa_def *
-r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, void *_options)
+r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr,
+                                    UNUSED void *_options)
 {
   b->cursor = nir_before_instr(instr);
   auto ir = nir_instr_as_intrinsic(instr);
@ -143,7 +144,8 @@ r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, void *_opt
 }

 static bool
-r600_legalize_image_load_store_filter(const nir_instr *instr, const void *_options)
+r600_legalize_image_load_store_filter(const nir_instr *instr,
+                                      UNUSED const void *_options)
 {
   if (instr->type != nir_instr_type_intrinsic)
      return false;
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
@ -108,6 +108,193 @@ private:
 };


+class LowerLoad64Uniform : public NirLowerInstruction {
+   bool filter(const nir_instr *instr) const override;
+   nir_ssa_def *lower(nir_instr *instr) override;
+};
+
+bool LowerLoad64Uniform::filter(const nir_instr *instr) const
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   auto intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_load_uniform &&
+       intr->intrinsic != nir_intrinsic_load_ubo &&
+       intr->intrinsic != nir_intrinsic_load_ubo_vec4)
+      return false;
+
+   return nir_dest_bit_size(intr->dest) == 64;
+}
+
+
+nir_ssa_def *LowerLoad64Uniform::lower(nir_instr *instr)
+{
+   auto intr = nir_instr_as_intrinsic(instr);
+   int old_components = nir_dest_num_components(intr->dest);
+   assert(old_components <= 2);
+   assert(intr->dest.is_ssa);
+   intr->dest.ssa.num_components *= 2;
+   intr->dest.ssa.bit_size = 32;
+   intr->num_components *= 2;
+
+   if (intr->intrinsic ==nir_intrinsic_load_ubo ||
+       intr->intrinsic ==nir_intrinsic_load_ubo_vec4)
+      nir_intrinsic_set_component(intr, 2 * nir_intrinsic_component(intr));
+
+   nir_ssa_def *result_vec[2] = {nullptr, nullptr};
+
+   for (int i = 0; i < old_components; ++i) {
+      result_vec[i] = nir_pack_64_2x32_split(b,
+                                             nir_channel(b, &intr->dest.ssa, 2 * i),
+                                             nir_channel(b, &intr->dest.ssa, 2 * i + 1));
+   }
+   if (old_components == 1)
+      return result_vec[0];
+
+   return nir_vec2(b, result_vec[0], result_vec[1]);
+}
+
+bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh)
+{
+   return LowerLoad64Uniform().run(sh);
+}
+
+class LowerSplit64op : public NirLowerInstruction {
+   bool filter(const nir_instr *instr) const override {
+      switch (instr->type) {
+      case nir_instr_type_alu: {
+         auto alu = nir_instr_as_alu(instr);
+         switch (alu->op) {
+         case nir_op_bcsel:
+            return nir_dest_bit_size(alu->dest.dest) == 64;
+         case nir_op_f2b1:
+         case nir_op_f2i32:
+         case nir_op_f2u32:
+         case nir_op_f2i64:
+         case nir_op_f2u64:
+         case nir_op_u2f64:
+         case nir_op_i2f64:
+            return nir_src_bit_size(alu->src[0].src) == 64;
+         default:
+            return false;
+         }
+      }
+      case nir_instr_type_phi: {
+         auto phi = nir_instr_as_phi(instr);
+         return nir_dest_num_components(phi->dest) == 64;
+      }
+      default:
+         return false;
+      }
+   }
+
+   nir_ssa_def *lower(nir_instr *instr) override {
+
+      switch (instr->type) {
+      case nir_instr_type_alu: {
+         auto alu = nir_instr_as_alu(instr);
+         switch (alu->op) {
+
+         case nir_op_bcsel: {
+            auto lo = nir_bcsel(b, nir_ssa_for_src(b, alu->src[0].src, 1),
+                  nir_unpack_64_2x32_split_x(b, nir_ssa_for_alu_src(b, alu, 1)),
+                  nir_unpack_64_2x32_split_x(b, nir_ssa_for_alu_src(b, alu, 2)));
+            auto hi = nir_bcsel(b, nir_ssa_for_src(b, alu->src[0].src, 1),
+                  nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 1)),
+                  nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 2)));
+            return nir_pack_64_2x32_split(b, lo, hi);
+         }
+         case nir_op_f2b1: {
+            auto mask = nir_component_mask(nir_dest_num_components(alu->dest.dest));
+            return nir_fneu(b, nir_channels(b, nir_ssa_for_alu_src(b, alu, 0), mask),
+                  nir_imm_zero(b, nir_dest_num_components(alu->dest.dest), 64));
+         }
+         case nir_op_f2i32: {
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
+            auto abs_src = nir_fabs(b, src);
+            auto value = nir_f2u32(b, abs_src);
+            return nir_bcsel(b, gt0, value, nir_ineg(b, value));
+         }
+         case nir_op_f2u32: {
+            /* fp32 doesn't hold suffient bits to represent the full range of
+             * u32, therefore we have to split the values, and because f2f32
+             * rounds, we have to remove the fractional part in the hi bits
+             * For values > UINT_MAX the result is undefined */
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
+            auto highval = nir_fmul_imm(b, src, 1.0/65536.0);
+            auto fract = nir_ffract(b, highval);
+            auto high = nir_f2u32(b, nir_f2f32(b, nir_fsub(b, highval, fract)));
+            auto lowval = nir_fmul_imm(b, fract, 65536.0);
+            auto low = nir_f2u32(b, nir_f2f32(b, lowval));
+            return nir_bcsel(b, gt0, nir_ior(b, nir_ishl_imm(b, high, 16), low),
+                             nir_imm_int(b, 0));
+         }
+         case nir_op_f2i64: {
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
+            auto abs_src = nir_fabs(b, src);
+            auto value = nir_f2u64(b, abs_src);
+            return nir_bcsel(b, gt0, value, nir_isub(b, nir_imm_zero(b, 1, 64), value));
+         }
+         case nir_op_f2u64: {
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
+            auto highval = nir_fmul_imm(b, src, 1.0/(65536.0 * 65536.0));
+            auto fract = nir_ffract(b, highval);
+            auto high = nir_f2u32(b, nir_fsub(b, highval, fract));
+            auto low = nir_f2u32(b, nir_fmul_imm(b, fract, 65536.0 * 65536.0));
+            return nir_bcsel(b, gt0, nir_pack_64_2x32_split(b, low, high),
+                             nir_imm_zero(b, 1, 64));
+         }
+         case nir_op_u2f64: {
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto low = nir_unpack_64_2x32_split_x(b, src);
+            auto high = nir_unpack_64_2x32_split_y(b, src);
+            auto flow = nir_u2f64(b, low);
+            auto fhigh = nir_u2f64(b, high);
+            return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
+         }
+         case nir_op_i2f64: {
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto low = nir_unpack_64_2x32_split_x(b, src);
+            auto high = nir_unpack_64_2x32_split_y(b, src);
+            auto flow = nir_u2f64(b, low);
+            auto fhigh = nir_i2f64(b, high);
+            return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
+         }
+         default:
+            unreachable("trying to lower instruction that was not in filter");
+         }
+      }
+      case nir_instr_type_phi: {
+         auto phi = nir_instr_as_phi(instr);
+         auto phi_lo = nir_phi_instr_create(b->shader);
+         auto phi_hi = nir_phi_instr_create(b->shader);
+         nir_ssa_dest_init(&phi_lo->instr, &phi_lo->dest, phi->dest.ssa.num_components * 2, 32, "");
+         nir_ssa_dest_init(&phi_hi->instr, &phi_hi->dest, phi->dest.ssa.num_components * 2, 32, "");
+         nir_foreach_phi_src(s, phi) {
+            auto lo = nir_unpack_32_2x16_split_x(b, nir_ssa_for_src(b, s->src, 1));
+            auto hi = nir_unpack_32_2x16_split_x(b, nir_ssa_for_src(b, s->src, 1));
+            nir_phi_instr_add_src(phi_lo, s->pred, nir_src_for_ssa(lo));
+            nir_phi_instr_add_src(phi_hi, s->pred, nir_src_for_ssa(hi));
+         }
+         return nir_pack_64_2x32_split(b, &phi_lo->dest.ssa, &phi_hi->dest.ssa);
+      }
+      default:
+         unreachable("Trying to lower instruction that was not in filter");
+      }
+   }
+};
+
+bool r600_split_64bit_alu_and_phi(nir_shader *sh)
+{
+   return LowerSplit64op().run(sh);
+}
+
+
 bool
 LowerSplit64BitVar::filter(const nir_instr *instr) const
 {
@ -271,7 +458,7 @@ LowerSplit64BitVar::split_store_deref_array(nir_intrinsic_instr *intr, nir_deref
 }

 nir_ssa_def *
-LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, nir_deref_instr *deref)
+LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, UNUSED nir_deref_instr *deref)
 {
   auto old_var = nir_intrinsic_get_var(intr, 0);
   unsigned old_components = old_var->type->without_array()->components();
@ -556,8 +743,6 @@ LowerSplit64BitVar::lower(nir_instr *instr)
   }
   case  nir_instr_type_alu: {
      auto alu = nir_instr_as_alu(instr);
-      nir_print_instr(instr, stderr);
-      fprintf(stderr, "\n");
      switch (alu->op) {
      case nir_op_bany_fnequal3:
         return split_reduction3(alu, nir_op_bany_fnequal2, nir_op_fneu, nir_op_ior);
@ -845,7 +1030,7 @@ static bool store_64bit_intr(nir_src *src, void *state)
   return !*s;
 }

-static bool double2vec2(nir_src *src, void *state)
+static bool double2vec2(nir_src *src, UNUSED void *state)
 {
   if (nir_src_bit_size(*src) != 64)
      return true;
@ -1058,6 +1243,206 @@ bool r600_merge_vec2_stores(nir_shader *shader)
   return merger.combine();
 }

+static bool
+r600_lower_64bit_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
+{
+   b->cursor = nir_after_instr(&instr->instr);
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_ubo:
+   case nir_intrinsic_load_ubo_vec4:
+   case nir_intrinsic_load_uniform:
+   case nir_intrinsic_load_ssbo:
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+   case nir_intrinsic_load_per_vertex_input:
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_ssbo:
+      break;
+   default:
+      return false;
+   }
+
+   if (instr->num_components <= 2)
+      return false;
+
+   bool has_dest = nir_intrinsic_infos[instr->intrinsic].has_dest;
+   if (has_dest) {
+      if (nir_dest_bit_size(instr->dest) != 64)
+         return false;
+   } else  {
+      if (nir_src_bit_size(instr->src[0]) != 64)
+          return false;
+   }
+
+   nir_intrinsic_instr *first =
+      nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
+   nir_intrinsic_instr *second =
+      nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_ubo:
+   case nir_intrinsic_load_ubo_vec4:
+   case nir_intrinsic_load_uniform:
+   case nir_intrinsic_load_ssbo:
+   case nir_intrinsic_store_ssbo:
+      break;
+
+   default: {
+      nir_io_semantics semantics = nir_intrinsic_io_semantics(second);
+      semantics.location++;
+      semantics.num_slots--;
+      nir_intrinsic_set_io_semantics(second, semantics);
+
+      nir_intrinsic_set_base(second, nir_intrinsic_base(second) + 1);
+      break;
+   }
+   }
+
+   first->num_components = 2;
+   second->num_components -= 2;
+   if (has_dest) {
+      first->dest.ssa.num_components = 2;
+      second->dest.ssa.num_components -= 2;
+   }
+
+   nir_builder_instr_insert(b, &first->instr);
+   nir_builder_instr_insert(b, &second->instr);
+
+   if (has_dest) {
+      /* Merge the two loads' results back into a vector. */
+      nir_ssa_scalar channels[4] = {
+         nir_get_ssa_scalar(&first->dest.ssa, 0),
+         nir_get_ssa_scalar(&first->dest.ssa, 1),
+         nir_get_ssa_scalar(&second->dest.ssa, 0),
+         nir_get_ssa_scalar(&second->dest.ssa, second->num_components > 1 ? 1 : 0),
+      };
+      nir_ssa_def *new_ir = nir_vec_scalars(b, channels, instr->num_components);
+      nir_ssa_def_rewrite_uses(&instr->dest.ssa, new_ir);
+   } else {
+      /* Split the src value across the two stores. */
+      b->cursor = nir_before_instr(&instr->instr);
+
+      nir_ssa_def *src0 = instr->src[0].ssa;
+      nir_ssa_scalar channels[4] = { 0 };
+      for (int i = 0; i < instr->num_components; i++)
+         channels[i] = nir_get_ssa_scalar(src0, i);
+
+      nir_intrinsic_set_write_mask(first, nir_intrinsic_write_mask(instr) & 3);
+      nir_intrinsic_set_write_mask(second, nir_intrinsic_write_mask(instr) >> 2);
+
+      nir_instr_rewrite_src(&first->instr, &first->src[0],
+                            nir_src_for_ssa(nir_vec_scalars(b, channels, 2)));
+      nir_instr_rewrite_src(&second->instr, &second->src[0],
+                            nir_src_for_ssa(nir_vec_scalars(b, &channels[2],
+                                                           second->num_components)));
+   }
+
+   int offset_src = -1;
+   uint32_t offset_amount = 16;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_ssbo:
+   case nir_intrinsic_load_ubo:
+      offset_src = 1;
+      break;
+   case nir_intrinsic_load_ubo_vec4:
+   case nir_intrinsic_load_uniform:
+      offset_src = 0;
+      offset_amount = 1;
+      break;
+   case nir_intrinsic_store_ssbo:
+      offset_src = 2;
+      break;
+   default:
+      break;
+   }
+   if (offset_src != -1) {
+      b->cursor = nir_before_instr(&second->instr);
+      nir_ssa_def *second_offset =
+         nir_iadd_imm(b, second->src[offset_src].ssa, offset_amount);
+      nir_instr_rewrite_src(&second->instr, &second->src[offset_src],
+                            nir_src_for_ssa(second_offset));
+   }
+
+   /* DCE stores we generated with no writemask (nothing else does this
+    * currently).
+    */
+   if (!has_dest) {
+      if (nir_intrinsic_write_mask(first) == 0)
+         nir_instr_remove(&first->instr);
+      if (nir_intrinsic_write_mask(second) == 0)
+         nir_instr_remove(&second->instr);
+   }
+
+   nir_instr_remove(&instr->instr);
+
+   return true;
+}
+
+static bool
+r600_lower_64bit_load_const(nir_builder *b, nir_load_const_instr *instr)
+{
+   int num_components = instr->def.num_components;
+
+   if (instr->def.bit_size != 64 || num_components <= 2)
+      return false;
+
+   b->cursor = nir_before_instr(&instr->instr);
+
+   nir_load_const_instr *first =
+      nir_load_const_instr_create(b->shader, 2, 64);
+   nir_load_const_instr *second =
+      nir_load_const_instr_create(b->shader, num_components - 2, 64);
+
+   first->value[0] = instr->value[0];
+   first->value[1] = instr->value[1];
+   second->value[0] = instr->value[2];
+   if (num_components == 4)
+      second->value[1] = instr->value[3];
+
+   nir_builder_instr_insert(b, &first->instr);
+   nir_builder_instr_insert(b, &second->instr);
+
+   nir_ssa_def *channels[4] = {
+      nir_channel(b, &first->def, 0),
+      nir_channel(b, &first->def, 1),
+      nir_channel(b, &second->def, 0),
+      num_components == 4 ? nir_channel(b, &second->def, 1) : NULL,
+   };
+   nir_ssa_def *new_ir = nir_vec(b, channels, num_components);
+   nir_ssa_def_rewrite_uses(&instr->def, new_ir);
+   nir_instr_remove(&instr->instr);
+
+   return true;
+}
+
+static bool
+r600_lower_64bit_to_vec2_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+   switch (instr->type) {
+   case nir_instr_type_load_const:
+      return r600_lower_64bit_load_const(b, nir_instr_as_load_const(instr));
+
+   case nir_instr_type_intrinsic:
+      return r600_lower_64bit_intrinsic(b, nir_instr_as_intrinsic(instr));
+   default:
+      return false;
+   }
+}
+
+bool
+r600_lower_64bit_to_vec2(nir_shader *s)
+{
+   return nir_shader_instructions_pass(s,
+                                       r600_lower_64bit_to_vec2_instr,
+                                       nir_metadata_block_index |
+                                       nir_metadata_dominance,
+                                       NULL);
+}
+
+
 } // end namespace r600


--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
@ -95,13 +95,14 @@ emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_i

   auto idx2 = nir_src_as_const_value(op->src[1]);
   if (!idx2 || idx2->u32 != 0)
-      offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
+      offset = nir_iadd(b, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)), offset);

   return nir_iadd(b, addr, offset);
 }

 static nir_ssa_def *
-emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset)
+emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op,
+                  UNUSED nir_variable_mode mode, int src_offset)
 {

   nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
@ -552,7 +553,7 @@ r600_lower_tess_coord_filter(const nir_instr *instr, UNUSED const void *_options
 }

 static nir_ssa_def *
-r600_lower_tess_coord_impl(nir_builder *b, nir_instr *instr, void *_options)
+r600_lower_tess_coord_impl(nir_builder *b, UNUSED nir_instr *instr, void *_options)
 {
   pipe_prim_type prim_type = *(pipe_prim_type *)_options;

--- a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp
@ -1,142 +1,9 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
+#include "sfn_nir_lower_tex.h"

-#include "sfn_instruction_tex.h"
+#include "nir.h"
 #include "nir_builder.h"
 #include "nir_builtin_builder.h"

-namespace r600 {
-
-TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src,
-                               unsigned sid, unsigned rid, PValue sampler_offset):
-   Instruction(tex),
-   m_opcode(op),
-   m_dst(dest),
-   m_src(src),
-   m_sampler_id(sid),
-   m_resource_id(rid),
-   m_flags(0),
-   m_inst_mode(0),
-   m_dest_swizzle{0,1,2,3},
-   m_sampler_offset(sampler_offset)
-
-{
-   memset(m_offset, 0, sizeof (m_offset));
-
-   add_remappable_src_value(&m_src);
-   add_remappable_src_value(&m_sampler_offset);
-   add_remappable_dst_value(&m_dst);
-}
-
-void TexInstruction::set_gather_comp(int cmp)
-{
-   m_inst_mode = cmp;
-}
-
-void TexInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   // I wonder whether we can actually end up here ...
-   for (auto c: candidates) {
-      if (*c == *m_src.reg_i(c->chan()))
-         m_src.set_reg_i(c->chan(), new_value);
-      if (*c == *m_dst.reg_i(c->chan()))
-         m_dst.set_reg_i(c->chan(), new_value);
-   }
-}
-
-void TexInstruction::set_offset(unsigned index, int32_t val)
-{
-   assert(index < 3);
-   m_offset[index] = val;
-}
-
-int TexInstruction::get_offset(unsigned index) const
-{
-   assert(index < 3);
-   return (m_offset[index] << 1 & 0x1f);
-}
-
-bool TexInstruction::is_equal_to(const Instruction& rhs) const
-{
-   assert(rhs.type() == tex);
-   const auto& r = static_cast<const TexInstruction&>(rhs);
-   return (m_opcode == r.m_opcode &&
-           m_dst == r.m_dst &&
-           m_src == r.m_src &&
-           m_sampler_id == r.m_sampler_id &&
-           m_resource_id == r.m_resource_id);
-}
-
-void TexInstruction::do_print(std::ostream& os) const
-{
-   const char *map_swz = "xyzw01?_";
-   os << opname(m_opcode) << " R" << m_dst.sel() << ".";
-   for (int i = 0; i < 4; ++i)
-      os << map_swz[m_dest_swizzle[i]];
-
-   os << " " << m_src
-      << " RESID:"  << m_resource_id << " SAMPLER:"
-      << m_sampler_id;
-}
-
-const char *TexInstruction::opname(Opcode op)
-{
-   switch (op) {
-   case ld: return "LD";
-   case get_resinfo: return "GET_TEXTURE_RESINFO";
-   case get_nsampled: return "GET_NUMBER_OF_SAMPLES";
-   case get_tex_lod: return "GET_LOD";
-   case get_gradient_h: return "GET_GRADIENTS_H";
-   case get_gradient_v: return "GET_GRADIENTS_V";
-   case set_offsets: return "SET_TEXTURE_OFFSETS";
-   case keep_gradients: return "KEEP_GRADIENTS";
-   case set_gradient_h: return "SET_GRADIENTS_H";
-   case set_gradient_v: return "SET_GRADIENTS_V";
-   case sample: return "SAMPLE";
-   case sample_l: return "SAMPLE_L";
-   case sample_lb: return "SAMPLE_LB";
-   case sample_lz: return "SAMPLE_LZ";
-   case sample_g: return "SAMPLE_G";
-   case sample_g_lb: return "SAMPLE_G_L";
-   case gather4: return "GATHER4";
-   case gather4_o: return "GATHER4_O";
-   case sample_c: return "SAMPLE_C";
-   case sample_c_l: return "SAMPLE_C_L";
-   case sample_c_lb: return "SAMPLE_C_LB";
-   case sample_c_lz: return "SAMPLE_C_LZ";
-   case sample_c_g: return "SAMPLE_C_G";
-   case sample_c_g_lb: return "SAMPLE_C_G_L";
-   case gather4_c: return "GATHER4_C";
-   case gather4_c_o: return "OP_GATHER4_C_O";
-   }
-   return "ERROR";
-}
-
-
-
 static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
 {
   b->cursor = nir_before_instr(&tex->instr);
@ -273,14 +140,14 @@ bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
   nir_ssa_def *lambda_exp =  nir_fexp2(b, lod);
   nir_ssa_def *scale = NULL;

-   if  (tex->is_array) {
+   if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+         unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
+         scale = nir_frcp(b, nir_channels(b, size, 1));
+         scale = nir_swizzle(b, scale, swizzle, 3);
+   } else if  (tex->is_array) {
      int cmp_mask = (1 << (size->num_components - 1)) - 1;
      scale = nir_frcp(b, nir_channels(b, size,
                                       (nir_component_mask_t)cmp_mask));
-   } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-      unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
-      scale = nir_frcp(b, nir_channels(b, size, 1));
-      scale = nir_swizzle(b, scale, swizzle, 3);
   }

   nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
@ -408,7 +275,3 @@ r600_nir_lower_cube_to_2darray(nir_shader *shader)
                                        r600_nir_lower_cube_to_2darray_filer,
                                        r600_nir_lower_cube_to_2darray_impl, nullptr);
 }
-
-
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h
@ -0,0 +1,10 @@
+#ifndef SFN_NIR_LOWER_TEX_H
+#define SFN_NIR_LOWER_TEX_H
+
+struct nir_shader;
+
+bool r600_nir_lower_int_tg4(nir_shader *nir);
+bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
+bool r600_nir_lower_cube_to_2darray(nir_shader *shader);
+
+#endif // LALA_H
--- a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp
@ -0,0 +1,627 @@
+#include "sfn_optimizer.h"
+
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_controlflow.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_tex.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_lds.h"
+#include "sfn_peephole.h"
+#include "sfn_debug.h"
+
+#include <sstream>
+
+namespace r600 {
+
+bool optimize(Shader& shader)
+{
+   bool progress;
+
+   sfn_log << SfnLog::opt  << "Shader before optimization\n";
+   if (sfn_log.has_debug_flag(SfnLog::opt)) {
+      std::stringstream ss;
+      shader.print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+   do {
+      progress = false;
+      progress |= copy_propagation_fwd(shader);
+      progress |= dead_code_elimination(shader);
+      progress |= copy_propagation_backward(shader);
+      progress |= dead_code_elimination(shader);
+      progress |= simplify_source_vectors(shader);
+      progress |= peephole(shader);
+      progress |= dead_code_elimination(shader);
+   } while (progress);
+
+   return progress;
+}
+
+class DCEVisitor : public InstrVisitor {
+public:
+   DCEVisitor();
+
+   void visit(AluInstr *instr) override;
+   void visit(AluGroup *instr) override;
+   void visit(TexInstr  *instr) override;
+   void visit(ExportInstr *instr) override {(void)instr;};
+   void visit(FetchInstr *instr) override;
+   void visit(Block *instr) override;
+
+   void visit(ControlFlowInstr *instr) override {(void)instr;};
+   void visit(IfInstr *instr) override {(void)instr;};
+   void visit(WriteScratchInstr *instr) override {(void)instr;};
+   void visit(StreamOutInstr *instr) override {(void)instr;};
+   void visit(MemRingOutInstr *instr) override {(void)instr;};
+   void visit(EmitVertexInstr *instr) override {(void)instr;};
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override;
+   void visit(RatInstr *instr) override {(void)instr;};
+
+
+   bool progress;
+};
+
+bool dead_code_elimination(Shader& shader)
+{
+   DCEVisitor dce;
+
+   do {
+
+      sfn_log << SfnLog::opt << "start dce run\n";
+
+      dce.progress = false;
+      for (auto& b : shader.func())
+         b->accept(dce);
+
+      sfn_log << SfnLog::opt << "finished dce run\n\n";
+
+   }  while (dce.progress);
+
+   sfn_log << SfnLog::opt  << "Shader after DCE\n";
+   if (sfn_log.has_debug_flag(SfnLog::opt)) {
+      std::stringstream ss;
+      shader.print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+   return dce.progress;
+}
+
+DCEVisitor::DCEVisitor():progress(false)
+{
+}
+
+void DCEVisitor::visit(AluInstr *instr)
+{
+   sfn_log << SfnLog::opt << "DCE: visit '" << *instr;
+
+   if (instr->has_instr_flag(Instr::dead))
+      return;
+
+   if (instr->dest() && instr->dest()->has_uses()) {
+      sfn_log << SfnLog::opt << " dest used\n";
+      return;
+   }
+
+   switch (instr->opcode()) {
+   case op2_kille:
+   case op2_killne:
+   case op2_kille_int:
+   case op2_killne_int:
+   case op2_killge:
+   case op2_killge_int:
+   case op2_killge_uint:
+   case op2_killgt:
+   case op2_killgt_int:
+   case op2_killgt_uint:
+   case op0_group_barrier:
+      sfn_log << SfnLog::opt << " never kill\n";
+      return;
+   default:
+      ;
+   }
+
+   bool dead = instr->set_dead();
+   sfn_log << SfnLog::opt << (dead ? "dead" : "alive") << "\n";
+   progress |= dead;
+}
+
+void DCEVisitor::visit(LDSReadInstr *instr)
+{
+   sfn_log << SfnLog::opt << "visit " << *instr << "\n";
+   progress |= instr->remove_unused_components();
+}
+
+void DCEVisitor::visit(AluGroup *instr)
+{
+   /* Groups are created because the instructions are used together
+    * so don't try to eliminate code there */
+   (void)instr;
+}
+
+void DCEVisitor::visit(TexInstr *instr)
+{
+   auto& dest = instr->dst();
+
+   bool has_uses = false;
+   RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
+   for (int i = 0; i < 4; ++i) {
+      if (!dest[i]->has_uses())
+         swz[i] = 7;
+      else
+         has_uses |= true;
+   }
+   instr->set_dest_swizzle(swz);
+
+   if (has_uses)
+      return;
+
+   progress |= instr->set_dead();
+}
+
+void DCEVisitor::visit(FetchInstr *instr)
+{
+   auto& dest = instr->dst();
+
+   bool has_uses = false;
+   RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
+   for (int i = 0; i < 4; ++i) {
+      if (!dest[i]->has_uses())
+         swz[i] = 7;
+      else
+         has_uses |= true;
+   }
+   instr->set_dest_swizzle(swz);
+
+   if (has_uses)
+      return;
+
+   sfn_log << SfnLog::opt << "set dead: " << *instr << "\n";
+
+   progress |= instr->set_dead();
+}
+
+void DCEVisitor::visit(Block *block)
+{
+   auto i = block->begin();
+   auto e = block->end();
+   while (i != e) {
+      auto n = i++;
+      if (!(*n)->keep()) {
+         (*n)->accept(*this);
+         if ((*n)->is_dead()) {
+            block->erase(n);
+         }
+      }
+   }
+}
+
+void visit(ControlFlowInstr *instr)
+{
+   (void)instr;
+}
+
+void visit(IfInstr *instr)
+{
+   (void)instr;
+}
+
+class CopyPropFwdVisitor : public InstrVisitor {
+public:
+   CopyPropFwdVisitor();
+
+   void visit(AluInstr *instr) override;
+   void visit(AluGroup *instr) override;
+   void visit(TexInstr *instr) override;
+   void visit(ExportInstr *instr) override {(void)instr;}
+   void visit(FetchInstr *instr) override;
+   void visit(Block *instr) override;
+   void visit(ControlFlowInstr *instr) override {(void)instr;}
+   void visit(IfInstr *instr) override {(void)instr;}
+   void visit(WriteScratchInstr *instr) override {(void)instr;}
+   void visit(StreamOutInstr *instr) override {(void)instr;}
+   void visit(MemRingOutInstr *instr) override {(void)instr;}
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(RatInstr *instr) override {(void)instr;};
+
+   // TODO: these two should use copy propagation
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override {(void)instr;};
+
+   bool progress;
+};
+
+
+class CopyPropBackVisitor : public InstrVisitor {
+public:
+   CopyPropBackVisitor();
+
+   void visit(AluInstr *instr) override;
+   void visit(AluGroup *instr) override;
+   void visit(TexInstr *instr) override;
+   void visit(ExportInstr *instr) override {(void)instr;}
+   void visit(FetchInstr *instr) override;
+   void visit(Block *instr) override;
+   void visit(ControlFlowInstr *instr) override {(void)instr;}
+   void visit(IfInstr *instr) override {(void)instr;}
+   void visit(WriteScratchInstr *instr) override {(void)instr;}
+   void visit(StreamOutInstr *instr) override {(void)instr;}
+   void visit(MemRingOutInstr *instr) override {(void)instr;}
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override {(void)instr;};
+   void visit(RatInstr *instr) override {(void)instr;};
+
+   bool progress;
+};
+
+bool copy_propagation_fwd(Shader& shader)
+{
+   auto& root = shader.func();
+   CopyPropFwdVisitor copy_prop;
+
+   do {
+      copy_prop.progress = false;
+      for (auto b : root)
+         b->accept(copy_prop);
+   }  while (copy_prop.progress);
+
+   sfn_log << SfnLog::opt  << "Shader after Copy Prop forward\n";
+   if (sfn_log.has_debug_flag(SfnLog::opt)) {
+      std::stringstream ss;
+      shader.print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+
+   return copy_prop.progress;
+}
+
+bool copy_propagation_backward(Shader& shader)
+{
+   CopyPropBackVisitor copy_prop;
+
+   do {
+      copy_prop.progress = false;
+      for (auto b: shader.func())
+         b->accept(copy_prop);
+   }  while (copy_prop.progress);
+
+   sfn_log << SfnLog::opt  << "Shader after Copy Prop backwards\n";
+   if (sfn_log.has_debug_flag(SfnLog::opt)) {
+      std::stringstream ss;
+      shader.print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+   return copy_prop.progress;
+}
+
+CopyPropFwdVisitor::CopyPropFwdVisitor():
+   progress(false)
+{}
+
+void CopyPropFwdVisitor::visit(AluInstr *instr)
+{
+   sfn_log << SfnLog::opt << "CopyPropFwdVisitor:["
+           << instr->block_id() << ":" << instr->index() << "] " << *instr
+           << " dset=" << instr->dest() << " ";
+
+
+
+   if (instr->dest()) {
+      sfn_log << SfnLog::opt << "has uses; "
+              << instr->dest()->uses().size();
+   }
+
+   sfn_log << SfnLog::opt << "\n";
+
+   if (!instr->can_propagate_src()) {
+      return;
+   }
+
+   auto src = instr->psrc(0);
+   auto dest = instr->dest();
+
+   for (auto& i : instr->dest()->uses()) {
+      /* SSA can always be propagated, registers only in the same block
+       * and only if they are not assigned to more than once */
+      if (dest->is_ssa() ||
+          (instr->block_id() == i->block_id() &&
+           instr->index() < i->index() &&
+           dest->uses().size() == 1)) {
+         sfn_log << SfnLog::opt << "   Try replace in "
+                 << i->block_id() << ":" << i->index()
+                 << *i<< "\n";
+         progress |= i->replace_source(dest, src);
+      }
+   }
+   if (instr->dest()) {
+      sfn_log << SfnLog::opt << "has uses; "
+              << instr->dest()->uses().size();
+   }
+   sfn_log << SfnLog::opt << "  done\n";
+}
+
+
+void CopyPropFwdVisitor::visit(AluGroup *instr)
+{
+   (void)instr;
+}
+
+void CopyPropFwdVisitor::visit(TexInstr *instr)
+{
+   (void)instr;
+}
+
+void CopyPropFwdVisitor::visit(FetchInstr *instr)
+{
+   (void)instr;
+}
+
+void CopyPropFwdVisitor::visit(Block *instr)
+{
+   for (auto& i: *instr)
+      i->accept(*this);
+}
+
+CopyPropBackVisitor::CopyPropBackVisitor():
+   progress(false)
+{
+
+}
+
+void CopyPropBackVisitor::visit(AluInstr *instr)
+{
+   bool local_progress = false;
+
+   sfn_log << SfnLog::opt << "CopyPropBackVisitor:["
+           << instr->block_id() << ":" << instr->index() << "] " << *instr << "\n";
+
+
+   if (!instr->can_propagate_dest()) {
+      return;
+   }
+
+   auto src_reg = instr->psrc(0)->as_register();
+   if (!src_reg) {
+      return;
+   }
+
+   if (src_reg->uses().size() > 1)
+      return;
+
+   auto dest = instr->dest();
+   if (!dest ||
+       !instr->has_alu_flag(alu_write)) {
+      return;
+   }
+
+   if (!dest->is_ssa() && dest->parents().size() > 1)
+      return;
+
+  for (auto& i: src_reg->parents()) {
+     sfn_log << SfnLog::opt << "Try replace dest in "
+             << i->block_id() << ":" << i->index()
+             << *i<< "\n";
+
+     if (i->replace_dest(dest, instr))  {
+        dest->del_parent(instr);
+        dest->add_parent(i);
+        for (auto d : instr->dependend_instr()) {
+           d->add_required_instr(i);
+        }
+        local_progress = true;
+     }
+  }
+
+  if (local_progress)
+     instr->set_dead();
+
+  progress |= local_progress;
+}
+
+void CopyPropBackVisitor::visit(AluGroup *instr)
+{
+   for (auto& i: *instr) {
+      if (i)
+         i->accept(*this);
+   }
+}
+
+void CopyPropBackVisitor::visit(TexInstr *instr)
+{
+   (void)instr;
+}
+
+void CopyPropBackVisitor::visit(FetchInstr *instr)
+{
+   (void)instr;
+}
+
+void CopyPropBackVisitor::visit(Block *instr)
+{
+   for (auto i = instr->rbegin(); i != instr->rend(); ++i)
+      if (!(*i)->is_dead())
+         (*i)->accept(*this);
+}
+
+class SimplifySourceVecVisitor : public InstrVisitor {
+public:
+   SimplifySourceVecVisitor():progress(false) {}
+
+   void visit(AluInstr *instr) override{(void)instr;}
+   void visit(AluGroup *instr) override{(void)instr;}
+   void visit(TexInstr *instr) override;
+   void visit(ExportInstr *instr) override;
+   void visit(FetchInstr *instr) override;
+   void visit(Block *instr) override;
+   void visit(ControlFlowInstr *instr) override;
+   void visit(IfInstr *instr) override;
+   void visit(WriteScratchInstr *instr) override;
+   void visit(StreamOutInstr *instr) override;
+   void visit(MemRingOutInstr *instr) override;
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override {(void)instr;};
+   void visit(RatInstr *instr) override {(void)instr;};
+
+   void replace_src(Instr *instr, RegisterVec4& reg4);
+
+   bool progress;
+};
+
+bool simplify_source_vectors(Shader& sh)
+{
+   SimplifySourceVecVisitor visitor;
+
+   for (auto b: sh.func())
+      b->accept(visitor);
+
+   return visitor.progress;
+}
+
+void SimplifySourceVecVisitor::visit(TexInstr *instr)
+{
+   if (instr->opcode() != TexInstr::get_resinfo) {
+      replace_src(instr, instr->src());
+   }
+}
+
+void SimplifySourceVecVisitor::visit(WriteScratchInstr *instr)
+{
+   (void) instr;
+}
+
+class ReplaceConstSource : public AluInstrVisitor {
+public:
+   ReplaceConstSource(Instr *old_use_, RegisterVec4& vreg_, int i):
+       old_use(old_use_), vreg(vreg_), index(i),success(false) {}
+
+   using AluInstrVisitor::visit;
+
+   void visit(AluInstr *alu) override;
+
+   Instr *old_use;
+   RegisterVec4& vreg;
+   int index;
+   bool success;
+};
+
+void SimplifySourceVecVisitor::visit(ExportInstr *instr)
+{
+   replace_src(instr, instr->value());
+}
+
+void SimplifySourceVecVisitor::replace_src(Instr *instr, RegisterVec4& reg4)
+{
+   for (int i = 0; i < 4; ++i) {
+      auto s = reg4[i];
+
+      if (s->chan() > 3)
+         continue;
+
+      if (!s->is_ssa())
+         continue;
+
+      /* Cayman trans ops have more then one parent for
+       * one dest */
+      if (s->parents().size() != 1)
+         continue;
+
+      auto& op = *s->parents().begin();
+
+      ReplaceConstSource visitor(instr, reg4, i);
+
+      op->accept(visitor);
+
+      progress |= visitor.success;
+   }
+}
+
+void SimplifySourceVecVisitor::visit(StreamOutInstr *instr)
+{
+   (void)instr;
+}
+
+void SimplifySourceVecVisitor::visit(MemRingOutInstr *instr)
+{
+   (void)instr;
+}
+
+void ReplaceConstSource::visit(AluInstr *alu)
+{
+   if (alu->opcode() != op1_mov)
+      return;
+
+   if (alu->has_alu_flag(alu_src0_abs) ||
+       alu->has_alu_flag(alu_src0_neg))
+      return;
+
+   auto src = alu->psrc(0);
+   assert(src);
+
+   int override_chan = -1;
+
+   auto ic = src->as_inline_const();
+   if (ic) {
+      if (ic->sel() == ALU_SRC_0)
+         override_chan = 4;
+
+      if (ic->sel() == ALU_SRC_1)
+         override_chan = 5;
+   }
+
+   auto literal = src->as_literal();
+   if (literal) {
+
+      if (literal->value() == 0)
+         override_chan = 4;
+
+      if (literal->value() == 0x3F800000)
+         override_chan = 5;
+   }
+
+   if (override_chan >= 0) {
+      vreg[index]->del_use(old_use);
+      auto reg = new Register(vreg.sel(), override_chan, vreg[index]->pin());
+      vreg.set_value(index, reg);
+      success = true;
+   }
+}
+
+void SimplifySourceVecVisitor::visit(FetchInstr *instr)
+{
+   (void) instr;
+}
+
+void SimplifySourceVecVisitor::visit(Block *instr)
+{
+   for (auto i = instr->rbegin(); i != instr->rend(); ++i)
+      if (!(*i)->is_dead())
+         (*i)->accept(*this);
+}
+
+void SimplifySourceVecVisitor::visit(ControlFlowInstr *instr)
+{
+   (void) instr;
+}
+
+void SimplifySourceVecVisitor::visit(IfInstr *instr)
+{
+   (void) instr;
+}
+
+
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_optimizer.h
+++ b/src/gallium/drivers/r600/sfn/sfn_optimizer.h
@ -0,0 +1,17 @@
+#ifndef OPTIMIZER_H
+#define OPTIMIZER_H
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+bool dead_code_elimination(Shader& shader);
+bool copy_propagation_fwd(Shader& shader);
+bool copy_propagation_backward(Shader& shader);
+bool simplify_source_vectors(Shader& sh);
+
+bool optimize(Shader& shader);
+
+}
+
+#endif // OPTIMIZER_H
--- a/src/gallium/drivers/r600/sfn/sfn_optimizers.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_optimizers.cpp
@ -1,12 +0,0 @@
-#include "sfn_optimizers.h"
-#include "sfn_instruction_block.h"
-
-namespace r600 {
-
-std::vector<PInstruction>
-flatten_shader(const std::vector<InstructionBlock> &ir)
-{
-
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_optimizers.h
+++ b/src/gallium/drivers/r600/sfn/sfn_optimizers.h
@ -1,14 +0,0 @@
-#ifndef SFN_OPTIMIZERS_H
-#define SFN_OPTIMIZERS_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-std::vector<PInstruction>
-flatten_alu_ops(const std::vector<InstructionBlock> &ir);
-
-
-}
-
-#endif // SFN_OPTIMIZERS_H
--- a/src/gallium/drivers/r600/sfn/sfn_peephole.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_peephole.cpp
@ -0,0 +1,212 @@
+#include "sfn_peephole.h"
+
+namespace r600 {
+
+
+class PeepholeVisitor : public InstrVisitor {
+public:
+   void visit(AluInstr *instr) override;
+   void visit(AluGroup *instr) override;
+   void visit(TexInstr *instr) override {(void)instr;};
+   void visit(ExportInstr *instr) override {(void)instr;}
+   void visit(FetchInstr *instr) override {(void)instr;}
+   void visit(Block *instr) override;
+   void visit(ControlFlowInstr *instr) override {(void)instr;}
+   void visit(IfInstr *instr) override;
+   void visit(WriteScratchInstr *instr) override {(void)instr;}
+   void visit(StreamOutInstr *instr) override {(void)instr;}
+   void visit(MemRingOutInstr *instr) override {(void)instr;}
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override {(void)instr;};
+   void visit(RatInstr *instr) override {(void)instr;};
+
+   bool src_is_zero(PVirtualValue value);
+   bool src_is_one(PVirtualValue value);
+
+   void convert_to_mov(AluInstr *alu, int src_idx);
+
+
+   bool progress{false};
+};
+
+
+bool peephole(Shader& sh)
+{
+   PeepholeVisitor peephole;
+   for(auto b : sh.func())
+      b->accept(peephole);
+   return peephole.progress;
+}
+
+void PeepholeVisitor::visit(AluInstr *instr)
+{
+   switch (instr->opcode()) {
+   case op2_add:
+   case op2_add_int:
+      if (src_is_zero(instr->psrc(0)))
+         convert_to_mov(instr, 1);
+      else if (src_is_zero(instr->psrc(1)))
+         convert_to_mov(instr, 0);
+      break;
+   case op2_mul:
+   case op2_mul_ieee:
+      if (src_is_one(instr->psrc(0)))
+         convert_to_mov(instr, 1);
+      else if (src_is_one(instr->psrc(1)))
+         convert_to_mov(instr, 0);
+      break;
+   case op3_muladd:
+   case op3_muladd_ieee:
+      if (src_is_zero(instr->psrc(0)) ||
+          src_is_zero(instr->psrc(1)))
+         convert_to_mov(instr, 2);
+      break;
+   default:
+      ;
+   }
+}
+
+bool PeepholeVisitor::src_is_zero(PVirtualValue value)
+{
+   if (value->as_inline_const() &&
+       value->as_inline_const()->sel() == ALU_SRC_0)
+      return true;
+
+   if (value->as_literal() &&
+       value->as_literal()->value() == 0)
+      return true;
+
+   return false;
+}
+
+bool PeepholeVisitor::src_is_one(PVirtualValue value)
+{
+   if (value->as_inline_const() &&
+       value->as_inline_const()->sel() == ALU_SRC_1)
+      return true;
+
+   if (value->as_literal() &&
+       value->as_literal()->value() == 0x3f800000)
+      return true;
+
+   return false;
+}
+
+void PeepholeVisitor::convert_to_mov(AluInstr *alu, int src_idx)
+{
+   AluInstr::SrcValues new_src{alu->psrc(src_idx)};
+   alu->set_sources(new_src);
+   alu->set_op(op1_mov);
+   progress = true;
+}
+
+
+void PeepholeVisitor::visit(AluGroup *instr)
+{
+
+}
+
+void PeepholeVisitor::visit(Block *instr)
+{
+   for (auto& i: *instr)
+      i->accept(*this);
+}
+
+class ReplaceIfPredicate : public AluInstrVisitor {
+public:
+   ReplaceIfPredicate(AluInstr *pred):
+      m_pred(pred) {}
+
+   using AluInstrVisitor::visit;
+
+   void visit(AluInstr *alu) override;
+
+   AluInstr *m_pred;
+   bool success{false};
+};
+
+void PeepholeVisitor::visit(IfInstr *instr)
+{
+   auto pred = instr->predicate();
+
+   auto& src1 = pred->src(1);
+   if (src1.as_inline_const() &&
+       src1.as_inline_const()->sel() == ALU_SRC_0) {
+      auto src0 = pred->src(0).as_register();
+      if (src0 && src0->is_ssa()) {
+         assert(!src0->parents().empty());
+         auto parent = *src0->parents().begin();
+
+         ReplaceIfPredicate visitor(pred);
+         parent->accept(visitor);
+         progress |= visitor.success;
+      }
+   }
+}
+
+static EAluOp pred_from_op(EAluOp pred_op, EAluOp op)
+{
+   switch (pred_op) {
+   case op2_pred_setne_int:
+      switch (op) {
+      /*
+       case op2_setge_dx10 : return op2_pred_setge_int;
+       case op2_setgt_dx10 : return op2_pred_setgt_int;
+       case op2_sete_dx10 : return op2_prede_int;
+       case op2_setne_dx10 : return op2_pred_setne_int;
+      */
+      case op2_setge_int : return op2_pred_setge_int;
+      case op2_setgt_int : return op2_pred_setgt_int;
+      case op2_setge_uint : return op2_pred_setge_uint;
+      case op2_setgt_uint : return op2_pred_setgt_uint;
+      case op2_sete_int : return op2_prede_int;
+      case op2_setne_int : return op2_pred_setne_int;
+      default:
+         return op0_nop;
+      }
+   case op2_prede_int:
+      switch (op) {
+      case op2_sete_int : return op2_pred_setne_int;
+      case op2_setne_int : return op2_prede_int;
+      default:
+         return op0_nop;
+      }
+   default:
+      return op0_nop;
+   }
+}
+
+void ReplaceIfPredicate::visit(AluInstr *alu)
+{
+   auto new_op = pred_from_op(m_pred->opcode(), alu->opcode());
+
+   if (new_op == op0_nop)
+      return;
+
+   /* Have to figure out how to pass the dependency correctly */
+   /*for (auto& s : alu->sources()) {
+      if (s->as_register() && s->as_register()->addr())
+         return;
+   }*/
+
+   m_pred->set_op(new_op);
+   m_pred->set_sources(alu->sources());
+
+   if (alu->has_alu_flag(alu_src0_abs))
+      m_pred->set_alu_flag(alu_src0_abs);
+   if (alu->has_alu_flag(alu_src1_abs))
+      m_pred->set_alu_flag(alu_src1_abs);
+
+   if (alu->has_alu_flag(alu_src0_neg))
+      m_pred->set_alu_flag(alu_src0_neg);
+
+   if (alu->has_alu_flag(alu_src1_neg))
+      m_pred->set_alu_flag(alu_src1_neg);
+
+   success = true;
+}
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_peephole.h
+++ b/src/gallium/drivers/r600/sfn/sfn_peephole.h
@ -0,0 +1,13 @@
+#ifndef PEEPHOLE_H
+#define PEEPHOLE_H
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+bool peephole(Shader& sh);
+
+}
+
+
+#endif // PEEPHOLE_H
--- a/src/gallium/drivers/r600/sfn/sfn_ra.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_ra.cpp
@ -0,0 +1,268 @@
+#include "sfn_debug.h"
+#include "sfn_ra.h"
+
+#include <cassert>
+#include <queue>
+
+namespace r600 {
+
+void ComponentInterference::prepare_row(int row)
+{
+   m_rows.resize(row + 1);
+
+}
+
+void ComponentInterference::add(size_t idx1, size_t idx2)
+{
+   assert(idx1 > idx2);
+   assert(m_rows.size() > idx1);
+   m_rows[idx1].push_back(idx2);
+   m_rows[idx2].push_back(idx1);
+}
+
+
+Interference::Interference(LiveRangeMap& map):
+   m_map(map)
+{
+   initialize();
+}
+
+void Interference::initialize()
+{
+   for(int i = 0; i < 4; ++i) {
+      initialize(m_components_maps[i], m_map.component(i));
+   }
+}
+
+void Interference::initialize(ComponentInterference& comp_interference,
+                              LiveRangeMap::ChannelLiveRange& clr)
+{
+   for (size_t row = 0; row < clr.size(); ++row) {
+      auto& row_entry = clr[row];
+      comp_interference.prepare_row(row);
+      for (size_t col = 0; col < row; ++col) {
+         auto& col_entry = clr[col];
+         if (row_entry.m_end >= col_entry.m_start &&
+             row_entry.m_start <= col_entry.m_end)
+            comp_interference.add(row, col);
+      }
+   }
+}
+
+struct Group {
+   int priority;
+   std::array<PRegister, 4> channels;
+};
+
+static inline bool operator < (const Group& lhs, const Group& rhs)
+{
+   return lhs.priority < rhs.priority;
+}
+
+using GroupRegisters = std::priority_queue<Group>;
+
+static bool
+group_allocation (LiveRangeMap& lrm, const Interference&  interference, GroupRegisters& groups)
+{
+   int color = 0;
+   // allocate grouped registers
+   while (!groups.empty()) {
+      auto group = groups.top();
+      groups.pop();
+
+      int start_comp = 0;
+      while (!group.channels[start_comp])
+         ++start_comp;
+
+      sfn_log << SfnLog::merge << "Color group with " << *group.channels[start_comp] << "\n";
+
+      // don't restart registers for exports, we may be able tp merge the
+      // export calls, is fthe registers are consecutive
+      if (group.priority > 0)
+         color = 0;
+
+      while (color < 124) {
+         /* Find the coloring for the first channel */
+         bool color_in_use = false;
+         int comp = start_comp;
+
+         auto& adjecency = interference.row(start_comp, group.channels[comp]->index());
+         auto& regs = lrm.component(comp);
+
+         sfn_log << SfnLog::merge << "Try color "<< color;
+
+         for (auto adj : adjecency) {
+            if (regs[adj].m_color == color) {
+               color_in_use = true;
+               sfn_log << SfnLog::merge << " in use\n";
+               break;
+            }
+         }
+
+         if (color_in_use) {
+            ++color;
+            continue;
+         }
+
+         /* First channel color found, check whether it can be used for all channels */
+         while (comp < 4) {
+            sfn_log << SfnLog::merge << " interference: ";
+            if (group.channels[comp]) {
+               auto& component_life_ranges = lrm.component(comp);
+               auto& adjecencies = interference.row(comp, group.channels[comp]->index());
+
+               for (auto adj_index : adjecencies) {
+                  sfn_log << SfnLog::merge << *component_life_ranges[adj_index].m_register << " ";
+                  if (component_life_ranges[adj_index].m_color == color) {
+                     color_in_use = true;
+                     sfn_log << SfnLog::merge << "used";
+                     break;
+                  }
+               }
+
+               if (color_in_use)
+                  break;
+            }
+            ++comp;
+         }
+
+         /* We couldn't allocate all channels with this color, so try next */
+         if (color_in_use) {
+            ++color;
+            sfn_log << SfnLog::merge << "\n";
+            continue;
+         }
+         sfn_log << SfnLog::merge << " success\n";
+
+         /* Coloring successful */
+         for (auto reg : group.channels) {
+            if (reg) {
+               auto& vregs = lrm.component(reg->chan());
+               auto& vreg_cmp = vregs[reg->index()];
+               assert(vreg_cmp.m_start != -1 || vreg_cmp.m_end != -1);
+               vreg_cmp.m_color = color;
+            }
+         }
+         break;
+      }
+
+      if (color == 124)
+         return false;
+   }
+
+   return true;
+}
+
+static bool
+scalar_allocation (LiveRangeMap& lrm, const Interference&  interference)
+{
+   for (int comp = 0; comp < 4; ++comp) {
+      auto& live_ranges = lrm.component(comp);
+      for (auto& r : live_ranges) {
+         if (r.m_color != -1)
+            continue;
+
+         if (r.m_start == -1 &&
+             r.m_end == -1)
+            continue;
+
+         sfn_log << SfnLog::merge << "Color " << *r.m_register << "\n";
+
+         auto& adjecency = interference.row(comp, r.m_register->index());
+
+         int color = 0;
+
+         while (color < 124) {
+            bool color_in_use = false;
+            for (auto adj : adjecency) {
+               if (live_ranges[adj].m_color == color) {
+                  color_in_use = true;
+                  break;
+               }
+            }
+
+            if (color_in_use) {
+               ++color;
+               continue;
+            }
+
+            r.m_color = color;
+            break;
+         }
+         if (color == 124)
+            return false;
+      }
+   }
+   return true;
+}
+
+bool register_allocation(LiveRangeMap& lrm)
+{
+   Interference interference(lrm);
+
+   std::map<int, Group> groups;
+
+   // setup fixed colors and group relationships
+   for (int i = 0; i < 4; ++i) {
+      auto& comp = lrm.component(i);
+      for (auto& entry : comp) {
+         sfn_log << SfnLog::merge << "Prepare RA for "
+                 << *entry.m_register
+                 << " [" << entry.m_start << ", " << entry.m_end << "]\n";
+         auto pin = entry.m_register->pin();
+         if (entry.m_start == -1 && entry.m_end == -1) {
+            if (pin == pin_group || pin == pin_chgr)
+               entry.m_register->set_chan(7);
+            continue;
+         }
+
+         auto sel = entry.m_register->sel();
+         /* fully pinned registers contain system values with the
+          * definite register index, and array values are allocated
+          * right after the system registers, so just reuse the IDs (for now)  */
+         if (pin == pin_fully || pin == pin_array) {
+            /* Must set all array element entries */
+            sfn_log << SfnLog::merge << "Pin color " << sel << " to " << *entry.m_register << "\n";
+            entry.m_color = sel;
+         } else if (pin == pin_group || pin == pin_chgr) {
+            /* Groups must all have the same sel() value, because they are used
+             * as vec4 registers */
+            auto igroup = groups.find(sel);
+            if (igroup != groups.end()) {
+               igroup->second.channels[i] = entry.m_register;
+               assert(comp[entry.m_register->index()].m_register->index() == entry.m_register->index());
+            } else {
+               int priority = entry.m_use.test(LiveRangeEntry::use_export) ? - entry.m_end : entry.m_start;
+               Group group{priority, {nullptr, nullptr, nullptr, nullptr}};
+               group.channels[i] = entry.m_register;
+               assert(comp[group.channels[i]->index()].m_register->index() == entry.m_register->index());
+               groups[sel] = group;
+            }
+         }
+      }
+   }
+
+   GroupRegisters groups_sorted;
+   for (auto& [sel, group] : groups)
+      groups_sorted.push(group);
+
+   if (!group_allocation (lrm, interference, groups_sorted))
+      return false;
+
+   if (!scalar_allocation(lrm, interference))
+      return false;
+
+   for (int i = 0; i < 4; ++i) {
+      auto& comp = lrm.component(i);
+      for (auto& entry : comp) {
+         sfn_log << SfnLog::merge << "Set " << *entry.m_register << " to ";
+         entry.m_register->set_sel(entry.m_color);
+         entry.m_register->set_pin(pin_none);
+         sfn_log << SfnLog::merge << *entry.m_register << "\n";
+      }
+   }
+
+   return true;
+}
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_ra.h
+++ b/src/gallium/drivers/r600/sfn/sfn_ra.h
@ -0,0 +1,51 @@
+#ifndef INTERFERENCE_H
+#define INTERFERENCE_H
+
+#include "sfn_valuefactory.h"
+
+#include <vector>
+
+namespace r600 {
+
+class ComponentInterference
+{
+public:
+
+   using Row = std::vector<int>;
+
+   void prepare_row(int row);
+
+   void add(size_t idx1, size_t idx2);
+
+   auto row(int idx) const -> const Row& {
+      assert((size_t)idx < m_rows.size()); return m_rows[idx];}
+
+private:
+
+   std::vector<Row> m_rows;
+};
+
+class Interference {
+public:
+   Interference(LiveRangeMap& map);
+
+   const auto& row(int comp, int index) const {
+      assert(comp < 4);
+      return m_components_maps[comp].row(index);
+   }
+
+private:
+   void initialize();
+   void initialize(ComponentInterference& comp, LiveRangeMap::ChannelLiveRange& clr);
+
+   LiveRangeMap& m_map;
+   std::array<ComponentInterference, 4> m_components_maps;
+
+
+};
+
+bool register_allocation(LiveRangeMap& lrm);
+
+}
+
+#endif // INTERFERENCE_H
--- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
@ -0,0 +1,890 @@
+#include "sfn_scheduler.h"
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_controlflow.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_mem.h"
+#include "sfn_instr_lds.h"
+#include "sfn_instr_tex.h"
+#include "sfn_debug.h"
+
+#include <algorithm>
+#include <sstream>
+
+namespace r600 {
+
+class CollectInstructions : public InstrVisitor {
+
+public:
+   CollectInstructions(ValueFactory& vf):
+      m_value_factory(vf)  {}
+
+   void visit(AluInstr *instr) override {
+      if (instr->has_alu_flag(alu_is_trans))
+         alu_trans.push_back(instr);
+      else {
+         if (instr->alu_slots() == 1)
+            alu_vec.push_back(instr);
+         else
+            alu_groups.push_back(instr->split(m_value_factory));
+      }
+   }
+   void visit(AluGroup *instr) override {
+      alu_groups.push_back(instr);
+   }
+   void visit(TexInstr *instr) override {
+      tex.push_back(instr);
+   }
+   void visit(ExportInstr *instr) override {
+      exports.push_back(instr);
+   }
+   void visit(FetchInstr *instr)  override {
+      fetches.push_back(instr);
+   }
+   void visit(Block *instr) override {
+      for (auto& i: *instr)
+         i->accept(*this);
+   }
+
+   void visit(ControlFlowInstr *instr) override {
+      assert(!m_cf_instr);
+      m_cf_instr = instr;
+   }
+
+   void visit(IfInstr *instr) override {
+      assert(!m_cf_instr);
+      m_cf_instr = instr;
+   }
+
+   void visit(EmitVertexInstr *instr) override {
+      assert(!m_cf_instr);
+      m_cf_instr = instr;
+   }
+
+   void visit(WriteScratchInstr *instr) override {
+      mem_write_instr.push_back(instr);
+   }
+
+   void visit(StreamOutInstr *instr) override {
+      mem_write_instr.push_back(instr);
+   }
+
+   void visit(MemRingOutInstr *instr) override {
+      mem_ring_writes.push_back(instr);
+   }
+
+   void visit(GDSInstr *instr) override {
+      gds_op.push_back(instr);
+   }
+
+   void visit(WriteTFInstr *instr) override {
+      write_tf.push_back(instr);
+   }
+
+   void visit(LDSReadInstr *instr) override {
+      std::vector<AluInstr*> buffer;
+      m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
+      for (auto& i: buffer) {
+         i->accept(*this);
+      }      
+   }
+
+   void visit(LDSAtomicInstr *instr) override {
+      std::vector<AluInstr*> buffer;
+      m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
+      for (auto& i: buffer) {
+         i->accept(*this);
+      }
+   }
+
+   void visit(RatInstr *instr) override {
+      rat_instr.push_back(instr);
+   }
+
+
+   std::list<AluInstr *> alu_trans;
+   std::list<AluInstr *> alu_vec;
+   std::list<TexInstr *> tex;
+   std::list<AluGroup *> alu_groups;
+   std::list<ExportInstr *> exports;
+   std::list<FetchInstr *> fetches;
+   std::list<WriteOutInstr *> mem_write_instr;
+   std::list<MemRingOutInstr *> mem_ring_writes;
+   std::list<GDSInstr *> gds_op;
+   std::list<WriteTFInstr *> write_tf;
+   std::list<RatInstr *> rat_instr;
+
+   Instr *m_cf_instr{nullptr};
+   ValueFactory& m_value_factory;
+
+   AluInstr *m_last_lds_instr{nullptr};
+};
+
+class BlockSheduler {
+public:
+   BlockSheduler();
+   void run(Shader *shader);
+
+   void finalize();
+
+private:
+
+   void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf);
+
+   bool collect_ready(CollectInstructions &available);
+
+   template <typename T>
+   bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
+
+   bool collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available);
+
+   bool schedule_tex(Shader::ShaderBlocks& out_blocks);
+   bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
+
+   template <typename I>
+   bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
+
+   template <typename I>
+   bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
+
+   bool schedule_alu(Shader::ShaderBlocks& out_blocks);
+   void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type);
+
+   bool schedule_alu_to_group_vec(AluGroup *group);
+   bool schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist);
+
+   bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list);
+
+   template <typename I>
+   bool schedule(std::list<I *>& ready_list);
+
+   template <typename I>
+   bool schedule_block(std::list<I *>& ready_list);
+
+   std::list<AluInstr *> alu_vec_ready;
+   std::list<AluInstr *> alu_trans_ready;
+   std::list<AluGroup *> alu_groups_ready;
+   std::list<TexInstr *> tex_ready;
+   std::list<ExportInstr *> exports_ready;
+   std::list<FetchInstr *> fetches_ready;
+   std::list<WriteOutInstr *> memops_ready;
+   std::list<MemRingOutInstr *> mem_ring_writes_ready;
+   std::list<GDSInstr *> gds_ready;
+   std::list<WriteTFInstr *> write_tf_ready;
+   std::list<RatInstr *> rat_instr_ready;
+
+   enum {
+      sched_alu,
+      sched_tex,
+      sched_fetch,
+      sched_free,
+      sched_mem_ring,
+      sched_gds,
+      sched_write_tf,
+      sched_rat,
+   } current_shed;
+
+   ExportInstr *m_last_pos;
+   ExportInstr *m_last_pixel;
+   ExportInstr *m_last_param;
+
+   Block *m_current_block;
+
+   int m_lds_addr_count{0};
+   int m_alu_groups_schduled{0};
+
+};
+
+Shader *schedule(Shader *original)
+{
+   AluGroup::set_chipclass(original->chip_class());
+
+   sfn_log << SfnLog::schedule << "Original shader\n";
+   if (sfn_log.has_debug_flag(SfnLog::schedule)) {
+      std::stringstream ss;
+      original->print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+   // TODO later it might be necessary to clone the shader
+   // to be able to re-start scheduling
+
+   auto scheduled_shader = original;
+   BlockSheduler s;
+   s.run(scheduled_shader);
+   s.finalize();
+
+   sfn_log << SfnLog::schedule << "Scheduled shader\n";
+   if (sfn_log.has_debug_flag(SfnLog::schedule)) {
+      std::stringstream ss;
+      scheduled_shader->print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+   return scheduled_shader;
+}
+
+BlockSheduler::BlockSheduler():
+   current_shed(sched_alu),
+   m_last_pos(nullptr),
+   m_last_pixel(nullptr),
+   m_last_param(nullptr),
+   m_current_block(nullptr)
+{
+}
+
+void BlockSheduler::run( Shader *shader)
+{
+   Shader::ShaderBlocks scheduled_blocks;
+
+   for (auto& block : shader->func()) {
+      sfn_log << SfnLog::schedule  << "Process block " << block->id() <<"\n";
+      if (sfn_log.has_debug_flag(SfnLog::schedule)) {
+         std::stringstream ss;
+         block->print(ss);
+         sfn_log << ss.str() << "\n";
+      }
+      schedule_block(*block, scheduled_blocks, shader->value_factory());
+   }
+
+   shader->reset_function(scheduled_blocks);
+}
+
+void BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf)
+{
+
+   assert(in_block.id() >= 0);
+
+
+   current_shed = sched_fetch;
+   auto last_shed = sched_fetch;
+
+   CollectInstructions cir(vf);
+   in_block.accept(cir);
+
+   bool have_instr = collect_ready(cir);
+
+   m_current_block = new Block(in_block.nesting_depth(), in_block.id());
+
+   assert(m_current_block->id() >= 0);
+
+   while (have_instr) {
+
+      sfn_log << SfnLog::schedule << "Have ready instructions\n";
+
+      if (alu_vec_ready.size())
+         sfn_log << SfnLog::schedule << "  ALU V:" << alu_vec_ready.size() << "\n";
+
+      if (alu_trans_ready.size())
+         sfn_log << SfnLog::schedule <<  "  ALU T:" << alu_trans_ready.size() << "\n";
+
+      if (alu_groups_ready.size())
+         sfn_log << SfnLog::schedule << "  ALU G:" << alu_groups_ready.size() << "\n";
+
+      if (exports_ready.size())
+         sfn_log << SfnLog::schedule << "  EXP:" << exports_ready.size()
+                 << "\n";
+      if (tex_ready.size())
+         sfn_log << SfnLog::schedule << "  TEX:" << tex_ready.size()
+                 << "\n";
+      if (fetches_ready.size())
+         sfn_log << SfnLog::schedule << "  FETCH:" << fetches_ready.size()
+                 << "\n";
+      if (mem_ring_writes_ready.size())
+         sfn_log << SfnLog::schedule << "  MEM_RING:" << mem_ring_writes_ready.size()
+                 << "\n";
+      if (memops_ready.size())
+         sfn_log << SfnLog::schedule << "  MEM_OPS:" << mem_ring_writes_ready.size()
+                 << "\n";
+
+      if (!m_current_block->lds_group_active()) {
+         if (last_shed != sched_free && memops_ready.size() > 8)
+            current_shed = sched_free;
+         else if (mem_ring_writes_ready.size() > 5)
+            current_shed = sched_mem_ring;
+         else if (rat_instr_ready.size() > 3)
+            current_shed = sched_rat;
+         else if (gds_ready.size() > 3)
+            current_shed = sched_gds;
+         else if (tex_ready.size() > 3)
+            current_shed = sched_tex;         
+      }
+
+      switch (current_shed) {
+      case sched_alu:
+         if (!schedule_alu(out_blocks)) {
+            assert(!m_current_block->lds_group_active());
+            current_shed = sched_tex;
+            continue;
+         }
+         last_shed = current_shed;
+         break;
+      case sched_tex:
+         if (tex_ready.empty() || !schedule_tex(out_blocks)) {
+            current_shed = sched_fetch;
+            continue;
+         }
+         last_shed = current_shed;
+         break;
+      case sched_fetch:
+         if (!fetches_ready.empty()) {
+            schedule_vtx(out_blocks);
+            last_shed = current_shed;
+         }
+         current_shed = sched_gds;
+         continue;
+      case sched_gds:
+         if (!gds_ready.empty()) {
+            schedule_gds(out_blocks, gds_ready);
+            last_shed = current_shed;
+         }
+         current_shed = sched_mem_ring;
+         continue;
+      case sched_mem_ring:
+         if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) {
+            current_shed = sched_write_tf;
+            continue;
+         }
+         last_shed = current_shed;
+         break;
+      case sched_write_tf:
+         if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) {
+            current_shed = sched_rat;
+            continue;
+         }
+         last_shed = current_shed;
+         break;
+      case sched_rat:
+         if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) {
+             current_shed = sched_free;
+             continue;
+          }
+         last_shed = current_shed;
+         break;
+      case sched_free:
+         if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) {
+            current_shed = sched_alu;
+            break;
+         }
+         last_shed = current_shed;
+      }
+
+      have_instr = collect_ready(cir);
+   }
+
+   /* Emit exports always at end of a block */
+   while (collect_ready_type(exports_ready, cir.exports))
+      schedule_exports(out_blocks, exports_ready);
+
+   bool fail = false;
+
+   if (!cir.alu_groups.empty()) {
+      std::cerr << "Unscheduled ALU groups:\n";
+      for (auto& a : cir.alu_groups) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+
+   if (!cir.alu_vec.empty()){
+      std::cerr << "Unscheduled ALU vec ops:\n";
+      for (auto& a : cir.alu_vec) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+
+   if (!cir.alu_trans.empty()){
+      std::cerr << "Unscheduled ALU trans ops:\n";
+      for (auto& a : cir.alu_trans) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+   if (!cir.mem_write_instr.empty()){
+      std::cerr << "Unscheduled MEM ops:\n";
+      for (auto& a : cir.mem_write_instr) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+
+   if (!cir.fetches.empty()){
+      std::cerr << "Unscheduled Fetch ops:\n";
+      for (auto& a : cir.fetches) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+
+   if (!cir.tex.empty()){
+      std::cerr << "Unscheduled Tex ops:\n";
+      for (auto& a : cir.tex) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+
+   assert(cir.tex.empty());
+   assert(cir.exports.empty());
+   assert(cir.fetches.empty());
+   assert(cir.alu_vec.empty());
+   assert(cir.mem_write_instr.empty());
+   assert(cir.mem_ring_writes.empty());
+
+   assert (!fail);
+
+   if (cir.m_cf_instr) {
+      // Assert that if condition is ready
+      m_current_block->push_back(cir.m_cf_instr);
+      cir.m_cf_instr->set_scheduled();
+   }
+
+   out_blocks.push_back(m_current_block);
+}
+
+void BlockSheduler::finalize()
+{
+   if (m_last_pos)
+      m_last_pos->set_is_last_export(true);
+   if (m_last_pixel)
+      m_last_pixel->set_is_last_export(true);
+   if (m_last_param)
+      m_last_param->set_is_last_export(true);
+}
+
+bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
+{
+   bool success = false;
+   AluGroup *group = nullptr;
+
+   bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty();
+
+   bool has_lds_ready = !alu_vec_ready.empty() &&
+                        (*alu_vec_ready.begin())->has_lds_access();
+
+   /* Schedule groups first. unless we have a pending LDS instuction
+    * We don't want the LDS instructions to be too far apart because the
+    * fetch + read from queue has to be in the same ALU CF block */
+   if (!alu_groups_ready.empty() && !has_lds_ready) {
+      group = *alu_groups_ready.begin();
+      alu_groups_ready.erase(alu_groups_ready.begin());
+      sfn_log << SfnLog::schedule << "Schedule ALU group\n";
+      success = true;
+   } else {
+      if (has_alu_ready) {
+         group = new AluGroup();
+         sfn_log << SfnLog::schedule << "START new ALU group\n";
+      }
+   }
+
+   if (group) {
+      int free_slots = group->free_slots();
+
+      if (free_slots && has_alu_ready) {
+         if (!alu_vec_ready.empty())
+            success |= schedule_alu_to_group_vec(group);
+
+         /* Apparently one can't schedule a t-slot if there is already
+          * and LDS instruction scheduled.
+          * TODO: check whether this is only relevant for actual LDS instructions
+          * or also for instructions that read from the LDS return value queue */
+
+         if (free_slots & 0x10 && !has_lds_ready) {
+            sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
+            if (!alu_trans_ready.empty())
+               success |= schedule_alu_to_group_trans(group, alu_trans_ready);
+            if (!alu_vec_ready.empty())
+               success |= schedule_alu_to_group_trans(group, alu_vec_ready);
+         }
+      }
+
+      sfn_log << SfnLog::schedule << "Finalize ALU group\n";
+      group->set_scheduled();
+      group->fix_last_flag();
+      group->set_nesting_depth(m_current_block->nesting_depth());
+
+
+      if (m_current_block->type() != Block::alu) {
+         start_new_block(out_blocks, Block::alu);
+         m_alu_groups_schduled = 0;
+      }
+
+      /* Pessimistic hack: If we have started an LDS group,
+       * make sure 8 instructions groups still fit into the CF
+       * TODO: take care of Address slot emission
+       * TODO: maybe do this CF split only in the assembler
+       */
+      /*if (group->slots() > m_current_block->remaining_slots() ||
+          (group->has_lds_group_start() &&
+           m_current_block->remaining_slots() < 7 * 8)) {
+         //assert(!m_current_block->lds_group_active());
+         start_new_block(out_blocks, Block::alu);
+      }*/
+
+      if (!m_current_block->try_reserve_kcache(*group)) {
+         assert(!m_current_block->lds_group_active());
+         start_new_block(out_blocks, Block::alu);
+         m_current_block->set_instr_flag(Instr::force_cf);
+      }
+
+      assert(m_current_block->try_reserve_kcache(*group));
+
+      if (group->has_lds_group_start())
+         m_current_block->lds_group_start(*group->begin());
+
+      m_current_block->push_back(group);
+      if (group->has_lds_group_end())
+         m_current_block->lds_group_end();
+   }
+
+   if (success)
+      ++m_alu_groups_schduled;
+
+   return success;
+}
+
+bool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
+{
+   if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() ==  0) {
+      start_new_block(out_blocks, Block::tex);
+      m_current_block->set_instr_flag(Instr::force_cf);
+   }
+
+
+   if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) {
+      auto ii = tex_ready.begin();
+      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
+
+      if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
+         start_new_block(out_blocks, Block::tex);
+
+      for (auto prep : (*ii)->prepare_instr()) {
+         prep->set_scheduled();
+         m_current_block->push_back(prep);
+      }
+
+      (*ii)->set_scheduled();
+      m_current_block->push_back(*ii);
+      tex_ready.erase(ii);
+      return true;
+   }
+   return false;
+}
+
+bool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks)
+{
+   if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) {
+      start_new_block(out_blocks, Block::vtx);
+      m_current_block->set_instr_flag(Instr::force_cf);
+   }
+   return schedule_block(fetches_ready);
+}
+
+template <typename I>
+bool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
+{
+   bool was_full = m_current_block->remaining_slots() == 0;
+   if (m_current_block->type() != Block::gds || was_full) {
+      start_new_block(out_blocks, Block::gds);
+      if (was_full)
+         m_current_block->set_instr_flag(Instr::force_cf);
+   }
+   return schedule_block(ready_list);
+}
+
+
+void BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type)
+{
+   if (!m_current_block->empty()) {
+      sfn_log << SfnLog::schedule << "Start new block\n";
+      assert(!m_current_block->lds_group_active());
+      out_blocks.push_back(m_current_block);
+      m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id());
+   }
+   m_current_block->set_type(type);
+}
+
+template <typename I>
+bool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
+{
+   if (ready_list.empty())
+      return false;
+   if (m_current_block->type() != Block::cf)
+      start_new_block(out_blocks, Block::cf);
+   return schedule(ready_list);
+}
+
+
+bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
+{
+   assert(group);
+   assert(!alu_vec_ready.empty());
+
+   bool success =  false;
+   auto i = alu_vec_ready.begin();
+   auto e = alu_vec_ready.end();
+   while (i != e) {
+      sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
+      if (group->add_vec_instructions(*i)) {
+         auto old_i = i;
+         ++i;
+         if ((*old_i)->has_alu_flag(alu_is_lds)) {
+            --m_lds_addr_count;
+         }
+
+         alu_vec_ready.erase(old_i);
+         success = true;
+         sfn_log << SfnLog::schedule << " success\n";
+      } else {
+         ++i;
+         sfn_log << SfnLog::schedule << " failed\n";
+      }
+   }
+   return success;
+}
+
+bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist)
+{
+   assert(group);
+
+   bool success =  false;
+   auto i = readylist.begin();
+   auto e = readylist.end();
+   while (i != e) {
+      sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
+      if (group->add_trans_instructions(*i)) {
+         auto old_i = i;
+         ++i;
+         readylist.erase(old_i);
+         success = true;
+         sfn_log << SfnLog::schedule << " sucess\n";
+         break;
+      } else {
+         ++i;
+         sfn_log << SfnLog::schedule << " failed\n";
+      }
+   }
+   return success;
+}
+
+template <typename I>
+bool BlockSheduler::schedule(std::list<I *>& ready_list)
+{
+   if (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
+      auto ii = ready_list.begin();
+      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
+      (*ii)->set_scheduled();
+      m_current_block->push_back(*ii);
+      ready_list.erase(ii);
+      return true;
+   }
+   return false;
+}
+
+template <typename I>
+bool BlockSheduler::schedule_block(std::list<I *>& ready_list)
+{
+   bool success = false;
+   while (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
+      auto ii = ready_list.begin();
+      sfn_log << SfnLog::schedule << "Schedule: " << **ii << " "
+              << m_current_block->remaining_slots() << "\n";
+      (*ii)->set_scheduled();
+      m_current_block->push_back(*ii);
+      ready_list.erase(ii);
+      success = true;
+   }
+   return success;
+}
+
+
+bool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list)
+{
+   if (m_current_block->type() != Block::cf)
+      start_new_block(out_blocks, Block::cf);
+
+   if (!ready_list.empty()) {
+      auto ii = ready_list.begin();
+      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
+      (*ii)->set_scheduled();
+      m_current_block->push_back(*ii);
+      switch ((*ii)->export_type()) {
+      case ExportInstr::pos: m_last_pos = *ii; break;
+      case ExportInstr::param: m_last_param = *ii; break;
+      case ExportInstr::pixel: m_last_pixel = *ii; break;
+      }
+      (*ii)->set_is_last_export(false);
+      ready_list.erase(ii);
+      return true;
+   }
+   return false;
+}
+
+bool BlockSheduler::collect_ready(CollectInstructions &available)
+{
+   sfn_log << SfnLog::schedule << "Ready instructions\n";
+   bool result = false;
+   result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
+   result |= collect_ready_type(alu_trans_ready, available.alu_trans);
+   result |= collect_ready_type(alu_groups_ready, available.alu_groups);
+   result |= collect_ready_type(gds_ready, available.gds_op);
+   result |= collect_ready_type(tex_ready, available.tex);
+   result |= collect_ready_type(fetches_ready, available.fetches);
+   result |= collect_ready_type(memops_ready, available.mem_write_instr);
+   result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes);
+   result |= collect_ready_type(write_tf_ready, available.write_tf);
+   result |= collect_ready_type(rat_instr_ready, available.rat_instr);
+
+   sfn_log << SfnLog::schedule << "\n";
+   return result;
+}
+
+bool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available)
+{
+   auto i = available.begin();
+   auto e = available.end();
+
+   for (auto alu : ready) {
+      alu->add_priority(100 * alu->register_priority());
+   }
+
+   int max_check = 0;
+   while (i != e && max_check++ < 32) {
+      if (ready.size() < 32 && (*i)->ready()) {
+
+         int priority = 0;
+         /* LDS fetches that use static offsets are usually ready ery fast,
+          * so that they would get schedules early, and this leaves the problem
+          * that we allocate too many registers with just constant values,
+          * and this will make problems wih RA. So limit the number of LDS
+          * address registers.
+          */
+         if ((*i)->has_alu_flag(alu_lds_address)) {
+            if (m_lds_addr_count > 64) {
+               ++i;
+               continue;
+            } else {
+               ++m_lds_addr_count;
+            }
+         }
+
+         /* LDS instructions are scheduled with high priority.
+          * instractions that can go into the t slot and don't have
+          * indirect access are put in last, so that they don't block
+          * vec-only instructions when scheduling to the vector slots
+          * for everything else we look at the register use */
+
+         if ((*i)->has_lds_access())
+             priority = 100000;
+         else if (AluGroup::has_t()) {
+            auto opinfo = alu_ops.find((*i)->opcode());
+            assert(opinfo != alu_ops.end());
+            if (opinfo->second.can_channel(AluOp::t) && !(*i)->indirect_addr().first)
+               priority = -1;
+         }
+
+         priority += 100 * (*i)->register_priority();
+
+         (*i)->add_priority(priority);
+         ready.push_back(*i);
+
+         auto old_i = i;
+         ++i;
+         available.erase(old_i);
+      } else
+         ++i;
+   }
+
+   for (auto& i: ready)
+      sfn_log << SfnLog::schedule << "V:  " << *i << "\n";
+
+   ready.sort([](const AluInstr *lhs, const AluInstr *rhs) {
+                 return lhs->priority() > rhs->priority();});
+
+   for (auto& i: ready)
+      sfn_log << SfnLog::schedule << "V (S):  " << *i << "\n";
+
+   return !ready.empty();
+}
+
+template <typename T>
+struct type_char {
+
+};
+
+
+template <>
+struct type_char<AluInstr> {
+   static constexpr const char value = 'A';
+};
+
+template <>
+struct type_char<AluGroup>  {
+   static constexpr const char value = 'G';
+};
+
+template <>
+struct type_char<ExportInstr>  {
+   static constexpr const char value = 'E';
+};
+
+template <>
+struct type_char<TexInstr>  {
+   static constexpr const char value = 'T';
+};
+
+template <>
+struct type_char<FetchInstr>  {
+   static constexpr const char value = 'F';
+};
+
+template <>
+struct type_char<WriteOutInstr>  {
+   static constexpr const char value = 'M';
+};
+
+template <>
+struct type_char<MemRingOutInstr>  {
+   static constexpr const char value = 'R';
+};
+
+template <>
+struct type_char<WriteTFInstr>  {
+   static constexpr const char value = 'X';
+};
+
+template <>
+struct type_char<GDSInstr>  {
+   static constexpr const char value = 'S';
+};
+
+template <>
+struct type_char<RatInstr>  {
+   static constexpr const char value = 'I';
+};
+
+
+template <typename T>
+bool BlockSheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& available)
+{
+   auto i = available.begin();
+   auto e = available.end();
+
+   while (i != e) {
+      if ((*i)->ready()) {
+         ready.push_back(*i);
+         auto old_i = i;
+         ++i;
+         available.erase(old_i);
+      } else
+         ++i;
+   }
+
+   for (auto& i: ready)
+      sfn_log << SfnLog::schedule << type_char<T>::value << ";  " << *i << "\n";
+
+   return !ready.empty();
+}
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_scheduler.h
+++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.h
@ -0,0 +1,13 @@
+#ifndef SHEDULER_H
+#define SHEDULER_H
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+
+Shader *schedule(Shader *original);
+
+}
+
+#endif // SHEDULER_H
--- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp
--- a/src/gallium/drivers/r600/sfn/sfn_shader.h
+++ b/src/gallium/drivers/r600/sfn/sfn_shader.h
@ -0,0 +1,365 @@
+#ifndef SHADER_H
+#define SHADER_H
+
+#include "sfn_instr.h"
+#include "sfn_instrfactory.h"
+#include "sfn_instr_controlflow.h"
+#include "gallium/drivers/r600/r600_shader.h"
+#include "sfn_liverangeevaluator.h"
+
+#include <bitset>
+#include <memory>
+#include <stack>
+#include <vector>
+
+struct nir_shader;
+struct nir_cf_node;
+struct nir_if;
+struct nir_block;
+struct nir_instr;
+
+namespace r600 {
+
+class ShaderIO {
+public:
+   void set_sid(int sid);
+   void override_spi_sid(int spi_sid);
+   void print(std::ostream& os) const;
+
+   int spi_sid() const { return m_spi_sid;}
+   unsigned sid() const { return m_sid;}
+
+   int location() const {return m_location;}
+   unsigned name() const { return m_name;}
+
+   int pos() const { return m_pos;}
+   void set_pos(int pos) {m_pos = pos;}
+
+   bool is_param() const { return m_is_param;}
+   void set_is_param(bool val) { m_is_param = val;}
+
+   void set_gpr(int gpr) {m_gpr = gpr;}
+   int gpr() const {return m_gpr;}
+
+protected:
+   ShaderIO(const char *type, int loc, int name);
+
+private:
+
+   virtual void do_print(std::ostream& os) const = 0;
+
+   const char *m_type;
+   int m_location{-1};
+   int m_name{-1};
+   int m_sid{0};
+   int m_spi_sid{0};
+   int m_pos{0};
+   int m_is_param{false};
+   int m_gpr{0};
+};
+
+class ShaderOutput : public ShaderIO {
+public:
+   ShaderOutput();
+   ShaderOutput(int location, int name, int writemask);
+
+   int writemask() const { return m_writemask;}
+
+private:
+   void do_print(std::ostream& os) const override;
+
+   int m_writemask{0};
+};
+
+
+class ShaderInput : public ShaderIO  {
+public:
+   ShaderInput();
+   ShaderInput(int location, int name);
+   void set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid);
+   void set_uses_interpolate_at_centroid();
+   void set_need_lds_pos() { m_need_lds_pos = true;}
+   int ij_index() const { return m_ij_index;}
+
+   int interpolator() const{return m_interpolator;}
+   int interpolate_loc() const {return m_interpolate_loc;}
+   bool need_lds_pos() const {return m_need_lds_pos;}
+   int lds_pos() const {return m_lds_pos;}
+   void set_lds_pos(int pos) {m_lds_pos = pos;}
+
+   int ring_offset() const {return m_ring_offset;}
+   void set_ring_offset(int offs) {m_ring_offset = offs;}
+   bool uses_interpolate_at_centroid() const {return m_uses_interpolate_at_centroid;}
+
+private:
+   void do_print(std::ostream& os) const override;
+
+   int m_interpolator{0};
+   int m_interpolate_loc{0};
+   int m_ij_index{0};
+   bool m_uses_interpolate_at_centroid{false};
+   bool m_need_lds_pos{false};
+   int m_lds_pos{0};
+   int m_ring_offset{0};
+};
+
+class Shader : public Allocate {
+public:
+   using InputIterator = std::map<int, ShaderInput>::iterator;
+   using OutputIterator = std::map<int, ShaderOutput>::iterator;
+
+   using ShaderBlocks = std::list<Block::Pointer, Allocator<Block::Pointer>>;
+
+   Shader(const Shader& orig) = delete;
+
+   virtual ~Shader() {}
+
+   bool add_info_from_string(std::istream& is);
+
+   static Shader *translate_from_nir(nir_shader *nir, const pipe_stream_output_info *so_info, r600_shader *gs_shader,
+                                     r600_shader_key& key, r600_chip_class chip_class);
+
+   bool process(nir_shader *nir);
+
+   bool process_cf_node(nir_cf_node *node);
+   bool process_if(nir_if *node);
+   bool process_loop(nir_loop *node);
+   bool process_block(nir_block *node);
+   bool process_instr(nir_instr *instr);
+   void emit_instruction(PInst instr);
+   bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
+
+   void print(std::ostream& os ) const;
+   void print_header(std::ostream& os ) const;
+
+   bool process_intrinsic(nir_intrinsic_instr *intr);
+
+   virtual bool load_input(nir_intrinsic_instr *intr) = 0;
+   virtual bool store_output(nir_intrinsic_instr *intr) = 0;
+
+   bool load_uniform(nir_intrinsic_instr *intr);
+   bool load_ubo(nir_intrinsic_instr *intr);
+
+   ValueFactory& value_factory();
+
+   void add_output(const ShaderOutput& output) {
+      m_outputs[output.location()] = output;
+   }
+
+   void add_input(const ShaderInput& input) {
+      m_inputs[input.location()] = input;
+   }
+
+   void set_input_gpr(int driver_lcation, int gpr);
+
+   InputIterator find_input(int location) { return m_inputs.find(location);}
+
+   InputIterator input_not_found() {return m_inputs.end();}
+
+   OutputIterator find_output(int location);
+   OutputIterator output_not_found() {return m_outputs.end();}
+
+   ShaderBlocks& func() { return m_root; }
+   void reset_function(ShaderBlocks& new_root);
+
+   void emit_instruction_from_string(const std::string &s);
+
+   void set_info(nir_shader *nir);
+   void get_shader_info(r600_shader *sh_info);
+
+   r600_chip_class chip_class() const {return m_chip_class;};
+   void set_chip_class(r600_chip_class cls) {m_chip_class = cls;};
+
+   void start_new_block(int nesting_depth);
+
+   const ShaderOutput& output(int base) const;
+
+   LiveRangeMap prepare_live_range_map();
+
+   void set_last_txd(Instr *txd){m_last_txd = txd;}
+   Instr *last_txd(){return m_last_txd;}
+
+   // Needed for keeping the memory access in order
+   void chain_scratch_read(Instr *instr);
+   void chain_ssbo_read(Instr *instr);
+
+   virtual uint32_t enabled_stream_buffers_mask() const {return 0;};
+
+   size_t noutputs() const { return m_outputs.size();}
+   size_t ninputs() const { return m_inputs.size();}
+
+   enum Flags {
+      sh_indirect_const_file,
+      sh_needs_scratch_space,
+      sh_needs_sbo_ret_address,
+      sh_uses_atomics,
+      sh_uses_images,
+      sh_uses_tex_buffer,
+      sh_writes_memory,
+      sh_txs_cube_array_comp,
+      sh_indirect_atomic,
+      sh_mem_barrier,
+      sh_flags_count
+   };
+
+   void set_flag(Flags f) {m_flags.set(f);}
+   bool has_flag(Flags f) const {return m_flags.test(f);}
+
+   int atomic_file_count() const { return m_atomic_file_count; }
+
+   PRegister atomic_update();
+   int remap_atomic_base(int base);
+   auto evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) -> std::pair<int, PRegister>;
+   int ssbo_image_offset() const {return m_ssbo_image_offset;}
+   PRegister rat_return_address() {assert(m_rat_return_address); return m_rat_return_address;}
+
+   PRegister emit_load_to_register(PVirtualValue src);
+
+protected:
+   enum ESlots {
+      es_face,
+      es_instanceid,
+      es_invocation_id,
+      es_patch_id,
+      es_pos,
+      es_rel_patch_id,
+      es_sample_mask_in,
+      es_sample_id,
+      es_sample_pos,
+      es_tess_factor_base,
+      es_vertexid,
+      es_tess_coord,
+      es_primitive_id,
+      es_helper_invocation,
+      es_last
+   };
+
+   std::bitset<es_last> m_sv_values;
+
+   Shader(const char *type_id);
+
+   const ShaderInput& input(int base) const;
+
+   bool emit_simple_mov(nir_dest& dest, int chan, PVirtualValue src, Pin pin = pin_free);
+
+private:
+   virtual bool process_stage_intrinsic(nir_intrinsic_instr *intr) = 0;
+
+   bool allocate_registers_from_string(std::istream& is, Pin pin);
+   bool allocate_arrays_from_string(std::istream& is);
+
+   bool read_chipclass(std::istream& is);
+
+   bool load_uniform_indirect(nir_intrinsic_instr *intr, PVirtualValue addr, int offset , int buffer_id);
+
+   bool scan_shader(const nir_function *impl);
+   bool scan_uniforms(nir_variable *uniform);
+   void allocate_reserved_registers();
+
+   void allocate_local_registers(const exec_list *registers);
+
+   virtual int do_allocate_reserved_registers() = 0;
+
+   bool scan_instruction(nir_instr *instr);
+   virtual bool do_scan_instruction(nir_instr *instr) = 0;
+
+   void print_properties(std::ostream& os) const;
+   virtual void do_print_properties(std::ostream& os) const = 0;
+
+   bool read_output(std::istream& is);
+   bool read_input(std::istream& is);
+   virtual bool read_prop(std::istream& is) = 0;
+
+   bool emit_if_start(nir_if *if_stmt);
+   bool emit_control_flow(ControlFlowInstr::CFType type);
+   bool emit_store_scratch(nir_intrinsic_instr *intr);
+   bool emit_load_scratch(nir_intrinsic_instr *intr);
+   bool emit_local_store(nir_intrinsic_instr *intr);
+   bool emit_local_load(nir_intrinsic_instr* instr);
+   bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
+   bool emit_barrier(nir_intrinsic_instr* intr);
+   bool emit_shader_clock(nir_intrinsic_instr* instr);
+   bool emit_wait_ack();
+
+   bool equal_to(const Shader& other) const;
+   void finalize();
+   virtual void do_finalize();
+
+   virtual void do_get_shader_info(r600_shader *sh_info);
+
+   ShaderBlocks m_root;
+   Block::Pointer m_current_block;
+
+   InstrFactory *m_instr_factory;
+   const char *m_type_id;
+
+   template <typename T>
+   using IOMap = std::map<int, T, std::less<int>, Allocator<std::pair<const int, T>>>;
+
+   IOMap<ShaderOutput> m_outputs;
+   IOMap<ShaderInput> m_inputs;
+   r600_chip_class m_chip_class;
+
+   int m_scratch_size;
+   int m_next_block;
+   bool m_indirect_const_file{false};
+
+   Instr *m_last_txd {nullptr};
+
+   uint32_t m_indirect_files{0};      
+   std::bitset<sh_flags_count> m_flags;
+   uint32_t nhwatomic_ranges{0};
+   std::vector<r600_shader_atomic> m_atomics;
+
+   uint32_t m_nhwatomic{0};
+   uint32_t m_atomic_base{0};
+   uint32_t m_next_hwatomic_loc{0};
+   std::unordered_map<int, int> m_atomic_base_map;
+   uint32_t m_atomic_file_count{0};
+   PRegister m_atomic_update{nullptr};
+   PRegister m_rat_return_address{nullptr};
+
+   int32_t m_ssbo_image_offset{0};
+   uint32_t m_nloops{0};
+
+   class InstructionChain : public InstrVisitor {
+   public:
+      void visit(AluInstr  *instr) override {(void) instr;}
+      void visit(AluGroup *instr) override {(void) instr;}
+      void visit(TexInstr *instr) override {(void) instr;}
+      void visit(ExportInstr *instr) override {(void) instr;}
+      void visit(FetchInstr *instr) override {(void) instr;}
+      void visit(Block *instr) override {(void) instr;}
+      void visit(ControlFlowInstr *instr) override {(void) instr;}
+      void visit(IfInstr *instr) override {(void) instr;}
+      void visit(StreamOutInstr *instr) override {(void) instr;}
+      void visit(MemRingOutInstr *instr) override {(void) instr;}
+      void visit(EmitVertexInstr *instr) override {(void) instr;}
+      void visit(WriteTFInstr *instr) override {(void) instr;}
+      void visit(LDSAtomicInstr *instr) override {(void) instr;}
+      void visit(LDSReadInstr *instr) override {(void) instr;}
+
+      void visit(WriteScratchInstr *instr) override;
+      void visit(GDSInstr *instr) override;
+      void visit(RatInstr *instr) override;
+
+      void apply(Instr *current, Instr **last);
+
+      Shader *this_shader{nullptr};
+      Instr *last_scratch_instr{nullptr};
+      Instr *last_gds_instr{nullptr};
+      Instr *last_ssbo_instr{nullptr};
+      bool prepare_mem_barrier{false};
+   };
+
+   InstructionChain m_chain_instr;
+   std::vector<Instr *> m_loops;
+};
+
+
+std::pair<unsigned, unsigned>
+r600_get_varying_semantic(unsigned varying_location);
+
+}
+
+#endif // SHADER_H
--- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
--- a/src/gallium/drivers/r600/sfn/sfn_shader_base.h
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.h
@ -1,231 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef sfn_shader_from_nir_h
-#define sfn_shader_from_nir_h
-
-
-#include "gallium/drivers/r600/r600_shader.h"
-
-#include "compiler/nir/nir.h"
-#include "compiler/nir_types.h"
-
-#include "sfn_instruction_block.h"
-#include "sfn_instruction_export.h"
-#include "sfn_alu_defines.h"
-#include "sfn_valuepool.h"
-#include "sfn_debug.h"
-#include "sfn_instruction_cf.h"
-#include "sfn_emittexinstruction.h"
-#include "sfn_emitaluinstruction.h"
-#include "sfn_emitssboinstruction.h"
-
-#include <vector>
-#include <set>
-#include <stack>
-#include <unordered_map>
-
-struct nir_instr;
-
-namespace r600 {
-
-extern SfnLog sfn_log;
-
-class ShaderFromNirProcessor : public ValuePool {
-public:
-   ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel,
-                          r600_shader& sh_info, int scratch_size, enum amd_gfx_level _chip_class,
-                          int atomic_base);
-   virtual ~ShaderFromNirProcessor();
-
-   void emit_instruction(Instruction *ir);
-
-   PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
-   GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
-                                              const GPRVector::Swizzle& swizzle, bool match = false);
-
-   bool emit_instruction(EAluOp opcode, PValue dest,
-                         std::vector<PValue> src0,
-                         const std::set<AluModifiers>& m_flags);
-   void emit_export_instruction(WriteoutInstruction *ir);
-   void emit_instruction(AluInstruction *ir);
-
-   bool use_legacy_math_rules(void) {
-      return m_sel.nir->info.use_legacy_math_rules;
-   };
-
-   void split_constants(nir_alu_instr* instr);
-   void remap_registers();
-
-   const nir_variable *get_deref_location(const nir_src& src) const;
-
-   r600_shader& sh_info() {return m_sh_info;}
-   void add_param_output_reg(int loc, const GPRVector *gpr);
-   void set_output(unsigned pos, int sel);
-   const GPRVector *output_register(unsigned location) const;
-   void evaluate_spi_sid(r600_shader_io &io);
-
-   enum amd_gfx_level get_chip_class() const;
-
-   int remap_atomic_base(int base) {
-      return m_atomic_base_map[base];
-   }
-
-   void get_array_info(r600_shader& shader) const;
-
-   virtual bool scan_inputs_read(const nir_shader *sh);
-   void set_shader_info(const nir_shader *sh);
-
-protected:
-
-   void set_var_address(nir_deref_instr *instr);
-   void set_input(unsigned pos, PValue var);
-
-   bool scan_instruction(nir_instr *instr);
-
-   virtual bool scan_sysvalue_access(nir_instr *instr) = 0;
-
-   bool emit_if_start(int if_id, nir_if *if_stmt);
-   bool emit_else_start(int if_id);
-   bool emit_ifelse_end(int if_id);
-
-   bool emit_loop_start(int loop_id);
-   bool emit_loop_end(int loop_id);
-   bool emit_jump_instruction(nir_jump_instr *instr);
-
-   bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
-   bool emit_load_local_shared(nir_intrinsic_instr* instr);
-   bool emit_store_local_shared(nir_intrinsic_instr* instr);
-   bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
-
-   bool emit_barrier(nir_intrinsic_instr* instr);
-
-   bool load_preloaded_value(const nir_dest& dest, int chan, PValue value,
-                             bool as_last = true);
-
-   void inc_atomic_file_count();
-
-   virtual void do_set_shader_info(const nir_shader *sh);
-
-   enum ESlots {
-      es_face,
-      es_instanceid,
-      es_invocation_id,
-      es_patch_id,
-      es_pos,
-      es_rel_patch_id,
-      es_sample_mask_in,
-      es_sample_id,
-      es_sample_pos,
-      es_tess_factor_base,
-      es_vertexid,
-      es_tess_coord,
-      es_primitive_id,
-      es_helper_invocation,
-      es_last
-   };
-
-   std::bitset<es_last> m_sv_values;
-
-   bool allocate_reserved_registers();
-
-
-private:
-   virtual bool do_allocate_reserved_registers() = 0;
-
-
-   void emit_instruction_internal(Instruction *ir);
-
-   bool emit_alu_instruction(nir_instr *instr);
-   bool emit_deref_instruction(nir_deref_instr* instr);
-   bool emit_intrinsic_instruction(nir_intrinsic_instr* instr);
-   virtual bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr);
-   bool emit_tex_instruction(nir_instr* instr);
-   bool emit_discard_if(nir_intrinsic_instr* instr);
-   bool emit_load_ubo_vec4(nir_intrinsic_instr* instr);
-   bool emit_ssbo_atomic_add(nir_intrinsic_instr* instr);
-   bool load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufid);
-
-   /* Code creating functions */
-   bool emit_load_function_temp(const nir_variable *var, nir_intrinsic_instr *instr);
-   AluInstruction *emit_load_literal(const nir_load_const_instr *literal, const nir_src& src, unsigned writemask);
-
-   bool load_uniform(nir_intrinsic_instr* instr);
-   bool process_uniforms(nir_variable *uniform);
-
-   void append_block(int nesting_change);
-
-   virtual void emit_shader_start();
-   virtual bool emit_deref_instruction_override(nir_deref_instr* instr);
-
-   bool emit_store_scratch(nir_intrinsic_instr* instr);
-   bool emit_load_scratch(nir_intrinsic_instr* instr);
-   bool emit_shader_clock(nir_intrinsic_instr* instr);
-   virtual void do_finalize() = 0;
-
-   void finalize();
-   friend class ShaderFromNir;
-
-   std::set<nir_variable*> m_arrays;
-
-   std::map<unsigned, PValue> m_inputs;
-   std::map<unsigned, int> m_outputs;
-
-   std::map<unsigned, nir_variable*> m_var_derefs;
-   std::map<const nir_variable *, nir_variable_mode> m_var_mode;
-
-   std::map<unsigned, const glsl_type*>  m_uniform_type_map;
-   std::map<int, IfElseInstruction *> m_if_block_start_map;
-   std::map<int, LoopBeginInstruction *> m_loop_begin_block_map;
-
-   pipe_shader_type m_processor_type;
-
-   std::vector<InstructionBlock> m_output;
-   unsigned m_nesting_depth;
-   unsigned m_block_number;
-   InstructionBlock m_export_output;
-   r600_shader& m_sh_info;
-   enum amd_gfx_level m_chip_class;
-   EmitTexInstruction m_tex_instr;
-   EmitAluInstruction m_alu_instr;
-   EmitSSBOInstruction m_ssbo_instr;
-   OutputRegisterMap m_output_register_map;
-
-   IfElseInstruction *m_pending_else;
-   int m_scratch_size;
-   int m_next_hwatomic_loc;
-
-   r600_pipe_shader_selector& m_sel;
-   int m_atomic_base ;
-   int m_image_count;
-
-   std::unordered_map<int, int> m_atomic_base_map;
-   AluInstruction *last_emitted_alu;
-};
-
-}
-
-#endif
--- a/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp
@ -1,112 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_shader_compute.h"
-#include "sfn_instruction_fetch.h"
-
-namespace r600 {
-
-ComputeShaderFromNir::ComputeShaderFromNir(r600_pipe_shader *sh,
-                                           r600_pipe_shader_selector& sel,
-                                           UNUSED const r600_shader_key& key,
-                                           enum amd_gfx_level gfx_level):
-     ShaderFromNirProcessor (PIPE_SHADER_COMPUTE, sel, sh->shader,
-                             sh->scratch_space_needed, gfx_level, 0),
-     m_reserved_registers(0)
-{
-}
-
-bool ComputeShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
-{
-   return true;
-}
-bool ComputeShaderFromNir::do_allocate_reserved_registers()
-{
-   int thread_id_sel = m_reserved_registers++;
-   int wg_id_sel = m_reserved_registers++;
-
-   for (int i = 0; i < 3; ++i) {
-      auto tmp = new GPRValue(thread_id_sel, i);
-      tmp->set_as_input();
-      tmp->set_keep_alive();
-      m_local_invocation_id[i] = PValue(tmp);
-      inject_register(tmp->sel(), i, m_local_invocation_id[i], false);
-
-      tmp = new GPRValue(wg_id_sel, i);
-      tmp->set_as_input();
-      tmp->set_keep_alive();
-      m_workgroup_id[i] = PValue(tmp);
-      inject_register(tmp->sel(), i, m_workgroup_id[i], false);
-   }
-   return true;
-}
-
-bool ComputeShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
-{
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_local_invocation_id:
-      return emit_load_3vec(instr, m_local_invocation_id);
-   case nir_intrinsic_load_workgroup_id:
-      return emit_load_3vec(instr, m_workgroup_id);
-   case nir_intrinsic_load_num_workgroups:
-      return emit_load_num_workgroups(instr);
-   default:
-      return false;
-   }
-}
-
-bool ComputeShaderFromNir::emit_load_3vec(nir_intrinsic_instr* instr,
-                                          const std::array<PValue,3>& src)
-{
-   for (int i = 0; i < 3; ++i)
-      load_preloaded_value(instr->dest, i, src[i], i == 2);
-   return true;
-}
-
-bool ComputeShaderFromNir::emit_load_num_workgroups(nir_intrinsic_instr* instr)
-{
-   PValue a_zero = get_temp_register(1);
-   emit_instruction(new AluInstruction(op1_mov, a_zero, Value::zero, EmitInstruction::last_write));
-   GPRVector dest;
-   for (int i = 0; i < 3; ++i)
-      dest.set_reg_i(i, from_nir(instr->dest, i));
-   dest.set_reg_i(3, from_nir(instr->dest, 7));
-
-   auto ir = new FetchInstruction(vc_fetch, no_index_offset,
-                                  fmt_32_32_32_32, vtx_nf_int, vtx_es_none, a_zero, dest, 16,
-                                  false, 16, R600_BUFFER_INFO_CONST_BUFFER, 0,
-                                  bim_none, false, false, 0, 0, 0, PValue(), {0,1,2,7});
-   ir->set_flag(vtx_srf_mode);
-   emit_instruction(ir);
-   return true;
-}
-
-void ComputeShaderFromNir::do_finalize()
-{
-
-}
-
-}
--- a/src/gallium/drivers/r600/sfn/sfn_shader_compute.h
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_compute.h
@ -1,62 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_COMPUTE_SHADER_FROM_NIR_H
-#define SFN_COMPUTE_SHADER_FROM_NIR_H
-
-#include "sfn_shader_base.h"
-#include "sfn_shaderio.h"
-#include <bitset>
-
-namespace r600 {
-
-class ComputeShaderFromNir : public ShaderFromNirProcessor
-{
-public:
-   ComputeShaderFromNir(r600_pipe_shader *sh,
-                        r600_pipe_shader_selector& sel,
-                        const r600_shader_key &key,
-                        enum amd_gfx_level gfx_level);
-
-   bool scan_sysvalue_access(nir_instr *instr) override;
-
-private:
-   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
-
-   bool do_allocate_reserved_registers() override;
-   void do_finalize() override;
-
-   bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PValue,3>& src);
-   bool emit_load_num_workgroups(nir_intrinsic_instr* instr);
-
-   int m_reserved_registers;
-   std::array<PValue,3> m_workgroup_id;
-   std::array<PValue,3> m_local_invocation_id;
-};
-
-}
-
-#endif // SFN_COMPUTE_SHADER_FROM_NIR_H
--- a/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp
@ -0,0 +1,95 @@
+#include "sfn_shader_cs.h"
+#include "sfn_instr_fetch.h"
+
+
+namespace r600 {
+
+ComputeShader::ComputeShader(UNUSED const r600_shader_key& key):
+   Shader("CS")
+{
+
+}
+
+bool ComputeShader::do_scan_instruction(UNUSED nir_instr *instr)
+{
+   return false;
+}
+
+int ComputeShader::do_allocate_reserved_registers()
+{
+   auto& vf = value_factory();
+
+   const int thread_id_sel = 0;
+   const int wg_id_sel = 1;
+
+   for (int i = 0; i < 3; ++i) {
+      m_local_invocation_id[i] = vf.allocate_pinned_register(thread_id_sel, i);
+      m_local_invocation_id[i]->pin_live_range(true);
+
+      m_workgroup_id[i] = vf.allocate_pinned_register(wg_id_sel, i);
+      m_workgroup_id[i]->pin_live_range(true);
+   }
+   return 2;
+}
+
+bool ComputeShader::process_stage_intrinsic(nir_intrinsic_instr *instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_local_invocation_id:
+      return emit_load_3vec(instr, m_local_invocation_id);
+   case nir_intrinsic_load_workgroup_id:
+      return emit_load_3vec(instr, m_workgroup_id);
+   case nir_intrinsic_load_num_workgroups:
+      return emit_load_num_workgroups(instr);
+   default:
+      return false;
+   }
+}
+
+void ComputeShader::do_get_shader_info(r600_shader *sh_info)
+{
+   sh_info->processor_type = PIPE_SHADER_COMPUTE;
+}
+
+bool ComputeShader::read_prop(UNUSED std::istream& is)
+{
+   return true;
+}
+
+void ComputeShader::do_print_properties(UNUSED std::ostream& os) const
+{
+
+}
+
+bool ComputeShader::emit_load_num_workgroups(nir_intrinsic_instr* instr)
+{
+   auto zero = value_factory().temp_register();
+
+   emit_instruction(new AluInstr(op1_mov, zero, value_factory().inline_const(ALU_SRC_0, 0),
+                                 AluInstr::last_write));
+   auto dest = value_factory().dest_vec4(instr->dest, pin_group);
+
+   auto ir = new LoadFromBuffer(dest, {0,1,2,7}, zero, 16,
+                                R600_BUFFER_INFO_CONST_BUFFER,
+                                nullptr, fmt_32_32_32_32);
+
+   ir->set_fetch_flag(LoadFromBuffer::srf_mode);
+   ir->reset_fetch_flag(LoadFromBuffer::format_comp_signed);
+   ir->set_num_format(vtx_nf_int);
+   emit_instruction(ir);
+   return true;
+
+}
+
+bool ComputeShader::emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PRegister,3>& src)
+{
+   auto& vf = value_factory();
+
+   for (int i = 0; i < 3; ++i) {
+      auto dest = vf.dest(instr->dest, i, pin_none);
+      emit_instruction(new AluInstr(op1_mov, dest, src[i], i == 2 ? AluInstr::last_write : AluInstr::write));
+   }
+   return true;
+}
+
+}
--- a/src/gallium/drivers/r600/sfn/sfn_shader_cs.h
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_cs.h
@ -0,0 +1,39 @@
+#ifndef COMPUTE_H
+#define COMPUTE_H
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+class ComputeShader : public Shader
+{
+public:
+   ComputeShader(const r600_shader_key& key);
+
+private:
+   bool do_scan_instruction(nir_instr *instr) override;
+   int do_allocate_reserved_registers() override;
+
+   bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
+   void do_get_shader_info(r600_shader *sh_info) override;
+
+   bool load_input(UNUSED nir_intrinsic_instr *intr) override {
+      unreachable("compute shaders  have bno inputs");
+   };
+   bool store_output(UNUSED nir_intrinsic_instr *intr) override {
+      unreachable("compute shaders have no outputs");
+   };
+
+   bool read_prop(std::istream& is) override;
+   void do_print_properties(std::ostream& os) const override;
+
+   bool emit_load_num_workgroups(nir_intrinsic_instr* instr);
+   bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PRegister,3>& src);
+
+   std::array<PRegister,3> m_workgroup_id{nullptr};
+   std::array<PRegister,3> m_local_invocation_id{nullptr};
+};
+
+}
+
+#endif // COMPUTE_H
--- a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp
--- a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h
@ -1,117 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef sfn_fragment_shader_from_nir_h
-#define sfn_fragment_shader_from_nir_h
-
-#include "sfn_shader_base.h"
-#include "sfn_shaderio.h"
-#include <bitset>
-
-namespace r600 {
-
-class FragmentShaderFromNir : public ShaderFromNirProcessor {
-public:
-   FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh_info,
-                         r600_pipe_shader_selector &sel, const r600_shader_key &key,
-                         enum amd_gfx_level gfx_level);
-   bool scan_sysvalue_access(nir_instr *instr) override;
-private:
-
-   struct Interpolator {
-      bool enabled;
-      unsigned ij_index;
-      PValue i;
-      PValue j;
-   };
-
-   void emit_shader_start() override;
-   bool do_allocate_reserved_registers() override;
-   bool process_store_output(nir_intrinsic_instr *instr);
-
-   bool emit_store_output(nir_intrinsic_instr* instr);
-
-   bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, int outputs);
-   bool emit_export_pixel(nir_intrinsic_instr* instr, int outputs);
-   bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip,
-                          int num_components, int start_comp);
-   bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op);
-   bool load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip,EAluOp op, int writemask);
-   bool load_interpolated_two_comp_for_one(GPRVector &dest,
-                                           ShaderInput& io, const Interpolator& ip, EAluOp op, int start, int comp);
-
-   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
-   void do_finalize() override;
-
-   void load_front_face();
-
-   bool emit_load_input(nir_intrinsic_instr* instr);
-   bool emit_load_front_face(nir_intrinsic_instr* instr);
-   bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
-   bool emit_load_sample_pos(nir_intrinsic_instr* instr);
-   bool emit_load_sample_id(nir_intrinsic_instr* instr);
-
-   bool process_load_input(nir_intrinsic_instr *instr, bool interpolated);
-   bool emit_load_interpolated_input(nir_intrinsic_instr* instr);
-   bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
-   bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
-
-
-   unsigned m_max_color_exports;
-   unsigned m_max_counted_color_exports;
-   bool m_two_sided_color;
-   ExportInstruction *m_last_pixel_export;
-   const nir_shader& m_nir;
-
-
-   std::array<Interpolator, 6> m_interpolator;
-   unsigned m_reserved_registers;
-   unsigned m_frag_pos_index;
-   PGPRValue m_front_face_reg;
-   PGPRValue m_sample_mask_reg;
-   PGPRValue m_sample_id_reg;
-   PGPRValue m_helper_invocation;
-   GPRVector m_frag_pos;
-   bool m_need_back_color;
-   bool m_front_face_loaded;
-   ShaderIO m_shaderio;
-   unsigned m_depth_exports;
-
-   std::map<unsigned, PValue> m_input_cache;
-
-   static const int s_max_interpolators = 6;
-
-   std::bitset<s_max_interpolators> m_interpolators_used;
-
-   unsigned m_apply_sample_mask;
-   bool m_dual_source_blend;
-   ShaderInput *m_pos_input;
-
-};
-	
-}
-
-#endif
--- a/Show More
+++ b/Show More