mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-23 02:04:41 +08:00
aco: reformat according to its .clang-format
Signed-off-by: Eric Engestrom <eric@igalia.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23253>
This commit is contained in:
parent
8b319c6db8
commit
6b21653ab4
@ -52,7 +52,7 @@ struct asm_context {
|
||||
// TODO: keep track of branch instructions referring blocks
|
||||
// and, when emitting the block, correct the offset in instr
|
||||
asm_context(Program* program_, std::vector<struct aco_symbol>* symbols_)
|
||||
: program(program_), gfx_level(program->gfx_level), symbols(symbols_)
|
||||
: program(program_), gfx_level(program->gfx_level), symbols(symbols_)
|
||||
{
|
||||
if (gfx_level <= GFX7)
|
||||
opcode = &instr_info.opcode_gfx7[0];
|
||||
@ -1160,8 +1160,7 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
|
||||
/* create the s_setpc_b64 to jump */
|
||||
instr.reset(
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand(def.physReg(), s2)).instr);
|
||||
instr.reset(bld.sop1(aco_opcode::s_setpc_b64, Operand(def.physReg(), s2)).instr);
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
}
|
||||
|
||||
@ -1218,8 +1217,7 @@ fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
|
||||
}
|
||||
|
||||
unsigned
|
||||
emit_program(Program* program, std::vector<uint32_t>& code,
|
||||
std::vector<struct aco_symbol>* symbols)
|
||||
emit_program(Program* program, std::vector<uint32_t>& code, std::vector<struct aco_symbol>* symbols)
|
||||
{
|
||||
asm_context ctx(program, symbols);
|
||||
|
||||
@ -1252,8 +1250,8 @@ emit_program(Program* program, std::vector<uint32_t>& code,
|
||||
code.insert(code.end(), (uint32_t*)program->constant_data.data(),
|
||||
(uint32_t*)(program->constant_data.data() + program->constant_data.size()));
|
||||
|
||||
program->config->scratch_bytes_per_wave = align(
|
||||
program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);
|
||||
program->config->scratch_bytes_per_wave =
|
||||
align(program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);
|
||||
|
||||
return exec_size;
|
||||
}
|
||||
|
@ -254,8 +254,7 @@ public:
|
||||
void join_min(const VGPRCounterMap& other)
|
||||
{
|
||||
unsigned i;
|
||||
BITSET_FOREACH_SET(i, other.resident, 256)
|
||||
{
|
||||
BITSET_FOREACH_SET (i, other.resident, 256) {
|
||||
if (BITSET_TEST(resident, i))
|
||||
val[i] = MIN2(val[i] + base, other.val[i] + other.base) - base;
|
||||
else
|
||||
@ -270,8 +269,7 @@ public:
|
||||
return false;
|
||||
|
||||
unsigned i;
|
||||
BITSET_FOREACH_SET(i, other.resident, 256)
|
||||
{
|
||||
BITSET_FOREACH_SET (i, other.resident, 256) {
|
||||
if (!BITSET_TEST(resident, i))
|
||||
return false;
|
||||
if (val[i] + base != other.val[i] + other.base)
|
||||
@ -365,11 +363,11 @@ search_backwards_internal(State& state, GlobalState& global_state, BlockState bl
|
||||
return;
|
||||
}
|
||||
|
||||
PRAGMA_DIAGNOSTIC_PUSH
|
||||
PRAGMA_DIAGNOSTIC_IGNORED(-Waddress)
|
||||
PRAGMA_DIAGNOSTIC_PUSH
|
||||
PRAGMA_DIAGNOSTIC_IGNORED(-Waddress)
|
||||
if (block_cb != nullptr && !block_cb(global_state, block_state, block))
|
||||
return;
|
||||
PRAGMA_DIAGNOSTIC_POP
|
||||
PRAGMA_DIAGNOSTIC_POP
|
||||
|
||||
for (unsigned lin_pred : block->linear_preds) {
|
||||
search_backwards_internal<GlobalState, BlockState, block_cb, instr_cb>(
|
||||
|
@ -52,8 +52,7 @@ struct wqm_ctx {
|
||||
/* state for WQM propagation */
|
||||
std::set<unsigned> worklist;
|
||||
std::vector<bool> branch_wqm; /* true if the branch condition in this block should be in wqm */
|
||||
wqm_ctx(Program* program_)
|
||||
: program(program_), branch_wqm(program->blocks.size())
|
||||
wqm_ctx(Program* program_) : program(program_), branch_wqm(program->blocks.size())
|
||||
{
|
||||
for (unsigned i = 0; i < program->blocks.size(); i++)
|
||||
worklist.insert(i);
|
||||
@ -137,8 +136,7 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
|
||||
propagate_wqm = true;
|
||||
|
||||
bool pred_by_exec = needs_exec_mask(instr.get()) ||
|
||||
instr->opcode == aco_opcode::p_logical_end ||
|
||||
instr->isBranch();
|
||||
instr->opcode == aco_opcode::p_logical_end || instr->isBranch();
|
||||
|
||||
if (needs_exact(instr))
|
||||
instr_needs[i] = Exact;
|
||||
@ -574,7 +572,8 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
|
||||
* WQM again.
|
||||
*/
|
||||
ctx.info[block->index].exec.resize(1);
|
||||
assert(ctx.info[block->index].exec[0].second == (mask_type_exact | mask_type_global));
|
||||
assert(ctx.info[block->index].exec[0].second ==
|
||||
(mask_type_exact | mask_type_global));
|
||||
current_exec = get_exec_op(ctx.info[block->index].exec.back().first);
|
||||
ctx.info[block->index].exec[0].first = Operand(bld.lm);
|
||||
}
|
||||
|
@ -91,9 +91,8 @@ enum vmem_type : uint8_t {
|
||||
vmem_bvh = 1 << 2,
|
||||
};
|
||||
|
||||
static const uint16_t exp_events =
|
||||
event_exp_pos | event_exp_param | event_exp_mrt_null | event_gds_gpr_lock | event_vmem_gpr_lock |
|
||||
event_ldsdir;
|
||||
static const uint16_t exp_events = event_exp_pos | event_exp_param | event_exp_mrt_null |
|
||||
event_gds_gpr_lock | event_vmem_gpr_lock | event_ldsdir;
|
||||
static const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat | event_sendmsg;
|
||||
static const uint16_t vm_events = event_vmem | event_flat;
|
||||
static const uint16_t vs_events = event_vmem_store;
|
||||
@ -580,7 +579,8 @@ kill(wait_imm& imm, alu_delay_info& delay, Instruction* instr, wait_ctx& ctx,
|
||||
}
|
||||
|
||||
if (ctx.program->gfx_level >= GFX11) {
|
||||
update_alu(ctx, false, false, false, MAX3(delay.salu_cycles, delay.valu_cycles, delay.trans_cycles));
|
||||
update_alu(ctx, false, false, false,
|
||||
MAX3(delay.salu_cycles, delay.valu_cycles, delay.trans_cycles));
|
||||
}
|
||||
|
||||
/* remove all gprs with higher counter from map */
|
||||
@ -775,8 +775,7 @@ insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_
|
||||
*/
|
||||
uint32_t ds_vmem_events = event_lds | event_gds | event_vmem | event_flat;
|
||||
uint32_t alu_events = event_trans | event_valu | event_salu;
|
||||
bool force_linear =
|
||||
ctx.gfx_level >= GFX11 && (event & (ds_vmem_events | alu_events));
|
||||
bool force_linear = ctx.gfx_level >= GFX11 && (event & (ds_vmem_events | alu_events));
|
||||
|
||||
insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, cycles,
|
||||
force_linear);
|
||||
|
@ -26,8 +26,8 @@
|
||||
#include "aco_instruction_selection.h"
|
||||
|
||||
#include "aco_builder.h"
|
||||
#include "aco_ir.h"
|
||||
#include "aco_interface.h"
|
||||
#include "aco_ir.h"
|
||||
|
||||
#include "common/ac_nir.h"
|
||||
#include "common/sid.h"
|
||||
@ -661,8 +661,8 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign
|
||||
Operand::c32(src_bits), Operand::c32((unsigned)sign_extend));
|
||||
} else {
|
||||
assert(src_bits < 32);
|
||||
bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(), Operand::c32(src_bits),
|
||||
Operand::c32((unsigned)sign_extend));
|
||||
bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(),
|
||||
Operand::c32(src_bits), Operand::c32((unsigned)sign_extend));
|
||||
}
|
||||
|
||||
if (dst_bits == 64) {
|
||||
@ -1894,8 +1894,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
}
|
||||
case nir_op_uadd_sat: {
|
||||
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
|
||||
Instruction* add_instr =
|
||||
emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
|
||||
Instruction* add_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
|
||||
add_instr->valu().clamp = 1;
|
||||
break;
|
||||
}
|
||||
@ -1977,8 +1976,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
}
|
||||
case nir_op_iadd_sat: {
|
||||
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
|
||||
Instruction* add_instr =
|
||||
emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
|
||||
Instruction* add_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
|
||||
add_instr->valu().clamp = 1;
|
||||
break;
|
||||
}
|
||||
@ -3316,8 +3314,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
exponent_large);
|
||||
Temp cond =
|
||||
bld.sopc(aco_opcode::s_cmp_ge_i32, bld.def(s1, scc), Operand::c32(64u), exponent);
|
||||
mantissa = bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa,
|
||||
Operand::c64(~0llu), cond);
|
||||
mantissa =
|
||||
bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa, Operand::c64(~0llu), cond);
|
||||
Temp lower = bld.tmp(s1), upper = bld.tmp(s1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa);
|
||||
Temp cond_small =
|
||||
@ -3483,9 +3481,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
case nir_op_unpack_64_4x16:
|
||||
case nir_op_unpack_32_4x8:
|
||||
bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
|
||||
emit_split_vector(ctx, dst,
|
||||
instr->op == nir_op_unpack_32_4x8 ||
|
||||
instr->op == nir_op_unpack_64_4x16 ? 4 : 2);
|
||||
emit_split_vector(
|
||||
ctx, dst, instr->op == nir_op_unpack_32_4x8 || instr->op == nir_op_unpack_64_4x16 ? 4 : 2);
|
||||
break;
|
||||
case nir_op_pack_64_2x32_split: {
|
||||
Temp src0 = get_alu_src(ctx, instr->src[0]);
|
||||
@ -4029,7 +4026,7 @@ struct LoadEmitInfo {
|
||||
unsigned num_components;
|
||||
unsigned component_size;
|
||||
Temp resource = Temp(0, s1); /* buffer resource or base 64-bit address */
|
||||
Temp idx = Temp(0, v1); /* buffer index */
|
||||
Temp idx = Temp(0, v1); /* buffer index */
|
||||
unsigned component_stride = 0;
|
||||
unsigned const_offset = 0;
|
||||
unsigned align_mul = 0;
|
||||
@ -4176,9 +4173,10 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
|
||||
aligned_offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), lo, hi);
|
||||
}
|
||||
}
|
||||
Temp aligned_offset_tmp =
|
||||
aligned_offset.isTemp() ? aligned_offset.getTemp() :
|
||||
aligned_offset.isConstant() ? bld.copy(bld.def(s1), aligned_offset) : Temp(0, s1);
|
||||
Temp aligned_offset_tmp = aligned_offset.isTemp() ? aligned_offset.getTemp()
|
||||
: aligned_offset.isConstant()
|
||||
? bld.copy(bld.def(s1), aligned_offset)
|
||||
: Temp(0, s1);
|
||||
|
||||
Temp val = params.callback(bld, info, aligned_offset_tmp, bytes_needed, align,
|
||||
reduced_const_offset, byte_align ? Temp() : info.dst);
|
||||
@ -4508,8 +4506,7 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
|
||||
mubuf->offen = offen;
|
||||
mubuf->idxen = idxen;
|
||||
mubuf->glc = info.glc;
|
||||
mubuf->dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->slc = info.slc;
|
||||
mubuf->sync = info.sync;
|
||||
mubuf->offset = const_offset;
|
||||
@ -4552,40 +4549,20 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
|
||||
aco_opcode op = aco_opcode::num_opcodes;
|
||||
if (info.component_size == 2) {
|
||||
switch (bytes_needed) {
|
||||
case 2:
|
||||
op = aco_opcode::buffer_load_format_d16_x;
|
||||
break;
|
||||
case 4:
|
||||
op = aco_opcode::buffer_load_format_d16_xy;
|
||||
break;
|
||||
case 6:
|
||||
op = aco_opcode::buffer_load_format_d16_xyz;
|
||||
break;
|
||||
case 8:
|
||||
op = aco_opcode::buffer_load_format_d16_xyzw;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid buffer load format size");
|
||||
break;
|
||||
case 2: op = aco_opcode::buffer_load_format_d16_x; break;
|
||||
case 4: op = aco_opcode::buffer_load_format_d16_xy; break;
|
||||
case 6: op = aco_opcode::buffer_load_format_d16_xyz; break;
|
||||
case 8: op = aco_opcode::buffer_load_format_d16_xyzw; break;
|
||||
default: unreachable("invalid buffer load format size"); break;
|
||||
}
|
||||
} else {
|
||||
assert(info.component_size == 4);
|
||||
switch (bytes_needed) {
|
||||
case 4:
|
||||
op = aco_opcode::buffer_load_format_x;
|
||||
break;
|
||||
case 8:
|
||||
op = aco_opcode::buffer_load_format_xy;
|
||||
break;
|
||||
case 12:
|
||||
op = aco_opcode::buffer_load_format_xyz;
|
||||
break;
|
||||
case 16:
|
||||
op = aco_opcode::buffer_load_format_xyzw;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid buffer load format size");
|
||||
break;
|
||||
case 4: op = aco_opcode::buffer_load_format_x; break;
|
||||
case 8: op = aco_opcode::buffer_load_format_xy; break;
|
||||
case 12: op = aco_opcode::buffer_load_format_xyz; break;
|
||||
case 16: op = aco_opcode::buffer_load_format_xyzw; break;
|
||||
default: unreachable("invalid buffer load format size"); break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4596,8 +4573,7 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
|
||||
mubuf->offen = offen;
|
||||
mubuf->idxen = idxen;
|
||||
mubuf->glc = info.glc;
|
||||
mubuf->dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->slc = info.slc;
|
||||
mubuf->sync = info.sync;
|
||||
mubuf->offset = const_offset;
|
||||
@ -5229,9 +5205,9 @@ resolve_excess_vmem_const_offset(Builder& bld, Temp& voffset, unsigned const_off
|
||||
}
|
||||
|
||||
void
|
||||
emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp soffset, Temp idx, Temp vdata,
|
||||
unsigned const_offset, memory_sync_info sync, bool glc, bool slc,
|
||||
bool swizzled)
|
||||
emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp soffset, Temp idx,
|
||||
Temp vdata, unsigned const_offset, memory_sync_info sync, bool glc,
|
||||
bool slc, bool swizzled)
|
||||
{
|
||||
assert(vdata.id());
|
||||
assert(vdata.size() != 3 || ctx->program->gfx_level != GFX6);
|
||||
@ -5256,8 +5232,8 @@ emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp s
|
||||
vaddr_op = Operand(idx);
|
||||
|
||||
Builder::Result r =
|
||||
bld.mubuf(op, Operand(descriptor), vaddr_op, soffset_op, Operand(vdata), const_offset,
|
||||
offen, swizzled, idxen, /* addr64 */ false, /* disable_wqm */ false, glc,
|
||||
bld.mubuf(op, Operand(descriptor), vaddr_op, soffset_op, Operand(vdata), const_offset, offen,
|
||||
swizzled, idxen, /* addr64 */ false, /* disable_wqm */ false, glc,
|
||||
/* dlc*/ false, slc);
|
||||
|
||||
r->mubuf().sync = sync;
|
||||
@ -5269,7 +5245,8 @@ store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Tem
|
||||
bool swizzled, memory_sync_info sync, bool glc, bool slc)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
assert(elem_size_bytes == 1 || elem_size_bytes == 2 || elem_size_bytes == 4 || elem_size_bytes == 8);
|
||||
assert(elem_size_bytes == 1 || elem_size_bytes == 2 || elem_size_bytes == 4 ||
|
||||
elem_size_bytes == 8);
|
||||
assert(write_mask);
|
||||
write_mask = util_widen_mask(write_mask, elem_size_bytes);
|
||||
|
||||
@ -5282,8 +5259,8 @@ store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Tem
|
||||
|
||||
for (unsigned i = 0; i < write_count; i++) {
|
||||
unsigned const_offset = offsets[i] + base_const_offset;
|
||||
emit_single_mubuf_store(ctx, descriptor, voffset, soffset, idx, write_datas[i], const_offset, sync,
|
||||
glc, slc, swizzled);
|
||||
emit_single_mubuf_store(ctx, descriptor, voffset, soffset, idx, write_datas[i], const_offset,
|
||||
sync, glc, slc, swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5387,7 +5364,7 @@ visit_store_output(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
/* LS pass output to TCS by temp if they have same in/out patch size. */
|
||||
bool ls_need_output = ctx->stage == vertex_tess_control_hs &&
|
||||
ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->tcs_in_out_eq;
|
||||
ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->tcs_in_out_eq;
|
||||
|
||||
bool ps_need_output = ctx->stage == fragment_fs;
|
||||
|
||||
@ -6331,8 +6308,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) {
|
||||
opcode = aco_opcode::image_load;
|
||||
} else {
|
||||
bool level_zero =
|
||||
nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
|
||||
bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
|
||||
opcode = level_zero ? aco_opcode::image_load : aco_opcode::image_load_mip;
|
||||
}
|
||||
|
||||
@ -6391,8 +6367,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0);
|
||||
unsigned access = nir_intrinsic_access(instr);
|
||||
bool glc = ctx->options->gfx_level == GFX6 ||
|
||||
((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
|
||||
ctx->program->gfx_level < GFX11);
|
||||
((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) && ctx->program->gfx_level < GFX11);
|
||||
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
|
||||
@ -6463,7 +6438,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, dmask_count, 1)};
|
||||
uint32_t index = 0;
|
||||
u_foreach_bit(bit, dmask) {
|
||||
u_foreach_bit (bit, dmask) {
|
||||
vec->operands[index++] = Operand(emit_extract_vector(ctx, data, bit, rc));
|
||||
}
|
||||
data = bld.tmp(RegClass::get(RegType::vgpr, dmask_count * rc.bytes()));
|
||||
@ -6491,9 +6466,8 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
}
|
||||
|
||||
void
|
||||
translate_buffer_image_atomic_op(const nir_atomic_op op,
|
||||
aco_opcode *buf_op, aco_opcode *buf_op64,
|
||||
aco_opcode *image_op)
|
||||
translate_buffer_image_atomic_op(const nir_atomic_op op, aco_opcode* buf_op, aco_opcode* buf_op64,
|
||||
aco_opcode* image_op)
|
||||
{
|
||||
switch (op) {
|
||||
case nir_atomic_op_iadd:
|
||||
@ -6571,8 +6545,7 @@ translate_buffer_image_atomic_op(const nir_atomic_op op,
|
||||
*buf_op64 = aco_opcode::buffer_atomic_fmax_x2;
|
||||
*image_op = aco_opcode::image_atomic_fmax;
|
||||
break;
|
||||
default:
|
||||
unreachable("unsupported atomic operation");
|
||||
default: unreachable("unsupported atomic operation");
|
||||
}
|
||||
}
|
||||
|
||||
@ -6682,9 +6655,8 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
|
||||
|
||||
memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0);
|
||||
bool glc =
|
||||
(nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
|
||||
ctx->program->gfx_level < GFX11;
|
||||
bool glc = (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
|
||||
ctx->program->gfx_level < GFX11;
|
||||
|
||||
unsigned write_count = 0;
|
||||
Temp write_datas[32];
|
||||
@ -6805,7 +6777,7 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
/* Don't expand global loads when they use MUBUF or SMEM.
|
||||
* Global loads don't have the bounds checking that buffer loads have that
|
||||
* makes this safe.
|
||||
*/
|
||||
*/
|
||||
unsigned align = nir_intrinsic_align(instr);
|
||||
bool byte_align_for_smem_mubuf =
|
||||
can_use_byte_align_for_global_load(num_components, component_size, align, false);
|
||||
@ -6836,9 +6808,8 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
|
||||
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
|
||||
memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0);
|
||||
bool glc =
|
||||
(nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
|
||||
ctx->program->gfx_level < GFX11;
|
||||
bool glc = (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
|
||||
ctx->program->gfx_level < GFX11;
|
||||
|
||||
unsigned write_count = 0;
|
||||
Temp write_datas[32];
|
||||
@ -6999,8 +6970,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
op32 = global ? aco_opcode::global_atomic_fmax : aco_opcode::flat_atomic_fmax;
|
||||
op64 = global ? aco_opcode::global_atomic_fmax_x2 : aco_opcode::flat_atomic_fmax_x2;
|
||||
break;
|
||||
default:
|
||||
unreachable("unsupported atomic operation");
|
||||
default: unreachable("unsupported atomic operation");
|
||||
}
|
||||
|
||||
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
|
||||
@ -7192,8 +7162,8 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
||||
memory_sync_info sync(aco_storage_mode_from_nir_mem_mode(mem_mode),
|
||||
written_once ? semantic_can_reorder : semantic_none);
|
||||
|
||||
store_vmem_mubuf(ctx, store_src, descriptor, v_offset, s_offset, idx, const_offset, elem_size_bytes,
|
||||
write_mask, swizzled, sync, glc, slc);
|
||||
store_vmem_mubuf(ctx, store_src, descriptor, v_offset, s_offset, idx, const_offset,
|
||||
elem_size_bytes, write_mask, swizzled, sync, glc, slc);
|
||||
}
|
||||
|
||||
void
|
||||
@ -7206,8 +7176,8 @@ visit_load_smem(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
|
||||
/* If base address is 32bit, convert to 64bit with the high 32bit part. */
|
||||
if (base.bytes() == 4) {
|
||||
base = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2),
|
||||
base, Operand::c32(ctx->options->address32_hi));
|
||||
base = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), base,
|
||||
Operand::c32(ctx->options->address32_hi));
|
||||
}
|
||||
|
||||
aco_opcode opcode = aco_opcode::s_load_dword;
|
||||
@ -7535,10 +7505,10 @@ get_scratch_resource(isel_context* ctx)
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp scratch_addr = ctx->program->private_segment_buffer;
|
||||
if (!scratch_addr.bytes()) {
|
||||
Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
|
||||
Operand::c32(aco_symbol_scratch_addr_lo));
|
||||
Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
|
||||
Operand::c32(aco_symbol_scratch_addr_hi));
|
||||
Temp addr_lo =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
|
||||
Temp addr_hi =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
|
||||
scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
||||
} else if (ctx->stage.hw != HWStage::CS) {
|
||||
scratch_addr =
|
||||
@ -8093,8 +8063,7 @@ Temp merged_wave_info_to_mask(isel_context* ctx, unsigned i);
|
||||
Temp lanecount_to_mask(isel_context* ctx, Temp count);
|
||||
|
||||
Temp
|
||||
get_interp_param(isel_context* ctx, nir_intrinsic_op intrin,
|
||||
enum glsl_interp_mode interp)
|
||||
get_interp_param(isel_context* ctx, nir_intrinsic_op intrin, enum glsl_interp_mode interp)
|
||||
{
|
||||
bool linear = interp == INTERP_MODE_NOPERSPECTIVE;
|
||||
if (intrin == nir_intrinsic_load_barycentric_pixel ||
|
||||
@ -8109,9 +8078,8 @@ get_interp_param(isel_context* ctx, nir_intrinsic_op intrin,
|
||||
}
|
||||
|
||||
void
|
||||
ds_ordered_count_offsets(isel_context *ctx, unsigned index_operand,
|
||||
unsigned wave_release, unsigned wave_done,
|
||||
unsigned *offset0, unsigned *offset1)
|
||||
ds_ordered_count_offsets(isel_context* ctx, unsigned index_operand, unsigned wave_release,
|
||||
unsigned wave_done, unsigned* offset0, unsigned* offset1)
|
||||
{
|
||||
unsigned ordered_count_index = index_operand & 0x3f;
|
||||
unsigned count_dword = (index_operand >> 24) & 0xf;
|
||||
@ -8189,7 +8157,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
RegClass rc = RegClass(offset.type(), 1);
|
||||
Temp pos1 = bld.tmp(rc), pos2 = bld.tmp(rc);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(pos1), Definition(pos2), offset);
|
||||
Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
|
||||
Temp bary = get_interp_param(ctx, instr->intrinsic,
|
||||
(glsl_interp_mode)nir_intrinsic_interp_mode(instr));
|
||||
emit_interp_center(ctx, get_ssa_temp(ctx, &instr->dest.ssa), bary, pos1, pos2);
|
||||
break;
|
||||
}
|
||||
@ -8977,8 +8946,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
if (ctx->args->merged_wave_info.used)
|
||||
bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(2u),
|
||||
Operand::c32(8u), Operand::zero());
|
||||
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(2u), Operand::c32(8u),
|
||||
Operand::zero());
|
||||
else if (ctx->args->gs_wave_id.used)
|
||||
bld.copy(Definition(dst), get_arg(ctx, ctx->args->gs_wave_id));
|
||||
else
|
||||
@ -9025,8 +8994,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
case nir_intrinsic_overwrite_tes_arguments_amd: {
|
||||
ctx->arg_temps[ctx->args->tes_u.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
ctx->arg_temps[ctx->args->tes_v.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
|
||||
ctx->arg_temps[ctx->args->tes_rel_patch_id.arg_index] =
|
||||
get_ssa_temp(ctx, instr->src[3].ssa);
|
||||
ctx->arg_temps[ctx->args->tes_rel_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[3].ssa);
|
||||
ctx->arg_temps[ctx->args->tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[2].ssa);
|
||||
break;
|
||||
}
|
||||
@ -9036,7 +9004,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
Temp src = ctx->arg_temps[nir_intrinsic_base(instr)];
|
||||
assert(src.id());
|
||||
assert(src.type() == (instr->intrinsic == nir_intrinsic_load_scalar_arg_amd ? RegType::sgpr : RegType::vgpr));
|
||||
assert(src.type() == (instr->intrinsic == nir_intrinsic_load_scalar_arg_amd ? RegType::sgpr
|
||||
: RegType::vgpr));
|
||||
bld.copy(Definition(dst), src);
|
||||
emit_split_vector(ctx, dst, dst.size());
|
||||
break;
|
||||
@ -9048,35 +9017,34 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
|
||||
Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u));
|
||||
unsigned offset0, offset1;
|
||||
Instruction *ds_instr;
|
||||
Instruction* ds_instr;
|
||||
Operand m;
|
||||
|
||||
/* Lock a GDS mutex. */
|
||||
ds_ordered_count_offsets(ctx, 1 << 24u, false, false, &offset0, &offset1);
|
||||
m = bld.m0(bld.as_uniform(ordered_id));
|
||||
ds_instr = bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m,
|
||||
offset0, offset1, true);
|
||||
ds_instr =
|
||||
bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
|
||||
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
|
||||
|
||||
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_create_vector, Format::PSEUDO, instr->num_components, 1)};
|
||||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||
|
||||
bool use_gds_registers =
|
||||
ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
|
||||
bool use_gds_registers = ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
|
||||
|
||||
for (unsigned i = 0; i < instr->num_components; i++) {
|
||||
if (write_mask & (1 << i)) {
|
||||
Temp chan_counter = emit_extract_vector(ctx, counter, i, v1);
|
||||
|
||||
if (use_gds_registers) {
|
||||
ds_instr = bld.ds(aco_opcode::ds_add_gs_reg_rtn, bld.def(v1),
|
||||
Operand(), chan_counter, i * 4, 0u, true);
|
||||
ds_instr = bld.ds(aco_opcode::ds_add_gs_reg_rtn, bld.def(v1), Operand(),
|
||||
chan_counter, i * 4, 0u, true);
|
||||
} else {
|
||||
m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u)));
|
||||
|
||||
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, bld.def(v1),
|
||||
gds_base, chan_counter, m, i * 4, 0u, true);
|
||||
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, bld.def(v1), gds_base, chan_counter, m,
|
||||
i * 4, 0u, true);
|
||||
}
|
||||
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
|
||||
|
||||
@ -9092,33 +9060,32 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
/* Unlock a GDS mutex. */
|
||||
ds_ordered_count_offsets(ctx, 1 << 24u, true, true, &offset0, &offset1);
|
||||
m = bld.m0(bld.as_uniform(ordered_id));
|
||||
ds_instr = bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m,
|
||||
offset0, offset1, true);
|
||||
ds_instr =
|
||||
bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
|
||||
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
|
||||
|
||||
emit_split_vector(ctx, dst, instr->num_components);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_xfb_counter_sub_amd: {
|
||||
bool use_gds_registers =
|
||||
ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
|
||||
bool use_gds_registers = ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
|
||||
|
||||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||
Temp counter = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u));
|
||||
|
||||
u_foreach_bit(i, write_mask) {
|
||||
u_foreach_bit (i, write_mask) {
|
||||
Temp chan_counter = emit_extract_vector(ctx, counter, i, v1);
|
||||
Instruction *ds_instr;
|
||||
Instruction* ds_instr;
|
||||
|
||||
if (use_gds_registers) {
|
||||
ds_instr = bld.ds(aco_opcode::ds_sub_gs_reg_rtn, bld.def(v1),
|
||||
Operand(), chan_counter, i * 4, 0u, true);
|
||||
ds_instr = bld.ds(aco_opcode::ds_sub_gs_reg_rtn, bld.def(v1), Operand(), chan_counter,
|
||||
i * 4, 0u, true);
|
||||
} else {
|
||||
Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u)));
|
||||
|
||||
ds_instr = bld.ds(aco_opcode::ds_sub_rtn_u32, bld.def(v1),
|
||||
gds_base, chan_counter, m, i * 4, 0u, true);
|
||||
ds_instr = bld.ds(aco_opcode::ds_sub_rtn_u32, bld.def(v1), gds_base, chan_counter, m,
|
||||
i * 4, 0u, true);
|
||||
}
|
||||
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
|
||||
}
|
||||
@ -9162,15 +9129,14 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
exp->valid_mask = false;
|
||||
|
||||
/* Compressed export uses two bits for a channel. */
|
||||
uint32_t channel_mask = exp->compressed ?
|
||||
(write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) :
|
||||
write_mask;
|
||||
uint32_t channel_mask =
|
||||
exp->compressed ? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) : write_mask;
|
||||
|
||||
Temp value = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
exp->operands[i] = channel_mask & BITFIELD_BIT(i) ?
|
||||
Operand(emit_extract_vector(ctx, value, i, v1)) :
|
||||
Operand(v1);
|
||||
exp->operands[i] = channel_mask & BITFIELD_BIT(i)
|
||||
? Operand(emit_extract_vector(ctx, value, i, v1))
|
||||
: Operand(v1);
|
||||
}
|
||||
|
||||
ctx->block->instructions.emplace_back(std::move(exp));
|
||||
@ -9183,13 +9149,11 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
|
||||
struct aco_export_mrt mrt0, mrt1;
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
mrt0.out[i] = write_mask & BITFIELD_BIT(i) ?
|
||||
Operand(emit_extract_vector(ctx, val0, i, v1)) :
|
||||
Operand(v1);
|
||||
mrt0.out[i] = write_mask & BITFIELD_BIT(i) ? Operand(emit_extract_vector(ctx, val0, i, v1))
|
||||
: Operand(v1);
|
||||
|
||||
mrt1.out[i] = write_mask & BITFIELD_BIT(i) ?
|
||||
Operand(emit_extract_vector(ctx, val1, i, v1)) :
|
||||
Operand(v1);
|
||||
mrt1.out[i] = write_mask & BITFIELD_BIT(i) ? Operand(emit_extract_vector(ctx, val1, i, v1))
|
||||
: Operand(v1);
|
||||
}
|
||||
mrt0.enabled_channels = mrt1.enabled_channels = write_mask;
|
||||
|
||||
@ -9383,7 +9347,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
||||
}
|
||||
|
||||
if (has_wqm_coord) {
|
||||
assert(instr->op == nir_texop_tex || instr->op == nir_texop_txb || instr->op == nir_texop_lod);
|
||||
assert(instr->op == nir_texop_tex || instr->op == nir_texop_txb ||
|
||||
instr->op == nir_texop_lod);
|
||||
assert(wqm_coord.regClass().is_linear_vgpr());
|
||||
assert(!a16 && !g16);
|
||||
}
|
||||
@ -9701,9 +9666,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
||||
if (dst.regClass() == s1) {
|
||||
Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
|
||||
emit_extract_vector(ctx, resource, 1, s1));
|
||||
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst),
|
||||
bld.as_uniform(tmp_dst), Operand::c32(0x76543210),
|
||||
bld.scc(is_not_null));
|
||||
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bld.as_uniform(tmp_dst),
|
||||
Operand::c32(0x76543210), bld.scc(is_not_null));
|
||||
} else {
|
||||
Temp is_not_null = bld.tmp(bld.lm);
|
||||
bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
|
||||
@ -10782,10 +10746,12 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
|
||||
|
||||
/* Replace NaN by zero (only 32-bit) to fix game bugs if requested. */
|
||||
if (out->enable_mrt_output_nan_fixup && !is_16bit &&
|
||||
(out->col_format == V_028714_SPI_SHADER_32_R || out->col_format == V_028714_SPI_SHADER_32_GR ||
|
||||
out->col_format == V_028714_SPI_SHADER_32_AR || out->col_format == V_028714_SPI_SHADER_32_ABGR ||
|
||||
(out->col_format == V_028714_SPI_SHADER_32_R ||
|
||||
out->col_format == V_028714_SPI_SHADER_32_GR ||
|
||||
out->col_format == V_028714_SPI_SHADER_32_AR ||
|
||||
out->col_format == V_028714_SPI_SHADER_32_ABGR ||
|
||||
out->col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
|
||||
u_foreach_bit(i, out->write_mask) {
|
||||
u_foreach_bit (i, out->write_mask) {
|
||||
Temp is_not_nan =
|
||||
bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), values[i], values[i]);
|
||||
values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), values[i],
|
||||
@ -10847,7 +10813,6 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case V_028714_SPI_SHADER_SNORM16_ABGR:
|
||||
if (is_16bit && ctx->options->gfx_level >= GFX9) {
|
||||
compr_op = aco_opcode::v_cvt_pknorm_i16_f16;
|
||||
@ -10862,13 +10827,13 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
|
||||
/* clamp */
|
||||
uint32_t max_rgb = out->is_int8 ? 255 : out->is_int10 ? 1023 : 0;
|
||||
|
||||
u_foreach_bit(i, out->write_mask) {
|
||||
u_foreach_bit (i, out->write_mask) {
|
||||
uint32_t max = i == 3 && out->is_int10 ? 3 : max_rgb;
|
||||
|
||||
values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), Operand::c32(max), values[i]);
|
||||
}
|
||||
} else if (is_16bit) {
|
||||
u_foreach_bit(i, out->write_mask) {
|
||||
u_foreach_bit (i, out->write_mask) {
|
||||
Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, false);
|
||||
values[i] = Operand(tmp);
|
||||
}
|
||||
@ -10882,7 +10847,7 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
|
||||
uint32_t max_rgb = out->is_int8 ? 127 : out->is_int10 ? 511 : 0;
|
||||
uint32_t min_rgb = out->is_int8 ? -128 : out->is_int10 ? -512 : 0;
|
||||
|
||||
u_foreach_bit(i, out->write_mask) {
|
||||
u_foreach_bit (i, out->write_mask) {
|
||||
uint32_t max = i == 3 && out->is_int10 ? 1 : max_rgb;
|
||||
uint32_t min = i == 3 && out->is_int10 ? -2u : min_rgb;
|
||||
|
||||
@ -10890,7 +10855,7 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
|
||||
values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::c32(min), values[i]);
|
||||
}
|
||||
} else if (is_16bit) {
|
||||
u_foreach_bit(i, out->write_mask) {
|
||||
u_foreach_bit (i, out->write_mask) {
|
||||
Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, true);
|
||||
values[i] = Operand(tmp);
|
||||
}
|
||||
@ -10996,8 +10961,7 @@ create_fs_jump_to_epilog(isel_context* ctx)
|
||||
}
|
||||
}
|
||||
|
||||
Temp continue_pc =
|
||||
convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.ps.epilog_pc));
|
||||
Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.ps.epilog_pc));
|
||||
|
||||
aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + color_exports.size(), 0)};
|
||||
@ -11068,12 +11032,13 @@ add_startpgm(struct isel_context* ctx)
|
||||
Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset));
|
||||
scratch_offset.setLateKill(true);
|
||||
|
||||
Operand scratch_addr = ctx->args->ring_offsets.used ?
|
||||
Operand(get_arg(ctx, ctx->args->ring_offsets)) : Operand(s2);
|
||||
Operand scratch_addr = ctx->args->ring_offsets.used
|
||||
? Operand(get_arg(ctx, ctx->args->ring_offsets))
|
||||
: Operand(s2);
|
||||
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
|
||||
scratch_addr, scratch_offset);
|
||||
bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc), scratch_addr,
|
||||
scratch_offset);
|
||||
}
|
||||
|
||||
return startpgm;
|
||||
@ -11085,9 +11050,9 @@ fix_ls_vgpr_init_bug(isel_context* ctx, Pseudo_instruction* startpgm)
|
||||
assert(ctx->shader->info.stage == MESA_SHADER_VERTEX);
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
constexpr unsigned hs_idx = 1u;
|
||||
Builder::Result hs_thread_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->merged_wave_info),
|
||||
Operand::c32((8u << 16) | (hs_idx * 8u)));
|
||||
Builder::Result hs_thread_count =
|
||||
bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32((8u << 16) | (hs_idx * 8u)));
|
||||
Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp());
|
||||
|
||||
/* If there are no HS threads, SPI mistakenly loads the LS VGPRs starting at VGPR 0. */
|
||||
@ -11218,10 +11183,9 @@ merged_wave_info_to_mask(isel_context* ctx, unsigned i)
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
/* lanecount_to_mask() only cares about s0.u[6:0] so we don't need either s_bfe nor s_and here */
|
||||
Temp count = i == 0
|
||||
? get_arg(ctx, ctx->args->merged_wave_info)
|
||||
: bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(i * 8u));
|
||||
Temp count = i == 0 ? get_arg(ctx, ctx->args->merged_wave_info)
|
||||
: bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(i * 8u));
|
||||
|
||||
return lanecount_to_mask(ctx, count);
|
||||
}
|
||||
@ -11276,10 +11240,10 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c
|
||||
void
|
||||
select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
|
||||
ac_shader_config* config, const struct aco_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
const struct ac_shader_args* args)
|
||||
const struct aco_shader_info* info, const struct ac_shader_args* args)
|
||||
{
|
||||
isel_context ctx = setup_isel_context(program, shader_count, shaders, config, options, info, args, false);
|
||||
isel_context ctx =
|
||||
setup_isel_context(program, shader_count, shaders, config, options, info, args, false);
|
||||
|
||||
if (ctx.stage == raytracing_cs)
|
||||
return select_program_rt(ctx, shader_count, shaders, args);
|
||||
@ -11391,8 +11355,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
|
||||
void
|
||||
select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shader_config* config,
|
||||
const struct aco_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
const struct ac_shader_args* args)
|
||||
const struct aco_shader_info* info, const struct ac_shader_args* args)
|
||||
{
|
||||
assert(options->gfx_level == GFX8);
|
||||
|
||||
|
@ -660,8 +660,8 @@ cleanup_context(isel_context* ctx)
|
||||
isel_context
|
||||
setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
|
||||
ac_shader_config* config, const struct aco_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
const struct ac_shader_args* args, bool is_ps_epilog)
|
||||
const struct aco_shader_info* info, const struct ac_shader_args* args,
|
||||
bool is_ps_epilog)
|
||||
{
|
||||
SWStage sw_stage = SWStage::None;
|
||||
for (unsigned i = 0; i < shader_count; i++) {
|
||||
|
@ -80,8 +80,7 @@ validate(aco::Program* program)
|
||||
}
|
||||
|
||||
static std::string
|
||||
get_disasm_string(aco::Program* program, std::vector<uint32_t>& code,
|
||||
unsigned exec_size)
|
||||
get_disasm_string(aco::Program* program, std::vector<uint32_t>& code, unsigned exec_size)
|
||||
{
|
||||
std::string disasm;
|
||||
|
||||
@ -111,8 +110,7 @@ get_disasm_string(aco::Program* program, std::vector<uint32_t>& code,
|
||||
|
||||
static std::string
|
||||
aco_postprocess_shader(const struct aco_compiler_options* options,
|
||||
const struct aco_shader_info *info,
|
||||
std::unique_ptr<aco::Program>& program)
|
||||
const struct aco_shader_info* info, std::unique_ptr<aco::Program>& program)
|
||||
{
|
||||
std::string llvm_ir;
|
||||
|
||||
@ -211,12 +209,9 @@ aco_postprocess_shader(const struct aco_compiler_options* options,
|
||||
}
|
||||
|
||||
void
|
||||
aco_compile_shader(const struct aco_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
aco_compile_shader(const struct aco_compiler_options* options, const struct aco_shader_info* info,
|
||||
unsigned shader_count, struct nir_shader* const* shaders,
|
||||
const struct ac_shader_args *args,
|
||||
aco_callback *build_binary,
|
||||
void **binary)
|
||||
const struct ac_shader_args* args, aco_callback* build_binary, void** binary)
|
||||
{
|
||||
aco::init();
|
||||
|
||||
@ -335,13 +330,8 @@ aco_compile_vs_prolog(const struct aco_compiler_options* options,
|
||||
if (get_disasm)
|
||||
disasm = get_disasm_string(program.get(), code, exec_size);
|
||||
|
||||
(*build_prolog)(binary,
|
||||
config.num_sgprs,
|
||||
config.num_vgprs,
|
||||
code.data(),
|
||||
code.size(),
|
||||
disasm.data(),
|
||||
disasm.size());
|
||||
(*build_prolog)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
|
||||
disasm.data(), disasm.size());
|
||||
}
|
||||
|
||||
void
|
||||
@ -377,11 +367,6 @@ aco_compile_ps_epilog(const struct aco_compiler_options* options,
|
||||
if (get_disasm)
|
||||
disasm = get_disasm_string(program.get(), code, exec_size);
|
||||
|
||||
(*build_epilog)(binary,
|
||||
config.num_sgprs,
|
||||
config.num_vgprs,
|
||||
code.data(),
|
||||
code.size(),
|
||||
disasm.data(),
|
||||
disasm.size());
|
||||
(*build_epilog)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
|
||||
disasm.data(), disasm.size());
|
||||
}
|
||||
|
@ -25,9 +25,9 @@
|
||||
#ifndef ACO_INTERFACE_H
|
||||
#define ACO_INTERFACE_H
|
||||
|
||||
#include "amd_family.h"
|
||||
|
||||
#include "aco_shader_info.h"
|
||||
|
||||
#include "amd_family.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -47,24 +47,18 @@ typedef void(aco_callback)(void** priv_ptr, const struct ac_shader_config* confi
|
||||
const char* llvm_ir_str, unsigned llvm_ir_size, const char* disasm_str,
|
||||
unsigned disasm_size, uint32_t* statistics, uint32_t stats_size,
|
||||
uint32_t exec_size, const uint32_t* code, uint32_t code_dw,
|
||||
const struct aco_symbol *symbols, unsigned num_symbols);
|
||||
const struct aco_symbol* symbols, unsigned num_symbols);
|
||||
|
||||
typedef void (aco_shader_part_callback)(void **priv_ptr,
|
||||
uint32_t num_sgprs,
|
||||
uint32_t num_vgprs,
|
||||
const uint32_t *code,
|
||||
uint32_t code_size,
|
||||
const char *disasm_str,
|
||||
uint32_t disasm_size);
|
||||
typedef void(aco_shader_part_callback)(void** priv_ptr, uint32_t num_sgprs, uint32_t num_vgprs,
|
||||
const uint32_t* code, uint32_t code_size,
|
||||
const char* disasm_str, uint32_t disasm_size);
|
||||
|
||||
extern const struct aco_compiler_statistic_info* aco_statistic_infos;
|
||||
|
||||
void aco_compile_shader(const struct aco_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
unsigned shader_count, struct nir_shader* const* shaders,
|
||||
const struct ac_shader_args *args,
|
||||
aco_callback *build_binary,
|
||||
void **binary);
|
||||
const struct aco_shader_info* info, unsigned shader_count,
|
||||
struct nir_shader* const* shaders, const struct ac_shader_args* args,
|
||||
aco_callback* build_binary, void** binary);
|
||||
|
||||
void aco_compile_rt_prolog(const struct aco_compiler_options* options,
|
||||
const struct aco_shader_info* info, const struct ac_shader_args* in_args,
|
||||
|
@ -98,8 +98,9 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
||||
program->wave_size = info->wave_size;
|
||||
program->lane_mask = program->wave_size == 32 ? s1 : s2;
|
||||
|
||||
program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024 :
|
||||
gfx_level >= GFX7 ? 512 : 256;
|
||||
program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024
|
||||
: gfx_level >= GFX7 ? 512
|
||||
: 256;
|
||||
program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
|
||||
|
||||
/* GFX6: There is 64KB LDS per CU, but a single workgroup can only use 32KB. */
|
||||
|
@ -140,9 +140,9 @@ enum storage_class : uint8_t {
|
||||
storage_buffer = 0x1, /* SSBOs and global memory */
|
||||
storage_gds = 0x2,
|
||||
storage_image = 0x4,
|
||||
storage_shared = 0x8, /* or TCS output */
|
||||
storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
|
||||
storage_task_payload = 0x20,/* Task-Mesh payload */
|
||||
storage_shared = 0x8, /* or TCS output */
|
||||
storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
|
||||
storage_task_payload = 0x20, /* Task-Mesh payload */
|
||||
storage_scratch = 0x40,
|
||||
storage_vgpr_spill = 0x80,
|
||||
storage_count = 8, /* not counting storage_none */
|
||||
@ -823,7 +823,8 @@ public:
|
||||
assert(bytes() == 2 || bytes() == 4);
|
||||
if (opsel) {
|
||||
if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral())
|
||||
return int16_t(data_.i) >> 16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
|
||||
return int16_t(data_.i) >>
|
||||
16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
|
||||
else
|
||||
return data_.i >> 16;
|
||||
}
|
||||
@ -1418,7 +1419,8 @@ struct VINTERP_inreg_instruction : public VALU_instruction {
|
||||
uint8_t padding5;
|
||||
uint8_t padding6;
|
||||
};
|
||||
static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
|
||||
static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
|
||||
"Unexpected padding");
|
||||
|
||||
/**
|
||||
* Data Parallel Primitives Format:
|
||||
@ -1809,8 +1811,7 @@ memory_sync_info get_sync_info(const Instruction* instr);
|
||||
inline bool
|
||||
is_dead(const std::vector<uint16_t>& uses, const Instruction* instr)
|
||||
{
|
||||
if (instr->definitions.empty() || instr->isBranch() ||
|
||||
instr->opcode == aco_opcode::p_startpgm ||
|
||||
if (instr->definitions.empty() || instr->isBranch() || instr->opcode == aco_opcode::p_startpgm ||
|
||||
instr->opcode == aco_opcode::p_init_scratch ||
|
||||
instr->opcode == aco_opcode::p_dual_src_export_gfx11)
|
||||
return false;
|
||||
@ -2216,8 +2217,7 @@ void init_program(Program* program, Stage stage, const struct aco_shader_info* i
|
||||
|
||||
void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
|
||||
ac_shader_config* config, const struct aco_compiler_options* options,
|
||||
const struct aco_shader_info* info,
|
||||
const struct ac_shader_args* args);
|
||||
const struct aco_shader_info* info, const struct ac_shader_args* args);
|
||||
void select_trap_handler_shader(Program* program, struct nir_shader* shader,
|
||||
ac_shader_config* config,
|
||||
const struct aco_compiler_options* options,
|
||||
@ -2258,7 +2258,7 @@ bool dealloc_vgprs(Program* program);
|
||||
void insert_NOPs(Program* program);
|
||||
void form_hard_clauses(Program* program);
|
||||
unsigned emit_program(Program* program, std::vector<uint32_t>& code,
|
||||
std::vector<struct aco_symbol> *symbols);
|
||||
std::vector<struct aco_symbol>* symbols);
|
||||
/**
|
||||
* Returns true if print_asm can disassemble the given program for the current build/runtime
|
||||
* configuration
|
||||
|
@ -2181,7 +2181,7 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
|
||||
instr->mimg().strict_wqm = false;
|
||||
|
||||
if ((3 + num_vaddr) > instr->operands.size()) {
|
||||
MIMG_instruction *new_instr = create_instruction<MIMG_instruction>(
|
||||
MIMG_instruction* new_instr = create_instruction<MIMG_instruction>(
|
||||
instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size());
|
||||
std::copy(instr->definitions.cbegin(), instr->definitions.cend(),
|
||||
new_instr->definitions.begin());
|
||||
@ -2346,8 +2346,8 @@ lower_to_hw_instr(Program* program)
|
||||
target =
|
||||
program->has_color_exports ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_MRTZ;
|
||||
if (program->stage == fragment_fs)
|
||||
bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1),
|
||||
0, target, false, true, true);
|
||||
bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1), 0,
|
||||
target, false, true, true);
|
||||
if (should_dealloc_vgprs)
|
||||
bld.sopp(aco_opcode::s_sendmsg, -1, sendmsg_dealloc_vgprs);
|
||||
bld.sopp(aco_opcode::s_endpgm);
|
||||
@ -2518,8 +2518,7 @@ lower_to_hw_instr(Program* program)
|
||||
create_bperm(bld, ext_swiz, dst, Operand::zero());
|
||||
}
|
||||
} else {
|
||||
SDWA_instruction& sdwa =
|
||||
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa();
|
||||
SDWA_instruction& sdwa = bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa();
|
||||
sdwa.sel[0] = SubdwordSel(bits / 8, offset / 8, signext);
|
||||
}
|
||||
}
|
||||
@ -2574,7 +2573,8 @@ lower_to_hw_instr(Program* program)
|
||||
} else {
|
||||
assert(dst.regClass() == v2b);
|
||||
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op)
|
||||
->sdwa().sel[1] = SubdwordSel::ubyte;
|
||||
->sdwa()
|
||||
.sel[1] = SubdwordSel::ubyte;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1369,7 +1369,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
if (instr->isSALU() || instr->isPseudo()) {
|
||||
unsigned bits = get_operand_size(instr, i);
|
||||
if ((info.is_constant(bits) || (info.is_literal(bits) && instr->isPseudo())) &&
|
||||
alu_can_accept_constant(instr, i)) {
|
||||
alu_can_accept_constant(instr, i)) {
|
||||
instr->operands[i] = get_constant_op(ctx, info, bits);
|
||||
continue;
|
||||
}
|
||||
@ -2116,9 +2116,10 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
case aco_opcode::v_mbcnt_hi_u32_b32_e64: {
|
||||
if (instr->operands[0].constantEquals(-1) && instr->operands[1].isTemp() &&
|
||||
ctx.info[instr->operands[1].tempId()].is_usedef()) {
|
||||
Instruction *usedef_instr = ctx.info[instr->operands[1].tempId()].instr;
|
||||
Instruction* usedef_instr = ctx.info[instr->operands[1].tempId()].instr;
|
||||
if (usedef_instr->opcode == aco_opcode::v_mbcnt_lo_u32_b32 &&
|
||||
usedef_instr->operands[0].constantEquals(-1) && usedef_instr->operands[1].constantEquals(0))
|
||||
usedef_instr->operands[0].constantEquals(-1) &&
|
||||
usedef_instr->operands[1].constantEquals(0))
|
||||
ctx.info[instr->definitions[0].tempId()].set_subgroup_invocation(instr.get());
|
||||
}
|
||||
break;
|
||||
@ -2370,7 +2371,9 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
return false;
|
||||
|
||||
/* Find the constant operand or return early if there isn't one. */
|
||||
const int const_op_idx = instr->operands[0].isConstant() ? 0 : instr->operands[1].isConstant() ? 1 : -1;
|
||||
const int const_op_idx = instr->operands[0].isConstant() ? 0
|
||||
: instr->operands[1].isConstant() ? 1
|
||||
: -1;
|
||||
if (const_op_idx == -1)
|
||||
return false;
|
||||
|
||||
@ -2413,11 +2416,10 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
first_bit = val + 1;
|
||||
num_bits = val >= wave_size ? 0 : (wave_size - val - 1);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
default: return false;
|
||||
}
|
||||
|
||||
Instruction *cpy = NULL;
|
||||
Instruction* cpy = NULL;
|
||||
const uint64_t mask = BITFIELD64_RANGE(first_bit, num_bits);
|
||||
if (wave_size == 64 && mask > 0x7fffffff && mask != -1ull) {
|
||||
/* Mask can't be represented as a 64-bit constant or literal, use s_bfm_b64. */
|
||||
@ -2426,7 +2428,8 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
cpy->operands[1] = Operand::c32(first_bit);
|
||||
} else {
|
||||
/* Copy mask as a literal constant. */
|
||||
cpy = create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 1, 1);
|
||||
cpy =
|
||||
create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 1, 1);
|
||||
cpy->operands[0] = wave_size == 32 ? Operand::c32((uint32_t)mask) : Operand::c64(mask);
|
||||
}
|
||||
|
||||
@ -4821,10 +4824,12 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
*/
|
||||
if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_and_b64) {
|
||||
if (instr->operands[0].isTemp() && fixed_to_exec(instr->operands[1]) &&
|
||||
ctx.uses[instr->operands[0].tempId()] == 1 && ctx.uses[instr->definitions[1].tempId()] == 0 &&
|
||||
ctx.uses[instr->operands[0].tempId()] == 1 &&
|
||||
ctx.uses[instr->definitions[1].tempId()] == 0 &&
|
||||
can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), instr->pass_flags)) {
|
||||
ctx.uses[instr->operands[0].tempId()]--;
|
||||
ctx.info[instr->operands[0].tempId()].instr->definitions[0].setTemp(instr->definitions[0].getTemp());
|
||||
ctx.info[instr->operands[0].tempId()].instr->definitions[0].setTemp(
|
||||
instr->definitions[0].getTemp());
|
||||
instr.reset();
|
||||
return;
|
||||
}
|
||||
|
@ -516,7 +516,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
|
||||
if (mimg.lwe)
|
||||
fprintf(output, " lwe");
|
||||
if (mimg.r128)
|
||||
fprintf(output, " r128");
|
||||
fprintf(output, " r128");
|
||||
if (mimg.a16)
|
||||
fprintf(output, " a16");
|
||||
if (mimg.d16)
|
||||
|
@ -460,8 +460,7 @@ print_regs(ra_ctx& ctx, bool vgprs, RegisterFile& reg_file)
|
||||
printf("%u/%u used, %u/%u free\n", regs.size - free_regs, regs.size, free_regs, regs.size);
|
||||
|
||||
/* print assignments ordered by registers */
|
||||
std::map<PhysReg, std::pair<unsigned, unsigned>>
|
||||
regs_to_vars; /* maps to byte size and temp id */
|
||||
std::map<PhysReg, std::pair<unsigned, unsigned>> regs_to_vars; /* maps to byte size and temp id */
|
||||
for (unsigned id : find_vars(ctx, reg_file, regs)) {
|
||||
const assignment& var = ctx.assignments[id];
|
||||
PhysReg reg = var.reg;
|
||||
@ -1088,8 +1087,8 @@ get_reg_for_create_vector_copy(ra_ctx& ctx, RegisterFile& reg_file,
|
||||
instr->operands[i].regClass() == info.rc) {
|
||||
assignment& op = ctx.assignments[instr->operands[i].tempId()];
|
||||
/* if everything matches, create parallelcopy for the killed operand */
|
||||
if (!intersects(def_reg, PhysRegInterval{op.reg, op.rc.size()}) &&
|
||||
op.reg != scc && reg_file.get_id(op.reg) == instr->operands[i].tempId()) {
|
||||
if (!intersects(def_reg, PhysRegInterval{op.reg, op.rc.size()}) && op.reg != scc &&
|
||||
reg_file.get_id(op.reg) == instr->operands[i].tempId()) {
|
||||
Definition pc_def = Definition(reg, info.rc);
|
||||
parallelcopies.emplace_back(instr->operands[i], pc_def);
|
||||
return op.reg;
|
||||
@ -1655,8 +1654,7 @@ get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
|
||||
return vcc;
|
||||
}
|
||||
if (ctx.assignments[temp.id()].m0) {
|
||||
if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) &&
|
||||
can_write_m0(instr))
|
||||
if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) && can_write_m0(instr))
|
||||
return m0;
|
||||
}
|
||||
|
||||
|
@ -587,8 +587,10 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
|
||||
/* don't move non-reorderable instructions */
|
||||
if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime ||
|
||||
instr->opcode == aco_opcode::s_setprio || instr->opcode == aco_opcode::s_getreg_b32 ||
|
||||
instr->opcode == aco_opcode::p_init_scratch || instr->opcode == aco_opcode::p_jump_to_epilog ||
|
||||
instr->opcode == aco_opcode::s_sendmsg_rtn_b32 || instr->opcode == aco_opcode::s_sendmsg_rtn_b64)
|
||||
instr->opcode == aco_opcode::p_init_scratch ||
|
||||
instr->opcode == aco_opcode::p_jump_to_epilog ||
|
||||
instr->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
|
||||
instr->opcode == aco_opcode::s_sendmsg_rtn_b64)
|
||||
return hazard_fail_unreorderable;
|
||||
|
||||
memory_event_set instr_set;
|
||||
@ -663,8 +665,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
|
||||
int16_t k = 0;
|
||||
|
||||
/* don't move s_memtime/s_memrealtime */
|
||||
if (current->opcode == aco_opcode::s_memtime ||
|
||||
current->opcode == aco_opcode::s_memrealtime ||
|
||||
if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime ||
|
||||
current->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
|
||||
current->opcode == aco_opcode::s_sendmsg_rtn_b64)
|
||||
return;
|
||||
|
@ -35,10 +35,10 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ACO_MAX_SO_OUTPUTS 64
|
||||
#define ACO_MAX_SO_BUFFERS 4
|
||||
#define ACO_MAX_SO_OUTPUTS 64
|
||||
#define ACO_MAX_SO_BUFFERS 4
|
||||
#define ACO_MAX_VERTEX_ATTRIBS 32
|
||||
#define ACO_MAX_VBS 32
|
||||
#define ACO_MAX_VBS 32
|
||||
|
||||
struct aco_vs_input_state {
|
||||
uint32_t instance_rate_inputs;
|
||||
@ -133,8 +133,8 @@ struct aco_compiler_options {
|
||||
enum amd_gfx_level gfx_level;
|
||||
uint32_t address32_hi;
|
||||
struct {
|
||||
void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message);
|
||||
void *private_data;
|
||||
void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message);
|
||||
void* private_data;
|
||||
} debug;
|
||||
};
|
||||
|
||||
|
@ -94,7 +94,8 @@ struct spill_ctx {
|
||||
spill_ctx(const RegisterDemand target_pressure_, Program* program_,
|
||||
std::vector<std::vector<RegisterDemand>> register_demand_)
|
||||
: target_pressure(target_pressure_), program(program_), memory(),
|
||||
register_demand(std::move(register_demand_)), renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
|
||||
register_demand(std::move(register_demand_)),
|
||||
renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
|
||||
spills_entry(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
|
||||
spills_exit(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
|
||||
processed(program->blocks.size(), false),
|
||||
@ -226,10 +227,11 @@ next_uses_per_block(spill_ctx& ctx, unsigned block_idx, uint32_t& worklist)
|
||||
|
||||
std::pair<uint32_t, uint32_t> distance{block_idx, 0};
|
||||
|
||||
auto it = instr->definitions[0].isTemp() ? next_use_distances_start.find(instr->definitions[0].getTemp())
|
||||
: next_use_distances_start.end();
|
||||
auto it = instr->definitions[0].isTemp()
|
||||
? next_use_distances_start.find(instr->definitions[0].getTemp())
|
||||
: next_use_distances_start.end();
|
||||
if (it != next_use_distances_start.end() &&
|
||||
phi_defs.insert(instr->definitions[0].getTemp()).second) {
|
||||
phi_defs.insert(instr->definitions[0].getTemp()).second) {
|
||||
distance = it->second;
|
||||
}
|
||||
|
||||
@ -388,7 +390,7 @@ get_rematerialize_info(spill_ctx& ctx)
|
||||
|
||||
void
|
||||
update_local_next_uses(spill_ctx& ctx, Block* block,
|
||||
std::vector<std::vector<std::pair<Temp, uint32_t>>>& local_next_uses)
|
||||
std::vector<std::vector<std::pair<Temp, uint32_t>>>& local_next_uses)
|
||||
{
|
||||
if (local_next_uses.size() < block->instructions.size()) {
|
||||
/* Allocate more next-use-maps. Note that by never reducing the vector size, we enable
|
||||
@ -1006,7 +1008,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
|
||||
ctx.renames[pred_idx].find(phi->operands[i].getTemp());
|
||||
if (it != ctx.renames[pred_idx].end()) {
|
||||
phi->operands[i].setTemp(it->second);
|
||||
/* prevent the defining instruction from being DCE'd if it could be rematerialized */
|
||||
/* prevent the defining instruction from being DCE'd if it could be rematerialized */
|
||||
} else {
|
||||
auto remat_it = ctx.remat.find(phi->operands[i].getTemp());
|
||||
if (remat_it != ctx.remat.end()) {
|
||||
@ -1407,7 +1409,8 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
|
||||
continue;
|
||||
|
||||
/* find p_logical_end */
|
||||
std::vector<aco_ptr<Instruction>>& prev_instructions = ctx.program->blocks[block_idx].instructions;
|
||||
std::vector<aco_ptr<Instruction>>& prev_instructions =
|
||||
ctx.program->blocks[block_idx].instructions;
|
||||
unsigned idx = prev_instructions.size() - 1;
|
||||
while (prev_instructions[idx]->opcode != aco_opcode::p_logical_end)
|
||||
idx--;
|
||||
@ -1422,10 +1425,10 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
|
||||
|
||||
Temp private_segment_buffer = ctx.program->private_segment_buffer;
|
||||
if (!private_segment_buffer.bytes()) {
|
||||
Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
|
||||
Operand::c32(aco_symbol_scratch_addr_lo));
|
||||
Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
|
||||
Operand::c32(aco_symbol_scratch_addr_hi));
|
||||
Temp addr_lo =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
|
||||
Temp addr_hi =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
|
||||
private_segment_buffer =
|
||||
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
||||
} else if (ctx.program->stage.hw != HWStage::CS) {
|
||||
@ -1471,8 +1474,7 @@ setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
|
||||
if (ctx.scratch_rsrc == Temp()) {
|
||||
int32_t saddr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size -
|
||||
ctx.program->dev.scratch_global_offset_min;
|
||||
ctx.scratch_rsrc =
|
||||
load_scratch_resource(ctx, scratch_offset, block, instructions, saddr);
|
||||
ctx.scratch_rsrc = load_scratch_resource(ctx, scratch_offset, block, instructions, saddr);
|
||||
}
|
||||
} else {
|
||||
bool add_offset_to_sgpr =
|
||||
|
@ -35,8 +35,8 @@
|
||||
namespace aco {
|
||||
|
||||
static void
|
||||
aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix,
|
||||
const char* file, unsigned line, const char* fmt, va_list args)
|
||||
aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix, const char* file,
|
||||
unsigned line, const char* fmt, va_list args)
|
||||
{
|
||||
char* msg;
|
||||
|
||||
@ -270,8 +270,7 @@ validate_ir(Program* program)
|
||||
(instr->opcode == aco_opcode::p_bpermute_gfx11w64 && i == 0) ||
|
||||
(flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
|
||||
((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
|
||||
(instr->isScratch() && i == 0) ||
|
||||
(instr->isDS() && i == 0) ||
|
||||
(instr->isScratch() && i == 0) || (instr->isDS() && i == 0) ||
|
||||
(instr->opcode == aco_opcode::p_init_scratch && i == 0);
|
||||
check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
|
||||
} else {
|
||||
@ -393,7 +392,7 @@ validate_ir(Program* program)
|
||||
"OPSEL_LO set for unsupported instruction format", instr.get());
|
||||
check(!instr->valu().opsel_hi || instr->isVOP3P(),
|
||||
"OPSEL_HI set for unsupported instruction format", instr.get());
|
||||
check(!instr->valu().omod || instr->isVOP3() ||instr->isSDWA(),
|
||||
check(!instr->valu().omod || instr->isVOP3() || instr->isSDWA(),
|
||||
"OMOD set for unsupported instruction format", instr.get());
|
||||
check(!instr->valu().clamp || instr->isVOP3() || instr->isVOP3P() ||
|
||||
instr->isSDWA() || instr->isVINTERP_INREG(),
|
||||
@ -562,7 +561,8 @@ validate_ir(Program* program)
|
||||
instr->definitions[2].regClass().size() == 1,
|
||||
"Third definition of p_dual_src_export_gfx11 must be a v1", instr.get());
|
||||
check(instr->definitions[3].regClass() == program->lane_mask,
|
||||
"Fourth definition of p_dual_src_export_gfx11 must be a lane mask", instr.get());
|
||||
"Fourth definition of p_dual_src_export_gfx11 must be a lane mask",
|
||||
instr.get());
|
||||
check(instr->definitions[4].physReg() == vcc,
|
||||
"Fifth definition of p_dual_src_export_gfx11 must be vcc", instr.get());
|
||||
check(instr->definitions[5].physReg() == scc,
|
||||
@ -627,26 +627,28 @@ validate_ir(Program* program)
|
||||
check(instr->operands.size() < 4 || instr->operands[3].isOfType(RegType::vgpr),
|
||||
"VMEM write data must be vgpr", instr.get());
|
||||
|
||||
const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
|
||||
instr->opcode == aco_opcode::buffer_load_ubyte ||
|
||||
instr->opcode == aco_opcode::buffer_load_sbyte ||
|
||||
instr->opcode == aco_opcode::buffer_load_ushort ||
|
||||
instr->opcode == aco_opcode::buffer_load_sshort ||
|
||||
instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
|
||||
instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
|
||||
instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
|
||||
instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
|
||||
instr->opcode == aco_opcode::buffer_load_short_d16 ||
|
||||
instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
|
||||
instr->opcode == aco_opcode::buffer_load_format_d16_x ||
|
||||
instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
|
||||
instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
|
||||
instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
|
||||
instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
|
||||
instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
|
||||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
|
||||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
|
||||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
|
||||
const bool d16 =
|
||||
instr->opcode ==
|
||||
aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
|
||||
instr->opcode == aco_opcode::buffer_load_ubyte ||
|
||||
instr->opcode == aco_opcode::buffer_load_sbyte ||
|
||||
instr->opcode == aco_opcode::buffer_load_ushort ||
|
||||
instr->opcode == aco_opcode::buffer_load_sshort ||
|
||||
instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
|
||||
instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
|
||||
instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
|
||||
instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
|
||||
instr->opcode == aco_opcode::buffer_load_short_d16 ||
|
||||
instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
|
||||
instr->opcode == aco_opcode::buffer_load_format_d16_x ||
|
||||
instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
|
||||
instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
|
||||
instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
|
||||
instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
|
||||
instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
|
||||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
|
||||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
|
||||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
|
||||
if (instr->definitions.size()) {
|
||||
check(instr->definitions[0].regClass().type() == RegType::vgpr,
|
||||
"VMEM definitions[0] (VDATA) must be VGPR", instr.get());
|
||||
@ -763,11 +765,14 @@ validate_ir(Program* program)
|
||||
break;
|
||||
}
|
||||
case Format::LDSDIR: {
|
||||
check(instr->definitions.size() == 1 && instr->definitions[0].regClass() == v1, "LDSDIR must have an v1 definition", instr.get());
|
||||
check(instr->definitions.size() == 1 && instr->definitions[0].regClass() == v1,
|
||||
"LDSDIR must have an v1 definition", instr.get());
|
||||
check(instr->operands.size() == 1, "LDSDIR must have an operand", instr.get());
|
||||
if (!instr->operands.empty()) {
|
||||
check(instr->operands[0].regClass() == s1, "LDSDIR must have an s1 operand", instr.get());
|
||||
check(instr->operands[0].isFixed() && instr->operands[0].physReg() == m0, "LDSDIR must have an operand fixed to m0", instr.get());
|
||||
check(instr->operands[0].regClass() == s1, "LDSDIR must have an s1 operand",
|
||||
instr.get());
|
||||
check(instr->operands[0].isFixed() && instr->operands[0].physReg() == m0,
|
||||
"LDSDIR must have an operand fixed to m0", instr.get());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -35,19 +35,20 @@
|
||||
#include <string>
|
||||
|
||||
struct TestDef {
|
||||
const char *name;
|
||||
const char *source_file;
|
||||
const char* name;
|
||||
const char* source_file;
|
||||
void (*func)();
|
||||
};
|
||||
|
||||
extern std::map<std::string, TestDef> tests;
|
||||
extern FILE *output;
|
||||
extern FILE* output;
|
||||
|
||||
bool set_variant(const char *name);
|
||||
bool set_variant(const char* name);
|
||||
|
||||
inline bool set_variant(amd_gfx_level cls, const char *rest="")
|
||||
inline bool
|
||||
set_variant(amd_gfx_level cls, const char* rest = "")
|
||||
{
|
||||
char buf[8+strlen(rest)];
|
||||
char buf[8 + strlen(rest)];
|
||||
if (cls != GFX10_3) {
|
||||
snprintf(buf, sizeof(buf), "gfx%d%s", cls - GFX6 + 6 - (cls > GFX10_3), rest);
|
||||
} else {
|
||||
@ -56,18 +57,21 @@ inline bool set_variant(amd_gfx_level cls, const char *rest="")
|
||||
return set_variant(buf);
|
||||
}
|
||||
|
||||
void fail_test(const char *fmt, ...);
|
||||
void skip_test(const char *fmt, ...);
|
||||
void fail_test(const char* fmt, ...);
|
||||
void skip_test(const char* fmt, ...);
|
||||
|
||||
#define _BEGIN_TEST(name, struct_name) static void struct_name(); static __attribute__((constructor)) void CONCAT2(add_test_, __COUNTER__)() {\
|
||||
tests[#name] = (TestDef){#name, ACO_TEST_BUILD_ROOT "/" __FILE__, &struct_name};\
|
||||
}\
|
||||
static void struct_name() {\
|
||||
#define _BEGIN_TEST(name, struct_name) \
|
||||
static void struct_name(); \
|
||||
static __attribute__((constructor)) void CONCAT2(add_test_, __COUNTER__)() \
|
||||
{ \
|
||||
tests[#name] = (TestDef){#name, ACO_TEST_BUILD_ROOT "/" __FILE__, &struct_name}; \
|
||||
} \
|
||||
static void struct_name() \
|
||||
{
|
||||
|
||||
#define BEGIN_TEST(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
|
||||
#define BEGIN_TEST(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
|
||||
#define BEGIN_TEST_TODO(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
|
||||
#define BEGIN_TEST_FAIL(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
|
||||
#define END_TEST \
|
||||
}
|
||||
#define END_TEST }
|
||||
|
||||
#endif /* ACO_TEST_COMMON_H */
|
||||
|
@ -22,19 +22,20 @@
|
||||
*
|
||||
*/
|
||||
#include "helpers.h"
|
||||
#include "vulkan/vk_format.h"
|
||||
|
||||
#include "common/amd_family.h"
|
||||
#include <stdio.h>
|
||||
#include <sstream>
|
||||
#include "vulkan/vk_format.h"
|
||||
|
||||
#include <llvm-c/Target.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
|
||||
using namespace aco;
|
||||
|
||||
extern "C" {
|
||||
PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
|
||||
VkInstance instance,
|
||||
const char* pName);
|
||||
PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, const char* pName);
|
||||
}
|
||||
|
||||
ac_shader_config config;
|
||||
@ -47,32 +48,34 @@ static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
|
||||
static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
|
||||
static std::mutex create_device_mutex;
|
||||
|
||||
#define FUNCTION_LIST\
|
||||
ITEM(CreateInstance)\
|
||||
ITEM(DestroyInstance)\
|
||||
ITEM(EnumeratePhysicalDevices)\
|
||||
ITEM(GetPhysicalDeviceProperties2)\
|
||||
ITEM(CreateDevice)\
|
||||
ITEM(DestroyDevice)\
|
||||
ITEM(CreateShaderModule)\
|
||||
ITEM(DestroyShaderModule)\
|
||||
ITEM(CreateGraphicsPipelines)\
|
||||
ITEM(CreateComputePipelines)\
|
||||
ITEM(DestroyPipeline)\
|
||||
ITEM(CreateDescriptorSetLayout)\
|
||||
ITEM(DestroyDescriptorSetLayout)\
|
||||
ITEM(CreatePipelineLayout)\
|
||||
ITEM(DestroyPipelineLayout)\
|
||||
ITEM(CreateRenderPass)\
|
||||
ITEM(DestroyRenderPass)\
|
||||
ITEM(GetPipelineExecutablePropertiesKHR)\
|
||||
#define FUNCTION_LIST \
|
||||
ITEM(CreateInstance) \
|
||||
ITEM(DestroyInstance) \
|
||||
ITEM(EnumeratePhysicalDevices) \
|
||||
ITEM(GetPhysicalDeviceProperties2) \
|
||||
ITEM(CreateDevice) \
|
||||
ITEM(DestroyDevice) \
|
||||
ITEM(CreateShaderModule) \
|
||||
ITEM(DestroyShaderModule) \
|
||||
ITEM(CreateGraphicsPipelines) \
|
||||
ITEM(CreateComputePipelines) \
|
||||
ITEM(DestroyPipeline) \
|
||||
ITEM(CreateDescriptorSetLayout) \
|
||||
ITEM(DestroyDescriptorSetLayout) \
|
||||
ITEM(CreatePipelineLayout) \
|
||||
ITEM(DestroyPipelineLayout) \
|
||||
ITEM(CreateRenderPass) \
|
||||
ITEM(DestroyRenderPass) \
|
||||
ITEM(GetPipelineExecutablePropertiesKHR) \
|
||||
ITEM(GetPipelineExecutableInternalRepresentationsKHR)
|
||||
|
||||
#define ITEM(n) PFN_vk##n n;
|
||||
FUNCTION_LIST
|
||||
#undef ITEM
|
||||
|
||||
void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size, enum radeon_family family)
|
||||
void
|
||||
create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size,
|
||||
enum radeon_family family)
|
||||
{
|
||||
memset(&config, 0, sizeof(config));
|
||||
info.wave_size = wave_size;
|
||||
@ -90,7 +93,7 @@ void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_siz
|
||||
program->debug.func = nullptr;
|
||||
program->debug.private_data = nullptr;
|
||||
|
||||
Block *block = program->create_and_insert_block();
|
||||
Block* block = program->create_and_insert_block();
|
||||
block->kind = block_kind_top_level;
|
||||
|
||||
bld = Builder(program.get(), &program->blocks[0]);
|
||||
@ -98,9 +101,9 @@ void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_siz
|
||||
config.float_mode = program->blocks[0].fp_mode.val;
|
||||
}
|
||||
|
||||
bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
|
||||
enum radeon_family family, const char* subvariant,
|
||||
unsigned wave_size)
|
||||
bool
|
||||
setup_cs(const char* input_spec, enum amd_gfx_level gfx_level, enum radeon_family family,
|
||||
const char* subvariant, unsigned wave_size)
|
||||
{
|
||||
if (!set_variant(gfx_level, subvariant))
|
||||
return false;
|
||||
@ -117,7 +120,8 @@ bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
|
||||
input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4)));
|
||||
|
||||
input_spec += 2 + in_bytes;
|
||||
while (input_spec[0] == ' ') input_spec++;
|
||||
while (input_spec[0] == ' ')
|
||||
input_spec++;
|
||||
}
|
||||
|
||||
aco_ptr<Instruction> startpgm{create_instruction<Pseudo_instruction>(
|
||||
@ -132,7 +136,8 @@ bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
|
||||
return true;
|
||||
}
|
||||
|
||||
void finish_program(Program *prog)
|
||||
void
|
||||
finish_program(Program* prog)
|
||||
{
|
||||
for (Block& BB : prog->blocks) {
|
||||
for (unsigned idx : BB.linear_preds)
|
||||
@ -149,7 +154,8 @@ void finish_program(Program *prog)
|
||||
}
|
||||
}
|
||||
|
||||
void finish_validator_test()
|
||||
void
|
||||
finish_validator_test()
|
||||
{
|
||||
finish_program(program.get());
|
||||
aco_print_program(program.get(), output);
|
||||
@ -160,7 +166,8 @@ void finish_validator_test()
|
||||
fprintf(output, "Validation failed\n");
|
||||
}
|
||||
|
||||
void finish_opt_test()
|
||||
void
|
||||
finish_opt_test()
|
||||
{
|
||||
finish_program(program.get());
|
||||
if (!aco::validate_ir(program.get())) {
|
||||
@ -175,7 +182,8 @@ void finish_opt_test()
|
||||
aco_print_program(program.get(), output);
|
||||
}
|
||||
|
||||
void finish_setup_reduce_temp_test()
|
||||
void
|
||||
finish_setup_reduce_temp_test()
|
||||
{
|
||||
finish_program(program.get());
|
||||
if (!aco::validate_ir(program.get())) {
|
||||
@ -190,7 +198,8 @@ void finish_setup_reduce_temp_test()
|
||||
aco_print_program(program.get(), output);
|
||||
}
|
||||
|
||||
void finish_ra_test(ra_test_policy policy, bool lower)
|
||||
void
|
||||
finish_ra_test(ra_test_policy policy, bool lower)
|
||||
{
|
||||
finish_program(program.get());
|
||||
if (!aco::validate_ir(program.get())) {
|
||||
@ -215,42 +224,48 @@ void finish_ra_test(ra_test_policy policy, bool lower)
|
||||
aco_print_program(program.get(), output);
|
||||
}
|
||||
|
||||
void finish_optimizer_postRA_test()
|
||||
void
|
||||
finish_optimizer_postRA_test()
|
||||
{
|
||||
finish_program(program.get());
|
||||
aco::optimize_postRA(program.get());
|
||||
aco_print_program(program.get(), output);
|
||||
}
|
||||
|
||||
void finish_to_hw_instr_test()
|
||||
void
|
||||
finish_to_hw_instr_test()
|
||||
{
|
||||
finish_program(program.get());
|
||||
aco::lower_to_hw_instr(program.get());
|
||||
aco_print_program(program.get(), output);
|
||||
}
|
||||
|
||||
void finish_waitcnt_test()
|
||||
void
|
||||
finish_waitcnt_test()
|
||||
{
|
||||
finish_program(program.get());
|
||||
aco::insert_wait_states(program.get());
|
||||
aco_print_program(program.get(), output);
|
||||
}
|
||||
|
||||
void finish_insert_nops_test()
|
||||
void
|
||||
finish_insert_nops_test()
|
||||
{
|
||||
finish_program(program.get());
|
||||
aco::insert_NOPs(program.get());
|
||||
aco_print_program(program.get(), output);
|
||||
}
|
||||
|
||||
void finish_form_hard_clause_test()
|
||||
void
|
||||
finish_form_hard_clause_test()
|
||||
{
|
||||
finish_program(program.get());
|
||||
aco::form_hard_clauses(program.get());
|
||||
aco_print_program(program.get(), output);
|
||||
}
|
||||
|
||||
void finish_assembler_test()
|
||||
void
|
||||
finish_assembler_test()
|
||||
{
|
||||
finish_program(program.get());
|
||||
std::vector<uint32_t> binary;
|
||||
@ -261,13 +276,14 @@ void finish_assembler_test()
|
||||
if (program->gfx_level >= GFX8) {
|
||||
print_asm(program.get(), binary, exec_size / 4u, output);
|
||||
} else {
|
||||
//TODO: maybe we should use CLRX and skip this test if it's not available?
|
||||
// TODO: maybe we should use CLRX and skip this test if it's not available?
|
||||
for (uint32_t dword : binary)
|
||||
fprintf(output, "%.8x\n", dword);
|
||||
}
|
||||
}
|
||||
|
||||
void writeout(unsigned i, Temp tmp)
|
||||
void
|
||||
writeout(unsigned i, Temp tmp)
|
||||
{
|
||||
if (tmp.id())
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
|
||||
@ -275,22 +291,26 @@ void writeout(unsigned i, Temp tmp)
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
|
||||
}
|
||||
|
||||
void writeout(unsigned i, aco::Builder::Result res)
|
||||
void
|
||||
writeout(unsigned i, aco::Builder::Result res)
|
||||
{
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
|
||||
}
|
||||
|
||||
void writeout(unsigned i, Operand op)
|
||||
void
|
||||
writeout(unsigned i, Operand op)
|
||||
{
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
|
||||
}
|
||||
|
||||
void writeout(unsigned i, Operand op0, Operand op1)
|
||||
void
|
||||
writeout(unsigned i, Operand op0, Operand op1)
|
||||
{
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
|
||||
}
|
||||
|
||||
Temp fneg(Temp src, Builder b)
|
||||
Temp
|
||||
fneg(Temp src, Builder b)
|
||||
{
|
||||
if (src.bytes() == 2)
|
||||
return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
|
||||
@ -298,35 +318,42 @@ Temp fneg(Temp src, Builder b)
|
||||
return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
|
||||
}
|
||||
|
||||
Temp fabs(Temp src, Builder b)
|
||||
Temp
|
||||
fabs(Temp src, Builder b)
|
||||
{
|
||||
if (src.bytes() == 2) {
|
||||
Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
|
||||
Builder::Result res =
|
||||
b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
|
||||
res->valu().abs[1] = true;
|
||||
return res;
|
||||
} else {
|
||||
Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
|
||||
Builder::Result res =
|
||||
b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
|
||||
res->valu().abs[1] = true;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
Temp f2f32(Temp src, Builder b)
|
||||
Temp
|
||||
f2f32(Temp src, Builder b)
|
||||
{
|
||||
return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
|
||||
}
|
||||
|
||||
Temp f2f16(Temp src, Builder b)
|
||||
Temp
|
||||
f2f16(Temp src, Builder b)
|
||||
{
|
||||
return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
|
||||
}
|
||||
|
||||
Temp u2u16(Temp src, Builder b)
|
||||
Temp
|
||||
u2u16(Temp src, Builder b)
|
||||
{
|
||||
return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
|
||||
}
|
||||
|
||||
Temp fadd(Temp src0, Temp src1, Builder b)
|
||||
Temp
|
||||
fadd(Temp src0, Temp src1, Builder b)
|
||||
{
|
||||
if (src0.bytes() == 2)
|
||||
return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
|
||||
@ -334,7 +361,8 @@ Temp fadd(Temp src0, Temp src1, Builder b)
|
||||
return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1);
|
||||
}
|
||||
|
||||
Temp fmul(Temp src0, Temp src1, Builder b)
|
||||
Temp
|
||||
fmul(Temp src0, Temp src1, Builder b)
|
||||
{
|
||||
if (src0.bytes() == 2)
|
||||
return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1);
|
||||
@ -342,7 +370,8 @@ Temp fmul(Temp src0, Temp src1, Builder b)
|
||||
return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1);
|
||||
}
|
||||
|
||||
Temp fma(Temp src0, Temp src1, Temp src2, Builder b)
|
||||
Temp
|
||||
fma(Temp src0, Temp src1, Temp src2, Builder b)
|
||||
{
|
||||
if (src0.bytes() == 2)
|
||||
return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2);
|
||||
@ -350,40 +379,46 @@ Temp fma(Temp src0, Temp src1, Temp src2, Builder b)
|
||||
return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
|
||||
}
|
||||
|
||||
Temp fsat(Temp src, Builder b)
|
||||
Temp
|
||||
fsat(Temp src, Builder b)
|
||||
{
|
||||
if (src.bytes() == 2)
|
||||
return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u),
|
||||
Operand::c16(0x3c00u), src);
|
||||
return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u), Operand::c16(0x3c00u),
|
||||
src);
|
||||
else
|
||||
return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(),
|
||||
Operand::c32(0x3f800000u), src);
|
||||
return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(), Operand::c32(0x3f800000u),
|
||||
src);
|
||||
}
|
||||
|
||||
Temp fmin(Temp src0, Temp src1, Builder b)
|
||||
Temp
|
||||
fmin(Temp src0, Temp src1, Builder b)
|
||||
{
|
||||
return b.vop2(aco_opcode::v_min_f32, b.def(v1), src0, src1);
|
||||
}
|
||||
|
||||
Temp fmax(Temp src0, Temp src1, Builder b)
|
||||
Temp
|
||||
fmax(Temp src0, Temp src1, Builder b)
|
||||
{
|
||||
return b.vop2(aco_opcode::v_max_f32, b.def(v1), src0, src1);
|
||||
}
|
||||
|
||||
Temp ext_ushort(Temp src, unsigned idx, Builder b)
|
||||
Temp
|
||||
ext_ushort(Temp src, unsigned idx, Builder b)
|
||||
{
|
||||
return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
|
||||
Operand::c32(16u), Operand::c32(false));
|
||||
}
|
||||
|
||||
Temp ext_ubyte(Temp src, unsigned idx, Builder b)
|
||||
Temp
|
||||
ext_ubyte(Temp src, unsigned idx, Builder b)
|
||||
{
|
||||
return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
|
||||
Operand::c32(8u), Operand::c32(false));
|
||||
}
|
||||
|
||||
void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
|
||||
std::function<void()> els)
|
||||
void
|
||||
emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
|
||||
std::function<void()> els)
|
||||
{
|
||||
prog->blocks.reserve(prog->blocks.size() + 6);
|
||||
|
||||
@ -418,8 +453,10 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
|
||||
PhysReg saved_exec_reg(84);
|
||||
|
||||
b.reset(if_block);
|
||||
Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg), Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
|
||||
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index, then_linear->index);
|
||||
Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg),
|
||||
Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
|
||||
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index,
|
||||
then_linear->index);
|
||||
|
||||
b.reset(then_logical);
|
||||
b.pseudo(aco_opcode::p_logical_start);
|
||||
@ -431,8 +468,10 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
|
||||
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index);
|
||||
|
||||
b.reset(invert);
|
||||
b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1), Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
|
||||
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index, else_linear->index);
|
||||
b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1),
|
||||
Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
|
||||
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index,
|
||||
else_linear->index);
|
||||
|
||||
b.reset(else_logical);
|
||||
b.pseudo(aco_opcode::p_logical_start);
|
||||
@ -444,42 +483,29 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
|
||||
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index);
|
||||
|
||||
b.reset(endif_block);
|
||||
b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), Operand(saved_exec, saved_exec_reg));
|
||||
b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
|
||||
Operand(saved_exec, saved_exec_reg));
|
||||
}
|
||||
|
||||
VkDevice get_vk_device(enum amd_gfx_level gfx_level)
|
||||
VkDevice
|
||||
get_vk_device(enum amd_gfx_level gfx_level)
|
||||
{
|
||||
enum radeon_family family;
|
||||
switch (gfx_level) {
|
||||
case GFX6:
|
||||
family = CHIP_TAHITI;
|
||||
break;
|
||||
case GFX7:
|
||||
family = CHIP_BONAIRE;
|
||||
break;
|
||||
case GFX8:
|
||||
family = CHIP_POLARIS10;
|
||||
break;
|
||||
case GFX9:
|
||||
family = CHIP_VEGA10;
|
||||
break;
|
||||
case GFX10:
|
||||
family = CHIP_NAVI10;
|
||||
break;
|
||||
case GFX10_3:
|
||||
family = CHIP_NAVI21;
|
||||
break;
|
||||
case GFX11:
|
||||
family = CHIP_GFX1100;
|
||||
break;
|
||||
default:
|
||||
family = CHIP_UNKNOWN;
|
||||
break;
|
||||
case GFX6: family = CHIP_TAHITI; break;
|
||||
case GFX7: family = CHIP_BONAIRE; break;
|
||||
case GFX8: family = CHIP_POLARIS10; break;
|
||||
case GFX9: family = CHIP_VEGA10; break;
|
||||
case GFX10: family = CHIP_NAVI10; break;
|
||||
case GFX10_3: family = CHIP_NAVI21; break;
|
||||
case GFX11: family = CHIP_GFX1100; break;
|
||||
default: family = CHIP_UNKNOWN; break;
|
||||
}
|
||||
return get_vk_device(family);
|
||||
}
|
||||
|
||||
VkDevice get_vk_device(enum radeon_family family)
|
||||
VkDevice
|
||||
get_vk_device(enum radeon_family family)
|
||||
{
|
||||
assert(family != CHIP_UNKNOWN);
|
||||
|
||||
@ -496,12 +522,13 @@ VkDevice get_vk_device(enum radeon_family family)
|
||||
VkInstanceCreateInfo instance_create_info = {};
|
||||
instance_create_info.pApplicationInfo = &app_info;
|
||||
instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
||||
ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
|
||||
ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(
|
||||
NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
#define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
|
||||
#define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
|
||||
FUNCTION_LIST
|
||||
#undef ITEM
|
||||
#undef ITEM
|
||||
|
||||
uint32_t device_count = 1;
|
||||
VkPhysicalDevice device = VK_NULL_HANDLE;
|
||||
@ -511,7 +538,7 @@ VkDevice get_vk_device(enum radeon_family family)
|
||||
|
||||
VkDeviceCreateInfo device_create_info = {};
|
||||
device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
static const char *extensions[] = {"VK_KHR_pipeline_executable_properties"};
|
||||
static const char* extensions[] = {"VK_KHR_pipeline_executable_properties"};
|
||||
device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
|
||||
device_create_info.ppEnabledExtensionNames = extensions;
|
||||
result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
|
||||
@ -520,7 +547,8 @@ VkDevice get_vk_device(enum radeon_family family)
|
||||
}
|
||||
|
||||
static struct DestroyDevices {
|
||||
~DestroyDevices() {
|
||||
~DestroyDevices()
|
||||
{
|
||||
for (unsigned i = 0; i < CHIP_LAST; i++) {
|
||||
if (!device_cache[i])
|
||||
continue;
|
||||
@ -530,8 +558,9 @@ static struct DestroyDevices {
|
||||
}
|
||||
} destroy_devices;
|
||||
|
||||
void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
|
||||
const char *name, bool remove_encoding)
|
||||
void
|
||||
print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
|
||||
const char* name, bool remove_encoding)
|
||||
{
|
||||
uint32_t executable_count = 16;
|
||||
VkPipelineExecutablePropertiesKHR executables[16];
|
||||
@ -539,7 +568,8 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
|
||||
pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
|
||||
pipeline_info.pNext = NULL;
|
||||
pipeline_info.pipeline = pipeline;
|
||||
ASSERTED VkResult result = GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
|
||||
ASSERTED VkResult result =
|
||||
GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
uint32_t executable = 0;
|
||||
@ -570,13 +600,13 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
|
||||
}
|
||||
assert(requested_ir && "Could not find requested IR");
|
||||
|
||||
char *data = (char*)malloc(requested_ir->dataSize);
|
||||
char* data = (char*)malloc(requested_ir->dataSize);
|
||||
requested_ir->pData = data;
|
||||
result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
if (remove_encoding) {
|
||||
for (char *c = data; *c; c++) {
|
||||
for (char* c = data; *c; c++) {
|
||||
if (*c == ';') {
|
||||
for (; *c && *c != '\n'; c++)
|
||||
*c = ' ';
|
||||
@ -588,23 +618,25 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
|
||||
free(data);
|
||||
}
|
||||
|
||||
VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *module_info)
|
||||
VkShaderModule
|
||||
__qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* module_info)
|
||||
{
|
||||
VkShaderModuleCreateInfo vk_module_info;
|
||||
vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
||||
vk_module_info.pNext = NULL;
|
||||
vk_module_info.flags = 0;
|
||||
vk_module_info.codeSize = module_info->spirvSize;
|
||||
vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
|
||||
VkShaderModuleCreateInfo vk_module_info;
|
||||
vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
||||
vk_module_info.pNext = NULL;
|
||||
vk_module_info.flags = 0;
|
||||
vk_module_info.codeSize = module_info->spirvSize;
|
||||
vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
|
||||
|
||||
VkShaderModule module;
|
||||
ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
|
||||
assert(result == VK_SUCCESS);
|
||||
VkShaderModule module;
|
||||
ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
return module;
|
||||
return module;
|
||||
}
|
||||
|
||||
PipelineBuilder::PipelineBuilder(VkDevice dev) {
|
||||
PipelineBuilder::PipelineBuilder(VkDevice dev)
|
||||
{
|
||||
memset(this, 0, sizeof(*this));
|
||||
topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
device = dev;
|
||||
@ -615,7 +647,7 @@ PipelineBuilder::~PipelineBuilder()
|
||||
DestroyPipeline(device, pipeline, NULL);
|
||||
|
||||
for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
|
||||
VkPipelineShaderStageCreateInfo *stage_info = &stages[i];
|
||||
VkPipelineShaderStageCreateInfo* stage_info = &stages[i];
|
||||
if (owned_stages & stage_info->stage)
|
||||
DestroyShaderModule(device, stage_info->module, NULL);
|
||||
}
|
||||
@ -628,72 +660,87 @@ PipelineBuilder::~PipelineBuilder()
|
||||
DestroyRenderPass(device, render_pass, NULL);
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
|
||||
uint32_t binding, VkDescriptorType type, uint32_t count)
|
||||
void
|
||||
PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
|
||||
VkDescriptorType type, uint32_t count)
|
||||
{
|
||||
desc_layouts_used |= 1ull << layout;
|
||||
desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
|
||||
void
|
||||
PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
|
||||
{
|
||||
vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset)
|
||||
void
|
||||
PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format,
|
||||
uint32_t offset)
|
||||
{
|
||||
vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo *module)
|
||||
void
|
||||
PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo* module)
|
||||
{
|
||||
for (unsigned i = 0; i < module->declarationCount; i++) {
|
||||
const QoShaderDecl *decl = &module->pDeclarations[i];
|
||||
const QoShaderDecl* decl = &module->pDeclarations[i];
|
||||
switch (decl->decl_type) {
|
||||
case QoShaderDeclType_ubo:
|
||||
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
||||
add_desc_binding(module->stage, decl->set, decl->binding,
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
||||
break;
|
||||
case QoShaderDeclType_ssbo:
|
||||
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||
add_desc_binding(module->stage, decl->set, decl->binding,
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||
break;
|
||||
case QoShaderDeclType_img_buf:
|
||||
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
|
||||
add_desc_binding(module->stage, decl->set, decl->binding,
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
|
||||
break;
|
||||
case QoShaderDeclType_img:
|
||||
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
|
||||
add_desc_binding(module->stage, decl->set, decl->binding,
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
|
||||
break;
|
||||
case QoShaderDeclType_tex_buf:
|
||||
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
|
||||
add_desc_binding(module->stage, decl->set, decl->binding,
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
|
||||
break;
|
||||
case QoShaderDeclType_combined:
|
||||
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
|
||||
add_desc_binding(module->stage, decl->set, decl->binding,
|
||||
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
|
||||
break;
|
||||
case QoShaderDeclType_tex:
|
||||
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
|
||||
add_desc_binding(module->stage, decl->set, decl->binding,
|
||||
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
|
||||
break;
|
||||
case QoShaderDeclType_samp:
|
||||
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
|
||||
void
|
||||
PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo* module)
|
||||
{
|
||||
unsigned next_vtx_offset = 0;
|
||||
for (unsigned i = 0; i < module->declarationCount; i++) {
|
||||
const QoShaderDecl *decl = &module->pDeclarations[i];
|
||||
const QoShaderDecl* decl = &module->pDeclarations[i];
|
||||
switch (decl->decl_type) {
|
||||
case QoShaderDeclType_in:
|
||||
if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
|
||||
if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
|
||||
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT, next_vtx_offset);
|
||||
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
|
||||
next_vtx_offset);
|
||||
else if (decl->type[0] == 'u')
|
||||
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT, next_vtx_offset);
|
||||
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT,
|
||||
next_vtx_offset);
|
||||
else if (decl->type[0] == 'i')
|
||||
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT, next_vtx_offset);
|
||||
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT,
|
||||
next_vtx_offset);
|
||||
next_vtx_offset += 16;
|
||||
}
|
||||
break;
|
||||
@ -707,17 +754,17 @@ void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
|
||||
color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
if (next_vtx_offset)
|
||||
add_vertex_binding(0, next_vtx_offset);
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name)
|
||||
void
|
||||
PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name)
|
||||
{
|
||||
VkPipelineShaderStageCreateInfo *stage_info;
|
||||
VkPipelineShaderStageCreateInfo* stage_info;
|
||||
if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
|
||||
stage_info = &stages[0];
|
||||
else
|
||||
@ -732,40 +779,50 @@ void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule modu
|
||||
owned_stages |= stage;
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name)
|
||||
void
|
||||
PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
|
||||
const char* name)
|
||||
{
|
||||
add_stage(stage, __qoCreateShaderModule(device, &module), name);
|
||||
add_resource_decls(&module);
|
||||
add_io_decls(&module);
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
|
||||
void
|
||||
PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
|
||||
{
|
||||
add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
|
||||
add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
|
||||
void
|
||||
PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
|
||||
{
|
||||
add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
|
||||
add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_cs(VkShaderModule cs)
|
||||
void
|
||||
PipelineBuilder::add_cs(VkShaderModule cs)
|
||||
{
|
||||
add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
|
||||
}
|
||||
|
||||
void PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
|
||||
void
|
||||
PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
|
||||
{
|
||||
add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
|
||||
}
|
||||
|
||||
bool PipelineBuilder::is_compute() {
|
||||
bool
|
||||
PipelineBuilder::is_compute()
|
||||
{
|
||||
return gfx_pipeline_info.stageCount == 0;
|
||||
}
|
||||
|
||||
void PipelineBuilder::create_compute_pipeline() {
|
||||
void
|
||||
PipelineBuilder::create_compute_pipeline()
|
||||
{
|
||||
VkComputePipelineCreateInfo create_info;
|
||||
create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
|
||||
create_info.pNext = NULL;
|
||||
@ -775,11 +832,14 @@ void PipelineBuilder::create_compute_pipeline() {
|
||||
create_info.basePipelineHandle = VK_NULL_HANDLE;
|
||||
create_info.basePipelineIndex = 0;
|
||||
|
||||
ASSERTED VkResult result = CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
|
||||
ASSERTED VkResult result =
|
||||
CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
|
||||
assert(result == VK_SUCCESS);
|
||||
}
|
||||
|
||||
void PipelineBuilder::create_graphics_pipeline() {
|
||||
void
|
||||
PipelineBuilder::create_graphics_pipeline()
|
||||
{
|
||||
/* create the create infos */
|
||||
if (!samples)
|
||||
samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
@ -792,7 +852,7 @@ void PipelineBuilder::create_graphics_pipeline() {
|
||||
if (color_outputs[i] == VK_FORMAT_UNDEFINED)
|
||||
continue;
|
||||
|
||||
VkAttachmentDescription *desc = &attachment_descs[num_color_attachments];
|
||||
VkAttachmentDescription* desc = &attachment_descs[num_color_attachments];
|
||||
desc->flags = 0;
|
||||
desc->format = color_outputs[i];
|
||||
desc->samples = samples;
|
||||
@ -803,16 +863,14 @@ void PipelineBuilder::create_graphics_pipeline() {
|
||||
desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkAttachmentReference *ref = &color_attachments[num_color_attachments];
|
||||
VkAttachmentReference* ref = &color_attachments[num_color_attachments];
|
||||
ref->attachment = num_color_attachments;
|
||||
ref->layout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkPipelineColorBlendAttachmentState *blend = &blend_attachment_states[num_color_attachments];
|
||||
VkPipelineColorBlendAttachmentState* blend = &blend_attachment_states[num_color_attachments];
|
||||
blend->blendEnable = false;
|
||||
blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
|
||||
VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT |
|
||||
VK_COLOR_COMPONENT_A_BIT;
|
||||
blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
|
||||
|
||||
num_color_attachments++;
|
||||
}
|
||||
@ -820,7 +878,7 @@ void PipelineBuilder::create_graphics_pipeline() {
|
||||
unsigned num_attachments = num_color_attachments;
|
||||
VkAttachmentReference ds_attachment;
|
||||
if (ds_output != VK_FORMAT_UNDEFINED) {
|
||||
VkAttachmentDescription *desc = &attachment_descs[num_attachments];
|
||||
VkAttachmentDescription* desc = &attachment_descs[num_attachments];
|
||||
desc->flags = 0;
|
||||
desc->format = ds_output;
|
||||
desc->samples = samples;
|
||||
@ -902,8 +960,7 @@ void PipelineBuilder::create_graphics_pipeline() {
|
||||
ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
|
||||
ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
|
||||
ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
|
||||
ds_state.front.compareMask = 0xffffffff,
|
||||
ds_state.front.writeMask = 0;
|
||||
ds_state.front.compareMask = 0xffffffff, ds_state.front.writeMask = 0;
|
||||
ds_state.front.reference = 0;
|
||||
ds_state.back = ds_state.front;
|
||||
|
||||
@ -915,17 +972,15 @@ void PipelineBuilder::create_graphics_pipeline() {
|
||||
color_blend_state.attachmentCount = num_color_attachments;
|
||||
color_blend_state.pAttachments = blend_attachment_states;
|
||||
|
||||
VkDynamicState dynamic_states[9] = {
|
||||
VK_DYNAMIC_STATE_VIEWPORT,
|
||||
VK_DYNAMIC_STATE_SCISSOR,
|
||||
VK_DYNAMIC_STATE_LINE_WIDTH,
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS,
|
||||
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
|
||||
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
|
||||
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_REFERENCE
|
||||
};
|
||||
VkDynamicState dynamic_states[9] = {VK_DYNAMIC_STATE_VIEWPORT,
|
||||
VK_DYNAMIC_STATE_SCISSOR,
|
||||
VK_DYNAMIC_STATE_LINE_WIDTH,
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS,
|
||||
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
|
||||
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
|
||||
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_REFERENCE};
|
||||
|
||||
VkPipelineDynamicStateCreateInfo dynamic_state;
|
||||
dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
|
||||
@ -985,7 +1040,9 @@ void PipelineBuilder::create_graphics_pipeline() {
|
||||
assert(result == VK_SUCCESS);
|
||||
}
|
||||
|
||||
void PipelineBuilder::create_pipeline() {
|
||||
void
|
||||
PipelineBuilder::create_pipeline()
|
||||
{
|
||||
unsigned num_desc_layouts = 0;
|
||||
for (unsigned i = 0; i < 64; i++) {
|
||||
if (!(desc_layouts_used & (1ull << i)))
|
||||
@ -998,7 +1055,8 @@ void PipelineBuilder::create_pipeline() {
|
||||
desc_layout_info.bindingCount = num_desc_bindings[i];
|
||||
desc_layout_info.pBindings = desc_bindings[i];
|
||||
|
||||
ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL, &desc_layouts[num_desc_layouts]);
|
||||
ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL,
|
||||
&desc_layouts[num_desc_layouts]);
|
||||
assert(result == VK_SUCCESS);
|
||||
num_desc_layouts++;
|
||||
}
|
||||
@ -1012,7 +1070,8 @@ void PipelineBuilder::create_pipeline() {
|
||||
pipeline_layout_info.setLayoutCount = num_desc_layouts;
|
||||
pipeline_layout_info.pSetLayouts = desc_layouts;
|
||||
|
||||
ASSERTED VkResult result = CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
|
||||
ASSERTED VkResult result =
|
||||
CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
if (is_compute())
|
||||
@ -1021,7 +1080,8 @@ void PipelineBuilder::create_pipeline() {
|
||||
create_graphics_pipeline();
|
||||
}
|
||||
|
||||
void PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char *name, bool remove_encoding)
|
||||
void
|
||||
PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char* name, bool remove_encoding)
|
||||
{
|
||||
if (!pipeline)
|
||||
create_pipeline();
|
||||
|
@ -24,8 +24,9 @@
|
||||
#ifndef ACO_TEST_HELPERS_H
|
||||
#define ACO_TEST_HELPERS_H
|
||||
|
||||
#include "framework.h"
|
||||
#include "vulkan/vulkan.h"
|
||||
|
||||
#include "framework.h"
|
||||
#include <functional>
|
||||
|
||||
enum QoShaderDeclType {
|
||||
@ -42,10 +43,10 @@ enum QoShaderDeclType {
|
||||
};
|
||||
|
||||
struct QoShaderDecl {
|
||||
const char *name;
|
||||
const char *type;
|
||||
const char* name;
|
||||
const char* type;
|
||||
QoShaderDeclType decl_type;
|
||||
//TODO: array size?
|
||||
// TODO: array size?
|
||||
unsigned location;
|
||||
unsigned component;
|
||||
unsigned binding;
|
||||
@ -53,12 +54,12 @@ struct QoShaderDecl {
|
||||
};
|
||||
|
||||
struct QoShaderModuleCreateInfo {
|
||||
void *pNext;
|
||||
size_t spirvSize;
|
||||
const void *pSpirv;
|
||||
uint32_t declarationCount;
|
||||
const QoShaderDecl *pDeclarations;
|
||||
VkShaderStageFlagBits stage;
|
||||
void* pNext;
|
||||
size_t spirvSize;
|
||||
const void* pSpirv;
|
||||
uint32_t declarationCount;
|
||||
const QoShaderDecl* pDeclarations;
|
||||
VkShaderStageFlagBits stage;
|
||||
};
|
||||
|
||||
extern ac_shader_config config;
|
||||
@ -71,17 +72,17 @@ namespace aco {
|
||||
struct ra_test_policy;
|
||||
}
|
||||
|
||||
void create_program(enum amd_gfx_level gfx_level, aco::Stage stage,
|
||||
unsigned wave_size=64, enum radeon_family family=CHIP_UNKNOWN);
|
||||
bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
|
||||
enum radeon_family family=CHIP_UNKNOWN, const char* subvariant = "",
|
||||
unsigned wave_size=64);
|
||||
void create_program(enum amd_gfx_level gfx_level, aco::Stage stage, unsigned wave_size = 64,
|
||||
enum radeon_family family = CHIP_UNKNOWN);
|
||||
bool setup_cs(const char* input_spec, enum amd_gfx_level gfx_level,
|
||||
enum radeon_family family = CHIP_UNKNOWN, const char* subvariant = "",
|
||||
unsigned wave_size = 64);
|
||||
|
||||
void finish_program(aco::Program *program);
|
||||
void finish_program(aco::Program* program);
|
||||
void finish_validator_test();
|
||||
void finish_opt_test();
|
||||
void finish_setup_reduce_temp_test();
|
||||
void finish_ra_test(aco::ra_test_policy, bool lower=false);
|
||||
void finish_ra_test(aco::ra_test_policy, bool lower = false);
|
||||
void finish_optimizer_postRA_test();
|
||||
void finish_to_hw_instr_test();
|
||||
void finish_waitcnt_test();
|
||||
@ -89,35 +90,35 @@ void finish_insert_nops_test();
|
||||
void finish_form_hard_clause_test();
|
||||
void finish_assembler_test();
|
||||
|
||||
void writeout(unsigned i, aco::Temp tmp=aco::Temp(0, aco::s1));
|
||||
void writeout(unsigned i, aco::Temp tmp = aco::Temp(0, aco::s1));
|
||||
void writeout(unsigned i, aco::Builder::Result res);
|
||||
void writeout(unsigned i, aco::Operand op);
|
||||
void writeout(unsigned i, aco::Operand op0, aco::Operand op1);
|
||||
|
||||
aco::Temp fneg(aco::Temp src, aco::Builder b=bld);
|
||||
aco::Temp fabs(aco::Temp src, aco::Builder b=bld);
|
||||
aco::Temp f2f32(aco::Temp src, aco::Builder b=bld);
|
||||
aco::Temp f2f16(aco::Temp src, aco::Builder b=bld);
|
||||
aco::Temp u2u16(aco::Temp src, aco::Builder b=bld);
|
||||
aco::Temp fadd(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
|
||||
aco::Temp fmul(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
|
||||
aco::Temp fma(aco::Temp src0, aco::Temp src1, aco::Temp src2, aco::Builder b=bld);
|
||||
aco::Temp fsat(aco::Temp src, aco::Builder b=bld);
|
||||
aco::Temp fmin(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
|
||||
aco::Temp fmax(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
|
||||
aco::Temp ext_ushort(aco::Temp src, unsigned idx, aco::Builder b=bld);
|
||||
aco::Temp ext_ubyte(aco::Temp src, unsigned idx, aco::Builder b=bld);
|
||||
void emit_divergent_if_else(aco::Program* prog, aco::Builder& b, aco::Operand cond, std::function<void()> then,
|
||||
std::function<void()> els);
|
||||
aco::Temp fneg(aco::Temp src, aco::Builder b = bld);
|
||||
aco::Temp fabs(aco::Temp src, aco::Builder b = bld);
|
||||
aco::Temp f2f32(aco::Temp src, aco::Builder b = bld);
|
||||
aco::Temp f2f16(aco::Temp src, aco::Builder b = bld);
|
||||
aco::Temp u2u16(aco::Temp src, aco::Builder b = bld);
|
||||
aco::Temp fadd(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
|
||||
aco::Temp fmul(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
|
||||
aco::Temp fma(aco::Temp src0, aco::Temp src1, aco::Temp src2, aco::Builder b = bld);
|
||||
aco::Temp fsat(aco::Temp src, aco::Builder b = bld);
|
||||
aco::Temp fmin(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
|
||||
aco::Temp fmax(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
|
||||
aco::Temp ext_ushort(aco::Temp src, unsigned idx, aco::Builder b = bld);
|
||||
aco::Temp ext_ubyte(aco::Temp src, unsigned idx, aco::Builder b = bld);
|
||||
void emit_divergent_if_else(aco::Program* prog, aco::Builder& b, aco::Operand cond,
|
||||
std::function<void()> then, std::function<void()> els);
|
||||
|
||||
/* vulkan helpers */
|
||||
VkDevice get_vk_device(enum amd_gfx_level gfx_level);
|
||||
VkDevice get_vk_device(enum radeon_family family);
|
||||
|
||||
void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
|
||||
const char *name, bool remove_encoding=false);
|
||||
const char* name, bool remove_encoding = false);
|
||||
|
||||
VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *info);
|
||||
VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* info);
|
||||
|
||||
class PipelineBuilder {
|
||||
public:
|
||||
@ -152,19 +153,21 @@ public:
|
||||
~PipelineBuilder();
|
||||
|
||||
PipelineBuilder(const PipelineBuilder&) = delete;
|
||||
PipelineBuilder& operator = (const PipelineBuilder&) = delete;
|
||||
PipelineBuilder& operator=(const PipelineBuilder&) = delete;
|
||||
|
||||
void add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
|
||||
uint32_t binding, VkDescriptorType type, uint32_t count=1);
|
||||
void add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
|
||||
VkDescriptorType type, uint32_t count = 1);
|
||||
|
||||
void add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate=VK_VERTEX_INPUT_RATE_VERTEX);
|
||||
void add_vertex_binding(uint32_t binding, uint32_t stride,
|
||||
VkVertexInputRate rate = VK_VERTEX_INPUT_RATE_VERTEX);
|
||||
void add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset);
|
||||
|
||||
void add_resource_decls(QoShaderModuleCreateInfo *module);
|
||||
void add_io_decls(QoShaderModuleCreateInfo *module);
|
||||
void add_resource_decls(QoShaderModuleCreateInfo* module);
|
||||
void add_io_decls(QoShaderModuleCreateInfo* module);
|
||||
|
||||
void add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name="main");
|
||||
void add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name="main");
|
||||
void add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name = "main");
|
||||
void add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
|
||||
const char* name = "main");
|
||||
void add_vsfs(VkShaderModule vs, VkShaderModule fs);
|
||||
void add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs);
|
||||
void add_cs(VkShaderModule cs);
|
||||
@ -174,7 +177,8 @@ public:
|
||||
|
||||
void create_pipeline();
|
||||
|
||||
void print_ir(VkShaderStageFlagBits stages, const char *name, bool remove_encoding=false);
|
||||
void print_ir(VkShaderStageFlagBits stages, const char* name, bool remove_encoding = false);
|
||||
|
||||
private:
|
||||
void create_compute_pipeline();
|
||||
void create_graphics_pipeline();
|
||||
|
@ -21,20 +21,22 @@
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "aco_ir.h"
|
||||
|
||||
#include <llvm-c/Target.h>
|
||||
|
||||
#include "framework.h"
|
||||
#include <getopt.h>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <getopt.h>
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include <stdarg.h>
|
||||
#include <llvm-c/Target.h>
|
||||
#include "aco_ir.h"
|
||||
#include "framework.h"
|
||||
#include <vector>
|
||||
|
||||
static const char *help_message =
|
||||
static const char* help_message =
|
||||
"Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n"
|
||||
"\n"
|
||||
"Run ACO unit test(s). If TEST is not provided, all tests are run.\n"
|
||||
@ -50,26 +52,27 @@ static const char *help_message =
|
||||
" --no-check Print test output instead of checking it.\n";
|
||||
|
||||
std::map<std::string, TestDef> tests;
|
||||
FILE *output = NULL;
|
||||
FILE* output = NULL;
|
||||
|
||||
static TestDef current_test;
|
||||
static unsigned tests_written = 0;
|
||||
static FILE *checker_stdin = NULL;
|
||||
static char *checker_stdin_data = NULL;
|
||||
static FILE* checker_stdin = NULL;
|
||||
static char* checker_stdin_data = NULL;
|
||||
static size_t checker_stdin_size = 0;
|
||||
|
||||
static char *output_data = NULL;
|
||||
static char* output_data = NULL;
|
||||
static size_t output_size = 0;
|
||||
static size_t output_offset = 0;
|
||||
|
||||
static char current_variant[64] = {0};
|
||||
static std::set<std::string> *variant_filter = NULL;
|
||||
static std::set<std::string>* variant_filter = NULL;
|
||||
|
||||
bool test_failed = false;
|
||||
bool test_skipped = false;
|
||||
static char fail_message[256] = {0};
|
||||
|
||||
void write_test()
|
||||
void
|
||||
write_test()
|
||||
{
|
||||
if (!checker_stdin) {
|
||||
/* not entirely correct, but shouldn't matter */
|
||||
@ -81,18 +84,18 @@ void write_test()
|
||||
if (output_offset == output_size && !test_skipped && !test_failed)
|
||||
return;
|
||||
|
||||
char *data = output_data + output_offset;
|
||||
char* data = output_data + output_offset;
|
||||
uint32_t size = output_size - output_offset;
|
||||
|
||||
fwrite("test", 1, 4, checker_stdin);
|
||||
fwrite(current_test.name, 1, strlen(current_test.name)+1, checker_stdin);
|
||||
fwrite(current_variant, 1, strlen(current_variant)+1, checker_stdin);
|
||||
fwrite(current_test.source_file, 1, strlen(current_test.source_file)+1, checker_stdin);
|
||||
fwrite(current_test.name, 1, strlen(current_test.name) + 1, checker_stdin);
|
||||
fwrite(current_variant, 1, strlen(current_variant) + 1, checker_stdin);
|
||||
fwrite(current_test.source_file, 1, strlen(current_test.source_file) + 1, checker_stdin);
|
||||
if (test_failed || test_skipped) {
|
||||
const char *res = test_failed ? "failed" : "skipped";
|
||||
const char* res = test_failed ? "failed" : "skipped";
|
||||
fwrite("\x01", 1, 1, checker_stdin);
|
||||
fwrite(res, 1, strlen(res)+1, checker_stdin);
|
||||
fwrite(fail_message, 1, strlen(fail_message)+1, checker_stdin);
|
||||
fwrite(res, 1, strlen(res) + 1, checker_stdin);
|
||||
fwrite(fail_message, 1, strlen(fail_message) + 1, checker_stdin);
|
||||
} else {
|
||||
fwrite("\x00", 1, 1, checker_stdin);
|
||||
}
|
||||
@ -103,7 +106,8 @@ void write_test()
|
||||
output_offset += size;
|
||||
}
|
||||
|
||||
bool set_variant(const char *name)
|
||||
bool
|
||||
set_variant(const char* name)
|
||||
{
|
||||
if (variant_filter && !variant_filter->count(name))
|
||||
return false;
|
||||
@ -118,7 +122,8 @@ bool set_variant(const char *name)
|
||||
return true;
|
||||
}
|
||||
|
||||
void fail_test(const char *fmt, ...)
|
||||
void
|
||||
fail_test(const char* fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
@ -129,7 +134,8 @@ void fail_test(const char *fmt, ...)
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void skip_test(const char *fmt, ...)
|
||||
void
|
||||
skip_test(const char* fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
@ -140,7 +146,8 @@ void skip_test(const char *fmt, ...)
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void run_test(TestDef def)
|
||||
void
|
||||
run_test(TestDef def)
|
||||
{
|
||||
current_test = def;
|
||||
output_data = NULL;
|
||||
@ -163,7 +170,8 @@ void run_test(TestDef def)
|
||||
free(output_data);
|
||||
}
|
||||
|
||||
int check_output(char **argv)
|
||||
int
|
||||
check_output(char** argv)
|
||||
{
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
@ -183,7 +191,8 @@ int check_output(char **argv)
|
||||
close(stdin_pipe[0]);
|
||||
close(stdin_pipe[1]);
|
||||
|
||||
execlp(ACO_TEST_PYTHON_BIN, ACO_TEST_PYTHON_BIN, ACO_TEST_SOURCE_DIR "/check_output.py", NULL);
|
||||
execlp(ACO_TEST_PYTHON_BIN, ACO_TEST_PYTHON_BIN, ACO_TEST_SOURCE_DIR "/check_output.py",
|
||||
NULL);
|
||||
fprintf(stderr, "%s: execlp() failed: %s\n", argv[0], strerror(errno));
|
||||
return 99;
|
||||
} else {
|
||||
@ -197,7 +206,8 @@ int check_output(char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
bool match_test(std::string name, std::string pattern)
|
||||
bool
|
||||
match_test(std::string name, std::string pattern)
|
||||
{
|
||||
if (name.length() < pattern.length())
|
||||
return false;
|
||||
@ -206,33 +216,25 @@ bool match_test(std::string name, std::string pattern)
|
||||
return name == pattern;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
int print_help = 0;
|
||||
int do_list = 0;
|
||||
int do_check = 1;
|
||||
const struct option opts[] = {
|
||||
{ "help", no_argument, &print_help, 1 },
|
||||
{ "list", no_argument, &do_list, 1 },
|
||||
{ "no-check", no_argument, &do_check, 0 },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
const struct option opts[] = {{"help", no_argument, &print_help, 1},
|
||||
{"list", no_argument, &do_list, 1},
|
||||
{"no-check", no_argument, &do_check, 0},
|
||||
{NULL, 0, NULL, 0}};
|
||||
|
||||
int c;
|
||||
while ((c = getopt_long(argc, argv, "hl", opts, NULL)) != -1) {
|
||||
switch (c) {
|
||||
case 'h':
|
||||
print_help = 1;
|
||||
break;
|
||||
case 'l':
|
||||
do_list = 1;
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
case 'h': print_help = 1; break;
|
||||
case 'l': do_list = 1; break;
|
||||
case 0: break;
|
||||
case '?':
|
||||
default:
|
||||
fprintf(stderr, "%s: Invalid argument\n", argv[0]);
|
||||
return 99;
|
||||
default: fprintf(stderr, "%s: Invalid argument\n", argv[0]); return 99;
|
||||
}
|
||||
}
|
||||
|
||||
@ -262,10 +264,10 @@ int main(int argc, char **argv)
|
||||
if (do_check)
|
||||
checker_stdin = open_memstream(&checker_stdin_data, &checker_stdin_size);
|
||||
|
||||
LLVMInitializeAMDGPUTargetInfo();
|
||||
LLVMInitializeAMDGPUTarget();
|
||||
LLVMInitializeAMDGPUTargetMC();
|
||||
LLVMInitializeAMDGPUDisassembler();
|
||||
LLVMInitializeAMDGPUTargetInfo();
|
||||
LLVMInitializeAMDGPUTarget();
|
||||
LLVMInitializeAMDGPUTargetMC();
|
||||
LLVMInitializeAMDGPUDisassembler();
|
||||
|
||||
aco::init();
|
||||
|
||||
|
@ -21,11 +21,11 @@
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <llvm/Config/llvm-config.h>
|
||||
|
||||
#include "helpers.h"
|
||||
#include "sid.h"
|
||||
|
||||
#include <llvm/Config/llvm-config.h>
|
||||
|
||||
using namespace aco;
|
||||
|
||||
BEGIN_TEST(assembler.s_memtime)
|
||||
@ -178,7 +178,7 @@ BEGIN_TEST(assembler.long_jump.conditional_backwards)
|
||||
finish_assembler_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.long_jump.3f)
|
||||
BEGIN_TEST(assembler.long_jump .3f)
|
||||
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
|
||||
return;
|
||||
|
||||
@ -354,25 +354,31 @@ BEGIN_TEST(assembler.vopc_sdwa)
|
||||
|
||||
//~gfx9>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 86860080
|
||||
//~gfx10>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 86860080
|
||||
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(), Operand::zero());
|
||||
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(),
|
||||
Operand::zero());
|
||||
|
||||
//~gfx9! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686ac80
|
||||
//~gfx10! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686ac80
|
||||
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(), Operand::zero());
|
||||
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(),
|
||||
Operand::zero());
|
||||
|
||||
//~gfx9! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686fe80
|
||||
//~gfx10! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686fe80
|
||||
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero());
|
||||
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(),
|
||||
Operand::zero());
|
||||
|
||||
if (i == GFX10) {
|
||||
//~gfx10! v_cmpx_lt_u32_sdwa 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7da300f9 86860080
|
||||
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero());
|
||||
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(),
|
||||
Operand::zero());
|
||||
} else {
|
||||
//~gfx9! v_cmpx_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 86860080
|
||||
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2), Operand::zero(), Operand::zero());
|
||||
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2),
|
||||
Operand::zero(), Operand::zero());
|
||||
|
||||
//~gfx9! v_cmpx_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 8686ac80
|
||||
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2), Definition(exec, s2), Operand::zero(), Operand::zero());
|
||||
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2),
|
||||
Definition(exec, s2), Operand::zero(), Operand::zero());
|
||||
}
|
||||
|
||||
finish_assembler_test();
|
||||
@ -452,48 +458,70 @@ BEGIN_TEST(assembler.gfx11.mubuf)
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, true);
|
||||
|
||||
//! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; e0500000 1e882a0a
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen =
|
||||
true;
|
||||
|
||||
//! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; e0500000 1ec82a14
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen =
|
||||
true;
|
||||
|
||||
//! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; e0500054 1e082a80
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false);
|
||||
|
||||
/* Various flags */
|
||||
//! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().glc = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.glc = true;
|
||||
|
||||
//! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().dlc = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.dlc = true;
|
||||
|
||||
//! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().slc = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.slc = true;
|
||||
|
||||
//; if llvm_ver >= 16:
|
||||
//; insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe ; e0500000 80282a80')
|
||||
//; else:
|
||||
//; insert_pattern('buffer_load_b32 v42, off, s[32:35], 0 tfe ; e0500000 80282a80')
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().tfe = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.tfe = true;
|
||||
|
||||
/* LDS */
|
||||
//! buffer_load_lds_b32 off, s[32:35], 0 ; e0c40000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
//! buffer_load_lds_i8 off, s[32:35], 0 ; e0b80000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
//! buffer_load_lds_i16 off, s[32:35], 0 ; e0c00000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
//! buffer_load_lds_u8 off, s[32:35], 0 ; e0b40000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
//! buffer_load_lds_u16 off, s[32:35], 0 ; e0bc0000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
//! buffer_load_lds_format_x off, s[32:35], 0 ; e0c80000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
/* Stores */
|
||||
//! buffer_store_b32 v10, off, s[32:35], s30 ; e0680000 1e080a80
|
||||
@ -532,42 +560,62 @@ BEGIN_TEST(assembler.gfx11.mtbuf)
|
||||
|
||||
/* Addressing */
|
||||
//>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0, false);
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0,
|
||||
false);
|
||||
|
||||
//! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt, nfmt, 0, false);
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt,
|
||||
nfmt, 0, false);
|
||||
|
||||
//! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, true);
|
||||
|
||||
//! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)->mtbuf().idxen = true;
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.idxen = true;
|
||||
|
||||
//! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)->mtbuf().idxen = true;
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)
|
||||
->mtbuf()
|
||||
.idxen = true;
|
||||
|
||||
//! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84, false);
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84,
|
||||
false);
|
||||
|
||||
/* Various flags */
|
||||
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().glc = true;
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.glc = true;
|
||||
|
||||
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().dlc = true;
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.dlc = true;
|
||||
|
||||
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().slc = true;
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.slc = true;
|
||||
|
||||
//; if llvm_ver >= 16:
|
||||
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80')
|
||||
//; else:
|
||||
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] tfe ; e9900000 80282a80')
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().tfe = true;
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.tfe = true;
|
||||
|
||||
/* Stores */
|
||||
//! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80
|
||||
bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0, false);
|
||||
bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0,
|
||||
false);
|
||||
|
||||
//! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a
|
||||
bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0, true);
|
||||
@ -604,7 +652,8 @@ BEGIN_TEST(assembler.gfx11.mimg)
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1);
|
||||
|
||||
//! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim = ac_image_2d;
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim =
|
||||
ac_image_2d;
|
||||
|
||||
//! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask = 0x1;
|
||||
@ -636,14 +685,20 @@ BEGIN_TEST(assembler.gfx11.mimg)
|
||||
|
||||
/* NSA */
|
||||
//! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1, Operand(bld.tmp(v1), PhysReg(256 + 40)))->mimg().dim = ac_image_2d;
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1,
|
||||
Operand(bld.tmp(v1), PhysReg(256 + 40)))
|
||||
->mimg()
|
||||
.dim = ac_image_2d;
|
||||
|
||||
/* Stores */
|
||||
//! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a
|
||||
bld.mimg(aco_opcode::image_store, op_s8, Operand(s4), op_v4, op_v1);
|
||||
|
||||
//! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14
|
||||
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4), op_v1, op_v2)->mimg().dim = ac_image_2d;
|
||||
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
|
||||
op_v1, op_v2)
|
||||
->mimg()
|
||||
.dim = ac_image_2d;
|
||||
|
||||
finish_assembler_test();
|
||||
END_TEST
|
||||
@ -761,13 +816,19 @@ BEGIN_TEST(assembler.gfx11.vinterp)
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0);
|
||||
|
||||
//! v_interp_p10_f32 v42, -v10, v20, v30 ; cd00002a 247a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[0] = true;
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
|
||||
->vinterp_inreg()
|
||||
.neg[0] = true;
|
||||
|
||||
//! v_interp_p10_f32 v42, v10, -v20, v30 ; cd00002a 447a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[1] = true;
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
|
||||
->vinterp_inreg()
|
||||
.neg[1] = true;
|
||||
|
||||
//! v_interp_p10_f32 v42, v10, v20, -v30 ; cd00002a 847a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[2] = true;
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
|
||||
->vinterp_inreg()
|
||||
.neg[2] = true;
|
||||
|
||||
//! v_interp_p10_f16_f32 v42, v10, v20, v30 op_sel:[1,0,0,0] ; cd02082a 047a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 0, 0x1);
|
||||
@ -782,7 +843,9 @@ BEGIN_TEST(assembler.gfx11.vinterp)
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x8);
|
||||
|
||||
//! v_interp_p10_f32 v42, v10, v20, v30 clamp ; cd00802a 047a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().clamp = true;
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
|
||||
->vinterp_inreg()
|
||||
.clamp = true;
|
||||
|
||||
finish_assembler_test();
|
||||
END_TEST
|
||||
@ -899,16 +962,22 @@ BEGIN_TEST(assembler.gfx11.vop12c_v128)
|
||||
bld.vop1_dpp(aco_opcode::v_rcp_f16, dst_v128, op_v1, dpp_row_rr(1))->dpp16().abs[0] = true;
|
||||
|
||||
//! v_mul_f16_e64_dpp v128, -v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350080 200204fa ff1d2101
|
||||
bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().neg[0] = true;
|
||||
bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().neg[0] =
|
||||
true;
|
||||
|
||||
//! v_mul_f16_e64_dpp v128, |v1|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350180 000204fa ff2d2101
|
||||
bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().abs[0] = true;
|
||||
bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().abs[0] =
|
||||
true;
|
||||
|
||||
//! v_cmp_eq_f16_e64_dpp vcc, -v129, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402006a 200204fa ff1d2181
|
||||
bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))->dpp16().neg[0] = true;
|
||||
bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))
|
||||
->dpp16()
|
||||
.neg[0] = true;
|
||||
|
||||
//! v_cmp_eq_f16_e64_dpp vcc, |v129|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402016a 000204fa ff2d2181
|
||||
bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))->dpp16().abs[0] = true;
|
||||
bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))
|
||||
->dpp16()
|
||||
.abs[0] = true;
|
||||
|
||||
finish_assembler_test();
|
||||
END_TEST
|
||||
|
@ -633,9 +633,10 @@ BEGIN_TEST(d3d11_derivs.nsa_max)
|
||||
|
||||
//~gfx11! v4: %_:v[0-3] = image_sample_c_b_o s8: undef, s4: undef, v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4-5] 2darray da
|
||||
|
||||
Instruction *instr = bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4),
|
||||
Operand(s8), Operand(s4), Operand(v1), Operand(reg_v0, v6.as_linear()),
|
||||
Operand(reg_v6, v1), Operand(reg_v7, v1), Operand(reg_v8, v1));
|
||||
Instruction* instr =
|
||||
bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4), Operand(s8), Operand(s4),
|
||||
Operand(v1), Operand(reg_v0, v6.as_linear()), Operand(reg_v6, v1),
|
||||
Operand(reg_v7, v1), Operand(reg_v8, v1));
|
||||
instr->mimg().dim = ac_image_2darray;
|
||||
instr->mimg().da = true;
|
||||
instr->mimg().strict_wqm = true;
|
||||
|
@ -26,7 +26,8 @@
|
||||
|
||||
using namespace aco;
|
||||
|
||||
static void create_mubuf(Temp desc=Temp(0, s8))
|
||||
static void
|
||||
create_mubuf(Temp desc = Temp(0, s8))
|
||||
{
|
||||
Operand desc_op(desc);
|
||||
desc_op.setFixed(PhysReg(0));
|
||||
@ -34,13 +35,15 @@ static void create_mubuf(Temp desc=Temp(0, s8))
|
||||
Operand(PhysReg(256), v1), Operand::zero(), 0, false);
|
||||
}
|
||||
|
||||
static void create_mubuf_store()
|
||||
static void
|
||||
create_mubuf_store()
|
||||
{
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(PhysReg(256), v1),
|
||||
Operand(PhysReg(256), v1), Operand::zero(), 0, false);
|
||||
}
|
||||
|
||||
static void create_mtbuf(Temp desc=Temp(0, s8))
|
||||
static void
|
||||
create_mtbuf(Temp desc = Temp(0, s8))
|
||||
{
|
||||
Operand desc_op(desc);
|
||||
desc_op.setFixed(PhysReg(0));
|
||||
@ -49,22 +52,25 @@ static void create_mtbuf(Temp desc=Temp(0, s8))
|
||||
V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false);
|
||||
}
|
||||
|
||||
static void create_flat()
|
||||
static void
|
||||
create_flat()
|
||||
{
|
||||
bld.flat(aco_opcode::flat_load_dword, Definition(PhysReg(256), v1),
|
||||
Operand(PhysReg(256), v2), Operand(s2));
|
||||
bld.flat(aco_opcode::flat_load_dword, Definition(PhysReg(256), v1), Operand(PhysReg(256), v2),
|
||||
Operand(s2));
|
||||
}
|
||||
|
||||
static void create_global()
|
||||
static void
|
||||
create_global()
|
||||
{
|
||||
bld.global(aco_opcode::global_load_dword, Definition(PhysReg(256), v1),
|
||||
Operand(PhysReg(256), v2), Operand(s2));
|
||||
}
|
||||
|
||||
static void create_mimg(bool nsa, Temp desc=Temp(0, s8))
|
||||
static void
|
||||
create_mimg(bool nsa, Temp desc = Temp(0, s8))
|
||||
{
|
||||
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(
|
||||
aco_opcode::image_sample, Format::MIMG, 5, 1)};
|
||||
aco_ptr<MIMG_instruction> mimg{
|
||||
create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 5, 1)};
|
||||
mimg->definitions[0] = Definition(PhysReg(256), v1);
|
||||
mimg->operands[0] = Operand(desc);
|
||||
mimg->operands[0].setFixed(PhysReg(0));
|
||||
@ -78,13 +84,15 @@ static void create_mimg(bool nsa, Temp desc=Temp(0, s8))
|
||||
bld.insert(std::move(mimg));
|
||||
}
|
||||
|
||||
static void create_smem()
|
||||
static void
|
||||
create_smem()
|
||||
{
|
||||
bld.smem(aco_opcode::s_load_dword, Definition(PhysReg(0), s1), Operand(PhysReg(0), s2),
|
||||
Operand::zero());
|
||||
}
|
||||
|
||||
static void create_smem_buffer(Temp desc=Temp(0, s4))
|
||||
static void
|
||||
create_smem_buffer(Temp desc = Temp(0, s4))
|
||||
{
|
||||
Operand desc_op(desc);
|
||||
desc_op.setFixed(PhysReg(0));
|
||||
|
@ -25,22 +25,25 @@
|
||||
|
||||
using namespace aco;
|
||||
|
||||
void create_mubuf(unsigned offset, PhysReg dst=PhysReg(256), PhysReg vaddr=PhysReg(256))
|
||||
void
|
||||
create_mubuf(unsigned offset, PhysReg dst = PhysReg(256), PhysReg vaddr = PhysReg(256))
|
||||
{
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, Definition(dst, v1), Operand(PhysReg(0), s4),
|
||||
Operand(vaddr, v1), Operand::zero(), offset, true);
|
||||
}
|
||||
|
||||
void create_mubuf_store(PhysReg src=PhysReg(256))
|
||||
void
|
||||
create_mubuf_store(PhysReg src = PhysReg(256))
|
||||
{
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4),
|
||||
Operand(src, v1), Operand::zero(), Operand(src, v1), 0, true);
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(src, v1),
|
||||
Operand::zero(), Operand(src, v1), 0, true);
|
||||
}
|
||||
|
||||
void create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
|
||||
void
|
||||
create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
|
||||
{
|
||||
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(
|
||||
aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
|
||||
aco_ptr<MIMG_instruction> mimg{
|
||||
create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
|
||||
mimg->definitions[0] = Definition(PhysReg(256), v1);
|
||||
mimg->operands[0] = Operand(PhysReg(0), s8);
|
||||
mimg->operands[1] = Operand(PhysReg(0), s4);
|
||||
@ -216,7 +219,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
|
||||
//! s_waitcnt_depctr vm_vsrc(0)
|
||||
//! s1: %0:m0 = s_mov_b32 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
|
||||
Operand(m0, s1));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
|
||||
|
||||
//! p_unit_test 5
|
||||
@ -224,7 +228,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
|
||||
//! s_waitcnt_depctr vm_vsrc(0)
|
||||
//! s2: %0:exec = s_mov_b64 -1
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
|
||||
Operand(m0, s1));
|
||||
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(-1));
|
||||
|
||||
/* no hazard: LDS */
|
||||
@ -232,7 +237,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
|
||||
//! v1: %0:v[0] = ds_read_b32 %0:v[0], %0:m0
|
||||
//! s1: %0:s[0] = s_mov_b32 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
|
||||
Operand(m0, s1));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero());
|
||||
|
||||
/* no hazard: LDS with VALU in-between */
|
||||
@ -241,7 +247,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
|
||||
//! v_nop
|
||||
//! s1: %0:m0 = s_mov_b32 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
|
||||
Operand(m0, s1));
|
||||
bld.vop1(aco_opcode::v_nop);
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
|
||||
|
||||
@ -269,7 +276,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
|
||||
//! s_waitcnt lgkmcnt(0)
|
||||
//! s1: %0:m0 = s_mov_b32 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
|
||||
Operand(m0, s1));
|
||||
bld.sopp(aco_opcode::s_waitcnt, -1, 0xc07f);
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
|
||||
|
||||
@ -300,7 +308,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
|
||||
//! s_waitcnt_depctr vm_vsrc(0)
|
||||
//! s1: %0:m0 = s_mov_b32 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
|
||||
Operand(m0, s1));
|
||||
bld.sopp(aco_opcode::s_waitcnt, -1, 0x3f70);
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
|
||||
|
||||
@ -932,8 +941,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
|
||||
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
|
||||
Operand::zero(), Operand(PhysReg(0), s2));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
|
||||
|
||||
@ -944,8 +953,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
|
||||
//! s1: %0:s[1] = s_mov_b32 0
|
||||
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
|
||||
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
|
||||
Operand::zero(), Operand(PhysReg(0), s2));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(257), v1), Operand(PhysReg(1), s1));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
|
||||
@ -957,8 +966,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
|
||||
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
|
||||
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
|
||||
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
|
||||
Operand::zero(), Operand(PhysReg(0), s2));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
|
||||
@ -969,8 +978,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
|
||||
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
|
||||
Operand::zero(), Operand(PhysReg(0), s2));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
|
||||
bld.sopp(aco_opcode::s_waitcnt_depctr, -1, 0xfffe);
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
|
||||
@ -982,8 +991,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
|
||||
Operand(PhysReg(2), s1), Operand::zero(), Operand(PhysReg(0), s2));
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand(PhysReg(2), s1),
|
||||
Operand::zero(), Operand(PhysReg(0), s2));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
|
||||
|
||||
|
@ -36,15 +36,14 @@ BEGIN_TEST(insert_waitcnt.ds_ordered_count)
|
||||
Operand chan_counter(PhysReg(260), v1);
|
||||
Operand m(m0, s1);
|
||||
|
||||
Instruction *ds_instr;
|
||||
Instruction* ds_instr;
|
||||
//>> ds_ordered_count %0:v[0], %0:v[3], %0:m0 offset0:3072 gds storage:gds semantics:volatile
|
||||
//! s_waitcnt lgkmcnt(0)
|
||||
ds_instr = bld.ds(aco_opcode::ds_ordered_count, def0, gds_base, m, 3072u, 0u, true);
|
||||
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
|
||||
|
||||
//! ds_add_rtn_u32 %0:v[1], %0:v[3], %0:v[4], %0:m0 gds storage:gds semantics:volatile,atomic,rmw
|
||||
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1,
|
||||
gds_base, chan_counter, m, 0u, 0u, true);
|
||||
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1, gds_base, chan_counter, m, 0u, 0u, true);
|
||||
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
|
||||
|
||||
//! s_waitcnt lgkmcnt(0)
|
||||
|
@ -21,19 +21,18 @@
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <llvm/Config/llvm-config.h>
|
||||
|
||||
#include "helpers.h"
|
||||
#include "test_isel-spirv.h"
|
||||
|
||||
#include <llvm/Config/llvm-config.h>
|
||||
|
||||
using namespace aco;
|
||||
|
||||
BEGIN_TEST(isel.interp.simple)
|
||||
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
||||
layout(location = 0) in vec4 in_color;
|
||||
layout(location = 0) out vec4 out_color;
|
||||
void main() {
|
||||
out_color = in_color;
|
||||
void main() { out_color = in_color;
|
||||
}
|
||||
);
|
||||
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
||||
|
@ -61,7 +61,8 @@ BEGIN_TEST(optimize.neg)
|
||||
|
||||
//! v1: %res5 = v_mul_f32 -%a, %b row_shl:1 bound_ctrl:1
|
||||
//! p_unit_test 5, %res5
|
||||
writeout(5, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), neg_a, inputs[1], dpp_row_sl(1)));
|
||||
writeout(5,
|
||||
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), neg_a, inputs[1], dpp_row_sl(1)));
|
||||
|
||||
//! v1: %res6 = v_subrev_f32 %a, %b
|
||||
//! p_unit_test 6, %res6
|
||||
@ -264,7 +265,8 @@ BEGIN_TEST(optimize.output_modifiers)
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
||||
Temp create_subbrev_co(Operand op0, Operand op1, Operand op2)
|
||||
Temp
|
||||
create_subbrev_co(Operand op0, Operand op1, Operand op2)
|
||||
{
|
||||
return bld.vop2_e64(aco_opcode::v_subbrev_co_u32, bld.def(v1), bld.def(bld.lm), op0, op1, op2);
|
||||
}
|
||||
@ -438,7 +440,7 @@ BEGIN_TEST(optimize.bcnt)
|
||||
END_TEST
|
||||
|
||||
struct clamp_config {
|
||||
const char *name;
|
||||
const char* name;
|
||||
aco_opcode min, max, med3;
|
||||
Operand lb, ub;
|
||||
};
|
||||
@ -863,7 +865,7 @@ enum denorm_op {
|
||||
denorm_fnegabs = 3,
|
||||
};
|
||||
|
||||
static const char *denorm_op_names[] = {
|
||||
static const char* denorm_op_names[] = {
|
||||
"mul1",
|
||||
"fneg",
|
||||
"fabs",
|
||||
@ -877,31 +879,27 @@ struct denorm_config {
|
||||
aco_opcode dest;
|
||||
};
|
||||
|
||||
static const char *srcdest_op_name(aco_opcode op)
|
||||
static const char*
|
||||
srcdest_op_name(aco_opcode op)
|
||||
{
|
||||
switch (op) {
|
||||
case aco_opcode::v_cndmask_b32:
|
||||
return "cndmask";
|
||||
case aco_opcode::v_min_f32:
|
||||
return "min";
|
||||
case aco_opcode::v_rcp_f32:
|
||||
return "rcp";
|
||||
default:
|
||||
return "none";
|
||||
case aco_opcode::v_cndmask_b32: return "cndmask";
|
||||
case aco_opcode::v_min_f32: return "min";
|
||||
case aco_opcode::v_rcp_f32: return "rcp";
|
||||
default: return "none";
|
||||
}
|
||||
}
|
||||
|
||||
static Temp emit_denorm_srcdest(aco_opcode op, Temp val)
|
||||
static Temp
|
||||
emit_denorm_srcdest(aco_opcode op, Temp val)
|
||||
{
|
||||
switch (op) {
|
||||
case aco_opcode::v_cndmask_b32:
|
||||
return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1]);
|
||||
case aco_opcode::v_min_f32:
|
||||
return bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), val);
|
||||
case aco_opcode::v_rcp_f32:
|
||||
return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val);
|
||||
default:
|
||||
return val;
|
||||
case aco_opcode::v_rcp_f32: return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val);
|
||||
default: return val;
|
||||
}
|
||||
}
|
||||
|
||||
@ -917,7 +915,8 @@ BEGIN_TEST(optimize.denorm_propagation)
|
||||
configs.push_back({flush, op, aco_opcode::num_opcodes, dest});
|
||||
}
|
||||
|
||||
for (aco_opcode src : {aco_opcode::v_cndmask_b32, aco_opcode::v_min_f32, aco_opcode::v_rcp_f32}) {
|
||||
for (aco_opcode src :
|
||||
{aco_opcode::v_cndmask_b32, aco_opcode::v_min_f32, aco_opcode::v_rcp_f32}) {
|
||||
for (denorm_op op : {denorm_mul1, denorm_fneg, denorm_fabs, denorm_fnegabs})
|
||||
configs.push_back({flush, op, src, aco_opcode::num_opcodes});
|
||||
}
|
||||
@ -925,18 +924,18 @@ BEGIN_TEST(optimize.denorm_propagation)
|
||||
|
||||
for (denorm_config cfg : configs) {
|
||||
char subvariant[128];
|
||||
sprintf(subvariant, "_%s_%s_%s_%s",
|
||||
cfg.flush ? "flush" : "keep", srcdest_op_name(cfg.src),
|
||||
sprintf(subvariant, "_%s_%s_%s_%s", cfg.flush ? "flush" : "keep", srcdest_op_name(cfg.src),
|
||||
denorm_op_names[(int)cfg.op], srcdest_op_name(cfg.dest));
|
||||
if (!setup_cs("v1 s2", (amd_gfx_level)i, CHIP_UNKNOWN, subvariant))
|
||||
continue;
|
||||
|
||||
bool can_propagate = cfg.src == aco_opcode::v_rcp_f32 || (i >= GFX9 && cfg.src == aco_opcode::v_min_f32) ||
|
||||
cfg.dest == aco_opcode::v_rcp_f32 || (i >= GFX9 && cfg.dest == aco_opcode::v_min_f32) ||
|
||||
!cfg.flush;
|
||||
bool can_propagate = cfg.src == aco_opcode::v_rcp_f32 ||
|
||||
(i >= GFX9 && cfg.src == aco_opcode::v_min_f32) ||
|
||||
cfg.dest == aco_opcode::v_rcp_f32 ||
|
||||
(i >= GFX9 && cfg.dest == aco_opcode::v_min_f32) || !cfg.flush;
|
||||
|
||||
fprintf(output, "src, dest, op: %s %s %s\n",
|
||||
srcdest_op_name(cfg.src), srcdest_op_name(cfg.dest), denorm_op_names[(int)cfg.op]);
|
||||
fprintf(output, "src, dest, op: %s %s %s\n", srcdest_op_name(cfg.src),
|
||||
srcdest_op_name(cfg.dest), denorm_op_names[(int)cfg.op]);
|
||||
fprintf(output, "can_propagate: %u\n", can_propagate);
|
||||
//! src, dest, op: $src $dest $op
|
||||
//! can_propagate: #can_propagate
|
||||
@ -976,15 +975,9 @@ BEGIN_TEST(optimize.denorm_propagation)
|
||||
case denorm_mul1:
|
||||
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), val);
|
||||
break;
|
||||
case denorm_fneg:
|
||||
val = fneg(val);
|
||||
break;
|
||||
case denorm_fabs:
|
||||
val = fabs(val);
|
||||
break;
|
||||
case denorm_fnegabs:
|
||||
val = fneg(fabs(val));
|
||||
break;
|
||||
case denorm_fneg: val = fneg(val); break;
|
||||
case denorm_fabs: val = fabs(val); break;
|
||||
case denorm_fnegabs: val = fneg(fabs(val)); break;
|
||||
}
|
||||
val = emit_denorm_srcdest(cfg.dest, val);
|
||||
writeout(
|
||||
@ -1123,13 +1116,15 @@ BEGIN_TEST(optimize.dpp_prop)
|
||||
//! v1: %res2 = v_mul_f32 0x12345678, %a
|
||||
//! p_unit_test 2, %res2
|
||||
Temp literal1 = bld.copy(bld.def(v1), Operand::c32(0x12345678u));
|
||||
writeout(2, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1)));
|
||||
writeout(2,
|
||||
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1)));
|
||||
|
||||
//! v1: %literal2 = p_parallelcopy 0x12345679
|
||||
//! v1: %res3 = v_mul_f32 %a, %literal row_shl:1 bound_ctrl:1
|
||||
//! p_unit_test 3, %res3
|
||||
Temp literal2 = bld.copy(bld.def(v1), Operand::c32(0x12345679u));
|
||||
writeout(3, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], literal2, dpp_row_sl(1)));
|
||||
writeout(3,
|
||||
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], literal2, dpp_row_sl(1)));
|
||||
|
||||
//! v1: %b_v = p_parallelcopy %b
|
||||
//! v1: %res4 = v_mul_f32 %b, %a
|
||||
@ -1171,7 +1166,9 @@ BEGIN_TEST(optimize.casts)
|
||||
//! v1: %res2_tmp = v_mul_f32 -1.0, %a16
|
||||
//! v2b: %res2 = v_mul_f16 %res2_tmp, %a16
|
||||
//! p_unit_test 2, %res2
|
||||
writeout(2, fmul(u2u16(bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), bld.as_uniform(a16))), a16));
|
||||
writeout(2, fmul(u2u16(bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1),
|
||||
Operand::c32(0xbf800000u), bld.as_uniform(a16))),
|
||||
a16));
|
||||
|
||||
//! v1: %res3_tmp = v_mul_f32 %a, %a
|
||||
//! v2b: %res3 = v_add_f16 %res3_tmp, 0 clamp
|
||||
@ -1191,7 +1188,8 @@ BEGIN_TEST(optimize.casts)
|
||||
//! v2b: %res6_tmp = v_mul_f16 %a16, %a16
|
||||
//! v1: %res6 = v_mul_f32 2.0, %res6_tmp
|
||||
//! p_unit_test 6, %res6
|
||||
writeout(6, fmul(bld.as_uniform(fmul(a16, a16)), bld.copy(bld.def(v1), Operand::c32(0x40000000))));
|
||||
writeout(6,
|
||||
fmul(bld.as_uniform(fmul(a16, a16)), bld.copy(bld.def(v1), Operand::c32(0x40000000))));
|
||||
|
||||
//! v1: %res7_tmp = v_mul_f32 %a, %a
|
||||
//! v2b: %res7 = v_add_f16 %res7_tmp, %a16
|
||||
@ -1211,7 +1209,8 @@ BEGIN_TEST(optimize.casts)
|
||||
//! v2b: %res10_tmp = v_mul_f16 %a16, %a16
|
||||
//! v1: %res10 = v_mul_f32 -1.0, %res10_tmp
|
||||
//! p_unit_test 10, %res10
|
||||
writeout(10, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), bld.as_uniform(fmul(a16, a16))));
|
||||
writeout(10, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u),
|
||||
bld.as_uniform(fmul(a16, a16))));
|
||||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
@ -1549,7 +1548,8 @@ BEGIN_TEST(optimize.mad_mix.fma.basic)
|
||||
//! v1: %res2_mul = v_fma_mix_f32 lo(%a16), %b, -0
|
||||
//! v1: %res2 = v_add_f32 %res2_mul, %c *2
|
||||
//! p_unit_test 2, %res2
|
||||
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000), fadd(fmul(f2f32(a16), b), c)));
|
||||
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000),
|
||||
fadd(fmul(f2f32(a16), b), c)));
|
||||
|
||||
/* neg/abs modifiers */
|
||||
//! v1: %res3 = v_fma_mix_f32 -lo(%a16), %b, |lo(%c16)|
|
||||
@ -1730,7 +1730,8 @@ BEGIN_TEST(optimize.mad_mix.cast)
|
||||
}
|
||||
END_TEST
|
||||
|
||||
static void vop3p_constant(unsigned *idx, aco_opcode op, const char *swizzle, uint32_t val)
|
||||
static void
|
||||
vop3p_constant(unsigned* idx, aco_opcode op, const char* swizzle, uint32_t val)
|
||||
{
|
||||
uint32_t halves[2] = {val & 0xffff, val >> 16};
|
||||
uint32_t expected = halves[swizzle[0] - 'x'] | (halves[swizzle[1] - 'x'] << 16);
|
||||
@ -1744,7 +1745,7 @@ static void vop3p_constant(unsigned *idx, aco_opcode op, const char *swizzle, ui
|
||||
|
||||
BEGIN_TEST(optimize.vop3p_constants)
|
||||
for (aco_opcode op : {aco_opcode::v_pk_add_f16, aco_opcode::v_pk_add_u16}) {
|
||||
for (const char *swizzle : {"xx", "yy", "xy", "yx"}) {
|
||||
for (const char* swizzle : {"xx", "yy", "xy", "yx"}) {
|
||||
char variant[16];
|
||||
strcpy(variant, op == aco_opcode::v_pk_add_f16 ? "_f16" : "_u16");
|
||||
strcat(variant, "_");
|
||||
|
@ -27,310 +27,324 @@
|
||||
using namespace aco;
|
||||
|
||||
BEGIN_TEST(optimizer_postRA.vcmp)
|
||||
PhysReg reg_v0(256);
|
||||
PhysReg reg_s0(0);
|
||||
PhysReg reg_s2(2);
|
||||
PhysReg reg_s4(4);
|
||||
PhysReg reg_v0(256);
|
||||
PhysReg reg_s0(0);
|
||||
PhysReg reg_s2(2);
|
||||
PhysReg reg_s4(4);
|
||||
|
||||
//>> v1: %a:v[0] = p_startpgm
|
||||
ASSERTED bool setup_ok = setup_cs("v1", GFX8);
|
||||
assert(setup_ok);
|
||||
//>> v1: %a:v[0] = p_startpgm
|
||||
ASSERTED bool setup_ok = setup_cs("v1", GFX8);
|
||||
assert(setup_ok);
|
||||
|
||||
auto &startpgm = bld.instructions->at(0);
|
||||
assert(startpgm->opcode == aco_opcode::p_startpgm);
|
||||
startpgm->definitions[0].setFixed(reg_v0);
|
||||
auto& startpgm = bld.instructions->at(0);
|
||||
assert(startpgm->opcode == aco_opcode::p_startpgm);
|
||||
startpgm->definitions[0].setFixed(reg_v0);
|
||||
|
||||
Temp v_in = inputs[0];
|
||||
Temp v_in = inputs[0];
|
||||
|
||||
{
|
||||
/* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
|
||||
{
|
||||
/* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
|
||||
|
||||
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %b:vcc
|
||||
//! p_unit_test 0, %e:s[2-3]
|
||||
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
|
||||
Operand(v_in, reg_v0));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(0, Operand(br, reg_s2));
|
||||
}
|
||||
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %b:vcc
|
||||
//! p_unit_test 0, %e:s[2-3]
|
||||
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
|
||||
Operand(v_in, reg_v0));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
|
||||
Operand(exec, bld.lm));
|
||||
auto br =
|
||||
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(0, Operand(br, reg_s2));
|
||||
}
|
||||
|
||||
//; del b, e
|
||||
//; del b, e
|
||||
|
||||
{
|
||||
/* When VCC is overwritten inbetween, don't optimize. */
|
||||
{
|
||||
/* When VCC is overwritten inbetween, don't optimize. */
|
||||
|
||||
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
|
||||
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
|
||||
//! s2: %f:vcc = s_mov_b64 0
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
|
||||
//! p_unit_test 1, %e:s[2-3], %f:vcc
|
||||
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
|
||||
Operand(v_in, reg_v0));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
|
||||
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
|
||||
}
|
||||
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
|
||||
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
|
||||
//! s2: %f:vcc = s_mov_b64 0
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
|
||||
//! p_unit_test 1, %e:s[2-3], %f:vcc
|
||||
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
|
||||
Operand(v_in, reg_v0));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
|
||||
Operand(exec, bld.lm));
|
||||
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
|
||||
auto br =
|
||||
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
|
||||
}
|
||||
|
||||
//; del b, c, d, e, f
|
||||
//; del b, c, d, e, f
|
||||
|
||||
{
|
||||
/* When part of VCC is overwritten inbetween, don't optimize. */
|
||||
{
|
||||
/* When part of VCC is overwritten inbetween, don't optimize. */
|
||||
|
||||
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
|
||||
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
|
||||
//! s1: %f:s[107] = s_mov_b32 0
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
|
||||
//! p_unit_test 1, %e:s[2-3], %f:vcc
|
||||
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
|
||||
Operand(v_in, reg_v0));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
|
||||
auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
|
||||
}
|
||||
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
|
||||
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
|
||||
//! s1: %f:s[107] = s_mov_b32 0
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
|
||||
//! p_unit_test 1, %e:s[2-3], %f:vcc
|
||||
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
|
||||
Operand(v_in, reg_v0));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
|
||||
Operand(exec, bld.lm));
|
||||
auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero());
|
||||
auto br =
|
||||
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
|
||||
}
|
||||
|
||||
//; del b, c, d, e, f
|
||||
//; del b, c, d, e, f
|
||||
|
||||
{
|
||||
/* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
|
||||
{
|
||||
/* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
|
||||
|
||||
//! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
|
||||
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
|
||||
//! p_unit_test 2, %e:s[2-3]
|
||||
auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
|
||||
Operand(v_in, reg_v0));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm));
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(2, Operand(br, reg_s2));
|
||||
}
|
||||
//! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
|
||||
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
|
||||
//! p_unit_test 2, %e:s[2-3]
|
||||
auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
|
||||
Operand(v_in, reg_v0));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
|
||||
Operand(vcmp, reg_s4), Operand(exec, bld.lm));
|
||||
auto br =
|
||||
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(2, Operand(br, reg_s2));
|
||||
}
|
||||
|
||||
//; del b, c, d, e
|
||||
//; del b, c, d, e
|
||||
|
||||
{
|
||||
/* When the VCC isn't written by VOPC, don't optimize */
|
||||
{
|
||||
/* When the VCC isn't written by VOPC, don't optimize */
|
||||
|
||||
//! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
|
||||
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
|
||||
//! p_unit_test 2, %e:s[2-3]
|
||||
auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc),
|
||||
Operand::c32(1u), Operand(reg_s4, bld.lm));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm));
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(2, Operand(br, reg_s2));
|
||||
}
|
||||
//! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
|
||||
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
|
||||
//! p_unit_test 2, %e:s[2-3]
|
||||
auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), Operand::c32(1u),
|
||||
Operand(reg_s4, bld.lm));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
|
||||
Operand(salu, vcc), Operand(exec, bld.lm));
|
||||
auto br =
|
||||
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(2, Operand(br, reg_s2));
|
||||
}
|
||||
|
||||
//; del b, c, d, e, f, x
|
||||
//; del b, c, d, e, f, x
|
||||
|
||||
{
|
||||
/* When EXEC is overwritten inbetween, don't optimize. */
|
||||
{
|
||||
/* When EXEC is overwritten inbetween, don't optimize. */
|
||||
|
||||
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
|
||||
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
|
||||
//! s2: %f:exec = s_mov_b64 42
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
|
||||
//! p_unit_test 4, %e:s[2-3], %f:exec
|
||||
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
|
||||
Operand(v_in, reg_v0));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
|
||||
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
|
||||
}
|
||||
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
|
||||
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
|
||||
//! s2: %f:exec = s_mov_b64 42
|
||||
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
|
||||
//! p_unit_test 4, %e:s[2-3], %f:exec
|
||||
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
|
||||
Operand(v_in, reg_v0));
|
||||
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
|
||||
Operand(exec, bld.lm));
|
||||
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
|
||||
auto br =
|
||||
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
|
||||
writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
|
||||
}
|
||||
|
||||
//; del b, c, d, e, f, x
|
||||
//; del b, c, d, e, f, x
|
||||
|
||||
finish_optimizer_postRA_test();
|
||||
finish_optimizer_postRA_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
|
||||
//>> s1: %a, s2: %y, s1: %z = p_startpgm
|
||||
ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);
|
||||
assert(setup_ok);
|
||||
//>> s1: %a, s2: %y, s1: %z = p_startpgm
|
||||
ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);
|
||||
assert(setup_ok);
|
||||
|
||||
PhysReg reg_s0{0};
|
||||
PhysReg reg_s2{2};
|
||||
PhysReg reg_s3{3};
|
||||
PhysReg reg_s4{4};
|
||||
PhysReg reg_s6{6};
|
||||
PhysReg reg_s8{8};
|
||||
PhysReg reg_s0{0};
|
||||
PhysReg reg_s2{2};
|
||||
PhysReg reg_s3{3};
|
||||
PhysReg reg_s4{4};
|
||||
PhysReg reg_s6{6};
|
||||
PhysReg reg_s8{8};
|
||||
|
||||
Temp in_0 = inputs[0];
|
||||
Temp in_1 = inputs[1];
|
||||
Temp in_2 = inputs[2];
|
||||
Operand op_in_0(in_0);
|
||||
op_in_0.setFixed(reg_s0);
|
||||
Operand op_in_1(in_1);
|
||||
op_in_1.setFixed(reg_s4);
|
||||
Operand op_in_2(in_2);
|
||||
op_in_2.setFixed(reg_s6);
|
||||
Temp in_0 = inputs[0];
|
||||
Temp in_1 = inputs[1];
|
||||
Temp in_2 = inputs[2];
|
||||
Operand op_in_0(in_0);
|
||||
op_in_0.setFixed(reg_s0);
|
||||
Operand op_in_1(in_1);
|
||||
op_in_1.setFixed(reg_s4);
|
||||
Operand op_in_2(in_2);
|
||||
op_in_2.setFixed(reg_s6);
|
||||
|
||||
{
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_nz %e:scc
|
||||
//! p_unit_test 0, %f:vcc
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(0, Operand(br, vcc));
|
||||
}
|
||||
{
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_nz %e:scc
|
||||
//! p_unit_test 0, %f:vcc
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(0, Operand(br, vcc));
|
||||
}
|
||||
|
||||
//; del d, e, f
|
||||
//; del d, e, f
|
||||
|
||||
{
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_z %e:scc
|
||||
//! p_unit_test 1, %f:vcc
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(1, Operand(br, vcc));
|
||||
}
|
||||
{
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_z %e:scc
|
||||
//! p_unit_test 1, %f:vcc
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(1, Operand(br, vcc));
|
||||
}
|
||||
|
||||
//; del d, e, f
|
||||
//; del d, e, f
|
||||
|
||||
{
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_z %e:scc
|
||||
//! p_unit_test 2, %f:vcc
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(2, Operand(br, vcc));
|
||||
}
|
||||
{
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_z %e:scc
|
||||
//! p_unit_test 2, %f:vcc
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(2, Operand(br, vcc));
|
||||
}
|
||||
|
||||
//; del d, e, f
|
||||
//; del d, e, f
|
||||
|
||||
{
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_nz %e:scc
|
||||
//! p_unit_test 3, %f:vcc
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(3, Operand(br, vcc));
|
||||
}
|
||||
{
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_nz %e:scc
|
||||
//! p_unit_test 3, %f:vcc
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(3, Operand(br, vcc));
|
||||
}
|
||||
|
||||
//; del d, e, f
|
||||
//; del d, e, f
|
||||
|
||||
{
|
||||
//! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
|
||||
//! s2: %f:vcc = p_cbranch_z %e:scc
|
||||
//! p_unit_test 4, %f:vcc
|
||||
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
|
||||
Operand::c32(0x12345u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero(8));
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(4, Operand(br, vcc));
|
||||
}
|
||||
{
|
||||
//! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
|
||||
//! s2: %f:vcc = p_cbranch_z %e:scc
|
||||
//! p_unit_test 4, %f:vcc
|
||||
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
|
||||
Operand::c32(0x12345u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero(8));
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(4, Operand(br, vcc));
|
||||
}
|
||||
|
||||
//; del d, e, f
|
||||
//; del d, e, f
|
||||
|
||||
{
|
||||
/* SCC is overwritten in between, don't optimize */
|
||||
{
|
||||
/* SCC is overwritten in between, don't optimize */
|
||||
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
|
||||
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
|
||||
//! s2: %f:vcc = p_cbranch_z %g:scc
|
||||
//! p_unit_test 5, %f:vcc, %h:s[3]
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(1u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
|
||||
}
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
|
||||
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
|
||||
//! s2: %f:vcc = p_cbranch_z %g:scc
|
||||
//! p_unit_test 5, %f:vcc, %h:s[3]
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(1u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
|
||||
}
|
||||
|
||||
//; del d, e, f, g, h, x
|
||||
//; del d, e, f, g, h, x
|
||||
|
||||
{
|
||||
/* SCC is overwritten in between, optimize by pulling down */
|
||||
{
|
||||
/* SCC is overwritten in between, optimize by pulling down */
|
||||
|
||||
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_z %g:scc
|
||||
//! p_unit_test 5, %f:vcc, %h:s[3]
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(1u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
|
||||
}
|
||||
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_z %g:scc
|
||||
//! p_unit_test 5, %f:vcc, %h:s[3]
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(1u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
|
||||
}
|
||||
|
||||
//; del d, e, f, g, h, x
|
||||
//; del d, e, f, g, h, x
|
||||
|
||||
{
|
||||
/* SCC is overwritten in between, optimize by pulling down */
|
||||
{
|
||||
/* SCC is overwritten in between, optimize by pulling down */
|
||||
|
||||
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
|
||||
//! s2: %d:s[8-9], s1: %e:scc = s_and_b64 %b:s[4-5], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_z %g:scc
|
||||
//! p_unit_test 5, %f:vcc, %h:s[3]
|
||||
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), op_in_1,
|
||||
Operand::c32(0x40018u));
|
||||
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(1u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s8),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
|
||||
}
|
||||
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
|
||||
//! s2: %d:s[8-9], s1: %e:scc = s_and_b64 %b:s[4-5], 0x40018
|
||||
//! s2: %f:vcc = p_cbranch_z %g:scc
|
||||
//! p_unit_test 5, %f:vcc, %h:s[3]
|
||||
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), op_in_1,
|
||||
Operand::c32(0x40018u));
|
||||
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(1u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s8),
|
||||
Operand::zero());
|
||||
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
|
||||
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
|
||||
}
|
||||
|
||||
//; del d, e, f, g, h, x
|
||||
//; del d, e, f, g, h, x
|
||||
|
||||
{
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
|
||||
//! p_unit_test 6, %f:s[4]
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
|
||||
writeout(6, Operand(br, reg_s4));
|
||||
}
|
||||
{
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
|
||||
//! p_unit_test 6, %f:s[4]
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0),
|
||||
Operand(op_in_2), bld.scc(scmp));
|
||||
writeout(6, Operand(br, reg_s4));
|
||||
}
|
||||
|
||||
//; del d, e, f
|
||||
//; del d, e, f
|
||||
|
||||
{
|
||||
/* SCC is overwritten in between, don't optimize */
|
||||
{
|
||||
/* SCC is overwritten in between, don't optimize */
|
||||
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
|
||||
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
|
||||
//! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
|
||||
//! p_unit_test 7, %f:s[4], %h:s[3]
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(1u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
|
||||
writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
|
||||
}
|
||||
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
|
||||
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
|
||||
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
|
||||
//! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
|
||||
//! p_unit_test 7, %f:s[4], %h:s[3]
|
||||
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(0x40018u));
|
||||
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
|
||||
Operand::c32(1u));
|
||||
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
|
||||
Operand::zero());
|
||||
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0),
|
||||
Operand(op_in_2), bld.scc(scmp));
|
||||
writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
|
||||
}
|
||||
|
||||
//; del d, e, f, g, h, x
|
||||
//; del d, e, f, g, h, x
|
||||
|
||||
finish_optimizer_postRA_test();
|
||||
finish_optimizer_postRA_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimizer_postRA.dpp)
|
||||
@ -368,7 +382,8 @@ BEGIN_TEST(optimizer_postRA.dpp)
|
||||
//! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1
|
||||
//! p_unit_test 2, %res2:v[2]
|
||||
Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
|
||||
Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror);
|
||||
Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2),
|
||||
dpp_row_half_mirror);
|
||||
writeout(2, Operand(res2, reg_v2));
|
||||
|
||||
/* modifiers */
|
||||
@ -429,14 +444,16 @@ BEGIN_TEST(optimizer_postRA.dpp)
|
||||
//! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
|
||||
//! p_unit_test 8, %res8:v[2]
|
||||
Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
|
||||
Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
|
||||
Temp res8 =
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
|
||||
writeout(8, Operand(res8, reg_v2));
|
||||
|
||||
//! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
|
||||
//! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1]
|
||||
//! p_unit_test 9, %res9:v[2]
|
||||
Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
|
||||
Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
|
||||
Temp res9 =
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
|
||||
writeout(9, Operand(res9, reg_v2));
|
||||
|
||||
/* control flow */
|
||||
@ -485,48 +502,53 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
|
||||
Operand c(inputs[2], PhysReg(258)); /* buffer store address */
|
||||
Operand d(inputs[3], PhysReg(259)); /* buffer store value */
|
||||
Operand e(inputs[4], PhysReg(0)); /* condition */
|
||||
PhysReg reg_v12(268); /* temporary register */
|
||||
PhysReg reg_v12(268); /* temporary register */
|
||||
|
||||
Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
|
||||
|
||||
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
|
||||
|
||||
emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
|
||||
/* --- logical then --- */
|
||||
//! BB1
|
||||
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
|
||||
//! p_logical_start
|
||||
emit_divergent_if_else(
|
||||
program.get(), bld, e,
|
||||
[&]() -> void
|
||||
{
|
||||
/* --- logical then --- */
|
||||
//! BB1
|
||||
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
|
||||
//! p_logical_start
|
||||
|
||||
//! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
|
||||
//! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
|
||||
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
|
||||
/* --- linear then --- */
|
||||
//! BB2
|
||||
//! /* logical preds: / linear preds: BB0, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
/* --- linear then --- */
|
||||
//! BB2
|
||||
//! /* logical preds: / linear preds: BB0, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
|
||||
/* --- invert --- */
|
||||
//! BB3
|
||||
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
|
||||
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
|
||||
}, [&]() -> void {
|
||||
/* --- logical else --- */
|
||||
//! BB4
|
||||
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
|
||||
//! p_logical_start
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
/* --- invert --- */
|
||||
//! BB3
|
||||
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
|
||||
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
|
||||
},
|
||||
[&]() -> void
|
||||
{
|
||||
/* --- logical else --- */
|
||||
//! BB4
|
||||
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
|
||||
//! p_logical_start
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
|
||||
/* --- linear else --- */
|
||||
//! BB5
|
||||
//! /* logical preds: / linear preds: BB3, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
});
|
||||
/* --- linear else --- */
|
||||
//! BB5
|
||||
//! /* logical preds: / linear preds: BB3, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
});
|
||||
|
||||
/* --- merge block --- */
|
||||
//! BB6
|
||||
@ -535,7 +557,8 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
|
||||
|
||||
//! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
|
||||
//! p_unit_test 10, %res10:v[12]
|
||||
Temp result = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
|
||||
Temp result =
|
||||
bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
|
||||
writeout(10, Operand(result, reg_v12));
|
||||
|
||||
finish_optimizer_postRA_test();
|
||||
@ -560,7 +583,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
|
||||
Operand d(inputs[3], PhysReg(259)); /* buffer store value */
|
||||
Operand e(inputs[4], PhysReg(0)); /* condition */
|
||||
Operand f(inputs[5], PhysReg(2)); /* buffer store address (scalar) */
|
||||
PhysReg reg_v12(268); /* temporary register */
|
||||
PhysReg reg_v12(268); /* temporary register */
|
||||
|
||||
//! v1: %dpp_tmp:v[12] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
|
||||
Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
|
||||
@ -568,44 +591,50 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
|
||||
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
|
||||
|
||||
emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
|
||||
/* --- logical then --- */
|
||||
//! BB1
|
||||
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
|
||||
//! p_logical_start
|
||||
emit_divergent_if_else(
|
||||
program.get(), bld, e,
|
||||
[&]() -> void
|
||||
{
|
||||
/* --- logical then --- */
|
||||
//! BB1
|
||||
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
|
||||
//! p_logical_start
|
||||
|
||||
//! v1: %addr:v[0] = p_parallelcopy %f:s[2]
|
||||
Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f);
|
||||
//! v1: %addr:v[0] = p_parallelcopy %f:s[2]
|
||||
Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f);
|
||||
|
||||
//! buffer_store_dword %addr:v[0], 0, %d:v[3], 0 offen
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, Operand(addr, a.physReg()), Operand::zero(), d, Operand::zero(), 0, true);
|
||||
//! buffer_store_dword %addr:v[0], 0, %d:v[3], 0 offen
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, Operand(addr, a.physReg()), Operand::zero(), d,
|
||||
Operand::zero(), 0, true);
|
||||
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
|
||||
/* --- linear then --- */
|
||||
//! BB2
|
||||
//! /* logical preds: / linear preds: BB0, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
/* --- linear then --- */
|
||||
//! BB2
|
||||
//! /* logical preds: / linear preds: BB0, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
|
||||
/* --- invert --- */
|
||||
//! BB3
|
||||
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
|
||||
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
|
||||
}, [&]() -> void {
|
||||
/* --- logical else --- */
|
||||
//! BB4
|
||||
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
|
||||
//! p_logical_start
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
/* --- invert --- */
|
||||
//! BB3
|
||||
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
|
||||
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
|
||||
},
|
||||
[&]() -> void
|
||||
{
|
||||
/* --- logical else --- */
|
||||
//! BB4
|
||||
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
|
||||
//! p_logical_start
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
|
||||
/* --- linear else --- */
|
||||
//! BB5
|
||||
//! /* logical preds: / linear preds: BB3, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
});
|
||||
/* --- linear else --- */
|
||||
//! BB5
|
||||
//! /* logical preds: / linear preds: BB3, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
});
|
||||
|
||||
/* --- merge block --- */
|
||||
//! BB6
|
||||
@ -613,7 +642,8 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
|
||||
//! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
|
||||
|
||||
//! v1: %result:v[12] = v_add_f32 %dpp_mov_tmp:v[12], %b:v[1]
|
||||
Temp result = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
|
||||
Temp result =
|
||||
bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
|
||||
//! p_unit_test 10, %result:v[12]
|
||||
writeout(10, Operand(result, reg_v12));
|
||||
|
||||
@ -631,53 +661,58 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
|
||||
startpgm->definitions[2].setFixed(PhysReg(259));
|
||||
startpgm->definitions[3].setFixed(PhysReg(0));
|
||||
|
||||
Operand a(inputs[0], PhysReg(2)); /* source for s_and */
|
||||
Operand a(inputs[0], PhysReg(2)); /* source for s_and */
|
||||
Operand c(inputs[1], PhysReg(258)); /* buffer store address */
|
||||
Operand d(inputs[2], PhysReg(259)); /* buffer store value */
|
||||
Operand e(inputs[3], PhysReg(0)); /* condition */
|
||||
PhysReg reg_s8(8); /* temporary register */
|
||||
PhysReg reg_s8(8); /* temporary register */
|
||||
|
||||
auto tmp_salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), a,
|
||||
Operand::c32(0x40018u));
|
||||
Operand::c32(0x40018u));
|
||||
|
||||
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
|
||||
|
||||
emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
|
||||
/* --- logical then --- */
|
||||
//! BB1
|
||||
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
|
||||
//! p_logical_start
|
||||
emit_divergent_if_else(
|
||||
program.get(), bld, e,
|
||||
[&]() -> void
|
||||
{
|
||||
/* --- logical then --- */
|
||||
//! BB1
|
||||
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
|
||||
//! p_logical_start
|
||||
|
||||
//! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
|
||||
//! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
|
||||
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
|
||||
/* --- linear then --- */
|
||||
//! BB2
|
||||
//! /* logical preds: / linear preds: BB0, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
/* --- linear then --- */
|
||||
//! BB2
|
||||
//! /* logical preds: / linear preds: BB0, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
|
||||
/* --- invert --- */
|
||||
//! BB3
|
||||
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
|
||||
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
|
||||
}, [&]() -> void {
|
||||
/* --- logical else --- */
|
||||
//! BB4
|
||||
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
|
||||
//! p_logical_start
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
/* --- invert --- */
|
||||
//! BB3
|
||||
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
|
||||
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
|
||||
},
|
||||
[&]() -> void
|
||||
{
|
||||
/* --- logical else --- */
|
||||
//! BB4
|
||||
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
|
||||
//! p_logical_start
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
|
||||
/* --- linear else --- */
|
||||
//! BB5
|
||||
//! /* logical preds: / linear preds: BB3, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
});
|
||||
/* --- linear else --- */
|
||||
//! BB5
|
||||
//! /* logical preds: / linear preds: BB3, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
});
|
||||
|
||||
/* --- merge block --- */
|
||||
//! BB6
|
||||
@ -695,7 +730,6 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
|
||||
finish_optimizer_postRA_test();
|
||||
END_TEST
|
||||
|
||||
|
||||
BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
|
||||
//>> s2: %a:s[2-3], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s1: %f:s[4] = p_startpgm
|
||||
if (!setup_cs("s2 v1 v1 s2 s1", GFX10_3))
|
||||
@ -708,59 +742,65 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
|
||||
startpgm->definitions[3].setFixed(PhysReg(0));
|
||||
startpgm->definitions[4].setFixed(PhysReg(4));
|
||||
|
||||
Operand a(inputs[0], PhysReg(2)); /* source for s_and */
|
||||
Operand a(inputs[0], PhysReg(2)); /* source for s_and */
|
||||
Operand c(inputs[1], PhysReg(258)); /* buffer store address */
|
||||
Operand d(inputs[2], PhysReg(259)); /* buffer store value */
|
||||
Operand e(inputs[3], PhysReg(0)); /* condition */
|
||||
Operand f(inputs[4], PhysReg(4)); /* overwrite value */
|
||||
PhysReg reg_s3(3); /* temporary register */
|
||||
PhysReg reg_s8(8); /* temporary register */
|
||||
PhysReg reg_s3(3); /* temporary register */
|
||||
PhysReg reg_s8(8); /* temporary register */
|
||||
|
||||
//! s2: %tmp_salu:s[8-9], s1: %tmp_salu_scc:scc = s_and_b64 %a:s[2-3], 0x40018
|
||||
auto tmp_salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), a,
|
||||
Operand::c32(0x40018u));
|
||||
Operand::c32(0x40018u));
|
||||
|
||||
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
|
||||
|
||||
emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
|
||||
/* --- logical then --- */
|
||||
//! BB1
|
||||
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
|
||||
//! p_logical_start
|
||||
emit_divergent_if_else(
|
||||
program.get(), bld, e,
|
||||
[&]() -> void
|
||||
{
|
||||
/* --- logical then --- */
|
||||
//! BB1
|
||||
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
|
||||
//! p_logical_start
|
||||
|
||||
//! s1: %ovrwr:s[3] = p_parallelcopy %f:s[4]
|
||||
Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f);
|
||||
//! s1: %ovrwr:s[3] = p_parallelcopy %f:s[4]
|
||||
Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f);
|
||||
|
||||
//! buffer_store_dword %c:v[2], %ovrwr:s[3], %d:v[3], 0 offen
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(s_addr, reg_s3), d, Operand::zero(), 0, true);
|
||||
//! buffer_store_dword %c:v[2], %ovrwr:s[3], %d:v[3], 0 offen
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(s_addr, reg_s3), d, Operand::zero(),
|
||||
0, true);
|
||||
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
|
||||
/* --- linear then --- */
|
||||
//! BB2
|
||||
//! /* logical preds: / linear preds: BB0, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
/* --- linear then --- */
|
||||
//! BB2
|
||||
//! /* logical preds: / linear preds: BB0, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
|
||||
/* --- invert --- */
|
||||
//! BB3
|
||||
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
|
||||
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
|
||||
}, [&]() -> void {
|
||||
/* --- logical else --- */
|
||||
//! BB4
|
||||
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
|
||||
//! p_logical_start
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
/* --- invert --- */
|
||||
//! BB3
|
||||
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
|
||||
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
|
||||
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
|
||||
},
|
||||
[&]() -> void
|
||||
{
|
||||
/* --- logical else --- */
|
||||
//! BB4
|
||||
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
|
||||
//! p_logical_start
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
|
||||
/* --- linear else --- */
|
||||
//! BB5
|
||||
//! /* logical preds: / linear preds: BB3, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
});
|
||||
/* --- linear else --- */
|
||||
//! BB5
|
||||
//! /* logical preds: / linear preds: BB3, / kind: */
|
||||
//! s2: %0:vcc = p_branch BB6
|
||||
});
|
||||
|
||||
/* --- merge block --- */
|
||||
//! BB6
|
||||
|
@ -35,22 +35,27 @@ BEGIN_TEST(setup_reduce_temp.divergent_if_phi)
|
||||
* use_linear_vgpr(v0)
|
||||
* }
|
||||
* ... = phi ...
|
||||
*/
|
||||
//TODO: fix the RA validator to spot this
|
||||
*/
|
||||
// TODO: fix the RA validator to spot this
|
||||
//>> s2: %_, v1: %a = p_startpgm
|
||||
if (!setup_cs("s2 v1", GFX9))
|
||||
return;
|
||||
|
||||
//>> lv1: %lv = p_start_linear_vgpr
|
||||
emit_divergent_if_else(program.get(), bld, Operand(inputs[0]), [&]() -> void {
|
||||
//>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, %lv, lv1: undef op:umin32 cluster_size:64
|
||||
Instruction* reduce = bld.reduction(aco_opcode::p_reduce, bld.def(s1),
|
||||
bld.def(bld.lm), bld.def(s1, scc), inputs[1],
|
||||
Operand(v1.as_linear()), Operand(v1.as_linear()), umin32);
|
||||
reduce->reduction().cluster_size = bld.lm.bytes() * 8;
|
||||
}, [&]() -> void {
|
||||
/* nothing */
|
||||
});
|
||||
emit_divergent_if_else(
|
||||
program.get(), bld, Operand(inputs[0]),
|
||||
[&]() -> void
|
||||
{
|
||||
//>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, %lv, lv1: undef op:umin32 cluster_size:64
|
||||
Instruction* reduce =
|
||||
bld.reduction(aco_opcode::p_reduce, bld.def(s1), bld.def(bld.lm), bld.def(s1, scc),
|
||||
inputs[1], Operand(v1.as_linear()), Operand(v1.as_linear()), umin32);
|
||||
reduce->reduction().cluster_size = bld.lm.bytes() * 8;
|
||||
},
|
||||
[&]() -> void
|
||||
{
|
||||
/* nothing */
|
||||
});
|
||||
bld.pseudo(aco_opcode::p_phi, bld.def(v1), Operand::c32(1), Operand::zero());
|
||||
//>> /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
|
||||
//! p_end_linear_vgpr %lv
|
||||
|
@ -37,7 +37,7 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
|
||||
|
||||
/* TODO: is this possible to do on GFX11? */
|
||||
for (amd_gfx_level cc = GFX8; cc <= GFX10_3; cc = (amd_gfx_level)((unsigned)cc + 1)) {
|
||||
for (bool pessimistic : { false, true }) {
|
||||
for (bool pessimistic : {false, true}) {
|
||||
const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
|
||||
|
||||
//>> v1: %_:v[#a] = p_startpgm
|
||||
@ -45,7 +45,8 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
|
||||
return;
|
||||
|
||||
//! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
|
||||
Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
|
||||
Builder::Result tmp =
|
||||
bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
|
||||
|
||||
//! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1
|
||||
//! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]
|
||||
@ -55,7 +56,7 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
|
||||
writeout(0, result1);
|
||||
writeout(1, result2);
|
||||
|
||||
finish_ra_test(ra_test_policy { pessimistic });
|
||||
finish_ra_test(ra_test_policy{pessimistic});
|
||||
}
|
||||
}
|
||||
END_TEST
|
||||
@ -67,7 +68,8 @@ BEGIN_TEST(regalloc._32bit_partial_write)
|
||||
|
||||
/* ensure high 16 bits are occupied */
|
||||
//! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
|
||||
Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
|
||||
Temp hi =
|
||||
bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
|
||||
|
||||
/* This test checks if this instruction uses SDWA. */
|
||||
//! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
|
||||
@ -168,9 +170,9 @@ BEGIN_TEST(regalloc.precolor.multiple_operands)
|
||||
|
||||
//! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
|
||||
//! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256+0)),
|
||||
Operand(inputs[0], PhysReg(256+1)), Operand(inputs[1], PhysReg(256+2)),
|
||||
Operand(inputs[2], PhysReg(256+3)));
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256 + 0)),
|
||||
Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[1], PhysReg(256 + 2)),
|
||||
Operand(inputs[2], PhysReg(256 + 3)));
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
@ -182,8 +184,8 @@ BEGIN_TEST(regalloc.precolor.different_regs)
|
||||
|
||||
//! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0]
|
||||
//! p_unit_test %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256+0)),
|
||||
Operand(inputs[0], PhysReg(256+1)), Operand(inputs[0], PhysReg(256+2)));
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256 + 0)),
|
||||
Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[0], PhysReg(256 + 2)));
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
@ -256,7 +258,8 @@ BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
|
||||
|
||||
//! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test
|
||||
Temp s0_tmp = bld.tmp(s1);
|
||||
Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1));
|
||||
Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc),
|
||||
Definition(s0_tmp.id(), PhysReg{0}, s1));
|
||||
|
||||
//! lv1: %tmp1:v[1] = p_unit_test
|
||||
Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1));
|
||||
@ -273,7 +276,8 @@ BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
|
||||
//>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1
|
||||
Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo();
|
||||
aco_print_instr(program->gfx_level, ¶llelcopy, output);
|
||||
fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg());
|
||||
fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc,
|
||||
parallelcopy.scratch_sgpr.reg());
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies)
|
||||
@ -392,13 +396,15 @@ BEGIN_TEST(regalloc.vinterp_fp16)
|
||||
|
||||
//! v1: %tmp0:v[1] = v_interp_p10_f16_f32_inreg %lo:v[3][0:16], %in1:v[1], hi(%hi:v[3][16:32])
|
||||
//! p_unit_test %tmp0:v[1]
|
||||
Temp tmp0 = bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
|
||||
Temp tmp0 =
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
|
||||
bld.pseudo(aco_opcode::p_unit_test, tmp0);
|
||||
|
||||
//! v2b: %tmp1:v[0][16:32] = v_interp_p2_f16_f32_inreg %in0:v[0], %in2:v[2], %tmp0:v[1] opsel_hi
|
||||
//! v1: %tmp2:v[0] = p_create_vector 0, %tmp1:v[0][16:32]
|
||||
//! p_unit_test %tmp2:v[0]
|
||||
Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0], inputs[2], tmp0);
|
||||
Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0],
|
||||
inputs[2], tmp0);
|
||||
Temp tmp2 = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand::zero(2), tmp1);
|
||||
bld.pseudo(aco_opcode::p_unit_test, tmp2);
|
||||
|
||||
|
@ -34,7 +34,8 @@ BEGIN_TEST(validate.sdwa.allow)
|
||||
//>> Validation results:
|
||||
//! Validation passed
|
||||
|
||||
SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa();
|
||||
SDWA_instruction* sdwa =
|
||||
&bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa();
|
||||
sdwa->neg[0] = sdwa->neg[1] = sdwa->abs[0] = sdwa->abs[1] = true;
|
||||
|
||||
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1b), inputs[0], inputs[1]);
|
||||
@ -105,7 +106,9 @@ BEGIN_TEST(validate.sdwa.vopc)
|
||||
bld.vopc_sdwa(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), inputs[0], inputs[1]);
|
||||
|
||||
//~gfx(9|10)! SDWA VOPC clamp only supported on GFX8: s2: %_:vcc = v_cmp_eq_f32 %vgpr0, %vgpr1 clamp src0_sel:dword src1_sel:dword
|
||||
bld.vopc_sdwa(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm, vcc), inputs[0], inputs[1])->sdwa().clamp = true;
|
||||
bld.vopc_sdwa(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm, vcc), inputs[0], inputs[1])
|
||||
->sdwa()
|
||||
.clamp = true;
|
||||
|
||||
//! Validation failed
|
||||
|
||||
@ -138,11 +141,13 @@ BEGIN_TEST(validate.sdwa.vcc)
|
||||
|
||||
//! 3rd operand must be fixed to vcc with SDWA: v1: %_ = v_cndmask_b32 %vgpr0, %vgpr1, %_ dst_sel:dword src0_sel:dword src1_sel:dword
|
||||
bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], inputs[2]);
|
||||
bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], bld.vcc(inputs[2]));
|
||||
bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1],
|
||||
bld.vcc(inputs[2]));
|
||||
|
||||
//! 2nd definition must be fixed to vcc with SDWA: v1: %_, s2: %_ = v_add_co_u32 %vgpr0, %vgpr1 dst_sel:dword src0_sel:dword src1_sel:dword
|
||||
bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm), inputs[0], inputs[1]);
|
||||
bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm, vcc), inputs[0], inputs[1]);
|
||||
bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm, vcc), inputs[0],
|
||||
inputs[1]);
|
||||
|
||||
//! Validation failed
|
||||
|
||||
@ -152,125 +157,127 @@ END_TEST
|
||||
|
||||
BEGIN_TEST(optimize.sdwa.extract)
|
||||
for (unsigned i = GFX7; i <= GFX10; i++) {
|
||||
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
|
||||
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
|
||||
continue;
|
||||
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
|
||||
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i, CHIP_UNKNOWN,
|
||||
is_signed ? "_signed" : "_unsigned"))
|
||||
continue;
|
||||
|
||||
//; def standard_test(index, sel):
|
||||
//; res = 'v1: %%res%s = v_mul_f32 %%a, %%b dst_sel:dword src0_sel:dword src1_sel:%c%s\n' % (index, 's' if variant.endswith('_signed') else 'u', sel)
|
||||
//; res += 'p_unit_test %s, %%res%s' % (index, index)
|
||||
//; return res
|
||||
//; funcs['standard_test'] = lambda a: standard_test(*(v for v in a.split(',')))
|
||||
//; def standard_test(index, sel):
|
||||
//; res = 'v1: %%res%s = v_mul_f32 %%a, %%b dst_sel:dword src0_sel:dword src1_sel:%c%s\n' % (index, 's' if variant.endswith('_signed') else 'u', sel)
|
||||
//; res += 'p_unit_test %s, %%res%s' % (index, index)
|
||||
//; return res
|
||||
//; funcs['standard_test'] = lambda a: standard_test(*(v for v in a.split(',')))
|
||||
|
||||
aco_opcode ext = aco_opcode::p_extract;
|
||||
aco_opcode ins = aco_opcode::p_insert;
|
||||
aco_opcode ext = aco_opcode::p_extract;
|
||||
aco_opcode ins = aco_opcode::p_insert;
|
||||
|
||||
{
|
||||
//~gfx[^7].*! @standard_test(0,byte0)
|
||||
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
|
||||
Operand::c32(is_signed));
|
||||
writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b));
|
||||
{
|
||||
//~gfx[^7].*! @standard_test(0,byte0)
|
||||
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
|
||||
Operand::c32(8u), Operand::c32(is_signed));
|
||||
writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b));
|
||||
|
||||
//~gfx[^7].*! @standard_test(1,byte1)
|
||||
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u),
|
||||
Operand::c32(is_signed));
|
||||
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b));
|
||||
//~gfx[^7].*! @standard_test(1,byte1)
|
||||
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
|
||||
Operand::c32(8u), Operand::c32(is_signed));
|
||||
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b));
|
||||
|
||||
//~gfx[^7].*! @standard_test(2,byte2)
|
||||
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u),
|
||||
Operand::c32(is_signed));
|
||||
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b));
|
||||
//~gfx[^7].*! @standard_test(2,byte2)
|
||||
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u),
|
||||
Operand::c32(8u), Operand::c32(is_signed));
|
||||
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b));
|
||||
|
||||
//~gfx[^7].*! @standard_test(3,byte3)
|
||||
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u),
|
||||
Operand::c32(is_signed));
|
||||
writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b));
|
||||
//~gfx[^7].*! @standard_test(3,byte3)
|
||||
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u),
|
||||
Operand::c32(8u), Operand::c32(is_signed));
|
||||
writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b));
|
||||
|
||||
//~gfx[^7].*! @standard_test(4,word0)
|
||||
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
|
||||
Operand::c32(is_signed));
|
||||
writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b));
|
||||
//~gfx[^7].*! @standard_test(4,word0)
|
||||
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
|
||||
Operand::c32(16u), Operand::c32(is_signed));
|
||||
writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b));
|
||||
|
||||
//~gfx[^7].*! @standard_test(5,word1)
|
||||
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
|
||||
Operand::c32(16u), Operand::c32(is_signed));
|
||||
writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b));
|
||||
//~gfx[^7].*! @standard_test(5,word1)
|
||||
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
|
||||
Operand::c32(16u), Operand::c32(is_signed));
|
||||
writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b));
|
||||
|
||||
//~gfx[^7]_unsigned! @standard_test(6,byte0)
|
||||
Temp bfi_byte0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u));
|
||||
writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b));
|
||||
//~gfx[^7]_unsigned! @standard_test(6,byte0)
|
||||
Temp bfi_byte0_b =
|
||||
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u));
|
||||
writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b));
|
||||
|
||||
//~gfx[^7]_unsigned! @standard_test(7,word0)
|
||||
Temp bfi_word0_b =
|
||||
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u));
|
||||
writeout(7, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_word0_b));
|
||||
//~gfx[^7]_unsigned! @standard_test(7,word0)
|
||||
Temp bfi_word0_b =
|
||||
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u));
|
||||
writeout(7, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_word0_b));
|
||||
}
|
||||
|
||||
//>> p_unit_test 63
|
||||
writeout(63);
|
||||
|
||||
{
|
||||
//! v1: %tmp8 = p_insert %b, 1, 8
|
||||
//! v1: %res8 = v_mul_f32 %a, %tmp8
|
||||
//! p_unit_test 8, %res8
|
||||
Temp bfi_byte1_b =
|
||||
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u));
|
||||
writeout(8, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte1_b));
|
||||
|
||||
/* v_cvt_f32_ubyte[0-3] can be used instead of v_cvt_f32_u32+sdwa */
|
||||
//~gfx7_signed! v1: %bfe_byte0_b = p_extract %b, 0, 8, 1
|
||||
//~gfx7_signed! v1: %res9 = v_cvt_f32_u32 %bfe_byte0_b
|
||||
//~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte0
|
||||
//~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b
|
||||
//! p_unit_test 9, %res9
|
||||
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
|
||||
Operand::c32(8u), Operand::c32(is_signed));
|
||||
writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b));
|
||||
|
||||
//~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1
|
||||
//~gfx7_signed! v1: %res10 = v_cvt_f32_u32 %bfe_byte1_b
|
||||
//~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte1
|
||||
//~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b
|
||||
//! p_unit_test 10, %res10
|
||||
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
|
||||
Operand::c32(8u), Operand::c32(is_signed));
|
||||
writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b));
|
||||
|
||||
//~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1
|
||||
//~gfx7_signed! v1: %res11 = v_cvt_f32_u32 %bfe_byte2_b
|
||||
//~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte2
|
||||
//~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b
|
||||
//! p_unit_test 11, %res11
|
||||
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u),
|
||||
Operand::c32(8u), Operand::c32(is_signed));
|
||||
writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b));
|
||||
|
||||
//~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1
|
||||
//~gfx7_signed! v1: %res12 = v_cvt_f32_u32 %bfe_byte3_b
|
||||
//~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte3
|
||||
//~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b
|
||||
//! p_unit_test 12, %res12
|
||||
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u),
|
||||
Operand::c32(8u), Operand::c32(is_signed));
|
||||
writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
|
||||
|
||||
/* VOP3-only instructions can't use SDWA but they can use opsel on GFX9+ instead */
|
||||
//~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b
|
||||
//~gfx(9|10).*! p_unit_test 13, %res13
|
||||
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
|
||||
Operand::c32(16u), Operand::c32(is_signed));
|
||||
writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
|
||||
|
||||
//~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)
|
||||
//~gfx(9|10).*! p_unit_test 14, %res14
|
||||
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
|
||||
Operand::c32(16u), Operand::c32(is_signed));
|
||||
writeout(14, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word1_b));
|
||||
}
|
||||
|
||||
finish_opt_test();
|
||||
}
|
||||
|
||||
//>> p_unit_test 63
|
||||
writeout(63);
|
||||
|
||||
{
|
||||
//! v1: %tmp8 = p_insert %b, 1, 8
|
||||
//! v1: %res8 = v_mul_f32 %a, %tmp8
|
||||
//! p_unit_test 8, %res8
|
||||
Temp bfi_byte1_b =
|
||||
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u));
|
||||
writeout(8, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte1_b));
|
||||
|
||||
/* v_cvt_f32_ubyte[0-3] can be used instead of v_cvt_f32_u32+sdwa */
|
||||
//~gfx7_signed! v1: %bfe_byte0_b = p_extract %b, 0, 8, 1
|
||||
//~gfx7_signed! v1: %res9 = v_cvt_f32_u32 %bfe_byte0_b
|
||||
//~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte0
|
||||
//~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b
|
||||
//! p_unit_test 9, %res9
|
||||
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
|
||||
Operand::c32(is_signed));
|
||||
writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b));
|
||||
|
||||
//~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1
|
||||
//~gfx7_signed! v1: %res10 = v_cvt_f32_u32 %bfe_byte1_b
|
||||
//~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte1
|
||||
//~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b
|
||||
//! p_unit_test 10, %res10
|
||||
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u),
|
||||
Operand::c32(is_signed));
|
||||
writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b));
|
||||
|
||||
//~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1
|
||||
//~gfx7_signed! v1: %res11 = v_cvt_f32_u32 %bfe_byte2_b
|
||||
//~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte2
|
||||
//~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b
|
||||
//! p_unit_test 11, %res11
|
||||
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u),
|
||||
Operand::c32(is_signed));
|
||||
writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b));
|
||||
|
||||
//~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1
|
||||
//~gfx7_signed! v1: %res12 = v_cvt_f32_u32 %bfe_byte3_b
|
||||
//~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte3
|
||||
//~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b
|
||||
//! p_unit_test 12, %res12
|
||||
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u),
|
||||
Operand::c32(is_signed));
|
||||
writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
|
||||
|
||||
/* VOP3-only instructions can't use SDWA but they can use opsel on GFX9+ instead */
|
||||
//~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b
|
||||
//~gfx(9|10).*! p_unit_test 13, %res13
|
||||
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
|
||||
Operand::c32(is_signed));
|
||||
writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
|
||||
|
||||
//~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)
|
||||
//~gfx(9|10).*! p_unit_test 14, %res14
|
||||
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
|
||||
Operand::c32(16u), Operand::c32(is_signed));
|
||||
writeout(14, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word1_b));
|
||||
}
|
||||
|
||||
finish_opt_test();
|
||||
}
|
||||
}
|
||||
END_TEST
|
||||
|
||||
|
@ -52,8 +52,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v2b), Definition(v1_lo, v2b),
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v1_lo, v2b),
|
||||
Operand(v1_lo, v2b), Operand(v0_lo, v2b));
|
||||
|
||||
//~gfx[67]! p_unit_test 1
|
||||
@ -61,9 +60,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
|
||||
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
|
||||
bld.pseudo(aco_opcode::p_create_vector,
|
||||
Definition(v0_lo, v1),
|
||||
Operand(v1_lo, v2b), Operand(v0_lo, v2b));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v2b),
|
||||
Operand(v0_lo, v2b));
|
||||
|
||||
//~gfx[67]! p_unit_test 2
|
||||
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
|
||||
@ -71,8 +69,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
|
||||
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
||||
bld.pseudo(aco_opcode::p_create_vector,
|
||||
Definition(v0_lo, v6b), Operand(v1_lo, v2b),
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v6b), Operand(v1_lo, v2b),
|
||||
Operand(v0_lo, v2b), Operand(v2_lo, v2b));
|
||||
|
||||
//~gfx[67]! p_unit_test 3
|
||||
@ -82,10 +79,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16]
|
||||
//~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
|
||||
bld.pseudo(aco_opcode::p_create_vector,
|
||||
Definition(v0_lo, v2),
|
||||
Operand(v1_lo, v2b), Operand(v0_lo, v2b),
|
||||
Operand(v2_lo, v2b), Operand(v3_lo, v2b));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b),
|
||||
Operand(v0_lo, v2b), Operand(v2_lo, v2b), Operand(v3_lo, v2b));
|
||||
|
||||
//~gfx[67]! p_unit_test 4
|
||||
//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]
|
||||
@ -96,17 +91,14 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
|
||||
bld.pseudo(aco_opcode::p_create_vector,
|
||||
Definition(v0_lo, v2),
|
||||
Operand(v1_lo, v2b), Operand(v2_lo, v2b),
|
||||
Operand(v0_lo, v2b), Operand(v3_lo, v2b));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b),
|
||||
Operand(v2_lo, v2b), Operand(v0_lo, v2b), Operand(v3_lo, v2b));
|
||||
|
||||
//~gfx[67]! p_unit_test 5
|
||||
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
|
||||
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
|
||||
bld.pseudo(aco_opcode::p_split_vector,
|
||||
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
|
||||
Operand(v0_lo, v1));
|
||||
|
||||
//~gfx[67]! p_unit_test 6
|
||||
@ -114,8 +106,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
|
||||
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
|
||||
bld.pseudo(aco_opcode::p_split_vector,
|
||||
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
|
||||
Definition(v2_lo, v2b), Operand(v0_lo, v6b));
|
||||
|
||||
//~gfx[67]! p_unit_test 7
|
||||
@ -124,10 +115,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
|
||||
//~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
|
||||
bld.pseudo(aco_opcode::p_split_vector,
|
||||
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
|
||||
Definition(v2_lo, v2b), Definition(v3_lo, v2b),
|
||||
Operand(v0_lo, v2));
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
|
||||
Definition(v2_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2));
|
||||
|
||||
//~gfx[67]! p_unit_test 8
|
||||
//~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32]
|
||||
@ -136,18 +125,15 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
|
||||
bld.pseudo(aco_opcode::p_split_vector,
|
||||
Definition(v1_lo, v2b), Definition(v2_lo, v2b),
|
||||
Definition(v0_lo, v2b), Definition(v3_lo, v2b),
|
||||
Operand(v0_lo, v2));
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v2_lo, v2b),
|
||||
Definition(v0_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2));
|
||||
|
||||
//~gfx[67]! p_unit_test 9
|
||||
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v1b), Definition(v1_lo, v1b),
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v1_lo, v1b),
|
||||
Operand(v1_lo, v1b), Operand(v0_lo, v1b));
|
||||
|
||||
//~gfx[67]! p_unit_test 10
|
||||
@ -155,9 +141,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
|
||||
//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
|
||||
bld.pseudo(aco_opcode::p_create_vector,
|
||||
Definition(v0_lo, v2b),
|
||||
Operand(v1_lo, v1b), Operand(v0_lo, v1b));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2b), Operand(v1_lo, v1b),
|
||||
Operand(v0_lo, v1b));
|
||||
|
||||
//~gfx[67]! p_unit_test 11
|
||||
//~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
|
||||
@ -166,8 +151,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
|
||||
//~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
|
||||
bld.pseudo(aco_opcode::p_create_vector,
|
||||
Definition(v0_lo, v3b), Operand(v1_lo, v1b),
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v3b), Operand(v1_lo, v1b),
|
||||
Operand(v0_lo, v1b), Operand(v2_lo, v1b));
|
||||
|
||||
//~gfx[67]! p_unit_test 12
|
||||
@ -179,10 +163,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24]
|
||||
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
|
||||
bld.pseudo(aco_opcode::p_create_vector,
|
||||
Definition(v0_lo, v1),
|
||||
Operand(v1_lo, v1b), Operand(v0_lo, v1b),
|
||||
Operand(v2_lo, v1b), Operand(v3_lo, v1b));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v1b),
|
||||
Operand(v0_lo, v1b), Operand(v2_lo, v1b), Operand(v3_lo, v1b));
|
||||
|
||||
//~gfx[67]! p_unit_test 13
|
||||
//~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8]
|
||||
@ -193,18 +175,16 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001
|
||||
//~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
|
||||
Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector,
|
||||
Definition(v0_lo, v1),
|
||||
Operand(v0_lo, v1b), Operand(v0_lo, v1b),
|
||||
Operand(v0_lo, v1b), Operand(v0_lo, v1b));
|
||||
Instruction* pseudo =
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v0_lo, v1b),
|
||||
Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b));
|
||||
pseudo->pseudo().scratch_sgpr = m0;
|
||||
|
||||
//~gfx[67]! p_unit_test 14
|
||||
//~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
|
||||
//~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
|
||||
bld.pseudo(aco_opcode::p_split_vector,
|
||||
Definition(v1_lo, v1b), Definition(v0_lo, v1b),
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b),
|
||||
Operand(v0_lo, v2b));
|
||||
|
||||
//~gfx[67]! p_unit_test 15
|
||||
@ -213,10 +193,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24]
|
||||
//~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
|
||||
bld.pseudo(aco_opcode::p_split_vector,
|
||||
Definition(v1_lo, v1b), Definition(v0_lo, v1b),
|
||||
Definition(v2_lo, v1b), Definition(v3_lo, v1b),
|
||||
Operand(v0_lo, v1));
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b),
|
||||
Definition(v2_lo, v1b), Definition(v3_lo, v1b), Operand(v0_lo, v1));
|
||||
|
||||
//~gfx[67]! s_endpgm
|
||||
|
||||
@ -231,8 +209,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
|
||||
//~gfx(9|11)! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
|
||||
Operand(v0_hi, v2b), Operand(v0_lo, v2b));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 1
|
||||
@ -243,8 +220,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
|
||||
//~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v1), Definition(v1_lo, v2b),
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
|
||||
Operand(v1_lo, v1), Operand(v0_lo, v2b));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 2
|
||||
@ -259,9 +235,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16]
|
||||
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b),
|
||||
Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
|
||||
Definition(v1_hi, v2b), Operand(v1_lo, v1), Operand(v0_lo, v2b),
|
||||
Operand(v0_lo, v2b));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 3
|
||||
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
@ -273,8 +249,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 %0:v[0][0:16]
|
||||
//~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v1), Definition(v1_b3, v1b),
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_b3, v1b),
|
||||
Operand(v1_lo, v1), Operand(v0_b3, v1b));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 4
|
||||
@ -287,8 +262,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104
|
||||
//~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v1), Definition(v1_lo, v1b),
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b),
|
||||
Operand(v1_lo, v1), Operand(v0_lo, v1b));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 5
|
||||
@ -301,9 +275,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1),
|
||||
Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v0_hi, v1b),
|
||||
Definition(v1_lo, v1), Operand(v1_lo, v1b), Operand(v1_hi, v1b),
|
||||
Operand(v0_lo, v1));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 6
|
||||
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
@ -311,9 +285,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
//~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
|
||||
Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
|
||||
Definition(v1_lo, v1), Operand(v1_lo, v2b), Operand(v1_hi, v2b),
|
||||
Operand(v0_lo, v1));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 7
|
||||
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
|
||||
@ -322,9 +296,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
|
||||
//~gfx(8|9|11)! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
|
||||
Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
|
||||
Definition(v1_lo, v1), Operand(v1_hi, v2b), Operand(v1_lo, v2b),
|
||||
Operand(v0_lo, v1));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 8
|
||||
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
|
||||
@ -342,8 +316,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
|
||||
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v3b), Definition(v1_lo, v3b),
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b),
|
||||
Operand(v1_lo, v3b), Operand(v0_lo, v3b));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 9
|
||||
@ -354,9 +327,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3
|
||||
//~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b),
|
||||
Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b),
|
||||
Definition(v0_b3, v1b), Operand(v1_lo, v3b), Operand(v0_lo, v3b),
|
||||
Operand(v1_b3, v1b));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 10
|
||||
//~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1
|
||||
@ -380,8 +353,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
|
||||
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_b1, v2b), Definition(v1_b1, v2b),
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Definition(v1_b1, v2b),
|
||||
Operand(v1_b1, v2b), Operand(v0_b1, v2b));
|
||||
|
||||
//~gfx(8|9|11)! p_unit_test 11
|
||||
@ -398,8 +370,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
||||
//~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(v0_b1, v1b), Definition(v0_b3, v1b),
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v1b), Definition(v0_b3, v1b),
|
||||
Operand(v0_b3, v1b), Operand(v0_b1, v1b));
|
||||
|
||||
//~gfx(8|9|11)! s_endpgm
|
||||
@ -535,8 +506,7 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
|
||||
//~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c0d
|
||||
//~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0xff
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b),
|
||||
Operand::c16(0x00ff));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x00ff));
|
||||
|
||||
//! p_unit_test 14
|
||||
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
|
||||
@ -544,29 +514,25 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
|
||||
//~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0xd0c0504
|
||||
//~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0xffffff00 opsel_hi
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b),
|
||||
Operand::c16(0xff00));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0xff00));
|
||||
|
||||
//! p_unit_test 15
|
||||
//~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
|
||||
//~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b),
|
||||
Operand::zero(2));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::zero(2));
|
||||
|
||||
//! p_unit_test 16
|
||||
//~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword
|
||||
//~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050d
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b),
|
||||
Operand::c8(0xff));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0xff));
|
||||
|
||||
//! p_unit_test 17
|
||||
//~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
|
||||
//~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050c
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b),
|
||||
Operand::zero(1));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::zero(1));
|
||||
|
||||
//! s_endpgm
|
||||
|
||||
@ -589,12 +555,12 @@ BEGIN_TEST(to_hw_instr.self_intersecting_swap)
|
||||
//! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3]
|
||||
//! s_endpgm
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
||||
//v[1:2] = v[2:3]
|
||||
//v3 = v7
|
||||
//v7 = v1
|
||||
bld.pseudo(aco_opcode::p_parallelcopy,
|
||||
Definition(reg_v1, v2), Definition(reg_v3, v1), Definition(reg_v7, v1),
|
||||
Operand(reg_v2, v2), Operand(reg_v7, v1), Operand(reg_v1, v1));
|
||||
// v[1:2] = v[2:3]
|
||||
// v3 = v7
|
||||
// v7 = v1
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v1, v2), Definition(reg_v3, v1),
|
||||
Definition(reg_v7, v1), Operand(reg_v2, v2), Operand(reg_v7, v1),
|
||||
Operand(reg_v1, v1));
|
||||
|
||||
finish_to_hw_instr_test();
|
||||
END_TEST
|
||||
@ -606,98 +572,98 @@ BEGIN_TEST(to_hw_instr.extract)
|
||||
PhysReg v1_lo{257};
|
||||
|
||||
for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) {
|
||||
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
|
||||
if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
|
||||
continue;
|
||||
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
|
||||
if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
|
||||
continue;
|
||||
|
||||
#define EXT(idx, size) \
|
||||
bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \
|
||||
Operand::c32(size), Operand::c32(is_signed));
|
||||
|
||||
//; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32'
|
||||
//; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32'
|
||||
//; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32'
|
||||
//; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32'
|
||||
//; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n)
|
||||
//; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32'
|
||||
//; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32'
|
||||
//; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32'
|
||||
//; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32'
|
||||
//; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n)
|
||||
|
||||
//>> p_unit_test 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
||||
//! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8
|
||||
EXT(0, 8)
|
||||
//! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8
|
||||
EXT(1, 8)
|
||||
//! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8
|
||||
EXT(2, 8)
|
||||
//! v1: %_:v[0] = @v_shr 24, %_:v[1]
|
||||
EXT(3, 8)
|
||||
//~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
|
||||
//~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1]
|
||||
//~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1]
|
||||
EXT(0, 16)
|
||||
//! v1: %_:v[0] = @v_shr 16, %_:v[1]
|
||||
EXT(1, 16)
|
||||
//>> p_unit_test 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
||||
//! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8
|
||||
EXT(0, 8)
|
||||
//! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8
|
||||
EXT(1, 8)
|
||||
//! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8
|
||||
EXT(2, 8)
|
||||
//! v1: %_:v[0] = @v_shr 24, %_:v[1]
|
||||
EXT(3, 8)
|
||||
//~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
|
||||
//~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1]
|
||||
//~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1]
|
||||
EXT(0, 16)
|
||||
//! v1: %_:v[0] = @v_shr 16, %_:v[1]
|
||||
EXT(1, 16)
|
||||
|
||||
#undef EXT
|
||||
#undef EXT
|
||||
|
||||
#define EXT(idx, size) \
|
||||
bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \
|
||||
Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed));
|
||||
|
||||
//>> p_unit_test 2
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
||||
//~gfx.*_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000
|
||||
//~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]
|
||||
EXT(0, 8)
|
||||
//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008
|
||||
EXT(1, 8)
|
||||
//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010
|
||||
EXT(2, 8)
|
||||
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24
|
||||
EXT(3, 8)
|
||||
//~gfx(7|8)_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000
|
||||
//~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0
|
||||
//~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1]
|
||||
EXT(0, 16)
|
||||
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16
|
||||
EXT(1, 16)
|
||||
//>> p_unit_test 2
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
||||
//~gfx.*_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000
|
||||
//~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]
|
||||
EXT(0, 8)
|
||||
//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008
|
||||
EXT(1, 8)
|
||||
//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010
|
||||
EXT(2, 8)
|
||||
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24
|
||||
EXT(3, 8)
|
||||
//~gfx(7|8)_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000
|
||||
//~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0
|
||||
//~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1]
|
||||
EXT(0, 16)
|
||||
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16
|
||||
EXT(1, 16)
|
||||
|
||||
#undef EXT
|
||||
#undef EXT
|
||||
|
||||
#define EXT(idx, src_b) \
|
||||
bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \
|
||||
Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed));
|
||||
|
||||
//>> p_unit_test 4
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
|
||||
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
|
||||
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
|
||||
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
|
||||
EXT(0, 0)
|
||||
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
|
||||
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
|
||||
if (lvl != GFX7)
|
||||
EXT(0, 2)
|
||||
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
|
||||
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
|
||||
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801
|
||||
EXT(1, 0)
|
||||
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
|
||||
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903
|
||||
if (lvl != GFX7)
|
||||
EXT(1, 2)
|
||||
//>> p_unit_test 4
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
|
||||
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
|
||||
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
|
||||
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
|
||||
EXT(0, 0)
|
||||
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
|
||||
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
|
||||
if (lvl != GFX7)
|
||||
EXT(0, 2)
|
||||
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
|
||||
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
|
||||
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801
|
||||
EXT(1, 0)
|
||||
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
|
||||
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03
|
||||
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903
|
||||
if (lvl != GFX7)
|
||||
EXT(1, 2)
|
||||
|
||||
#undef EXT
|
||||
#undef EXT
|
||||
|
||||
finish_to_hw_instr_test();
|
||||
finish_to_hw_instr_test();
|
||||
|
||||
//! s_endpgm
|
||||
}
|
||||
//! s_endpgm
|
||||
}
|
||||
}
|
||||
END_TEST
|
||||
|
||||
@ -736,7 +702,7 @@ BEGIN_TEST(to_hw_instr.insert)
|
||||
//! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1]
|
||||
INS(1, 16)
|
||||
|
||||
#undef INS
|
||||
#undef INS
|
||||
|
||||
#define INS(idx, size) \
|
||||
bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \
|
||||
@ -759,7 +725,7 @@ BEGIN_TEST(to_hw_instr.insert)
|
||||
//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16
|
||||
INS(1, 16)
|
||||
|
||||
#undef INS
|
||||
#undef INS
|
||||
|
||||
#define INS(idx, def_b) \
|
||||
bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \
|
||||
@ -784,7 +750,7 @@ BEGIN_TEST(to_hw_instr.insert)
|
||||
if (lvl != GFX7)
|
||||
INS(1, 2)
|
||||
|
||||
#undef INS
|
||||
#undef INS
|
||||
|
||||
finish_to_hw_instr_test();
|
||||
|
||||
@ -816,10 +782,9 @@ BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc)
|
||||
//! lv1: %0:v[0] = v_mov_b32 %0:v[1]
|
||||
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
|
||||
//! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0
|
||||
Instruction *instr = bld.pseudo(
|
||||
aco_opcode::p_parallelcopy,
|
||||
Definition(scc, s1), Definition(v0_lo, v1.as_linear()),
|
||||
Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear()));
|
||||
Instruction* instr =
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(scc, s1), Definition(v0_lo, v1.as_linear()),
|
||||
Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear()));
|
||||
instr->pseudo().scratch_sgpr = m0;
|
||||
|
||||
finish_to_hw_instr_test();
|
||||
@ -836,10 +801,9 @@ BEGIN_TEST(to_hw_instr.swap_linear_vgpr)
|
||||
//>> p_unit_test 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
||||
|
||||
Instruction *instr = bld.pseudo(
|
||||
aco_opcode::p_parallelcopy,
|
||||
Definition(reg_v0, v1_linear), Definition(reg_v1, v1_linear),
|
||||
Operand(reg_v1, v1_linear), Operand(reg_v0, v1_linear));
|
||||
Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
|
||||
Definition(reg_v1, v1_linear), Operand(reg_v1, v1_linear),
|
||||
Operand(reg_v0, v1_linear));
|
||||
instr->pseudo().scratch_sgpr = m0;
|
||||
|
||||
finish_to_hw_instr_test();
|
||||
|
Loading…
Reference in New Issue
Block a user