aco: reformat according to its .clang-format

Signed-off-by: Eric Engestrom <eric@igalia.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23253>
This commit is contained in:
Eric Engestrom 2023-05-26 12:55:35 +01:00 committed by Marge Bot
parent 8b319c6db8
commit 6b21653ab4
34 changed files with 1556 additions and 1430 deletions

View File

@ -52,7 +52,7 @@ struct asm_context {
// TODO: keep track of branch instructions referring blocks
// and, when emitting the block, correct the offset in instr
asm_context(Program* program_, std::vector<struct aco_symbol>* symbols_)
: program(program_), gfx_level(program->gfx_level), symbols(symbols_)
: program(program_), gfx_level(program->gfx_level), symbols(symbols_)
{
if (gfx_level <= GFX7)
opcode = &instr_info.opcode_gfx7[0];
@ -1160,8 +1160,7 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
emit_instruction(ctx, out, instr.get());
/* create the s_setpc_b64 to jump */
instr.reset(
bld.sop1(aco_opcode::s_setpc_b64, Operand(def.physReg(), s2)).instr);
instr.reset(bld.sop1(aco_opcode::s_setpc_b64, Operand(def.physReg(), s2)).instr);
emit_instruction(ctx, out, instr.get());
}
@ -1218,8 +1217,7 @@ fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
}
unsigned
emit_program(Program* program, std::vector<uint32_t>& code,
std::vector<struct aco_symbol>* symbols)
emit_program(Program* program, std::vector<uint32_t>& code, std::vector<struct aco_symbol>* symbols)
{
asm_context ctx(program, symbols);
@ -1252,8 +1250,8 @@ emit_program(Program* program, std::vector<uint32_t>& code,
code.insert(code.end(), (uint32_t*)program->constant_data.data(),
(uint32_t*)(program->constant_data.data() + program->constant_data.size()));
program->config->scratch_bytes_per_wave = align(
program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);
program->config->scratch_bytes_per_wave =
align(program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);
return exec_size;
}

View File

@ -254,8 +254,7 @@ public:
void join_min(const VGPRCounterMap& other)
{
unsigned i;
BITSET_FOREACH_SET(i, other.resident, 256)
{
BITSET_FOREACH_SET (i, other.resident, 256) {
if (BITSET_TEST(resident, i))
val[i] = MIN2(val[i] + base, other.val[i] + other.base) - base;
else
@ -270,8 +269,7 @@ public:
return false;
unsigned i;
BITSET_FOREACH_SET(i, other.resident, 256)
{
BITSET_FOREACH_SET (i, other.resident, 256) {
if (!BITSET_TEST(resident, i))
return false;
if (val[i] + base != other.val[i] + other.base)
@ -365,11 +363,11 @@ search_backwards_internal(State& state, GlobalState& global_state, BlockState bl
return;
}
PRAGMA_DIAGNOSTIC_PUSH
PRAGMA_DIAGNOSTIC_IGNORED(-Waddress)
PRAGMA_DIAGNOSTIC_PUSH
PRAGMA_DIAGNOSTIC_IGNORED(-Waddress)
if (block_cb != nullptr && !block_cb(global_state, block_state, block))
return;
PRAGMA_DIAGNOSTIC_POP
PRAGMA_DIAGNOSTIC_POP
for (unsigned lin_pred : block->linear_preds) {
search_backwards_internal<GlobalState, BlockState, block_cb, instr_cb>(

View File

@ -52,8 +52,7 @@ struct wqm_ctx {
/* state for WQM propagation */
std::set<unsigned> worklist;
std::vector<bool> branch_wqm; /* true if the branch condition in this block should be in wqm */
wqm_ctx(Program* program_)
: program(program_), branch_wqm(program->blocks.size())
wqm_ctx(Program* program_) : program(program_), branch_wqm(program->blocks.size())
{
for (unsigned i = 0; i < program->blocks.size(); i++)
worklist.insert(i);
@ -137,8 +136,7 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
propagate_wqm = true;
bool pred_by_exec = needs_exec_mask(instr.get()) ||
instr->opcode == aco_opcode::p_logical_end ||
instr->isBranch();
instr->opcode == aco_opcode::p_logical_end || instr->isBranch();
if (needs_exact(instr))
instr_needs[i] = Exact;
@ -574,7 +572,8 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
* WQM again.
*/
ctx.info[block->index].exec.resize(1);
assert(ctx.info[block->index].exec[0].second == (mask_type_exact | mask_type_global));
assert(ctx.info[block->index].exec[0].second ==
(mask_type_exact | mask_type_global));
current_exec = get_exec_op(ctx.info[block->index].exec.back().first);
ctx.info[block->index].exec[0].first = Operand(bld.lm);
}

View File

@ -91,9 +91,8 @@ enum vmem_type : uint8_t {
vmem_bvh = 1 << 2,
};
static const uint16_t exp_events =
event_exp_pos | event_exp_param | event_exp_mrt_null | event_gds_gpr_lock | event_vmem_gpr_lock |
event_ldsdir;
static const uint16_t exp_events = event_exp_pos | event_exp_param | event_exp_mrt_null |
event_gds_gpr_lock | event_vmem_gpr_lock | event_ldsdir;
static const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat | event_sendmsg;
static const uint16_t vm_events = event_vmem | event_flat;
static const uint16_t vs_events = event_vmem_store;
@ -580,7 +579,8 @@ kill(wait_imm& imm, alu_delay_info& delay, Instruction* instr, wait_ctx& ctx,
}
if (ctx.program->gfx_level >= GFX11) {
update_alu(ctx, false, false, false, MAX3(delay.salu_cycles, delay.valu_cycles, delay.trans_cycles));
update_alu(ctx, false, false, false,
MAX3(delay.salu_cycles, delay.valu_cycles, delay.trans_cycles));
}
/* remove all gprs with higher counter from map */
@ -775,8 +775,7 @@ insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_
*/
uint32_t ds_vmem_events = event_lds | event_gds | event_vmem | event_flat;
uint32_t alu_events = event_trans | event_valu | event_salu;
bool force_linear =
ctx.gfx_level >= GFX11 && (event & (ds_vmem_events | alu_events));
bool force_linear = ctx.gfx_level >= GFX11 && (event & (ds_vmem_events | alu_events));
insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, cycles,
force_linear);

View File

@ -26,8 +26,8 @@
#include "aco_instruction_selection.h"
#include "aco_builder.h"
#include "aco_ir.h"
#include "aco_interface.h"
#include "aco_ir.h"
#include "common/ac_nir.h"
#include "common/sid.h"
@ -661,8 +661,8 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign
Operand::c32(src_bits), Operand::c32((unsigned)sign_extend));
} else {
assert(src_bits < 32);
bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(), Operand::c32(src_bits),
Operand::c32((unsigned)sign_extend));
bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(),
Operand::c32(src_bits), Operand::c32((unsigned)sign_extend));
}
if (dst_bits == 64) {
@ -1894,8 +1894,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
case nir_op_uadd_sat: {
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Instruction* add_instr =
emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
Instruction* add_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
add_instr->valu().clamp = 1;
break;
}
@ -1977,8 +1976,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
case nir_op_iadd_sat: {
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Instruction* add_instr =
emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
Instruction* add_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
add_instr->valu().clamp = 1;
break;
}
@ -3316,8 +3314,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
exponent_large);
Temp cond =
bld.sopc(aco_opcode::s_cmp_ge_i32, bld.def(s1, scc), Operand::c32(64u), exponent);
mantissa = bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa,
Operand::c64(~0llu), cond);
mantissa =
bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa, Operand::c64(~0llu), cond);
Temp lower = bld.tmp(s1), upper = bld.tmp(s1);
bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa);
Temp cond_small =
@ -3483,9 +3481,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
case nir_op_unpack_64_4x16:
case nir_op_unpack_32_4x8:
bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
emit_split_vector(ctx, dst,
instr->op == nir_op_unpack_32_4x8 ||
instr->op == nir_op_unpack_64_4x16 ? 4 : 2);
emit_split_vector(
ctx, dst, instr->op == nir_op_unpack_32_4x8 || instr->op == nir_op_unpack_64_4x16 ? 4 : 2);
break;
case nir_op_pack_64_2x32_split: {
Temp src0 = get_alu_src(ctx, instr->src[0]);
@ -4029,7 +4026,7 @@ struct LoadEmitInfo {
unsigned num_components;
unsigned component_size;
Temp resource = Temp(0, s1); /* buffer resource or base 64-bit address */
Temp idx = Temp(0, v1); /* buffer index */
Temp idx = Temp(0, v1); /* buffer index */
unsigned component_stride = 0;
unsigned const_offset = 0;
unsigned align_mul = 0;
@ -4176,9 +4173,10 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
aligned_offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), lo, hi);
}
}
Temp aligned_offset_tmp =
aligned_offset.isTemp() ? aligned_offset.getTemp() :
aligned_offset.isConstant() ? bld.copy(bld.def(s1), aligned_offset) : Temp(0, s1);
Temp aligned_offset_tmp = aligned_offset.isTemp() ? aligned_offset.getTemp()
: aligned_offset.isConstant()
? bld.copy(bld.def(s1), aligned_offset)
: Temp(0, s1);
Temp val = params.callback(bld, info, aligned_offset_tmp, bytes_needed, align,
reduced_const_offset, byte_align ? Temp() : info.dst);
@ -4508,8 +4506,7 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
mubuf->offen = offen;
mubuf->idxen = idxen;
mubuf->glc = info.glc;
mubuf->dlc =
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mubuf->slc = info.slc;
mubuf->sync = info.sync;
mubuf->offset = const_offset;
@ -4552,40 +4549,20 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
aco_opcode op = aco_opcode::num_opcodes;
if (info.component_size == 2) {
switch (bytes_needed) {
case 2:
op = aco_opcode::buffer_load_format_d16_x;
break;
case 4:
op = aco_opcode::buffer_load_format_d16_xy;
break;
case 6:
op = aco_opcode::buffer_load_format_d16_xyz;
break;
case 8:
op = aco_opcode::buffer_load_format_d16_xyzw;
break;
default:
unreachable("invalid buffer load format size");
break;
case 2: op = aco_opcode::buffer_load_format_d16_x; break;
case 4: op = aco_opcode::buffer_load_format_d16_xy; break;
case 6: op = aco_opcode::buffer_load_format_d16_xyz; break;
case 8: op = aco_opcode::buffer_load_format_d16_xyzw; break;
default: unreachable("invalid buffer load format size"); break;
}
} else {
assert(info.component_size == 4);
switch (bytes_needed) {
case 4:
op = aco_opcode::buffer_load_format_x;
break;
case 8:
op = aco_opcode::buffer_load_format_xy;
break;
case 12:
op = aco_opcode::buffer_load_format_xyz;
break;
case 16:
op = aco_opcode::buffer_load_format_xyzw;
break;
default:
unreachable("invalid buffer load format size");
break;
case 4: op = aco_opcode::buffer_load_format_x; break;
case 8: op = aco_opcode::buffer_load_format_xy; break;
case 12: op = aco_opcode::buffer_load_format_xyz; break;
case 16: op = aco_opcode::buffer_load_format_xyzw; break;
default: unreachable("invalid buffer load format size"); break;
}
}
@ -4596,8 +4573,7 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
mubuf->offen = offen;
mubuf->idxen = idxen;
mubuf->glc = info.glc;
mubuf->dlc =
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mubuf->slc = info.slc;
mubuf->sync = info.sync;
mubuf->offset = const_offset;
@ -5229,9 +5205,9 @@ resolve_excess_vmem_const_offset(Builder& bld, Temp& voffset, unsigned const_off
}
void
emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp soffset, Temp idx, Temp vdata,
unsigned const_offset, memory_sync_info sync, bool glc, bool slc,
bool swizzled)
emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp soffset, Temp idx,
Temp vdata, unsigned const_offset, memory_sync_info sync, bool glc,
bool slc, bool swizzled)
{
assert(vdata.id());
assert(vdata.size() != 3 || ctx->program->gfx_level != GFX6);
@ -5256,8 +5232,8 @@ emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp s
vaddr_op = Operand(idx);
Builder::Result r =
bld.mubuf(op, Operand(descriptor), vaddr_op, soffset_op, Operand(vdata), const_offset,
offen, swizzled, idxen, /* addr64 */ false, /* disable_wqm */ false, glc,
bld.mubuf(op, Operand(descriptor), vaddr_op, soffset_op, Operand(vdata), const_offset, offen,
swizzled, idxen, /* addr64 */ false, /* disable_wqm */ false, glc,
/* dlc*/ false, slc);
r->mubuf().sync = sync;
@ -5269,7 +5245,8 @@ store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Tem
bool swizzled, memory_sync_info sync, bool glc, bool slc)
{
Builder bld(ctx->program, ctx->block);
assert(elem_size_bytes == 1 || elem_size_bytes == 2 || elem_size_bytes == 4 || elem_size_bytes == 8);
assert(elem_size_bytes == 1 || elem_size_bytes == 2 || elem_size_bytes == 4 ||
elem_size_bytes == 8);
assert(write_mask);
write_mask = util_widen_mask(write_mask, elem_size_bytes);
@ -5282,8 +5259,8 @@ store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Tem
for (unsigned i = 0; i < write_count; i++) {
unsigned const_offset = offsets[i] + base_const_offset;
emit_single_mubuf_store(ctx, descriptor, voffset, soffset, idx, write_datas[i], const_offset, sync,
glc, slc, swizzled);
emit_single_mubuf_store(ctx, descriptor, voffset, soffset, idx, write_datas[i], const_offset,
sync, glc, slc, swizzled);
}
}
@ -5387,7 +5364,7 @@ visit_store_output(isel_context* ctx, nir_intrinsic_instr* instr)
{
/* LS pass output to TCS by temp if they have same in/out patch size. */
bool ls_need_output = ctx->stage == vertex_tess_control_hs &&
ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->tcs_in_out_eq;
ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->tcs_in_out_eq;
bool ps_need_output = ctx->stage == fragment_fs;
@ -6331,8 +6308,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) {
opcode = aco_opcode::image_load;
} else {
bool level_zero =
nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
opcode = level_zero ? aco_opcode::image_load : aco_opcode::image_load_mip;
}
@ -6391,8 +6367,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0);
unsigned access = nir_intrinsic_access(instr);
bool glc = ctx->options->gfx_level == GFX6 ||
((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
ctx->program->gfx_level < GFX11);
((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) && ctx->program->gfx_level < GFX11);
if (dim == GLSL_SAMPLER_DIM_BUF) {
Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
@ -6463,7 +6438,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, dmask_count, 1)};
uint32_t index = 0;
u_foreach_bit(bit, dmask) {
u_foreach_bit (bit, dmask) {
vec->operands[index++] = Operand(emit_extract_vector(ctx, data, bit, rc));
}
data = bld.tmp(RegClass::get(RegType::vgpr, dmask_count * rc.bytes()));
@ -6491,9 +6466,8 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
}
void
translate_buffer_image_atomic_op(const nir_atomic_op op,
aco_opcode *buf_op, aco_opcode *buf_op64,
aco_opcode *image_op)
translate_buffer_image_atomic_op(const nir_atomic_op op, aco_opcode* buf_op, aco_opcode* buf_op64,
aco_opcode* image_op)
{
switch (op) {
case nir_atomic_op_iadd:
@ -6571,8 +6545,7 @@ translate_buffer_image_atomic_op(const nir_atomic_op op,
*buf_op64 = aco_opcode::buffer_atomic_fmax_x2;
*image_op = aco_opcode::image_atomic_fmax;
break;
default:
unreachable("unsupported atomic operation");
default: unreachable("unsupported atomic operation");
}
}
@ -6682,9 +6655,8 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0);
bool glc =
(nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
ctx->program->gfx_level < GFX11;
bool glc = (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
ctx->program->gfx_level < GFX11;
unsigned write_count = 0;
Temp write_datas[32];
@ -6805,7 +6777,7 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
/* Don't expand global loads when they use MUBUF or SMEM.
* Global loads don't have the bounds checking that buffer loads have that
* makes this safe.
*/
*/
unsigned align = nir_intrinsic_align(instr);
bool byte_align_for_smem_mubuf =
can_use_byte_align_for_global_load(num_components, component_size, align, false);
@ -6836,9 +6808,8 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0);
bool glc =
(nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
ctx->program->gfx_level < GFX11;
bool glc = (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
ctx->program->gfx_level < GFX11;
unsigned write_count = 0;
Temp write_datas[32];
@ -6999,8 +6970,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
op32 = global ? aco_opcode::global_atomic_fmax : aco_opcode::flat_atomic_fmax;
op64 = global ? aco_opcode::global_atomic_fmax_x2 : aco_opcode::flat_atomic_fmax_x2;
break;
default:
unreachable("unsupported atomic operation");
default: unreachable("unsupported atomic operation");
}
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
@ -7192,8 +7162,8 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
memory_sync_info sync(aco_storage_mode_from_nir_mem_mode(mem_mode),
written_once ? semantic_can_reorder : semantic_none);
store_vmem_mubuf(ctx, store_src, descriptor, v_offset, s_offset, idx, const_offset, elem_size_bytes,
write_mask, swizzled, sync, glc, slc);
store_vmem_mubuf(ctx, store_src, descriptor, v_offset, s_offset, idx, const_offset,
elem_size_bytes, write_mask, swizzled, sync, glc, slc);
}
void
@ -7206,8 +7176,8 @@ visit_load_smem(isel_context* ctx, nir_intrinsic_instr* instr)
/* If base address is 32bit, convert to 64bit with the high 32bit part. */
if (base.bytes() == 4) {
base = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2),
base, Operand::c32(ctx->options->address32_hi));
base = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), base,
Operand::c32(ctx->options->address32_hi));
}
aco_opcode opcode = aco_opcode::s_load_dword;
@ -7535,10 +7505,10 @@ get_scratch_resource(isel_context* ctx)
Builder bld(ctx->program, ctx->block);
Temp scratch_addr = ctx->program->private_segment_buffer;
if (!scratch_addr.bytes()) {
Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
Operand::c32(aco_symbol_scratch_addr_lo));
Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
Operand::c32(aco_symbol_scratch_addr_hi));
Temp addr_lo =
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
Temp addr_hi =
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
} else if (ctx->stage.hw != HWStage::CS) {
scratch_addr =
@ -8093,8 +8063,7 @@ Temp merged_wave_info_to_mask(isel_context* ctx, unsigned i);
Temp lanecount_to_mask(isel_context* ctx, Temp count);
Temp
get_interp_param(isel_context* ctx, nir_intrinsic_op intrin,
enum glsl_interp_mode interp)
get_interp_param(isel_context* ctx, nir_intrinsic_op intrin, enum glsl_interp_mode interp)
{
bool linear = interp == INTERP_MODE_NOPERSPECTIVE;
if (intrin == nir_intrinsic_load_barycentric_pixel ||
@ -8109,9 +8078,8 @@ get_interp_param(isel_context* ctx, nir_intrinsic_op intrin,
}
void
ds_ordered_count_offsets(isel_context *ctx, unsigned index_operand,
unsigned wave_release, unsigned wave_done,
unsigned *offset0, unsigned *offset1)
ds_ordered_count_offsets(isel_context* ctx, unsigned index_operand, unsigned wave_release,
unsigned wave_done, unsigned* offset0, unsigned* offset1)
{
unsigned ordered_count_index = index_operand & 0x3f;
unsigned count_dword = (index_operand >> 24) & 0xf;
@ -8189,7 +8157,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
RegClass rc = RegClass(offset.type(), 1);
Temp pos1 = bld.tmp(rc), pos2 = bld.tmp(rc);
bld.pseudo(aco_opcode::p_split_vector, Definition(pos1), Definition(pos2), offset);
Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
Temp bary = get_interp_param(ctx, instr->intrinsic,
(glsl_interp_mode)nir_intrinsic_interp_mode(instr));
emit_interp_center(ctx, get_ssa_temp(ctx, &instr->dest.ssa), bary, pos1, pos2);
break;
}
@ -8977,8 +8946,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
if (ctx->args->merged_wave_info.used)
bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc),
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(2u),
Operand::c32(8u), Operand::zero());
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(2u), Operand::c32(8u),
Operand::zero());
else if (ctx->args->gs_wave_id.used)
bld.copy(Definition(dst), get_arg(ctx, ctx->args->gs_wave_id));
else
@ -9025,8 +8994,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
case nir_intrinsic_overwrite_tes_arguments_amd: {
ctx->arg_temps[ctx->args->tes_u.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
ctx->arg_temps[ctx->args->tes_v.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
ctx->arg_temps[ctx->args->tes_rel_patch_id.arg_index] =
get_ssa_temp(ctx, instr->src[3].ssa);
ctx->arg_temps[ctx->args->tes_rel_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[3].ssa);
ctx->arg_temps[ctx->args->tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[2].ssa);
break;
}
@ -9036,7 +9004,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
Temp src = ctx->arg_temps[nir_intrinsic_base(instr)];
assert(src.id());
assert(src.type() == (instr->intrinsic == nir_intrinsic_load_scalar_arg_amd ? RegType::sgpr : RegType::vgpr));
assert(src.type() == (instr->intrinsic == nir_intrinsic_load_scalar_arg_amd ? RegType::sgpr
: RegType::vgpr));
bld.copy(Definition(dst), src);
emit_split_vector(ctx, dst, dst.size());
break;
@ -9048,35 +9017,34 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u));
unsigned offset0, offset1;
Instruction *ds_instr;
Instruction* ds_instr;
Operand m;
/* Lock a GDS mutex. */
ds_ordered_count_offsets(ctx, 1 << 24u, false, false, &offset0, &offset1);
m = bld.m0(bld.as_uniform(ordered_id));
ds_instr = bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m,
offset0, offset1, true);
ds_instr =
bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, instr->num_components, 1)};
unsigned write_mask = nir_intrinsic_write_mask(instr);
bool use_gds_registers =
ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
bool use_gds_registers = ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
for (unsigned i = 0; i < instr->num_components; i++) {
if (write_mask & (1 << i)) {
Temp chan_counter = emit_extract_vector(ctx, counter, i, v1);
if (use_gds_registers) {
ds_instr = bld.ds(aco_opcode::ds_add_gs_reg_rtn, bld.def(v1),
Operand(), chan_counter, i * 4, 0u, true);
ds_instr = bld.ds(aco_opcode::ds_add_gs_reg_rtn, bld.def(v1), Operand(),
chan_counter, i * 4, 0u, true);
} else {
m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u)));
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, bld.def(v1),
gds_base, chan_counter, m, i * 4, 0u, true);
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, bld.def(v1), gds_base, chan_counter, m,
i * 4, 0u, true);
}
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
@ -9092,33 +9060,32 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
/* Unlock a GDS mutex. */
ds_ordered_count_offsets(ctx, 1 << 24u, true, true, &offset0, &offset1);
m = bld.m0(bld.as_uniform(ordered_id));
ds_instr = bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m,
offset0, offset1, true);
ds_instr =
bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
emit_split_vector(ctx, dst, instr->num_components);
break;
}
case nir_intrinsic_xfb_counter_sub_amd: {
bool use_gds_registers =
ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
bool use_gds_registers = ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
unsigned write_mask = nir_intrinsic_write_mask(instr);
Temp counter = get_ssa_temp(ctx, instr->src[0].ssa);
Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u));
u_foreach_bit(i, write_mask) {
u_foreach_bit (i, write_mask) {
Temp chan_counter = emit_extract_vector(ctx, counter, i, v1);
Instruction *ds_instr;
Instruction* ds_instr;
if (use_gds_registers) {
ds_instr = bld.ds(aco_opcode::ds_sub_gs_reg_rtn, bld.def(v1),
Operand(), chan_counter, i * 4, 0u, true);
ds_instr = bld.ds(aco_opcode::ds_sub_gs_reg_rtn, bld.def(v1), Operand(), chan_counter,
i * 4, 0u, true);
} else {
Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u)));
ds_instr = bld.ds(aco_opcode::ds_sub_rtn_u32, bld.def(v1),
gds_base, chan_counter, m, i * 4, 0u, true);
ds_instr = bld.ds(aco_opcode::ds_sub_rtn_u32, bld.def(v1), gds_base, chan_counter, m,
i * 4, 0u, true);
}
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
}
@ -9162,15 +9129,14 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
exp->valid_mask = false;
/* Compressed export uses two bits for a channel. */
uint32_t channel_mask = exp->compressed ?
(write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) :
write_mask;
uint32_t channel_mask =
exp->compressed ? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) : write_mask;
Temp value = get_ssa_temp(ctx, instr->src[0].ssa);
for (unsigned i = 0; i < 4; i++) {
exp->operands[i] = channel_mask & BITFIELD_BIT(i) ?
Operand(emit_extract_vector(ctx, value, i, v1)) :
Operand(v1);
exp->operands[i] = channel_mask & BITFIELD_BIT(i)
? Operand(emit_extract_vector(ctx, value, i, v1))
: Operand(v1);
}
ctx->block->instructions.emplace_back(std::move(exp));
@ -9183,13 +9149,11 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
struct aco_export_mrt mrt0, mrt1;
for (unsigned i = 0; i < 4; i++) {
mrt0.out[i] = write_mask & BITFIELD_BIT(i) ?
Operand(emit_extract_vector(ctx, val0, i, v1)) :
Operand(v1);
mrt0.out[i] = write_mask & BITFIELD_BIT(i) ? Operand(emit_extract_vector(ctx, val0, i, v1))
: Operand(v1);
mrt1.out[i] = write_mask & BITFIELD_BIT(i) ?
Operand(emit_extract_vector(ctx, val1, i, v1)) :
Operand(v1);
mrt1.out[i] = write_mask & BITFIELD_BIT(i) ? Operand(emit_extract_vector(ctx, val1, i, v1))
: Operand(v1);
}
mrt0.enabled_channels = mrt1.enabled_channels = write_mask;
@ -9383,7 +9347,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
}
if (has_wqm_coord) {
assert(instr->op == nir_texop_tex || instr->op == nir_texop_txb || instr->op == nir_texop_lod);
assert(instr->op == nir_texop_tex || instr->op == nir_texop_txb ||
instr->op == nir_texop_lod);
assert(wqm_coord.regClass().is_linear_vgpr());
assert(!a16 && !g16);
}
@ -9701,9 +9666,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
if (dst.regClass() == s1) {
Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
emit_extract_vector(ctx, resource, 1, s1));
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst),
bld.as_uniform(tmp_dst), Operand::c32(0x76543210),
bld.scc(is_not_null));
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bld.as_uniform(tmp_dst),
Operand::c32(0x76543210), bld.scc(is_not_null));
} else {
Temp is_not_null = bld.tmp(bld.lm);
bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
@ -10782,10 +10746,12 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
/* Replace NaN by zero (only 32-bit) to fix game bugs if requested. */
if (out->enable_mrt_output_nan_fixup && !is_16bit &&
(out->col_format == V_028714_SPI_SHADER_32_R || out->col_format == V_028714_SPI_SHADER_32_GR ||
out->col_format == V_028714_SPI_SHADER_32_AR || out->col_format == V_028714_SPI_SHADER_32_ABGR ||
(out->col_format == V_028714_SPI_SHADER_32_R ||
out->col_format == V_028714_SPI_SHADER_32_GR ||
out->col_format == V_028714_SPI_SHADER_32_AR ||
out->col_format == V_028714_SPI_SHADER_32_ABGR ||
out->col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
u_foreach_bit(i, out->write_mask) {
u_foreach_bit (i, out->write_mask) {
Temp is_not_nan =
bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), values[i], values[i]);
values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), values[i],
@ -10847,7 +10813,6 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
}
break;
case V_028714_SPI_SHADER_SNORM16_ABGR:
if (is_16bit && ctx->options->gfx_level >= GFX9) {
compr_op = aco_opcode::v_cvt_pknorm_i16_f16;
@ -10862,13 +10827,13 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
/* clamp */
uint32_t max_rgb = out->is_int8 ? 255 : out->is_int10 ? 1023 : 0;
u_foreach_bit(i, out->write_mask) {
u_foreach_bit (i, out->write_mask) {
uint32_t max = i == 3 && out->is_int10 ? 3 : max_rgb;
values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), Operand::c32(max), values[i]);
}
} else if (is_16bit) {
u_foreach_bit(i, out->write_mask) {
u_foreach_bit (i, out->write_mask) {
Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, false);
values[i] = Operand(tmp);
}
@ -10882,7 +10847,7 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
uint32_t max_rgb = out->is_int8 ? 127 : out->is_int10 ? 511 : 0;
uint32_t min_rgb = out->is_int8 ? -128 : out->is_int10 ? -512 : 0;
u_foreach_bit(i, out->write_mask) {
u_foreach_bit (i, out->write_mask) {
uint32_t max = i == 3 && out->is_int10 ? 1 : max_rgb;
uint32_t min = i == 3 && out->is_int10 ? -2u : min_rgb;
@ -10890,7 +10855,7 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::c32(min), values[i]);
}
} else if (is_16bit) {
u_foreach_bit(i, out->write_mask) {
u_foreach_bit (i, out->write_mask) {
Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, true);
values[i] = Operand(tmp);
}
@ -10996,8 +10961,7 @@ create_fs_jump_to_epilog(isel_context* ctx)
}
}
Temp continue_pc =
convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.ps.epilog_pc));
Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.ps.epilog_pc));
aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + color_exports.size(), 0)};
@ -11068,12 +11032,13 @@ add_startpgm(struct isel_context* ctx)
Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset));
scratch_offset.setLateKill(true);
Operand scratch_addr = ctx->args->ring_offsets.used ?
Operand(get_arg(ctx, ctx->args->ring_offsets)) : Operand(s2);
Operand scratch_addr = ctx->args->ring_offsets.used
? Operand(get_arg(ctx, ctx->args->ring_offsets))
: Operand(s2);
Builder bld(ctx->program, ctx->block);
bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
scratch_addr, scratch_offset);
bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc), scratch_addr,
scratch_offset);
}
return startpgm;
@ -11085,9 +11050,9 @@ fix_ls_vgpr_init_bug(isel_context* ctx, Pseudo_instruction* startpgm)
assert(ctx->shader->info.stage == MESA_SHADER_VERTEX);
Builder bld(ctx->program, ctx->block);
constexpr unsigned hs_idx = 1u;
Builder::Result hs_thread_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
get_arg(ctx, ctx->args->merged_wave_info),
Operand::c32((8u << 16) | (hs_idx * 8u)));
Builder::Result hs_thread_count =
bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32((8u << 16) | (hs_idx * 8u)));
Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp());
/* If there are no HS threads, SPI mistakenly loads the LS VGPRs starting at VGPR 0. */
@ -11218,10 +11183,9 @@ merged_wave_info_to_mask(isel_context* ctx, unsigned i)
Builder bld(ctx->program, ctx->block);
/* lanecount_to_mask() only cares about s0.u[6:0] so we don't need either s_bfe nor s_and here */
Temp count = i == 0
? get_arg(ctx, ctx->args->merged_wave_info)
: bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(i * 8u));
Temp count = i == 0 ? get_arg(ctx, ctx->args->merged_wave_info)
: bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(i * 8u));
return lanecount_to_mask(ctx, count);
}
@ -11276,10 +11240,10 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c
void
select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
ac_shader_config* config, const struct aco_compiler_options* options,
const struct aco_shader_info* info,
const struct ac_shader_args* args)
const struct aco_shader_info* info, const struct ac_shader_args* args)
{
isel_context ctx = setup_isel_context(program, shader_count, shaders, config, options, info, args, false);
isel_context ctx =
setup_isel_context(program, shader_count, shaders, config, options, info, args, false);
if (ctx.stage == raytracing_cs)
return select_program_rt(ctx, shader_count, shaders, args);
@ -11391,8 +11355,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
void
select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shader_config* config,
const struct aco_compiler_options* options,
const struct aco_shader_info* info,
const struct ac_shader_args* args)
const struct aco_shader_info* info, const struct ac_shader_args* args)
{
assert(options->gfx_level == GFX8);

View File

@ -660,8 +660,8 @@ cleanup_context(isel_context* ctx)
isel_context
setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
ac_shader_config* config, const struct aco_compiler_options* options,
const struct aco_shader_info* info,
const struct ac_shader_args* args, bool is_ps_epilog)
const struct aco_shader_info* info, const struct ac_shader_args* args,
bool is_ps_epilog)
{
SWStage sw_stage = SWStage::None;
for (unsigned i = 0; i < shader_count; i++) {

View File

@ -80,8 +80,7 @@ validate(aco::Program* program)
}
static std::string
get_disasm_string(aco::Program* program, std::vector<uint32_t>& code,
unsigned exec_size)
get_disasm_string(aco::Program* program, std::vector<uint32_t>& code, unsigned exec_size)
{
std::string disasm;
@ -111,8 +110,7 @@ get_disasm_string(aco::Program* program, std::vector<uint32_t>& code,
static std::string
aco_postprocess_shader(const struct aco_compiler_options* options,
const struct aco_shader_info *info,
std::unique_ptr<aco::Program>& program)
const struct aco_shader_info* info, std::unique_ptr<aco::Program>& program)
{
std::string llvm_ir;
@ -211,12 +209,9 @@ aco_postprocess_shader(const struct aco_compiler_options* options,
}
void
aco_compile_shader(const struct aco_compiler_options* options,
const struct aco_shader_info* info,
aco_compile_shader(const struct aco_compiler_options* options, const struct aco_shader_info* info,
unsigned shader_count, struct nir_shader* const* shaders,
const struct ac_shader_args *args,
aco_callback *build_binary,
void **binary)
const struct ac_shader_args* args, aco_callback* build_binary, void** binary)
{
aco::init();
@ -335,13 +330,8 @@ aco_compile_vs_prolog(const struct aco_compiler_options* options,
if (get_disasm)
disasm = get_disasm_string(program.get(), code, exec_size);
(*build_prolog)(binary,
config.num_sgprs,
config.num_vgprs,
code.data(),
code.size(),
disasm.data(),
disasm.size());
(*build_prolog)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
disasm.data(), disasm.size());
}
void
@ -377,11 +367,6 @@ aco_compile_ps_epilog(const struct aco_compiler_options* options,
if (get_disasm)
disasm = get_disasm_string(program.get(), code, exec_size);
(*build_epilog)(binary,
config.num_sgprs,
config.num_vgprs,
code.data(),
code.size(),
disasm.data(),
disasm.size());
(*build_epilog)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
disasm.data(), disasm.size());
}

View File

@ -25,9 +25,9 @@
#ifndef ACO_INTERFACE_H
#define ACO_INTERFACE_H
#include "amd_family.h"
#include "aco_shader_info.h"
#include "amd_family.h"
#ifdef __cplusplus
extern "C" {
#endif
@ -47,24 +47,18 @@ typedef void(aco_callback)(void** priv_ptr, const struct ac_shader_config* confi
const char* llvm_ir_str, unsigned llvm_ir_size, const char* disasm_str,
unsigned disasm_size, uint32_t* statistics, uint32_t stats_size,
uint32_t exec_size, const uint32_t* code, uint32_t code_dw,
const struct aco_symbol *symbols, unsigned num_symbols);
const struct aco_symbol* symbols, unsigned num_symbols);
typedef void (aco_shader_part_callback)(void **priv_ptr,
uint32_t num_sgprs,
uint32_t num_vgprs,
const uint32_t *code,
uint32_t code_size,
const char *disasm_str,
uint32_t disasm_size);
typedef void(aco_shader_part_callback)(void** priv_ptr, uint32_t num_sgprs, uint32_t num_vgprs,
const uint32_t* code, uint32_t code_size,
const char* disasm_str, uint32_t disasm_size);
extern const struct aco_compiler_statistic_info* aco_statistic_infos;
void aco_compile_shader(const struct aco_compiler_options* options,
const struct aco_shader_info* info,
unsigned shader_count, struct nir_shader* const* shaders,
const struct ac_shader_args *args,
aco_callback *build_binary,
void **binary);
const struct aco_shader_info* info, unsigned shader_count,
struct nir_shader* const* shaders, const struct ac_shader_args* args,
aco_callback* build_binary, void** binary);
void aco_compile_rt_prolog(const struct aco_compiler_options* options,
const struct aco_shader_info* info, const struct ac_shader_args* in_args,

View File

@ -98,8 +98,9 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
program->wave_size = info->wave_size;
program->lane_mask = program->wave_size == 32 ? s1 : s2;
program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024 :
gfx_level >= GFX7 ? 512 : 256;
program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024
: gfx_level >= GFX7 ? 512
: 256;
program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
/* GFX6: There is 64KB LDS per CU, but a single workgroup can only use 32KB. */

View File

@ -140,9 +140,9 @@ enum storage_class : uint8_t {
storage_buffer = 0x1, /* SSBOs and global memory */
storage_gds = 0x2,
storage_image = 0x4,
storage_shared = 0x8, /* or TCS output */
storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
storage_task_payload = 0x20,/* Task-Mesh payload */
storage_shared = 0x8, /* or TCS output */
storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
storage_task_payload = 0x20, /* Task-Mesh payload */
storage_scratch = 0x40,
storage_vgpr_spill = 0x80,
storage_count = 8, /* not counting storage_none */
@ -823,7 +823,8 @@ public:
assert(bytes() == 2 || bytes() == 4);
if (opsel) {
if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral())
return int16_t(data_.i) >> 16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
return int16_t(data_.i) >>
16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
else
return data_.i >> 16;
}
@ -1418,7 +1419,8 @@ struct VINTERP_inreg_instruction : public VALU_instruction {
uint8_t padding5;
uint8_t padding6;
};
static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
"Unexpected padding");
/**
* Data Parallel Primitives Format:
@ -1809,8 +1811,7 @@ memory_sync_info get_sync_info(const Instruction* instr);
inline bool
is_dead(const std::vector<uint16_t>& uses, const Instruction* instr)
{
if (instr->definitions.empty() || instr->isBranch() ||
instr->opcode == aco_opcode::p_startpgm ||
if (instr->definitions.empty() || instr->isBranch() || instr->opcode == aco_opcode::p_startpgm ||
instr->opcode == aco_opcode::p_init_scratch ||
instr->opcode == aco_opcode::p_dual_src_export_gfx11)
return false;
@ -2216,8 +2217,7 @@ void init_program(Program* program, Stage stage, const struct aco_shader_info* i
void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
ac_shader_config* config, const struct aco_compiler_options* options,
const struct aco_shader_info* info,
const struct ac_shader_args* args);
const struct aco_shader_info* info, const struct ac_shader_args* args);
void select_trap_handler_shader(Program* program, struct nir_shader* shader,
ac_shader_config* config,
const struct aco_compiler_options* options,
@ -2258,7 +2258,7 @@ bool dealloc_vgprs(Program* program);
void insert_NOPs(Program* program);
void form_hard_clauses(Program* program);
unsigned emit_program(Program* program, std::vector<uint32_t>& code,
std::vector<struct aco_symbol> *symbols);
std::vector<struct aco_symbol>* symbols);
/**
* Returns true if print_asm can disassemble the given program for the current build/runtime
* configuration

View File

@ -2181,7 +2181,7 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
instr->mimg().strict_wqm = false;
if ((3 + num_vaddr) > instr->operands.size()) {
MIMG_instruction *new_instr = create_instruction<MIMG_instruction>(
MIMG_instruction* new_instr = create_instruction<MIMG_instruction>(
instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size());
std::copy(instr->definitions.cbegin(), instr->definitions.cend(),
new_instr->definitions.begin());
@ -2346,8 +2346,8 @@ lower_to_hw_instr(Program* program)
target =
program->has_color_exports ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_MRTZ;
if (program->stage == fragment_fs)
bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1),
0, target, false, true, true);
bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1), 0,
target, false, true, true);
if (should_dealloc_vgprs)
bld.sopp(aco_opcode::s_sendmsg, -1, sendmsg_dealloc_vgprs);
bld.sopp(aco_opcode::s_endpgm);
@ -2518,8 +2518,7 @@ lower_to_hw_instr(Program* program)
create_bperm(bld, ext_swiz, dst, Operand::zero());
}
} else {
SDWA_instruction& sdwa =
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa();
SDWA_instruction& sdwa = bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa();
sdwa.sel[0] = SubdwordSel(bits / 8, offset / 8, signext);
}
}
@ -2574,7 +2573,8 @@ lower_to_hw_instr(Program* program)
} else {
assert(dst.regClass() == v2b);
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op)
->sdwa().sel[1] = SubdwordSel::ubyte;
->sdwa()
.sel[1] = SubdwordSel::ubyte;
}
break;
}

View File

@ -1369,7 +1369,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (instr->isSALU() || instr->isPseudo()) {
unsigned bits = get_operand_size(instr, i);
if ((info.is_constant(bits) || (info.is_literal(bits) && instr->isPseudo())) &&
alu_can_accept_constant(instr, i)) {
alu_can_accept_constant(instr, i)) {
instr->operands[i] = get_constant_op(ctx, info, bits);
continue;
}
@ -2116,9 +2116,10 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
case aco_opcode::v_mbcnt_hi_u32_b32_e64: {
if (instr->operands[0].constantEquals(-1) && instr->operands[1].isTemp() &&
ctx.info[instr->operands[1].tempId()].is_usedef()) {
Instruction *usedef_instr = ctx.info[instr->operands[1].tempId()].instr;
Instruction* usedef_instr = ctx.info[instr->operands[1].tempId()].instr;
if (usedef_instr->opcode == aco_opcode::v_mbcnt_lo_u32_b32 &&
usedef_instr->operands[0].constantEquals(-1) && usedef_instr->operands[1].constantEquals(0))
usedef_instr->operands[0].constantEquals(-1) &&
usedef_instr->operands[1].constantEquals(0))
ctx.info[instr->definitions[0].tempId()].set_subgroup_invocation(instr.get());
}
break;
@ -2370,7 +2371,9 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return false;
/* Find the constant operand or return early if there isn't one. */
const int const_op_idx = instr->operands[0].isConstant() ? 0 : instr->operands[1].isConstant() ? 1 : -1;
const int const_op_idx = instr->operands[0].isConstant() ? 0
: instr->operands[1].isConstant() ? 1
: -1;
if (const_op_idx == -1)
return false;
@ -2413,11 +2416,10 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
first_bit = val + 1;
num_bits = val >= wave_size ? 0 : (wave_size - val - 1);
break;
default:
return false;
default: return false;
}
Instruction *cpy = NULL;
Instruction* cpy = NULL;
const uint64_t mask = BITFIELD64_RANGE(first_bit, num_bits);
if (wave_size == 64 && mask > 0x7fffffff && mask != -1ull) {
/* Mask can't be represented as a 64-bit constant or literal, use s_bfm_b64. */
@ -2426,7 +2428,8 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
cpy->operands[1] = Operand::c32(first_bit);
} else {
/* Copy mask as a literal constant. */
cpy = create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 1, 1);
cpy =
create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 1, 1);
cpy->operands[0] = wave_size == 32 ? Operand::c32((uint32_t)mask) : Operand::c64(mask);
}
@ -4821,10 +4824,12 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
*/
if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_and_b64) {
if (instr->operands[0].isTemp() && fixed_to_exec(instr->operands[1]) &&
ctx.uses[instr->operands[0].tempId()] == 1 && ctx.uses[instr->definitions[1].tempId()] == 0 &&
ctx.uses[instr->operands[0].tempId()] == 1 &&
ctx.uses[instr->definitions[1].tempId()] == 0 &&
can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), instr->pass_flags)) {
ctx.uses[instr->operands[0].tempId()]--;
ctx.info[instr->operands[0].tempId()].instr->definitions[0].setTemp(instr->definitions[0].getTemp());
ctx.info[instr->operands[0].tempId()].instr->definitions[0].setTemp(
instr->definitions[0].getTemp());
instr.reset();
return;
}

View File

@ -516,7 +516,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
if (mimg.lwe)
fprintf(output, " lwe");
if (mimg.r128)
fprintf(output, " r128");
fprintf(output, " r128");
if (mimg.a16)
fprintf(output, " a16");
if (mimg.d16)

View File

@ -460,8 +460,7 @@ print_regs(ra_ctx& ctx, bool vgprs, RegisterFile& reg_file)
printf("%u/%u used, %u/%u free\n", regs.size - free_regs, regs.size, free_regs, regs.size);
/* print assignments ordered by registers */
std::map<PhysReg, std::pair<unsigned, unsigned>>
regs_to_vars; /* maps to byte size and temp id */
std::map<PhysReg, std::pair<unsigned, unsigned>> regs_to_vars; /* maps to byte size and temp id */
for (unsigned id : find_vars(ctx, reg_file, regs)) {
const assignment& var = ctx.assignments[id];
PhysReg reg = var.reg;
@ -1088,8 +1087,8 @@ get_reg_for_create_vector_copy(ra_ctx& ctx, RegisterFile& reg_file,
instr->operands[i].regClass() == info.rc) {
assignment& op = ctx.assignments[instr->operands[i].tempId()];
/* if everything matches, create parallelcopy for the killed operand */
if (!intersects(def_reg, PhysRegInterval{op.reg, op.rc.size()}) &&
op.reg != scc && reg_file.get_id(op.reg) == instr->operands[i].tempId()) {
if (!intersects(def_reg, PhysRegInterval{op.reg, op.rc.size()}) && op.reg != scc &&
reg_file.get_id(op.reg) == instr->operands[i].tempId()) {
Definition pc_def = Definition(reg, info.rc);
parallelcopies.emplace_back(instr->operands[i], pc_def);
return op.reg;
@ -1655,8 +1654,7 @@ get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
return vcc;
}
if (ctx.assignments[temp.id()].m0) {
if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) &&
can_write_m0(instr))
if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) && can_write_m0(instr))
return m0;
}

View File

@ -587,8 +587,10 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
/* don't move non-reorderable instructions */
if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime ||
instr->opcode == aco_opcode::s_setprio || instr->opcode == aco_opcode::s_getreg_b32 ||
instr->opcode == aco_opcode::p_init_scratch || instr->opcode == aco_opcode::p_jump_to_epilog ||
instr->opcode == aco_opcode::s_sendmsg_rtn_b32 || instr->opcode == aco_opcode::s_sendmsg_rtn_b64)
instr->opcode == aco_opcode::p_init_scratch ||
instr->opcode == aco_opcode::p_jump_to_epilog ||
instr->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
instr->opcode == aco_opcode::s_sendmsg_rtn_b64)
return hazard_fail_unreorderable;
memory_event_set instr_set;
@ -663,8 +665,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
int16_t k = 0;
/* don't move s_memtime/s_memrealtime */
if (current->opcode == aco_opcode::s_memtime ||
current->opcode == aco_opcode::s_memrealtime ||
if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime ||
current->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
current->opcode == aco_opcode::s_sendmsg_rtn_b64)
return;

View File

@ -35,10 +35,10 @@
extern "C" {
#endif
#define ACO_MAX_SO_OUTPUTS 64
#define ACO_MAX_SO_BUFFERS 4
#define ACO_MAX_SO_OUTPUTS 64
#define ACO_MAX_SO_BUFFERS 4
#define ACO_MAX_VERTEX_ATTRIBS 32
#define ACO_MAX_VBS 32
#define ACO_MAX_VBS 32
struct aco_vs_input_state {
uint32_t instance_rate_inputs;
@ -133,8 +133,8 @@ struct aco_compiler_options {
enum amd_gfx_level gfx_level;
uint32_t address32_hi;
struct {
void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message);
void *private_data;
void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message);
void* private_data;
} debug;
};

View File

@ -94,7 +94,8 @@ struct spill_ctx {
spill_ctx(const RegisterDemand target_pressure_, Program* program_,
std::vector<std::vector<RegisterDemand>> register_demand_)
: target_pressure(target_pressure_), program(program_), memory(),
register_demand(std::move(register_demand_)), renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
register_demand(std::move(register_demand_)),
renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
spills_entry(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
spills_exit(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
processed(program->blocks.size(), false),
@ -226,10 +227,11 @@ next_uses_per_block(spill_ctx& ctx, unsigned block_idx, uint32_t& worklist)
std::pair<uint32_t, uint32_t> distance{block_idx, 0};
auto it = instr->definitions[0].isTemp() ? next_use_distances_start.find(instr->definitions[0].getTemp())
: next_use_distances_start.end();
auto it = instr->definitions[0].isTemp()
? next_use_distances_start.find(instr->definitions[0].getTemp())
: next_use_distances_start.end();
if (it != next_use_distances_start.end() &&
phi_defs.insert(instr->definitions[0].getTemp()).second) {
phi_defs.insert(instr->definitions[0].getTemp()).second) {
distance = it->second;
}
@ -388,7 +390,7 @@ get_rematerialize_info(spill_ctx& ctx)
void
update_local_next_uses(spill_ctx& ctx, Block* block,
std::vector<std::vector<std::pair<Temp, uint32_t>>>& local_next_uses)
std::vector<std::vector<std::pair<Temp, uint32_t>>>& local_next_uses)
{
if (local_next_uses.size() < block->instructions.size()) {
/* Allocate more next-use-maps. Note that by never reducing the vector size, we enable
@ -1006,7 +1008,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
ctx.renames[pred_idx].find(phi->operands[i].getTemp());
if (it != ctx.renames[pred_idx].end()) {
phi->operands[i].setTemp(it->second);
/* prevent the defining instruction from being DCE'd if it could be rematerialized */
/* prevent the defining instruction from being DCE'd if it could be rematerialized */
} else {
auto remat_it = ctx.remat.find(phi->operands[i].getTemp());
if (remat_it != ctx.remat.end()) {
@ -1407,7 +1409,8 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
continue;
/* find p_logical_end */
std::vector<aco_ptr<Instruction>>& prev_instructions = ctx.program->blocks[block_idx].instructions;
std::vector<aco_ptr<Instruction>>& prev_instructions =
ctx.program->blocks[block_idx].instructions;
unsigned idx = prev_instructions.size() - 1;
while (prev_instructions[idx]->opcode != aco_opcode::p_logical_end)
idx--;
@ -1422,10 +1425,10 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
Temp private_segment_buffer = ctx.program->private_segment_buffer;
if (!private_segment_buffer.bytes()) {
Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
Operand::c32(aco_symbol_scratch_addr_lo));
Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
Operand::c32(aco_symbol_scratch_addr_hi));
Temp addr_lo =
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
Temp addr_hi =
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
private_segment_buffer =
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
} else if (ctx.program->stage.hw != HWStage::CS) {
@ -1471,8 +1474,7 @@ setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
if (ctx.scratch_rsrc == Temp()) {
int32_t saddr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size -
ctx.program->dev.scratch_global_offset_min;
ctx.scratch_rsrc =
load_scratch_resource(ctx, scratch_offset, block, instructions, saddr);
ctx.scratch_rsrc = load_scratch_resource(ctx, scratch_offset, block, instructions, saddr);
}
} else {
bool add_offset_to_sgpr =

View File

@ -35,8 +35,8 @@
namespace aco {
static void
aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix,
const char* file, unsigned line, const char* fmt, va_list args)
aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix, const char* file,
unsigned line, const char* fmt, va_list args)
{
char* msg;
@ -270,8 +270,7 @@ validate_ir(Program* program)
(instr->opcode == aco_opcode::p_bpermute_gfx11w64 && i == 0) ||
(flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
(instr->isScratch() && i == 0) ||
(instr->isDS() && i == 0) ||
(instr->isScratch() && i == 0) || (instr->isDS() && i == 0) ||
(instr->opcode == aco_opcode::p_init_scratch && i == 0);
check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
} else {
@ -393,7 +392,7 @@ validate_ir(Program* program)
"OPSEL_LO set for unsupported instruction format", instr.get());
check(!instr->valu().opsel_hi || instr->isVOP3P(),
"OPSEL_HI set for unsupported instruction format", instr.get());
check(!instr->valu().omod || instr->isVOP3() ||instr->isSDWA(),
check(!instr->valu().omod || instr->isVOP3() || instr->isSDWA(),
"OMOD set for unsupported instruction format", instr.get());
check(!instr->valu().clamp || instr->isVOP3() || instr->isVOP3P() ||
instr->isSDWA() || instr->isVINTERP_INREG(),
@ -562,7 +561,8 @@ validate_ir(Program* program)
instr->definitions[2].regClass().size() == 1,
"Third definition of p_dual_src_export_gfx11 must be a v1", instr.get());
check(instr->definitions[3].regClass() == program->lane_mask,
"Fourth definition of p_dual_src_export_gfx11 must be a lane mask", instr.get());
"Fourth definition of p_dual_src_export_gfx11 must be a lane mask",
instr.get());
check(instr->definitions[4].physReg() == vcc,
"Fifth definition of p_dual_src_export_gfx11 must be vcc", instr.get());
check(instr->definitions[5].physReg() == scc,
@ -627,26 +627,28 @@ validate_ir(Program* program)
check(instr->operands.size() < 4 || instr->operands[3].isOfType(RegType::vgpr),
"VMEM write data must be vgpr", instr.get());
const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
instr->opcode == aco_opcode::buffer_load_ubyte ||
instr->opcode == aco_opcode::buffer_load_sbyte ||
instr->opcode == aco_opcode::buffer_load_ushort ||
instr->opcode == aco_opcode::buffer_load_sshort ||
instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
instr->opcode == aco_opcode::buffer_load_short_d16 ||
instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
instr->opcode == aco_opcode::buffer_load_format_d16_x ||
instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
const bool d16 =
instr->opcode ==
aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
instr->opcode == aco_opcode::buffer_load_ubyte ||
instr->opcode == aco_opcode::buffer_load_sbyte ||
instr->opcode == aco_opcode::buffer_load_ushort ||
instr->opcode == aco_opcode::buffer_load_sshort ||
instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
instr->opcode == aco_opcode::buffer_load_short_d16 ||
instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
instr->opcode == aco_opcode::buffer_load_format_d16_x ||
instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
if (instr->definitions.size()) {
check(instr->definitions[0].regClass().type() == RegType::vgpr,
"VMEM definitions[0] (VDATA) must be VGPR", instr.get());
@ -763,11 +765,14 @@ validate_ir(Program* program)
break;
}
case Format::LDSDIR: {
check(instr->definitions.size() == 1 && instr->definitions[0].regClass() == v1, "LDSDIR must have an v1 definition", instr.get());
check(instr->definitions.size() == 1 && instr->definitions[0].regClass() == v1,
"LDSDIR must have an v1 definition", instr.get());
check(instr->operands.size() == 1, "LDSDIR must have an operand", instr.get());
if (!instr->operands.empty()) {
check(instr->operands[0].regClass() == s1, "LDSDIR must have an s1 operand", instr.get());
check(instr->operands[0].isFixed() && instr->operands[0].physReg() == m0, "LDSDIR must have an operand fixed to m0", instr.get());
check(instr->operands[0].regClass() == s1, "LDSDIR must have an s1 operand",
instr.get());
check(instr->operands[0].isFixed() && instr->operands[0].physReg() == m0,
"LDSDIR must have an operand fixed to m0", instr.get());
}
break;
}

View File

@ -35,19 +35,20 @@
#include <string>
struct TestDef {
const char *name;
const char *source_file;
const char* name;
const char* source_file;
void (*func)();
};
extern std::map<std::string, TestDef> tests;
extern FILE *output;
extern FILE* output;
bool set_variant(const char *name);
bool set_variant(const char* name);
inline bool set_variant(amd_gfx_level cls, const char *rest="")
inline bool
set_variant(amd_gfx_level cls, const char* rest = "")
{
char buf[8+strlen(rest)];
char buf[8 + strlen(rest)];
if (cls != GFX10_3) {
snprintf(buf, sizeof(buf), "gfx%d%s", cls - GFX6 + 6 - (cls > GFX10_3), rest);
} else {
@ -56,18 +57,21 @@ inline bool set_variant(amd_gfx_level cls, const char *rest="")
return set_variant(buf);
}
void fail_test(const char *fmt, ...);
void skip_test(const char *fmt, ...);
void fail_test(const char* fmt, ...);
void skip_test(const char* fmt, ...);
#define _BEGIN_TEST(name, struct_name) static void struct_name(); static __attribute__((constructor)) void CONCAT2(add_test_, __COUNTER__)() {\
tests[#name] = (TestDef){#name, ACO_TEST_BUILD_ROOT "/" __FILE__, &struct_name};\
}\
static void struct_name() {\
#define _BEGIN_TEST(name, struct_name) \
static void struct_name(); \
static __attribute__((constructor)) void CONCAT2(add_test_, __COUNTER__)() \
{ \
tests[#name] = (TestDef){#name, ACO_TEST_BUILD_ROOT "/" __FILE__, &struct_name}; \
} \
static void struct_name() \
{
#define BEGIN_TEST(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
#define BEGIN_TEST(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
#define BEGIN_TEST_TODO(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
#define BEGIN_TEST_FAIL(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
#define END_TEST \
}
#define END_TEST }
#endif /* ACO_TEST_COMMON_H */

View File

@ -22,19 +22,20 @@
*
*/
#include "helpers.h"
#include "vulkan/vk_format.h"
#include "common/amd_family.h"
#include <stdio.h>
#include <sstream>
#include "vulkan/vk_format.h"
#include <llvm-c/Target.h>
#include <mutex>
#include <sstream>
#include <stdio.h>
using namespace aco;
extern "C" {
PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
VkInstance instance,
const char* pName);
PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, const char* pName);
}
ac_shader_config config;
@ -47,32 +48,34 @@ static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
static std::mutex create_device_mutex;
#define FUNCTION_LIST\
ITEM(CreateInstance)\
ITEM(DestroyInstance)\
ITEM(EnumeratePhysicalDevices)\
ITEM(GetPhysicalDeviceProperties2)\
ITEM(CreateDevice)\
ITEM(DestroyDevice)\
ITEM(CreateShaderModule)\
ITEM(DestroyShaderModule)\
ITEM(CreateGraphicsPipelines)\
ITEM(CreateComputePipelines)\
ITEM(DestroyPipeline)\
ITEM(CreateDescriptorSetLayout)\
ITEM(DestroyDescriptorSetLayout)\
ITEM(CreatePipelineLayout)\
ITEM(DestroyPipelineLayout)\
ITEM(CreateRenderPass)\
ITEM(DestroyRenderPass)\
ITEM(GetPipelineExecutablePropertiesKHR)\
#define FUNCTION_LIST \
ITEM(CreateInstance) \
ITEM(DestroyInstance) \
ITEM(EnumeratePhysicalDevices) \
ITEM(GetPhysicalDeviceProperties2) \
ITEM(CreateDevice) \
ITEM(DestroyDevice) \
ITEM(CreateShaderModule) \
ITEM(DestroyShaderModule) \
ITEM(CreateGraphicsPipelines) \
ITEM(CreateComputePipelines) \
ITEM(DestroyPipeline) \
ITEM(CreateDescriptorSetLayout) \
ITEM(DestroyDescriptorSetLayout) \
ITEM(CreatePipelineLayout) \
ITEM(DestroyPipelineLayout) \
ITEM(CreateRenderPass) \
ITEM(DestroyRenderPass) \
ITEM(GetPipelineExecutablePropertiesKHR) \
ITEM(GetPipelineExecutableInternalRepresentationsKHR)
#define ITEM(n) PFN_vk##n n;
FUNCTION_LIST
#undef ITEM
void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size, enum radeon_family family)
void
create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size,
enum radeon_family family)
{
memset(&config, 0, sizeof(config));
info.wave_size = wave_size;
@ -90,7 +93,7 @@ void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_siz
program->debug.func = nullptr;
program->debug.private_data = nullptr;
Block *block = program->create_and_insert_block();
Block* block = program->create_and_insert_block();
block->kind = block_kind_top_level;
bld = Builder(program.get(), &program->blocks[0]);
@ -98,9 +101,9 @@ void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_siz
config.float_mode = program->blocks[0].fp_mode.val;
}
bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
enum radeon_family family, const char* subvariant,
unsigned wave_size)
bool
setup_cs(const char* input_spec, enum amd_gfx_level gfx_level, enum radeon_family family,
const char* subvariant, unsigned wave_size)
{
if (!set_variant(gfx_level, subvariant))
return false;
@ -117,7 +120,8 @@ bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4)));
input_spec += 2 + in_bytes;
while (input_spec[0] == ' ') input_spec++;
while (input_spec[0] == ' ')
input_spec++;
}
aco_ptr<Instruction> startpgm{create_instruction<Pseudo_instruction>(
@ -132,7 +136,8 @@ bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
return true;
}
void finish_program(Program *prog)
void
finish_program(Program* prog)
{
for (Block& BB : prog->blocks) {
for (unsigned idx : BB.linear_preds)
@ -149,7 +154,8 @@ void finish_program(Program *prog)
}
}
void finish_validator_test()
void
finish_validator_test()
{
finish_program(program.get());
aco_print_program(program.get(), output);
@ -160,7 +166,8 @@ void finish_validator_test()
fprintf(output, "Validation failed\n");
}
void finish_opt_test()
void
finish_opt_test()
{
finish_program(program.get());
if (!aco::validate_ir(program.get())) {
@ -175,7 +182,8 @@ void finish_opt_test()
aco_print_program(program.get(), output);
}
void finish_setup_reduce_temp_test()
void
finish_setup_reduce_temp_test()
{
finish_program(program.get());
if (!aco::validate_ir(program.get())) {
@ -190,7 +198,8 @@ void finish_setup_reduce_temp_test()
aco_print_program(program.get(), output);
}
void finish_ra_test(ra_test_policy policy, bool lower)
void
finish_ra_test(ra_test_policy policy, bool lower)
{
finish_program(program.get());
if (!aco::validate_ir(program.get())) {
@ -215,42 +224,48 @@ void finish_ra_test(ra_test_policy policy, bool lower)
aco_print_program(program.get(), output);
}
void finish_optimizer_postRA_test()
void
finish_optimizer_postRA_test()
{
finish_program(program.get());
aco::optimize_postRA(program.get());
aco_print_program(program.get(), output);
}
void finish_to_hw_instr_test()
void
finish_to_hw_instr_test()
{
finish_program(program.get());
aco::lower_to_hw_instr(program.get());
aco_print_program(program.get(), output);
}
void finish_waitcnt_test()
void
finish_waitcnt_test()
{
finish_program(program.get());
aco::insert_wait_states(program.get());
aco_print_program(program.get(), output);
}
void finish_insert_nops_test()
void
finish_insert_nops_test()
{
finish_program(program.get());
aco::insert_NOPs(program.get());
aco_print_program(program.get(), output);
}
void finish_form_hard_clause_test()
void
finish_form_hard_clause_test()
{
finish_program(program.get());
aco::form_hard_clauses(program.get());
aco_print_program(program.get(), output);
}
void finish_assembler_test()
void
finish_assembler_test()
{
finish_program(program.get());
std::vector<uint32_t> binary;
@ -261,13 +276,14 @@ void finish_assembler_test()
if (program->gfx_level >= GFX8) {
print_asm(program.get(), binary, exec_size / 4u, output);
} else {
//TODO: maybe we should use CLRX and skip this test if it's not available?
// TODO: maybe we should use CLRX and skip this test if it's not available?
for (uint32_t dword : binary)
fprintf(output, "%.8x\n", dword);
}
}
void writeout(unsigned i, Temp tmp)
void
writeout(unsigned i, Temp tmp)
{
if (tmp.id())
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
@ -275,22 +291,26 @@ void writeout(unsigned i, Temp tmp)
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
}
void writeout(unsigned i, aco::Builder::Result res)
void
writeout(unsigned i, aco::Builder::Result res)
{
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
}
void writeout(unsigned i, Operand op)
void
writeout(unsigned i, Operand op)
{
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
}
void writeout(unsigned i, Operand op0, Operand op1)
void
writeout(unsigned i, Operand op0, Operand op1)
{
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
}
Temp fneg(Temp src, Builder b)
Temp
fneg(Temp src, Builder b)
{
if (src.bytes() == 2)
return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
@ -298,35 +318,42 @@ Temp fneg(Temp src, Builder b)
return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
}
Temp fabs(Temp src, Builder b)
Temp
fabs(Temp src, Builder b)
{
if (src.bytes() == 2) {
Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
Builder::Result res =
b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
res->valu().abs[1] = true;
return res;
} else {
Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
Builder::Result res =
b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
res->valu().abs[1] = true;
return res;
}
}
Temp f2f32(Temp src, Builder b)
Temp
f2f32(Temp src, Builder b)
{
return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
}
Temp f2f16(Temp src, Builder b)
Temp
f2f16(Temp src, Builder b)
{
return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
}
Temp u2u16(Temp src, Builder b)
Temp
u2u16(Temp src, Builder b)
{
return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
}
Temp fadd(Temp src0, Temp src1, Builder b)
Temp
fadd(Temp src0, Temp src1, Builder b)
{
if (src0.bytes() == 2)
return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
@ -334,7 +361,8 @@ Temp fadd(Temp src0, Temp src1, Builder b)
return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1);
}
Temp fmul(Temp src0, Temp src1, Builder b)
Temp
fmul(Temp src0, Temp src1, Builder b)
{
if (src0.bytes() == 2)
return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1);
@ -342,7 +370,8 @@ Temp fmul(Temp src0, Temp src1, Builder b)
return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1);
}
Temp fma(Temp src0, Temp src1, Temp src2, Builder b)
Temp
fma(Temp src0, Temp src1, Temp src2, Builder b)
{
if (src0.bytes() == 2)
return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2);
@ -350,40 +379,46 @@ Temp fma(Temp src0, Temp src1, Temp src2, Builder b)
return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
}
Temp fsat(Temp src, Builder b)
Temp
fsat(Temp src, Builder b)
{
if (src.bytes() == 2)
return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u),
Operand::c16(0x3c00u), src);
return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u), Operand::c16(0x3c00u),
src);
else
return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(),
Operand::c32(0x3f800000u), src);
return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(), Operand::c32(0x3f800000u),
src);
}
Temp fmin(Temp src0, Temp src1, Builder b)
Temp
fmin(Temp src0, Temp src1, Builder b)
{
return b.vop2(aco_opcode::v_min_f32, b.def(v1), src0, src1);
}
Temp fmax(Temp src0, Temp src1, Builder b)
Temp
fmax(Temp src0, Temp src1, Builder b)
{
return b.vop2(aco_opcode::v_max_f32, b.def(v1), src0, src1);
}
Temp ext_ushort(Temp src, unsigned idx, Builder b)
Temp
ext_ushort(Temp src, unsigned idx, Builder b)
{
return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
Operand::c32(16u), Operand::c32(false));
}
Temp ext_ubyte(Temp src, unsigned idx, Builder b)
Temp
ext_ubyte(Temp src, unsigned idx, Builder b)
{
return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
Operand::c32(8u), Operand::c32(false));
}
void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
std::function<void()> els)
void
emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
std::function<void()> els)
{
prog->blocks.reserve(prog->blocks.size() + 6);
@ -418,8 +453,10 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
PhysReg saved_exec_reg(84);
b.reset(if_block);
Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg), Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index, then_linear->index);
Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg),
Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index,
then_linear->index);
b.reset(then_logical);
b.pseudo(aco_opcode::p_logical_start);
@ -431,8 +468,10 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index);
b.reset(invert);
b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1), Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index, else_linear->index);
b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1),
Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index,
else_linear->index);
b.reset(else_logical);
b.pseudo(aco_opcode::p_logical_start);
@ -444,42 +483,29 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index);
b.reset(endif_block);
b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), Operand(saved_exec, saved_exec_reg));
b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
Operand(saved_exec, saved_exec_reg));
}
VkDevice get_vk_device(enum amd_gfx_level gfx_level)
VkDevice
get_vk_device(enum amd_gfx_level gfx_level)
{
enum radeon_family family;
switch (gfx_level) {
case GFX6:
family = CHIP_TAHITI;
break;
case GFX7:
family = CHIP_BONAIRE;
break;
case GFX8:
family = CHIP_POLARIS10;
break;
case GFX9:
family = CHIP_VEGA10;
break;
case GFX10:
family = CHIP_NAVI10;
break;
case GFX10_3:
family = CHIP_NAVI21;
break;
case GFX11:
family = CHIP_GFX1100;
break;
default:
family = CHIP_UNKNOWN;
break;
case GFX6: family = CHIP_TAHITI; break;
case GFX7: family = CHIP_BONAIRE; break;
case GFX8: family = CHIP_POLARIS10; break;
case GFX9: family = CHIP_VEGA10; break;
case GFX10: family = CHIP_NAVI10; break;
case GFX10_3: family = CHIP_NAVI21; break;
case GFX11: family = CHIP_GFX1100; break;
default: family = CHIP_UNKNOWN; break;
}
return get_vk_device(family);
}
VkDevice get_vk_device(enum radeon_family family)
VkDevice
get_vk_device(enum radeon_family family)
{
assert(family != CHIP_UNKNOWN);
@ -496,12 +522,13 @@ VkDevice get_vk_device(enum radeon_family family)
VkInstanceCreateInfo instance_create_info = {};
instance_create_info.pApplicationInfo = &app_info;
instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(
NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
assert(result == VK_SUCCESS);
#define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
#define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
FUNCTION_LIST
#undef ITEM
#undef ITEM
uint32_t device_count = 1;
VkPhysicalDevice device = VK_NULL_HANDLE;
@ -511,7 +538,7 @@ VkDevice get_vk_device(enum radeon_family family)
VkDeviceCreateInfo device_create_info = {};
device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
static const char *extensions[] = {"VK_KHR_pipeline_executable_properties"};
static const char* extensions[] = {"VK_KHR_pipeline_executable_properties"};
device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
device_create_info.ppEnabledExtensionNames = extensions;
result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
@ -520,7 +547,8 @@ VkDevice get_vk_device(enum radeon_family family)
}
static struct DestroyDevices {
~DestroyDevices() {
~DestroyDevices()
{
for (unsigned i = 0; i < CHIP_LAST; i++) {
if (!device_cache[i])
continue;
@ -530,8 +558,9 @@ static struct DestroyDevices {
}
} destroy_devices;
void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
const char *name, bool remove_encoding)
void
print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
const char* name, bool remove_encoding)
{
uint32_t executable_count = 16;
VkPipelineExecutablePropertiesKHR executables[16];
@ -539,7 +568,8 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
pipeline_info.pNext = NULL;
pipeline_info.pipeline = pipeline;
ASSERTED VkResult result = GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
ASSERTED VkResult result =
GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
assert(result == VK_SUCCESS);
uint32_t executable = 0;
@ -570,13 +600,13 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
}
assert(requested_ir && "Could not find requested IR");
char *data = (char*)malloc(requested_ir->dataSize);
char* data = (char*)malloc(requested_ir->dataSize);
requested_ir->pData = data;
result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
assert(result == VK_SUCCESS);
if (remove_encoding) {
for (char *c = data; *c; c++) {
for (char* c = data; *c; c++) {
if (*c == ';') {
for (; *c && *c != '\n'; c++)
*c = ' ';
@ -588,23 +618,25 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
free(data);
}
VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *module_info)
VkShaderModule
__qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* module_info)
{
VkShaderModuleCreateInfo vk_module_info;
vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
vk_module_info.pNext = NULL;
vk_module_info.flags = 0;
vk_module_info.codeSize = module_info->spirvSize;
vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
VkShaderModuleCreateInfo vk_module_info;
vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
vk_module_info.pNext = NULL;
vk_module_info.flags = 0;
vk_module_info.codeSize = module_info->spirvSize;
vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
VkShaderModule module;
ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
assert(result == VK_SUCCESS);
VkShaderModule module;
ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
assert(result == VK_SUCCESS);
return module;
return module;
}
PipelineBuilder::PipelineBuilder(VkDevice dev) {
PipelineBuilder::PipelineBuilder(VkDevice dev)
{
memset(this, 0, sizeof(*this));
topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
device = dev;
@ -615,7 +647,7 @@ PipelineBuilder::~PipelineBuilder()
DestroyPipeline(device, pipeline, NULL);
for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
VkPipelineShaderStageCreateInfo *stage_info = &stages[i];
VkPipelineShaderStageCreateInfo* stage_info = &stages[i];
if (owned_stages & stage_info->stage)
DestroyShaderModule(device, stage_info->module, NULL);
}
@ -628,72 +660,87 @@ PipelineBuilder::~PipelineBuilder()
DestroyRenderPass(device, render_pass, NULL);
}
void PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
uint32_t binding, VkDescriptorType type, uint32_t count)
void
PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
VkDescriptorType type, uint32_t count)
{
desc_layouts_used |= 1ull << layout;
desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
}
void PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
void
PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
{
vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
}
void PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset)
void
PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format,
uint32_t offset)
{
vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
}
void PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo *module)
void
PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo* module)
{
for (unsigned i = 0; i < module->declarationCount; i++) {
const QoShaderDecl *decl = &module->pDeclarations[i];
const QoShaderDecl* decl = &module->pDeclarations[i];
switch (decl->decl_type) {
case QoShaderDeclType_ubo:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
break;
case QoShaderDeclType_ssbo:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
break;
case QoShaderDeclType_img_buf:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
break;
case QoShaderDeclType_img:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
break;
case QoShaderDeclType_tex_buf:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
break;
case QoShaderDeclType_combined:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
break;
case QoShaderDeclType_tex:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
break;
case QoShaderDeclType_samp:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
break;
default:
break;
default: break;
}
}
}
void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
void
PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo* module)
{
unsigned next_vtx_offset = 0;
for (unsigned i = 0; i < module->declarationCount; i++) {
const QoShaderDecl *decl = &module->pDeclarations[i];
const QoShaderDecl* decl = &module->pDeclarations[i];
switch (decl->decl_type) {
case QoShaderDeclType_in:
if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT, next_vtx_offset);
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
next_vtx_offset);
else if (decl->type[0] == 'u')
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT, next_vtx_offset);
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT,
next_vtx_offset);
else if (decl->type[0] == 'i')
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT, next_vtx_offset);
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT,
next_vtx_offset);
next_vtx_offset += 16;
}
break;
@ -707,17 +754,17 @@ void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
}
break;
default:
break;
default: break;
}
}
if (next_vtx_offset)
add_vertex_binding(0, next_vtx_offset);
}
void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name)
void
PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name)
{
VkPipelineShaderStageCreateInfo *stage_info;
VkPipelineShaderStageCreateInfo* stage_info;
if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
stage_info = &stages[0];
else
@ -732,40 +779,50 @@ void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule modu
owned_stages |= stage;
}
void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name)
void
PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
const char* name)
{
add_stage(stage, __qoCreateShaderModule(device, &module), name);
add_resource_decls(&module);
add_io_decls(&module);
}
void PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
void
PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
{
add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
}
void PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
void
PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
{
add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
}
void PipelineBuilder::add_cs(VkShaderModule cs)
void
PipelineBuilder::add_cs(VkShaderModule cs)
{
add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
}
void PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
void
PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
{
add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
}
bool PipelineBuilder::is_compute() {
bool
PipelineBuilder::is_compute()
{
return gfx_pipeline_info.stageCount == 0;
}
void PipelineBuilder::create_compute_pipeline() {
void
PipelineBuilder::create_compute_pipeline()
{
VkComputePipelineCreateInfo create_info;
create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
create_info.pNext = NULL;
@ -775,11 +832,14 @@ void PipelineBuilder::create_compute_pipeline() {
create_info.basePipelineHandle = VK_NULL_HANDLE;
create_info.basePipelineIndex = 0;
ASSERTED VkResult result = CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
ASSERTED VkResult result =
CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
assert(result == VK_SUCCESS);
}
void PipelineBuilder::create_graphics_pipeline() {
void
PipelineBuilder::create_graphics_pipeline()
{
/* create the create infos */
if (!samples)
samples = VK_SAMPLE_COUNT_1_BIT;
@ -792,7 +852,7 @@ void PipelineBuilder::create_graphics_pipeline() {
if (color_outputs[i] == VK_FORMAT_UNDEFINED)
continue;
VkAttachmentDescription *desc = &attachment_descs[num_color_attachments];
VkAttachmentDescription* desc = &attachment_descs[num_color_attachments];
desc->flags = 0;
desc->format = color_outputs[i];
desc->samples = samples;
@ -803,16 +863,14 @@ void PipelineBuilder::create_graphics_pipeline() {
desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
VkAttachmentReference *ref = &color_attachments[num_color_attachments];
VkAttachmentReference* ref = &color_attachments[num_color_attachments];
ref->attachment = num_color_attachments;
ref->layout = VK_IMAGE_LAYOUT_GENERAL;
VkPipelineColorBlendAttachmentState *blend = &blend_attachment_states[num_color_attachments];
VkPipelineColorBlendAttachmentState* blend = &blend_attachment_states[num_color_attachments];
blend->blendEnable = false;
blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT |
VK_COLOR_COMPONENT_A_BIT;
blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
num_color_attachments++;
}
@ -820,7 +878,7 @@ void PipelineBuilder::create_graphics_pipeline() {
unsigned num_attachments = num_color_attachments;
VkAttachmentReference ds_attachment;
if (ds_output != VK_FORMAT_UNDEFINED) {
VkAttachmentDescription *desc = &attachment_descs[num_attachments];
VkAttachmentDescription* desc = &attachment_descs[num_attachments];
desc->flags = 0;
desc->format = ds_output;
desc->samples = samples;
@ -902,8 +960,7 @@ void PipelineBuilder::create_graphics_pipeline() {
ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
ds_state.front.compareMask = 0xffffffff,
ds_state.front.writeMask = 0;
ds_state.front.compareMask = 0xffffffff, ds_state.front.writeMask = 0;
ds_state.front.reference = 0;
ds_state.back = ds_state.front;
@ -915,17 +972,15 @@ void PipelineBuilder::create_graphics_pipeline() {
color_blend_state.attachmentCount = num_color_attachments;
color_blend_state.pAttachments = blend_attachment_states;
VkDynamicState dynamic_states[9] = {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
VK_DYNAMIC_STATE_STENCIL_REFERENCE
};
VkDynamicState dynamic_states[9] = {VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
VK_DYNAMIC_STATE_STENCIL_REFERENCE};
VkPipelineDynamicStateCreateInfo dynamic_state;
dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
@ -985,7 +1040,9 @@ void PipelineBuilder::create_graphics_pipeline() {
assert(result == VK_SUCCESS);
}
void PipelineBuilder::create_pipeline() {
void
PipelineBuilder::create_pipeline()
{
unsigned num_desc_layouts = 0;
for (unsigned i = 0; i < 64; i++) {
if (!(desc_layouts_used & (1ull << i)))
@ -998,7 +1055,8 @@ void PipelineBuilder::create_pipeline() {
desc_layout_info.bindingCount = num_desc_bindings[i];
desc_layout_info.pBindings = desc_bindings[i];
ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL, &desc_layouts[num_desc_layouts]);
ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL,
&desc_layouts[num_desc_layouts]);
assert(result == VK_SUCCESS);
num_desc_layouts++;
}
@ -1012,7 +1070,8 @@ void PipelineBuilder::create_pipeline() {
pipeline_layout_info.setLayoutCount = num_desc_layouts;
pipeline_layout_info.pSetLayouts = desc_layouts;
ASSERTED VkResult result = CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
ASSERTED VkResult result =
CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
assert(result == VK_SUCCESS);
if (is_compute())
@ -1021,7 +1080,8 @@ void PipelineBuilder::create_pipeline() {
create_graphics_pipeline();
}
void PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char *name, bool remove_encoding)
void
PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char* name, bool remove_encoding)
{
if (!pipeline)
create_pipeline();

View File

@ -24,8 +24,9 @@
#ifndef ACO_TEST_HELPERS_H
#define ACO_TEST_HELPERS_H
#include "framework.h"
#include "vulkan/vulkan.h"
#include "framework.h"
#include <functional>
enum QoShaderDeclType {
@ -42,10 +43,10 @@ enum QoShaderDeclType {
};
struct QoShaderDecl {
const char *name;
const char *type;
const char* name;
const char* type;
QoShaderDeclType decl_type;
//TODO: array size?
// TODO: array size?
unsigned location;
unsigned component;
unsigned binding;
@ -53,12 +54,12 @@ struct QoShaderDecl {
};
struct QoShaderModuleCreateInfo {
void *pNext;
size_t spirvSize;
const void *pSpirv;
uint32_t declarationCount;
const QoShaderDecl *pDeclarations;
VkShaderStageFlagBits stage;
void* pNext;
size_t spirvSize;
const void* pSpirv;
uint32_t declarationCount;
const QoShaderDecl* pDeclarations;
VkShaderStageFlagBits stage;
};
extern ac_shader_config config;
@ -71,17 +72,17 @@ namespace aco {
struct ra_test_policy;
}
void create_program(enum amd_gfx_level gfx_level, aco::Stage stage,
unsigned wave_size=64, enum radeon_family family=CHIP_UNKNOWN);
bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
enum radeon_family family=CHIP_UNKNOWN, const char* subvariant = "",
unsigned wave_size=64);
void create_program(enum amd_gfx_level gfx_level, aco::Stage stage, unsigned wave_size = 64,
enum radeon_family family = CHIP_UNKNOWN);
bool setup_cs(const char* input_spec, enum amd_gfx_level gfx_level,
enum radeon_family family = CHIP_UNKNOWN, const char* subvariant = "",
unsigned wave_size = 64);
void finish_program(aco::Program *program);
void finish_program(aco::Program* program);
void finish_validator_test();
void finish_opt_test();
void finish_setup_reduce_temp_test();
void finish_ra_test(aco::ra_test_policy, bool lower=false);
void finish_ra_test(aco::ra_test_policy, bool lower = false);
void finish_optimizer_postRA_test();
void finish_to_hw_instr_test();
void finish_waitcnt_test();
@ -89,35 +90,35 @@ void finish_insert_nops_test();
void finish_form_hard_clause_test();
void finish_assembler_test();
void writeout(unsigned i, aco::Temp tmp=aco::Temp(0, aco::s1));
void writeout(unsigned i, aco::Temp tmp = aco::Temp(0, aco::s1));
void writeout(unsigned i, aco::Builder::Result res);
void writeout(unsigned i, aco::Operand op);
void writeout(unsigned i, aco::Operand op0, aco::Operand op1);
aco::Temp fneg(aco::Temp src, aco::Builder b=bld);
aco::Temp fabs(aco::Temp src, aco::Builder b=bld);
aco::Temp f2f32(aco::Temp src, aco::Builder b=bld);
aco::Temp f2f16(aco::Temp src, aco::Builder b=bld);
aco::Temp u2u16(aco::Temp src, aco::Builder b=bld);
aco::Temp fadd(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
aco::Temp fmul(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
aco::Temp fma(aco::Temp src0, aco::Temp src1, aco::Temp src2, aco::Builder b=bld);
aco::Temp fsat(aco::Temp src, aco::Builder b=bld);
aco::Temp fmin(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
aco::Temp fmax(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
aco::Temp ext_ushort(aco::Temp src, unsigned idx, aco::Builder b=bld);
aco::Temp ext_ubyte(aco::Temp src, unsigned idx, aco::Builder b=bld);
void emit_divergent_if_else(aco::Program* prog, aco::Builder& b, aco::Operand cond, std::function<void()> then,
std::function<void()> els);
aco::Temp fneg(aco::Temp src, aco::Builder b = bld);
aco::Temp fabs(aco::Temp src, aco::Builder b = bld);
aco::Temp f2f32(aco::Temp src, aco::Builder b = bld);
aco::Temp f2f16(aco::Temp src, aco::Builder b = bld);
aco::Temp u2u16(aco::Temp src, aco::Builder b = bld);
aco::Temp fadd(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
aco::Temp fmul(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
aco::Temp fma(aco::Temp src0, aco::Temp src1, aco::Temp src2, aco::Builder b = bld);
aco::Temp fsat(aco::Temp src, aco::Builder b = bld);
aco::Temp fmin(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
aco::Temp fmax(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
aco::Temp ext_ushort(aco::Temp src, unsigned idx, aco::Builder b = bld);
aco::Temp ext_ubyte(aco::Temp src, unsigned idx, aco::Builder b = bld);
void emit_divergent_if_else(aco::Program* prog, aco::Builder& b, aco::Operand cond,
std::function<void()> then, std::function<void()> els);
/* vulkan helpers */
VkDevice get_vk_device(enum amd_gfx_level gfx_level);
VkDevice get_vk_device(enum radeon_family family);
void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
const char *name, bool remove_encoding=false);
const char* name, bool remove_encoding = false);
VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *info);
VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* info);
class PipelineBuilder {
public:
@ -152,19 +153,21 @@ public:
~PipelineBuilder();
PipelineBuilder(const PipelineBuilder&) = delete;
PipelineBuilder& operator = (const PipelineBuilder&) = delete;
PipelineBuilder& operator=(const PipelineBuilder&) = delete;
void add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
uint32_t binding, VkDescriptorType type, uint32_t count=1);
void add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
VkDescriptorType type, uint32_t count = 1);
void add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate=VK_VERTEX_INPUT_RATE_VERTEX);
void add_vertex_binding(uint32_t binding, uint32_t stride,
VkVertexInputRate rate = VK_VERTEX_INPUT_RATE_VERTEX);
void add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset);
void add_resource_decls(QoShaderModuleCreateInfo *module);
void add_io_decls(QoShaderModuleCreateInfo *module);
void add_resource_decls(QoShaderModuleCreateInfo* module);
void add_io_decls(QoShaderModuleCreateInfo* module);
void add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name="main");
void add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name="main");
void add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name = "main");
void add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
const char* name = "main");
void add_vsfs(VkShaderModule vs, VkShaderModule fs);
void add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs);
void add_cs(VkShaderModule cs);
@ -174,7 +177,8 @@ public:
void create_pipeline();
void print_ir(VkShaderStageFlagBits stages, const char *name, bool remove_encoding=false);
void print_ir(VkShaderStageFlagBits stages, const char* name, bool remove_encoding = false);
private:
void create_compute_pipeline();
void create_graphics_pipeline();

View File

@ -21,20 +21,22 @@
* IN THE SOFTWARE.
*
*/
#include "aco_ir.h"
#include <llvm-c/Target.h>
#include "framework.h"
#include <getopt.h>
#include <map>
#include <set>
#include <string>
#include <vector>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <string>
#include <unistd.h>
#include <stdarg.h>
#include <llvm-c/Target.h>
#include "aco_ir.h"
#include "framework.h"
#include <vector>
static const char *help_message =
static const char* help_message =
"Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n"
"\n"
"Run ACO unit test(s). If TEST is not provided, all tests are run.\n"
@ -50,26 +52,27 @@ static const char *help_message =
" --no-check Print test output instead of checking it.\n";
std::map<std::string, TestDef> tests;
FILE *output = NULL;
FILE* output = NULL;
static TestDef current_test;
static unsigned tests_written = 0;
static FILE *checker_stdin = NULL;
static char *checker_stdin_data = NULL;
static FILE* checker_stdin = NULL;
static char* checker_stdin_data = NULL;
static size_t checker_stdin_size = 0;
static char *output_data = NULL;
static char* output_data = NULL;
static size_t output_size = 0;
static size_t output_offset = 0;
static char current_variant[64] = {0};
static std::set<std::string> *variant_filter = NULL;
static std::set<std::string>* variant_filter = NULL;
bool test_failed = false;
bool test_skipped = false;
static char fail_message[256] = {0};
void write_test()
void
write_test()
{
if (!checker_stdin) {
/* not entirely correct, but shouldn't matter */
@ -81,18 +84,18 @@ void write_test()
if (output_offset == output_size && !test_skipped && !test_failed)
return;
char *data = output_data + output_offset;
char* data = output_data + output_offset;
uint32_t size = output_size - output_offset;
fwrite("test", 1, 4, checker_stdin);
fwrite(current_test.name, 1, strlen(current_test.name)+1, checker_stdin);
fwrite(current_variant, 1, strlen(current_variant)+1, checker_stdin);
fwrite(current_test.source_file, 1, strlen(current_test.source_file)+1, checker_stdin);
fwrite(current_test.name, 1, strlen(current_test.name) + 1, checker_stdin);
fwrite(current_variant, 1, strlen(current_variant) + 1, checker_stdin);
fwrite(current_test.source_file, 1, strlen(current_test.source_file) + 1, checker_stdin);
if (test_failed || test_skipped) {
const char *res = test_failed ? "failed" : "skipped";
const char* res = test_failed ? "failed" : "skipped";
fwrite("\x01", 1, 1, checker_stdin);
fwrite(res, 1, strlen(res)+1, checker_stdin);
fwrite(fail_message, 1, strlen(fail_message)+1, checker_stdin);
fwrite(res, 1, strlen(res) + 1, checker_stdin);
fwrite(fail_message, 1, strlen(fail_message) + 1, checker_stdin);
} else {
fwrite("\x00", 1, 1, checker_stdin);
}
@ -103,7 +106,8 @@ void write_test()
output_offset += size;
}
bool set_variant(const char *name)
bool
set_variant(const char* name)
{
if (variant_filter && !variant_filter->count(name))
return false;
@ -118,7 +122,8 @@ bool set_variant(const char *name)
return true;
}
void fail_test(const char *fmt, ...)
void
fail_test(const char* fmt, ...)
{
va_list args;
va_start(args, fmt);
@ -129,7 +134,8 @@ void fail_test(const char *fmt, ...)
va_end(args);
}
void skip_test(const char *fmt, ...)
void
skip_test(const char* fmt, ...)
{
va_list args;
va_start(args, fmt);
@ -140,7 +146,8 @@ void skip_test(const char *fmt, ...)
va_end(args);
}
void run_test(TestDef def)
void
run_test(TestDef def)
{
current_test = def;
output_data = NULL;
@ -163,7 +170,8 @@ void run_test(TestDef def)
free(output_data);
}
int check_output(char **argv)
int
check_output(char** argv)
{
fflush(stdout);
fflush(stderr);
@ -183,7 +191,8 @@ int check_output(char **argv)
close(stdin_pipe[0]);
close(stdin_pipe[1]);
execlp(ACO_TEST_PYTHON_BIN, ACO_TEST_PYTHON_BIN, ACO_TEST_SOURCE_DIR "/check_output.py", NULL);
execlp(ACO_TEST_PYTHON_BIN, ACO_TEST_PYTHON_BIN, ACO_TEST_SOURCE_DIR "/check_output.py",
NULL);
fprintf(stderr, "%s: execlp() failed: %s\n", argv[0], strerror(errno));
return 99;
} else {
@ -197,7 +206,8 @@ int check_output(char **argv)
}
}
bool match_test(std::string name, std::string pattern)
bool
match_test(std::string name, std::string pattern)
{
if (name.length() < pattern.length())
return false;
@ -206,33 +216,25 @@ bool match_test(std::string name, std::string pattern)
return name == pattern;
}
int main(int argc, char **argv)
int
main(int argc, char** argv)
{
int print_help = 0;
int do_list = 0;
int do_check = 1;
const struct option opts[] = {
{ "help", no_argument, &print_help, 1 },
{ "list", no_argument, &do_list, 1 },
{ "no-check", no_argument, &do_check, 0 },
{ NULL, 0, NULL, 0 }
};
const struct option opts[] = {{"help", no_argument, &print_help, 1},
{"list", no_argument, &do_list, 1},
{"no-check", no_argument, &do_check, 0},
{NULL, 0, NULL, 0}};
int c;
while ((c = getopt_long(argc, argv, "hl", opts, NULL)) != -1) {
switch (c) {
case 'h':
print_help = 1;
break;
case 'l':
do_list = 1;
break;
case 0:
break;
case 'h': print_help = 1; break;
case 'l': do_list = 1; break;
case 0: break;
case '?':
default:
fprintf(stderr, "%s: Invalid argument\n", argv[0]);
return 99;
default: fprintf(stderr, "%s: Invalid argument\n", argv[0]); return 99;
}
}
@ -262,10 +264,10 @@ int main(int argc, char **argv)
if (do_check)
checker_stdin = open_memstream(&checker_stdin_data, &checker_stdin_size);
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUDisassembler();
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUDisassembler();
aco::init();

View File

@ -21,11 +21,11 @@
* IN THE SOFTWARE.
*
*/
#include <llvm/Config/llvm-config.h>
#include "helpers.h"
#include "sid.h"
#include <llvm/Config/llvm-config.h>
using namespace aco;
BEGIN_TEST(assembler.s_memtime)
@ -178,7 +178,7 @@ BEGIN_TEST(assembler.long_jump.conditional_backwards)
finish_assembler_test();
END_TEST
BEGIN_TEST(assembler.long_jump.3f)
BEGIN_TEST(assembler.long_jump .3f)
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
return;
@ -354,25 +354,31 @@ BEGIN_TEST(assembler.vopc_sdwa)
//~gfx9>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 86860080
//~gfx10>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 86860080
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(), Operand::zero());
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(),
Operand::zero());
//~gfx9! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686ac80
//~gfx10! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686ac80
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(), Operand::zero());
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(),
Operand::zero());
//~gfx9! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686fe80
//~gfx10! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686fe80
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero());
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(),
Operand::zero());
if (i == GFX10) {
//~gfx10! v_cmpx_lt_u32_sdwa 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7da300f9 86860080
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero());
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(),
Operand::zero());
} else {
//~gfx9! v_cmpx_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 86860080
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2), Operand::zero(), Operand::zero());
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2),
Operand::zero(), Operand::zero());
//~gfx9! v_cmpx_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 8686ac80
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2), Definition(exec, s2), Operand::zero(), Operand::zero());
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2),
Definition(exec, s2), Operand::zero(), Operand::zero());
}
finish_assembler_test();
@ -452,48 +458,70 @@ BEGIN_TEST(assembler.gfx11.mubuf)
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, true);
//! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; e0500000 1e882a0a
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen = true;
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen =
true;
//! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; e0500000 1ec82a14
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen = true;
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen =
true;
//! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; e0500054 1e082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false);
/* Various flags */
//! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().glc = true;
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.glc = true;
//! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().dlc = true;
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.dlc = true;
//! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().slc = true;
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.slc = true;
//; if llvm_ver >= 16:
//; insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe ; e0500000 80282a80')
//; else:
//; insert_pattern('buffer_load_b32 v42, off, s[32:35], 0 tfe ; e0500000 80282a80')
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().tfe = true;
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.tfe = true;
/* LDS */
//! buffer_load_lds_b32 off, s[32:35], 0 ; e0c40000 80080080
bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
//! buffer_load_lds_i8 off, s[32:35], 0 ; e0b80000 80080080
bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
//! buffer_load_lds_i16 off, s[32:35], 0 ; e0c00000 80080080
bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
//! buffer_load_lds_u8 off, s[32:35], 0 ; e0b40000 80080080
bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
//! buffer_load_lds_u16 off, s[32:35], 0 ; e0bc0000 80080080
bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
//! buffer_load_lds_format_x off, s[32:35], 0 ; e0c80000 80080080
bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
/* Stores */
//! buffer_store_b32 v10, off, s[32:35], s30 ; e0680000 1e080a80
@ -532,42 +560,62 @@ BEGIN_TEST(assembler.gfx11.mtbuf)
/* Addressing */
//>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0, false);
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0,
false);
//! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt, nfmt, 0, false);
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt,
nfmt, 0, false);
//! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, true);
//! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)->mtbuf().idxen = true;
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)
->mtbuf()
.idxen = true;
//! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)->mtbuf().idxen = true;
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)
->mtbuf()
.idxen = true;
//! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84, false);
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84,
false);
/* Various flags */
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().glc = true;
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.glc = true;
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().dlc = true;
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.dlc = true;
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().slc = true;
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.slc = true;
//; if llvm_ver >= 16:
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80')
//; else:
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] tfe ; e9900000 80282a80')
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().tfe = true;
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.tfe = true;
/* Stores */
//! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80
bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0, false);
bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0,
false);
//! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a
bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0, true);
@ -604,7 +652,8 @@ BEGIN_TEST(assembler.gfx11.mimg)
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1);
//! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim = ac_image_2d;
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim =
ac_image_2d;
//! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a
bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask = 0x1;
@ -636,14 +685,20 @@ BEGIN_TEST(assembler.gfx11.mimg)
/* NSA */
//! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1, Operand(bld.tmp(v1), PhysReg(256 + 40)))->mimg().dim = ac_image_2d;
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1,
Operand(bld.tmp(v1), PhysReg(256 + 40)))
->mimg()
.dim = ac_image_2d;
/* Stores */
//! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a
bld.mimg(aco_opcode::image_store, op_s8, Operand(s4), op_v4, op_v1);
//! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4), op_v1, op_v2)->mimg().dim = ac_image_2d;
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
op_v1, op_v2)
->mimg()
.dim = ac_image_2d;
finish_assembler_test();
END_TEST
@ -761,13 +816,19 @@ BEGIN_TEST(assembler.gfx11.vinterp)
bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0);
//! v_interp_p10_f32 v42, -v10, v20, v30 ; cd00002a 247a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[0] = true;
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
->vinterp_inreg()
.neg[0] = true;
//! v_interp_p10_f32 v42, v10, -v20, v30 ; cd00002a 447a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[1] = true;
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
->vinterp_inreg()
.neg[1] = true;
//! v_interp_p10_f32 v42, v10, v20, -v30 ; cd00002a 847a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[2] = true;
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
->vinterp_inreg()
.neg[2] = true;
//! v_interp_p10_f16_f32 v42, v10, v20, v30 op_sel:[1,0,0,0] ; cd02082a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 0, 0x1);
@ -782,7 +843,9 @@ BEGIN_TEST(assembler.gfx11.vinterp)
bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x8);
//! v_interp_p10_f32 v42, v10, v20, v30 clamp ; cd00802a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().clamp = true;
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
->vinterp_inreg()
.clamp = true;
finish_assembler_test();
END_TEST
@ -899,16 +962,22 @@ BEGIN_TEST(assembler.gfx11.vop12c_v128)
bld.vop1_dpp(aco_opcode::v_rcp_f16, dst_v128, op_v1, dpp_row_rr(1))->dpp16().abs[0] = true;
//! v_mul_f16_e64_dpp v128, -v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350080 200204fa ff1d2101
bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().neg[0] = true;
bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().neg[0] =
true;
//! v_mul_f16_e64_dpp v128, |v1|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350180 000204fa ff2d2101
bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().abs[0] = true;
bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().abs[0] =
true;
//! v_cmp_eq_f16_e64_dpp vcc, -v129, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402006a 200204fa ff1d2181
bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))->dpp16().neg[0] = true;
bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))
->dpp16()
.neg[0] = true;
//! v_cmp_eq_f16_e64_dpp vcc, |v129|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402016a 000204fa ff2d2181
bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))->dpp16().abs[0] = true;
bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))
->dpp16()
.abs[0] = true;
finish_assembler_test();
END_TEST

View File

@ -633,9 +633,10 @@ BEGIN_TEST(d3d11_derivs.nsa_max)
//~gfx11! v4: %_:v[0-3] = image_sample_c_b_o s8: undef, s4: undef, v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4-5] 2darray da
Instruction *instr = bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4),
Operand(s8), Operand(s4), Operand(v1), Operand(reg_v0, v6.as_linear()),
Operand(reg_v6, v1), Operand(reg_v7, v1), Operand(reg_v8, v1));
Instruction* instr =
bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4), Operand(s8), Operand(s4),
Operand(v1), Operand(reg_v0, v6.as_linear()), Operand(reg_v6, v1),
Operand(reg_v7, v1), Operand(reg_v8, v1));
instr->mimg().dim = ac_image_2darray;
instr->mimg().da = true;
instr->mimg().strict_wqm = true;

View File

@ -26,7 +26,8 @@
using namespace aco;
static void create_mubuf(Temp desc=Temp(0, s8))
static void
create_mubuf(Temp desc = Temp(0, s8))
{
Operand desc_op(desc);
desc_op.setFixed(PhysReg(0));
@ -34,13 +35,15 @@ static void create_mubuf(Temp desc=Temp(0, s8))
Operand(PhysReg(256), v1), Operand::zero(), 0, false);
}
static void create_mubuf_store()
static void
create_mubuf_store()
{
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(PhysReg(256), v1),
Operand(PhysReg(256), v1), Operand::zero(), 0, false);
}
static void create_mtbuf(Temp desc=Temp(0, s8))
static void
create_mtbuf(Temp desc = Temp(0, s8))
{
Operand desc_op(desc);
desc_op.setFixed(PhysReg(0));
@ -49,22 +52,25 @@ static void create_mtbuf(Temp desc=Temp(0, s8))
V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false);
}
static void create_flat()
static void
create_flat()
{
bld.flat(aco_opcode::flat_load_dword, Definition(PhysReg(256), v1),
Operand(PhysReg(256), v2), Operand(s2));
bld.flat(aco_opcode::flat_load_dword, Definition(PhysReg(256), v1), Operand(PhysReg(256), v2),
Operand(s2));
}
static void create_global()
static void
create_global()
{
bld.global(aco_opcode::global_load_dword, Definition(PhysReg(256), v1),
Operand(PhysReg(256), v2), Operand(s2));
}
static void create_mimg(bool nsa, Temp desc=Temp(0, s8))
static void
create_mimg(bool nsa, Temp desc = Temp(0, s8))
{
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(
aco_opcode::image_sample, Format::MIMG, 5, 1)};
aco_ptr<MIMG_instruction> mimg{
create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 5, 1)};
mimg->definitions[0] = Definition(PhysReg(256), v1);
mimg->operands[0] = Operand(desc);
mimg->operands[0].setFixed(PhysReg(0));
@ -78,13 +84,15 @@ static void create_mimg(bool nsa, Temp desc=Temp(0, s8))
bld.insert(std::move(mimg));
}
static void create_smem()
static void
create_smem()
{
bld.smem(aco_opcode::s_load_dword, Definition(PhysReg(0), s1), Operand(PhysReg(0), s2),
Operand::zero());
}
static void create_smem_buffer(Temp desc=Temp(0, s4))
static void
create_smem_buffer(Temp desc = Temp(0, s4))
{
Operand desc_op(desc);
desc_op.setFixed(PhysReg(0));

View File

@ -25,22 +25,25 @@
using namespace aco;
void create_mubuf(unsigned offset, PhysReg dst=PhysReg(256), PhysReg vaddr=PhysReg(256))
void
create_mubuf(unsigned offset, PhysReg dst = PhysReg(256), PhysReg vaddr = PhysReg(256))
{
bld.mubuf(aco_opcode::buffer_load_dword, Definition(dst, v1), Operand(PhysReg(0), s4),
Operand(vaddr, v1), Operand::zero(), offset, true);
}
void create_mubuf_store(PhysReg src=PhysReg(256))
void
create_mubuf_store(PhysReg src = PhysReg(256))
{
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4),
Operand(src, v1), Operand::zero(), Operand(src, v1), 0, true);
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(src, v1),
Operand::zero(), Operand(src, v1), 0, true);
}
void create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
void
create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
{
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(
aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
aco_ptr<MIMG_instruction> mimg{
create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
mimg->definitions[0] = Definition(PhysReg(256), v1);
mimg->operands[0] = Operand(PhysReg(0), s8);
mimg->operands[1] = Operand(PhysReg(0), s4);
@ -216,7 +219,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! s_waitcnt_depctr vm_vsrc(0)
//! s1: %0:m0 = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
//! p_unit_test 5
@ -224,7 +228,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! s_waitcnt_depctr vm_vsrc(0)
//! s2: %0:exec = s_mov_b64 -1
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(-1));
/* no hazard: LDS */
@ -232,7 +237,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! v1: %0:v[0] = ds_read_b32 %0:v[0], %0:m0
//! s1: %0:s[0] = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero());
/* no hazard: LDS with VALU in-between */
@ -241,7 +247,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! v_nop
//! s1: %0:m0 = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.vop1(aco_opcode::v_nop);
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
@ -269,7 +276,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! s_waitcnt lgkmcnt(0)
//! s1: %0:m0 = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.sopp(aco_opcode::s_waitcnt, -1, 0xc07f);
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
@ -300,7 +308,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! s_waitcnt_depctr vm_vsrc(0)
//! s1: %0:m0 = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.sopp(aco_opcode::s_waitcnt, -1, 0x3f70);
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
@ -932,8 +941,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
Operand::zero(), Operand(PhysReg(0), s2));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@ -944,8 +953,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
//! s1: %0:s[1] = s_mov_b32 0
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
Operand::zero(), Operand(PhysReg(0), s2));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(257), v1), Operand(PhysReg(1), s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@ -957,8 +966,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
Operand::zero(), Operand(PhysReg(0), s2));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@ -969,8 +978,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
Operand::zero(), Operand(PhysReg(0), s2));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
bld.sopp(aco_opcode::s_waitcnt_depctr, -1, 0xfffe);
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@ -982,8 +991,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
Operand(PhysReg(2), s1), Operand::zero(), Operand(PhysReg(0), s2));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand(PhysReg(2), s1),
Operand::zero(), Operand(PhysReg(0), s2));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));

View File

@ -36,15 +36,14 @@ BEGIN_TEST(insert_waitcnt.ds_ordered_count)
Operand chan_counter(PhysReg(260), v1);
Operand m(m0, s1);
Instruction *ds_instr;
Instruction* ds_instr;
//>> ds_ordered_count %0:v[0], %0:v[3], %0:m0 offset0:3072 gds storage:gds semantics:volatile
//! s_waitcnt lgkmcnt(0)
ds_instr = bld.ds(aco_opcode::ds_ordered_count, def0, gds_base, m, 3072u, 0u, true);
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
//! ds_add_rtn_u32 %0:v[1], %0:v[3], %0:v[4], %0:m0 gds storage:gds semantics:volatile,atomic,rmw
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1,
gds_base, chan_counter, m, 0u, 0u, true);
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1, gds_base, chan_counter, m, 0u, 0u, true);
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
//! s_waitcnt lgkmcnt(0)

View File

@ -21,19 +21,18 @@
* IN THE SOFTWARE.
*
*/
#include <llvm/Config/llvm-config.h>
#include "helpers.h"
#include "test_isel-spirv.h"
#include <llvm/Config/llvm-config.h>
using namespace aco;
BEGIN_TEST(isel.interp.simple)
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec4 in_color;
layout(location = 0) out vec4 out_color;
void main() {
out_color = in_color;
void main() { out_color = in_color;
}
);
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,

View File

@ -61,7 +61,8 @@ BEGIN_TEST(optimize.neg)
//! v1: %res5 = v_mul_f32 -%a, %b row_shl:1 bound_ctrl:1
//! p_unit_test 5, %res5
writeout(5, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), neg_a, inputs[1], dpp_row_sl(1)));
writeout(5,
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), neg_a, inputs[1], dpp_row_sl(1)));
//! v1: %res6 = v_subrev_f32 %a, %b
//! p_unit_test 6, %res6
@ -264,7 +265,8 @@ BEGIN_TEST(optimize.output_modifiers)
finish_opt_test();
END_TEST
Temp create_subbrev_co(Operand op0, Operand op1, Operand op2)
Temp
create_subbrev_co(Operand op0, Operand op1, Operand op2)
{
return bld.vop2_e64(aco_opcode::v_subbrev_co_u32, bld.def(v1), bld.def(bld.lm), op0, op1, op2);
}
@ -438,7 +440,7 @@ BEGIN_TEST(optimize.bcnt)
END_TEST
struct clamp_config {
const char *name;
const char* name;
aco_opcode min, max, med3;
Operand lb, ub;
};
@ -863,7 +865,7 @@ enum denorm_op {
denorm_fnegabs = 3,
};
static const char *denorm_op_names[] = {
static const char* denorm_op_names[] = {
"mul1",
"fneg",
"fabs",
@ -877,31 +879,27 @@ struct denorm_config {
aco_opcode dest;
};
static const char *srcdest_op_name(aco_opcode op)
static const char*
srcdest_op_name(aco_opcode op)
{
switch (op) {
case aco_opcode::v_cndmask_b32:
return "cndmask";
case aco_opcode::v_min_f32:
return "min";
case aco_opcode::v_rcp_f32:
return "rcp";
default:
return "none";
case aco_opcode::v_cndmask_b32: return "cndmask";
case aco_opcode::v_min_f32: return "min";
case aco_opcode::v_rcp_f32: return "rcp";
default: return "none";
}
}
static Temp emit_denorm_srcdest(aco_opcode op, Temp val)
static Temp
emit_denorm_srcdest(aco_opcode op, Temp val)
{
switch (op) {
case aco_opcode::v_cndmask_b32:
return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1]);
case aco_opcode::v_min_f32:
return bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), val);
case aco_opcode::v_rcp_f32:
return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val);
default:
return val;
case aco_opcode::v_rcp_f32: return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val);
default: return val;
}
}
@ -917,7 +915,8 @@ BEGIN_TEST(optimize.denorm_propagation)
configs.push_back({flush, op, aco_opcode::num_opcodes, dest});
}
for (aco_opcode src : {aco_opcode::v_cndmask_b32, aco_opcode::v_min_f32, aco_opcode::v_rcp_f32}) {
for (aco_opcode src :
{aco_opcode::v_cndmask_b32, aco_opcode::v_min_f32, aco_opcode::v_rcp_f32}) {
for (denorm_op op : {denorm_mul1, denorm_fneg, denorm_fabs, denorm_fnegabs})
configs.push_back({flush, op, src, aco_opcode::num_opcodes});
}
@ -925,18 +924,18 @@ BEGIN_TEST(optimize.denorm_propagation)
for (denorm_config cfg : configs) {
char subvariant[128];
sprintf(subvariant, "_%s_%s_%s_%s",
cfg.flush ? "flush" : "keep", srcdest_op_name(cfg.src),
sprintf(subvariant, "_%s_%s_%s_%s", cfg.flush ? "flush" : "keep", srcdest_op_name(cfg.src),
denorm_op_names[(int)cfg.op], srcdest_op_name(cfg.dest));
if (!setup_cs("v1 s2", (amd_gfx_level)i, CHIP_UNKNOWN, subvariant))
continue;
bool can_propagate = cfg.src == aco_opcode::v_rcp_f32 || (i >= GFX9 && cfg.src == aco_opcode::v_min_f32) ||
cfg.dest == aco_opcode::v_rcp_f32 || (i >= GFX9 && cfg.dest == aco_opcode::v_min_f32) ||
!cfg.flush;
bool can_propagate = cfg.src == aco_opcode::v_rcp_f32 ||
(i >= GFX9 && cfg.src == aco_opcode::v_min_f32) ||
cfg.dest == aco_opcode::v_rcp_f32 ||
(i >= GFX9 && cfg.dest == aco_opcode::v_min_f32) || !cfg.flush;
fprintf(output, "src, dest, op: %s %s %s\n",
srcdest_op_name(cfg.src), srcdest_op_name(cfg.dest), denorm_op_names[(int)cfg.op]);
fprintf(output, "src, dest, op: %s %s %s\n", srcdest_op_name(cfg.src),
srcdest_op_name(cfg.dest), denorm_op_names[(int)cfg.op]);
fprintf(output, "can_propagate: %u\n", can_propagate);
//! src, dest, op: $src $dest $op
//! can_propagate: #can_propagate
@ -976,15 +975,9 @@ BEGIN_TEST(optimize.denorm_propagation)
case denorm_mul1:
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), val);
break;
case denorm_fneg:
val = fneg(val);
break;
case denorm_fabs:
val = fabs(val);
break;
case denorm_fnegabs:
val = fneg(fabs(val));
break;
case denorm_fneg: val = fneg(val); break;
case denorm_fabs: val = fabs(val); break;
case denorm_fnegabs: val = fneg(fabs(val)); break;
}
val = emit_denorm_srcdest(cfg.dest, val);
writeout(
@ -1123,13 +1116,15 @@ BEGIN_TEST(optimize.dpp_prop)
//! v1: %res2 = v_mul_f32 0x12345678, %a
//! p_unit_test 2, %res2
Temp literal1 = bld.copy(bld.def(v1), Operand::c32(0x12345678u));
writeout(2, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1)));
writeout(2,
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1)));
//! v1: %literal2 = p_parallelcopy 0x12345679
//! v1: %res3 = v_mul_f32 %a, %literal row_shl:1 bound_ctrl:1
//! p_unit_test 3, %res3
Temp literal2 = bld.copy(bld.def(v1), Operand::c32(0x12345679u));
writeout(3, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], literal2, dpp_row_sl(1)));
writeout(3,
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], literal2, dpp_row_sl(1)));
//! v1: %b_v = p_parallelcopy %b
//! v1: %res4 = v_mul_f32 %b, %a
@ -1171,7 +1166,9 @@ BEGIN_TEST(optimize.casts)
//! v1: %res2_tmp = v_mul_f32 -1.0, %a16
//! v2b: %res2 = v_mul_f16 %res2_tmp, %a16
//! p_unit_test 2, %res2
writeout(2, fmul(u2u16(bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), bld.as_uniform(a16))), a16));
writeout(2, fmul(u2u16(bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1),
Operand::c32(0xbf800000u), bld.as_uniform(a16))),
a16));
//! v1: %res3_tmp = v_mul_f32 %a, %a
//! v2b: %res3 = v_add_f16 %res3_tmp, 0 clamp
@ -1191,7 +1188,8 @@ BEGIN_TEST(optimize.casts)
//! v2b: %res6_tmp = v_mul_f16 %a16, %a16
//! v1: %res6 = v_mul_f32 2.0, %res6_tmp
//! p_unit_test 6, %res6
writeout(6, fmul(bld.as_uniform(fmul(a16, a16)), bld.copy(bld.def(v1), Operand::c32(0x40000000))));
writeout(6,
fmul(bld.as_uniform(fmul(a16, a16)), bld.copy(bld.def(v1), Operand::c32(0x40000000))));
//! v1: %res7_tmp = v_mul_f32 %a, %a
//! v2b: %res7 = v_add_f16 %res7_tmp, %a16
@ -1211,7 +1209,8 @@ BEGIN_TEST(optimize.casts)
//! v2b: %res10_tmp = v_mul_f16 %a16, %a16
//! v1: %res10 = v_mul_f32 -1.0, %res10_tmp
//! p_unit_test 10, %res10
writeout(10, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), bld.as_uniform(fmul(a16, a16))));
writeout(10, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u),
bld.as_uniform(fmul(a16, a16))));
finish_opt_test();
END_TEST
@ -1549,7 +1548,8 @@ BEGIN_TEST(optimize.mad_mix.fma.basic)
//! v1: %res2_mul = v_fma_mix_f32 lo(%a16), %b, -0
//! v1: %res2 = v_add_f32 %res2_mul, %c *2
//! p_unit_test 2, %res2
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000), fadd(fmul(f2f32(a16), b), c)));
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000),
fadd(fmul(f2f32(a16), b), c)));
/* neg/abs modifiers */
//! v1: %res3 = v_fma_mix_f32 -lo(%a16), %b, |lo(%c16)|
@ -1730,7 +1730,8 @@ BEGIN_TEST(optimize.mad_mix.cast)
}
END_TEST
static void vop3p_constant(unsigned *idx, aco_opcode op, const char *swizzle, uint32_t val)
static void
vop3p_constant(unsigned* idx, aco_opcode op, const char* swizzle, uint32_t val)
{
uint32_t halves[2] = {val & 0xffff, val >> 16};
uint32_t expected = halves[swizzle[0] - 'x'] | (halves[swizzle[1] - 'x'] << 16);
@ -1744,7 +1745,7 @@ static void vop3p_constant(unsigned *idx, aco_opcode op, const char *swizzle, ui
BEGIN_TEST(optimize.vop3p_constants)
for (aco_opcode op : {aco_opcode::v_pk_add_f16, aco_opcode::v_pk_add_u16}) {
for (const char *swizzle : {"xx", "yy", "xy", "yx"}) {
for (const char* swizzle : {"xx", "yy", "xy", "yx"}) {
char variant[16];
strcpy(variant, op == aco_opcode::v_pk_add_f16 ? "_f16" : "_u16");
strcat(variant, "_");

View File

@ -27,310 +27,324 @@
using namespace aco;
BEGIN_TEST(optimizer_postRA.vcmp)
PhysReg reg_v0(256);
PhysReg reg_s0(0);
PhysReg reg_s2(2);
PhysReg reg_s4(4);
PhysReg reg_v0(256);
PhysReg reg_s0(0);
PhysReg reg_s2(2);
PhysReg reg_s4(4);
//>> v1: %a:v[0] = p_startpgm
ASSERTED bool setup_ok = setup_cs("v1", GFX8);
assert(setup_ok);
//>> v1: %a:v[0] = p_startpgm
ASSERTED bool setup_ok = setup_cs("v1", GFX8);
assert(setup_ok);
auto &startpgm = bld.instructions->at(0);
assert(startpgm->opcode == aco_opcode::p_startpgm);
startpgm->definitions[0].setFixed(reg_v0);
auto& startpgm = bld.instructions->at(0);
assert(startpgm->opcode == aco_opcode::p_startpgm);
startpgm->definitions[0].setFixed(reg_v0);
Temp v_in = inputs[0];
Temp v_in = inputs[0];
{
/* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
{
/* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %e:s[2-3] = p_cbranch_z %b:vcc
//! p_unit_test 0, %e:s[2-3]
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(0, Operand(br, reg_s2));
}
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %e:s[2-3] = p_cbranch_z %b:vcc
//! p_unit_test 0, %e:s[2-3]
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(0, Operand(br, reg_s2));
}
//; del b, e
//; del b, e
{
/* When VCC is overwritten inbetween, don't optimize. */
{
/* When VCC is overwritten inbetween, don't optimize. */
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %f:vcc = s_mov_b64 0
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 1, %e:s[2-3], %f:vcc
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
}
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %f:vcc = s_mov_b64 0
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 1, %e:s[2-3], %f:vcc
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
}
//; del b, c, d, e, f
//; del b, c, d, e, f
{
/* When part of VCC is overwritten inbetween, don't optimize. */
{
/* When part of VCC is overwritten inbetween, don't optimize. */
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s1: %f:s[107] = s_mov_b32 0
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 1, %e:s[2-3], %f:vcc
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
}
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s1: %f:s[107] = s_mov_b32 0
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 1, %e:s[2-3], %f:vcc
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero());
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
}
//; del b, c, d, e, f
//; del b, c, d, e, f
{
/* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
{
/* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
//! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 2, %e:s[2-3]
auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm));
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(2, Operand(br, reg_s2));
}
//! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 2, %e:s[2-3]
auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
Operand(vcmp, reg_s4), Operand(exec, bld.lm));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(2, Operand(br, reg_s2));
}
//; del b, c, d, e
//; del b, c, d, e
{
/* When the VCC isn't written by VOPC, don't optimize */
{
/* When the VCC isn't written by VOPC, don't optimize */
//! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 2, %e:s[2-3]
auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc),
Operand::c32(1u), Operand(reg_s4, bld.lm));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm));
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(2, Operand(br, reg_s2));
}
//! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 2, %e:s[2-3]
auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), Operand::c32(1u),
Operand(reg_s4, bld.lm));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
Operand(salu, vcc), Operand(exec, bld.lm));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(2, Operand(br, reg_s2));
}
//; del b, c, d, e, f, x
//; del b, c, d, e, f, x
{
/* When EXEC is overwritten inbetween, don't optimize. */
{
/* When EXEC is overwritten inbetween, don't optimize. */
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %f:exec = s_mov_b64 42
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 4, %e:s[2-3], %f:exec
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
}
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %f:exec = s_mov_b64 42
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 4, %e:s[2-3], %f:exec
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
}
//; del b, c, d, e, f, x
//; del b, c, d, e, f, x
finish_optimizer_postRA_test();
finish_optimizer_postRA_test();
END_TEST
BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
//>> s1: %a, s2: %y, s1: %z = p_startpgm
ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);
assert(setup_ok);
//>> s1: %a, s2: %y, s1: %z = p_startpgm
ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);
assert(setup_ok);
PhysReg reg_s0{0};
PhysReg reg_s2{2};
PhysReg reg_s3{3};
PhysReg reg_s4{4};
PhysReg reg_s6{6};
PhysReg reg_s8{8};
PhysReg reg_s0{0};
PhysReg reg_s2{2};
PhysReg reg_s3{3};
PhysReg reg_s4{4};
PhysReg reg_s6{6};
PhysReg reg_s8{8};
Temp in_0 = inputs[0];
Temp in_1 = inputs[1];
Temp in_2 = inputs[2];
Operand op_in_0(in_0);
op_in_0.setFixed(reg_s0);
Operand op_in_1(in_1);
op_in_1.setFixed(reg_s4);
Operand op_in_2(in_2);
op_in_2.setFixed(reg_s6);
Temp in_0 = inputs[0];
Temp in_1 = inputs[1];
Temp in_2 = inputs[2];
Operand op_in_0(in_0);
op_in_0.setFixed(reg_s0);
Operand op_in_1(in_1);
op_in_1.setFixed(reg_s4);
Operand op_in_2(in_2);
op_in_2.setFixed(reg_s6);
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_nz %e:scc
//! p_unit_test 0, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(0, Operand(br, vcc));
}
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_nz %e:scc
//! p_unit_test 0, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(0, Operand(br, vcc));
}
//; del d, e, f
//; del d, e, f
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 1, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(1, Operand(br, vcc));
}
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 1, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(1, Operand(br, vcc));
}
//; del d, e, f
//; del d, e, f
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 2, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(2, Operand(br, vcc));
}
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 2, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(2, Operand(br, vcc));
}
//; del d, e, f
//; del d, e, f
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_nz %e:scc
//! p_unit_test 3, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(3, Operand(br, vcc));
}
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_nz %e:scc
//! p_unit_test 3, %f:vcc
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(3, Operand(br, vcc));
}
//; del d, e, f
//; del d, e, f
{
//! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 4, %f:vcc
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
Operand::c32(0x12345u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero(8));
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(4, Operand(br, vcc));
}
{
//! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 4, %f:vcc
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
Operand::c32(0x12345u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero(8));
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(4, Operand(br, vcc));
}
//; del d, e, f
//; del d, e, f
{
/* SCC is overwritten in between, don't optimize */
{
/* SCC is overwritten in between, don't optimize */
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
//! s2: %f:vcc = p_cbranch_z %g:scc
//! p_unit_test 5, %f:vcc, %h:s[3]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
}
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
//! s2: %f:vcc = p_cbranch_z %g:scc
//! p_unit_test 5, %f:vcc, %h:s[3]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
}
//; del d, e, f, g, h, x
//; del d, e, f, g, h, x
{
/* SCC is overwritten in between, optimize by pulling down */
{
/* SCC is overwritten in between, optimize by pulling down */
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %g:scc
//! p_unit_test 5, %f:vcc, %h:s[3]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
}
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %g:scc
//! p_unit_test 5, %f:vcc, %h:s[3]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
}
//; del d, e, f, g, h, x
//; del d, e, f, g, h, x
{
/* SCC is overwritten in between, optimize by pulling down */
{
/* SCC is overwritten in between, optimize by pulling down */
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s2: %d:s[8-9], s1: %e:scc = s_and_b64 %b:s[4-5], 0x40018
//! s2: %f:vcc = p_cbranch_z %g:scc
//! p_unit_test 5, %f:vcc, %h:s[3]
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), op_in_1,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s8),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
}
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s2: %d:s[8-9], s1: %e:scc = s_and_b64 %b:s[4-5], 0x40018
//! s2: %f:vcc = p_cbranch_z %g:scc
//! p_unit_test 5, %f:vcc, %h:s[3]
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), op_in_1,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s8),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
}
//; del d, e, f, g, h, x
//; del d, e, f, g, h, x
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
//! p_unit_test 6, %f:s[4]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
writeout(6, Operand(br, reg_s4));
}
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
//! p_unit_test 6, %f:s[4]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0),
Operand(op_in_2), bld.scc(scmp));
writeout(6, Operand(br, reg_s4));
}
//; del d, e, f
//; del d, e, f
{
/* SCC is overwritten in between, don't optimize */
{
/* SCC is overwritten in between, don't optimize */
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
//! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
//! p_unit_test 7, %f:s[4], %h:s[3]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
}
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
//! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
//! p_unit_test 7, %f:s[4], %h:s[3]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0),
Operand(op_in_2), bld.scc(scmp));
writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
}
//; del d, e, f, g, h, x
//; del d, e, f, g, h, x
finish_optimizer_postRA_test();
finish_optimizer_postRA_test();
END_TEST
BEGIN_TEST(optimizer_postRA.dpp)
@ -368,7 +382,8 @@ BEGIN_TEST(optimizer_postRA.dpp)
//! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1
//! p_unit_test 2, %res2:v[2]
Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror);
Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2),
dpp_row_half_mirror);
writeout(2, Operand(res2, reg_v2));
/* modifiers */
@ -429,14 +444,16 @@ BEGIN_TEST(optimizer_postRA.dpp)
//! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
//! p_unit_test 8, %res8:v[2]
Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
Temp res8 =
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
writeout(8, Operand(res8, reg_v2));
//! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
//! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1]
//! p_unit_test 9, %res9:v[2]
Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
Temp res9 =
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
writeout(9, Operand(res9, reg_v2));
/* control flow */
@ -485,48 +502,53 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
Operand c(inputs[2], PhysReg(258)); /* buffer store address */
Operand d(inputs[3], PhysReg(259)); /* buffer store value */
Operand e(inputs[4], PhysReg(0)); /* condition */
PhysReg reg_v12(268); /* temporary register */
PhysReg reg_v12(268); /* temporary register */
Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
/* --- logical then --- */
//! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
//! p_logical_start
emit_divergent_if_else(
program.get(), bld, e,
[&]() -> void
{
/* --- logical then --- */
//! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
//! p_logical_start
//! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
//! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
}, [&]() -> void {
/* --- logical else --- */
//! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
},
[&]() -> void
{
/* --- logical else --- */
//! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
});
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
});
/* --- merge block --- */
//! BB6
@ -535,7 +557,8 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
//! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
//! p_unit_test 10, %res10:v[12]
Temp result = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
Temp result =
bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
writeout(10, Operand(result, reg_v12));
finish_optimizer_postRA_test();
@ -560,7 +583,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
Operand d(inputs[3], PhysReg(259)); /* buffer store value */
Operand e(inputs[4], PhysReg(0)); /* condition */
Operand f(inputs[5], PhysReg(2)); /* buffer store address (scalar) */
PhysReg reg_v12(268); /* temporary register */
PhysReg reg_v12(268); /* temporary register */
//! v1: %dpp_tmp:v[12] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
@ -568,44 +591,50 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
/* --- logical then --- */
//! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
//! p_logical_start
emit_divergent_if_else(
program.get(), bld, e,
[&]() -> void
{
/* --- logical then --- */
//! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
//! p_logical_start
//! v1: %addr:v[0] = p_parallelcopy %f:s[2]
Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f);
//! v1: %addr:v[0] = p_parallelcopy %f:s[2]
Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f);
//! buffer_store_dword %addr:v[0], 0, %d:v[3], 0 offen
bld.mubuf(aco_opcode::buffer_store_dword, Operand(addr, a.physReg()), Operand::zero(), d, Operand::zero(), 0, true);
//! buffer_store_dword %addr:v[0], 0, %d:v[3], 0 offen
bld.mubuf(aco_opcode::buffer_store_dword, Operand(addr, a.physReg()), Operand::zero(), d,
Operand::zero(), 0, true);
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
}, [&]() -> void {
/* --- logical else --- */
//! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
},
[&]() -> void
{
/* --- logical else --- */
//! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
});
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
});
/* --- merge block --- */
//! BB6
@ -613,7 +642,8 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
//! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
//! v1: %result:v[12] = v_add_f32 %dpp_mov_tmp:v[12], %b:v[1]
Temp result = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
Temp result =
bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
//! p_unit_test 10, %result:v[12]
writeout(10, Operand(result, reg_v12));
@ -631,53 +661,58 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
startpgm->definitions[2].setFixed(PhysReg(259));
startpgm->definitions[3].setFixed(PhysReg(0));
Operand a(inputs[0], PhysReg(2)); /* source for s_and */
Operand a(inputs[0], PhysReg(2)); /* source for s_and */
Operand c(inputs[1], PhysReg(258)); /* buffer store address */
Operand d(inputs[2], PhysReg(259)); /* buffer store value */
Operand e(inputs[3], PhysReg(0)); /* condition */
PhysReg reg_s8(8); /* temporary register */
PhysReg reg_s8(8); /* temporary register */
auto tmp_salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), a,
Operand::c32(0x40018u));
Operand::c32(0x40018u));
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
/* --- logical then --- */
//! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
//! p_logical_start
emit_divergent_if_else(
program.get(), bld, e,
[&]() -> void
{
/* --- logical then --- */
//! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
//! p_logical_start
//! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
//! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
}, [&]() -> void {
/* --- logical else --- */
//! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
},
[&]() -> void
{
/* --- logical else --- */
//! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
});
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
});
/* --- merge block --- */
//! BB6
@ -695,7 +730,6 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
finish_optimizer_postRA_test();
END_TEST
BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
//>> s2: %a:s[2-3], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s1: %f:s[4] = p_startpgm
if (!setup_cs("s2 v1 v1 s2 s1", GFX10_3))
@ -708,59 +742,65 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
startpgm->definitions[3].setFixed(PhysReg(0));
startpgm->definitions[4].setFixed(PhysReg(4));
Operand a(inputs[0], PhysReg(2)); /* source for s_and */
Operand a(inputs[0], PhysReg(2)); /* source for s_and */
Operand c(inputs[1], PhysReg(258)); /* buffer store address */
Operand d(inputs[2], PhysReg(259)); /* buffer store value */
Operand e(inputs[3], PhysReg(0)); /* condition */
Operand f(inputs[4], PhysReg(4)); /* overwrite value */
PhysReg reg_s3(3); /* temporary register */
PhysReg reg_s8(8); /* temporary register */
PhysReg reg_s3(3); /* temporary register */
PhysReg reg_s8(8); /* temporary register */
//! s2: %tmp_salu:s[8-9], s1: %tmp_salu_scc:scc = s_and_b64 %a:s[2-3], 0x40018
auto tmp_salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), a,
Operand::c32(0x40018u));
Operand::c32(0x40018u));
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
/* --- logical then --- */
//! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
//! p_logical_start
emit_divergent_if_else(
program.get(), bld, e,
[&]() -> void
{
/* --- logical then --- */
//! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
//! p_logical_start
//! s1: %ovrwr:s[3] = p_parallelcopy %f:s[4]
Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f);
//! s1: %ovrwr:s[3] = p_parallelcopy %f:s[4]
Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f);
//! buffer_store_dword %c:v[2], %ovrwr:s[3], %d:v[3], 0 offen
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(s_addr, reg_s3), d, Operand::zero(), 0, true);
//! buffer_store_dword %c:v[2], %ovrwr:s[3], %d:v[3], 0 offen
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(s_addr, reg_s3), d, Operand::zero(),
0, true);
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
}, [&]() -> void {
/* --- logical else --- */
//! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
},
[&]() -> void
{
/* --- logical else --- */
//! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
});
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
});
/* --- merge block --- */
//! BB6

View File

@ -35,22 +35,27 @@ BEGIN_TEST(setup_reduce_temp.divergent_if_phi)
* use_linear_vgpr(v0)
* }
* ... = phi ...
*/
//TODO: fix the RA validator to spot this
*/
// TODO: fix the RA validator to spot this
//>> s2: %_, v1: %a = p_startpgm
if (!setup_cs("s2 v1", GFX9))
return;
//>> lv1: %lv = p_start_linear_vgpr
emit_divergent_if_else(program.get(), bld, Operand(inputs[0]), [&]() -> void {
//>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, %lv, lv1: undef op:umin32 cluster_size:64
Instruction* reduce = bld.reduction(aco_opcode::p_reduce, bld.def(s1),
bld.def(bld.lm), bld.def(s1, scc), inputs[1],
Operand(v1.as_linear()), Operand(v1.as_linear()), umin32);
reduce->reduction().cluster_size = bld.lm.bytes() * 8;
}, [&]() -> void {
/* nothing */
});
emit_divergent_if_else(
program.get(), bld, Operand(inputs[0]),
[&]() -> void
{
//>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, %lv, lv1: undef op:umin32 cluster_size:64
Instruction* reduce =
bld.reduction(aco_opcode::p_reduce, bld.def(s1), bld.def(bld.lm), bld.def(s1, scc),
inputs[1], Operand(v1.as_linear()), Operand(v1.as_linear()), umin32);
reduce->reduction().cluster_size = bld.lm.bytes() * 8;
},
[&]() -> void
{
/* nothing */
});
bld.pseudo(aco_opcode::p_phi, bld.def(v1), Operand::c32(1), Operand::zero());
//>> /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
//! p_end_linear_vgpr %lv

View File

@ -37,7 +37,7 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
/* TODO: is this possible to do on GFX11? */
for (amd_gfx_level cc = GFX8; cc <= GFX10_3; cc = (amd_gfx_level)((unsigned)cc + 1)) {
for (bool pessimistic : { false, true }) {
for (bool pessimistic : {false, true}) {
const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
//>> v1: %_:v[#a] = p_startpgm
@ -45,7 +45,8 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
return;
//! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
Builder::Result tmp =
bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
//! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1
//! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]
@ -55,7 +56,7 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
writeout(0, result1);
writeout(1, result2);
finish_ra_test(ra_test_policy { pessimistic });
finish_ra_test(ra_test_policy{pessimistic});
}
}
END_TEST
@ -67,7 +68,8 @@ BEGIN_TEST(regalloc._32bit_partial_write)
/* ensure high 16 bits are occupied */
//! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
Temp hi =
bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
/* This test checks if this instruction uses SDWA. */
//! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
@ -168,9 +170,9 @@ BEGIN_TEST(regalloc.precolor.multiple_operands)
//! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
//! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3]
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256+0)),
Operand(inputs[0], PhysReg(256+1)), Operand(inputs[1], PhysReg(256+2)),
Operand(inputs[2], PhysReg(256+3)));
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256 + 0)),
Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[1], PhysReg(256 + 2)),
Operand(inputs[2], PhysReg(256 + 3)));
finish_ra_test(ra_test_policy());
END_TEST
@ -182,8 +184,8 @@ BEGIN_TEST(regalloc.precolor.different_regs)
//! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0]
//! p_unit_test %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256+0)),
Operand(inputs[0], PhysReg(256+1)), Operand(inputs[0], PhysReg(256+2)));
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256 + 0)),
Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[0], PhysReg(256 + 2)));
finish_ra_test(ra_test_policy());
END_TEST
@ -256,7 +258,8 @@ BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
//! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test
Temp s0_tmp = bld.tmp(s1);
Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1));
Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc),
Definition(s0_tmp.id(), PhysReg{0}, s1));
//! lv1: %tmp1:v[1] = p_unit_test
Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1));
@ -273,7 +276,8 @@ BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
//>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1
Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo();
aco_print_instr(program->gfx_level, &parallelcopy, output);
fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg());
fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc,
parallelcopy.scratch_sgpr.reg());
END_TEST
BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies)
@ -392,13 +396,15 @@ BEGIN_TEST(regalloc.vinterp_fp16)
//! v1: %tmp0:v[1] = v_interp_p10_f16_f32_inreg %lo:v[3][0:16], %in1:v[1], hi(%hi:v[3][16:32])
//! p_unit_test %tmp0:v[1]
Temp tmp0 = bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
Temp tmp0 =
bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
bld.pseudo(aco_opcode::p_unit_test, tmp0);
//! v2b: %tmp1:v[0][16:32] = v_interp_p2_f16_f32_inreg %in0:v[0], %in2:v[2], %tmp0:v[1] opsel_hi
//! v1: %tmp2:v[0] = p_create_vector 0, %tmp1:v[0][16:32]
//! p_unit_test %tmp2:v[0]
Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0], inputs[2], tmp0);
Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0],
inputs[2], tmp0);
Temp tmp2 = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand::zero(2), tmp1);
bld.pseudo(aco_opcode::p_unit_test, tmp2);

View File

@ -34,7 +34,8 @@ BEGIN_TEST(validate.sdwa.allow)
//>> Validation results:
//! Validation passed
SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa();
SDWA_instruction* sdwa =
&bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa();
sdwa->neg[0] = sdwa->neg[1] = sdwa->abs[0] = sdwa->abs[1] = true;
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1b), inputs[0], inputs[1]);
@ -105,7 +106,9 @@ BEGIN_TEST(validate.sdwa.vopc)
bld.vopc_sdwa(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), inputs[0], inputs[1]);
//~gfx(9|10)! SDWA VOPC clamp only supported on GFX8: s2: %_:vcc = v_cmp_eq_f32 %vgpr0, %vgpr1 clamp src0_sel:dword src1_sel:dword
bld.vopc_sdwa(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm, vcc), inputs[0], inputs[1])->sdwa().clamp = true;
bld.vopc_sdwa(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm, vcc), inputs[0], inputs[1])
->sdwa()
.clamp = true;
//! Validation failed
@ -138,11 +141,13 @@ BEGIN_TEST(validate.sdwa.vcc)
//! 3rd operand must be fixed to vcc with SDWA: v1: %_ = v_cndmask_b32 %vgpr0, %vgpr1, %_ dst_sel:dword src0_sel:dword src1_sel:dword
bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], inputs[2]);
bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], bld.vcc(inputs[2]));
bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1],
bld.vcc(inputs[2]));
//! 2nd definition must be fixed to vcc with SDWA: v1: %_, s2: %_ = v_add_co_u32 %vgpr0, %vgpr1 dst_sel:dword src0_sel:dword src1_sel:dword
bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm), inputs[0], inputs[1]);
bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm, vcc), inputs[0], inputs[1]);
bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm, vcc), inputs[0],
inputs[1]);
//! Validation failed
@ -152,125 +157,127 @@ END_TEST
BEGIN_TEST(optimize.sdwa.extract)
for (unsigned i = GFX7; i <= GFX10; i++) {
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
continue;
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i, CHIP_UNKNOWN,
is_signed ? "_signed" : "_unsigned"))
continue;
//; def standard_test(index, sel):
//; res = 'v1: %%res%s = v_mul_f32 %%a, %%b dst_sel:dword src0_sel:dword src1_sel:%c%s\n' % (index, 's' if variant.endswith('_signed') else 'u', sel)
//; res += 'p_unit_test %s, %%res%s' % (index, index)
//; return res
//; funcs['standard_test'] = lambda a: standard_test(*(v for v in a.split(',')))
//; def standard_test(index, sel):
//; res = 'v1: %%res%s = v_mul_f32 %%a, %%b dst_sel:dword src0_sel:dword src1_sel:%c%s\n' % (index, 's' if variant.endswith('_signed') else 'u', sel)
//; res += 'p_unit_test %s, %%res%s' % (index, index)
//; return res
//; funcs['standard_test'] = lambda a: standard_test(*(v for v in a.split(',')))
aco_opcode ext = aco_opcode::p_extract;
aco_opcode ins = aco_opcode::p_insert;
aco_opcode ext = aco_opcode::p_extract;
aco_opcode ins = aco_opcode::p_insert;
{
//~gfx[^7].*! @standard_test(0,byte0)
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
Operand::c32(is_signed));
writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b));
{
//~gfx[^7].*! @standard_test(0,byte0)
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(8u), Operand::c32(is_signed));
writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b));
//~gfx[^7].*! @standard_test(1,byte1)
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b));
//~gfx[^7].*! @standard_test(1,byte1)
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
Operand::c32(8u), Operand::c32(is_signed));
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b));
//~gfx[^7].*! @standard_test(2,byte2)
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b));
//~gfx[^7].*! @standard_test(2,byte2)
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u),
Operand::c32(8u), Operand::c32(is_signed));
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b));
//~gfx[^7].*! @standard_test(3,byte3)
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b));
//~gfx[^7].*! @standard_test(3,byte3)
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u),
Operand::c32(8u), Operand::c32(is_signed));
writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b));
//~gfx[^7].*! @standard_test(4,word0)
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
Operand::c32(is_signed));
writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b));
//~gfx[^7].*! @standard_test(4,word0)
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(16u), Operand::c32(is_signed));
writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b));
//~gfx[^7].*! @standard_test(5,word1)
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
Operand::c32(16u), Operand::c32(is_signed));
writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b));
//~gfx[^7].*! @standard_test(5,word1)
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
Operand::c32(16u), Operand::c32(is_signed));
writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b));
//~gfx[^7]_unsigned! @standard_test(6,byte0)
Temp bfi_byte0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u));
writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b));
//~gfx[^7]_unsigned! @standard_test(6,byte0)
Temp bfi_byte0_b =
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u));
writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b));
//~gfx[^7]_unsigned! @standard_test(7,word0)
Temp bfi_word0_b =
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u));
writeout(7, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_word0_b));
//~gfx[^7]_unsigned! @standard_test(7,word0)
Temp bfi_word0_b =
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u));
writeout(7, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_word0_b));
}
//>> p_unit_test 63
writeout(63);
{
//! v1: %tmp8 = p_insert %b, 1, 8
//! v1: %res8 = v_mul_f32 %a, %tmp8
//! p_unit_test 8, %res8
Temp bfi_byte1_b =
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u));
writeout(8, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte1_b));
/* v_cvt_f32_ubyte[0-3] can be used instead of v_cvt_f32_u32+sdwa */
//~gfx7_signed! v1: %bfe_byte0_b = p_extract %b, 0, 8, 1
//~gfx7_signed! v1: %res9 = v_cvt_f32_u32 %bfe_byte0_b
//~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte0
//~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b
//! p_unit_test 9, %res9
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(8u), Operand::c32(is_signed));
writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b));
//~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1
//~gfx7_signed! v1: %res10 = v_cvt_f32_u32 %bfe_byte1_b
//~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte1
//~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b
//! p_unit_test 10, %res10
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
Operand::c32(8u), Operand::c32(is_signed));
writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b));
//~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1
//~gfx7_signed! v1: %res11 = v_cvt_f32_u32 %bfe_byte2_b
//~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte2
//~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b
//! p_unit_test 11, %res11
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u),
Operand::c32(8u), Operand::c32(is_signed));
writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b));
//~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1
//~gfx7_signed! v1: %res12 = v_cvt_f32_u32 %bfe_byte3_b
//~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte3
//~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b
//! p_unit_test 12, %res12
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u),
Operand::c32(8u), Operand::c32(is_signed));
writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
/* VOP3-only instructions can't use SDWA but they can use opsel on GFX9+ instead */
//~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b
//~gfx(9|10).*! p_unit_test 13, %res13
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(16u), Operand::c32(is_signed));
writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
//~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)
//~gfx(9|10).*! p_unit_test 14, %res14
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
Operand::c32(16u), Operand::c32(is_signed));
writeout(14, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word1_b));
}
finish_opt_test();
}
//>> p_unit_test 63
writeout(63);
{
//! v1: %tmp8 = p_insert %b, 1, 8
//! v1: %res8 = v_mul_f32 %a, %tmp8
//! p_unit_test 8, %res8
Temp bfi_byte1_b =
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u));
writeout(8, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte1_b));
/* v_cvt_f32_ubyte[0-3] can be used instead of v_cvt_f32_u32+sdwa */
//~gfx7_signed! v1: %bfe_byte0_b = p_extract %b, 0, 8, 1
//~gfx7_signed! v1: %res9 = v_cvt_f32_u32 %bfe_byte0_b
//~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte0
//~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b
//! p_unit_test 9, %res9
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
Operand::c32(is_signed));
writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b));
//~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1
//~gfx7_signed! v1: %res10 = v_cvt_f32_u32 %bfe_byte1_b
//~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte1
//~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b
//! p_unit_test 10, %res10
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b));
//~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1
//~gfx7_signed! v1: %res11 = v_cvt_f32_u32 %bfe_byte2_b
//~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte2
//~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b
//! p_unit_test 11, %res11
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b));
//~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1
//~gfx7_signed! v1: %res12 = v_cvt_f32_u32 %bfe_byte3_b
//~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte3
//~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b
//! p_unit_test 12, %res12
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u),
Operand::c32(is_signed));
writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
/* VOP3-only instructions can't use SDWA but they can use opsel on GFX9+ instead */
//~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b
//~gfx(9|10).*! p_unit_test 13, %res13
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
Operand::c32(is_signed));
writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
//~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)
//~gfx(9|10).*! p_unit_test 14, %res14
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
Operand::c32(16u), Operand::c32(is_signed));
writeout(14, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word1_b));
}
finish_opt_test();
}
}
END_TEST

View File

@ -52,8 +52,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v1_lo, v2b),
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v1_lo, v2b),
Operand(v1_lo, v2b), Operand(v0_lo, v2b));
//~gfx[67]! p_unit_test 1
@ -61,9 +60,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v1),
Operand(v1_lo, v2b), Operand(v0_lo, v2b));
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v2b),
Operand(v0_lo, v2b));
//~gfx[67]! p_unit_test 2
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
@ -71,8 +69,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v6b), Operand(v1_lo, v2b),
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v6b), Operand(v1_lo, v2b),
Operand(v0_lo, v2b), Operand(v2_lo, v2b));
//~gfx[67]! p_unit_test 3
@ -82,10 +79,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16]
//~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v2),
Operand(v1_lo, v2b), Operand(v0_lo, v2b),
Operand(v2_lo, v2b), Operand(v3_lo, v2b));
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b),
Operand(v0_lo, v2b), Operand(v2_lo, v2b), Operand(v3_lo, v2b));
//~gfx[67]! p_unit_test 4
//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]
@ -96,17 +91,14 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v2),
Operand(v1_lo, v2b), Operand(v2_lo, v2b),
Operand(v0_lo, v2b), Operand(v3_lo, v2b));
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b),
Operand(v2_lo, v2b), Operand(v0_lo, v2b), Operand(v3_lo, v2b));
//~gfx[67]! p_unit_test 5
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Operand(v0_lo, v1));
//~gfx[67]! p_unit_test 6
@ -114,8 +106,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Definition(v2_lo, v2b), Operand(v0_lo, v6b));
//~gfx[67]! p_unit_test 7
@ -124,10 +115,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
//~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Definition(v2_lo, v2b), Definition(v3_lo, v2b),
Operand(v0_lo, v2));
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Definition(v2_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2));
//~gfx[67]! p_unit_test 8
//~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32]
@ -136,18 +125,15 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v2b), Definition(v2_lo, v2b),
Definition(v0_lo, v2b), Definition(v3_lo, v2b),
Operand(v0_lo, v2));
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v2_lo, v2b),
Definition(v0_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2));
//~gfx[67]! p_unit_test 9
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1b), Definition(v1_lo, v1b),
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v1_lo, v1b),
Operand(v1_lo, v1b), Operand(v0_lo, v1b));
//~gfx[67]! p_unit_test 10
@ -155,9 +141,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v2b),
Operand(v1_lo, v1b), Operand(v0_lo, v1b));
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2b), Operand(v1_lo, v1b),
Operand(v0_lo, v1b));
//~gfx[67]! p_unit_test 11
//~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
@ -166,8 +151,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
//~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v3b), Operand(v1_lo, v1b),
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v3b), Operand(v1_lo, v1b),
Operand(v0_lo, v1b), Operand(v2_lo, v1b));
//~gfx[67]! p_unit_test 12
@ -179,10 +163,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24]
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v1),
Operand(v1_lo, v1b), Operand(v0_lo, v1b),
Operand(v2_lo, v1b), Operand(v3_lo, v1b));
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v1b),
Operand(v0_lo, v1b), Operand(v2_lo, v1b), Operand(v3_lo, v1b));
//~gfx[67]! p_unit_test 13
//~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8]
@ -193,18 +175,16 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001
//~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v1),
Operand(v0_lo, v1b), Operand(v0_lo, v1b),
Operand(v0_lo, v1b), Operand(v0_lo, v1b));
Instruction* pseudo =
bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v0_lo, v1b),
Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b));
pseudo->pseudo().scratch_sgpr = m0;
//~gfx[67]! p_unit_test 14
//~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
//~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v1b), Definition(v0_lo, v1b),
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b),
Operand(v0_lo, v2b));
//~gfx[67]! p_unit_test 15
@ -213,10 +193,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24]
//~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
bld.pseudo(aco_opcode::p_split_vector,
Definition(v1_lo, v1b), Definition(v0_lo, v1b),
Definition(v2_lo, v1b), Definition(v3_lo, v1b),
Operand(v0_lo, v1));
bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b),
Definition(v2_lo, v1b), Definition(v3_lo, v1b), Operand(v0_lo, v1));
//~gfx[67]! s_endpgm
@ -231,8 +209,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
//~gfx(9|11)! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand(v0_hi, v2b), Operand(v0_lo, v2b));
//~gfx(8|9|11)! p_unit_test 1
@ -243,8 +220,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
//~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1), Definition(v1_lo, v2b),
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
Operand(v1_lo, v1), Operand(v0_lo, v2b));
//~gfx(8|9|11)! p_unit_test 2
@ -259,9 +235,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16]
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b),
Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
Definition(v1_hi, v2b), Operand(v1_lo, v1), Operand(v0_lo, v2b),
Operand(v0_lo, v2b));
//~gfx(8|9|11)! p_unit_test 3
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
@ -273,8 +249,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 %0:v[0][0:16]
//~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1), Definition(v1_b3, v1b),
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_b3, v1b),
Operand(v1_lo, v1), Operand(v0_b3, v1b));
//~gfx(8|9|11)! p_unit_test 4
@ -287,8 +262,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104
//~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1), Definition(v1_lo, v1b),
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b),
Operand(v1_lo, v1), Operand(v0_lo, v1b));
//~gfx(8|9|11)! p_unit_test 5
@ -301,9 +275,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1),
Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v0_hi, v1b),
Definition(v1_lo, v1), Operand(v1_lo, v1b), Operand(v1_hi, v1b),
Operand(v0_lo, v1));
//~gfx(8|9|11)! p_unit_test 6
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
@ -311,9 +285,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Definition(v1_lo, v1), Operand(v1_lo, v2b), Operand(v1_hi, v2b),
Operand(v0_lo, v1));
//~gfx(8|9|11)! p_unit_test 7
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
@ -322,9 +296,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
//~gfx(8|9|11)! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Definition(v1_lo, v1), Operand(v1_hi, v2b), Operand(v1_lo, v2b),
Operand(v0_lo, v1));
//~gfx(8|9|11)! p_unit_test 8
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
@ -342,8 +316,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v3b), Definition(v1_lo, v3b),
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b),
Operand(v1_lo, v3b), Operand(v0_lo, v3b));
//~gfx(8|9|11)! p_unit_test 9
@ -354,9 +327,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3
//~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b),
Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b),
Definition(v0_b3, v1b), Operand(v1_lo, v3b), Operand(v0_lo, v3b),
Operand(v1_b3, v1b));
//~gfx(8|9|11)! p_unit_test 10
//~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1
@ -380,8 +353,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_b1, v2b), Definition(v1_b1, v2b),
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Definition(v1_b1, v2b),
Operand(v1_b1, v2b), Operand(v0_b1, v2b));
//~gfx(8|9|11)! p_unit_test 11
@ -398,8 +370,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_b1, v1b), Definition(v0_b3, v1b),
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v1b), Definition(v0_b3, v1b),
Operand(v0_b3, v1b), Operand(v0_b1, v1b));
//~gfx(8|9|11)! s_endpgm
@ -535,8 +506,7 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
//~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c0d
//~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0xff
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b),
Operand::c16(0x00ff));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x00ff));
//! p_unit_test 14
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
@ -544,29 +514,25 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
//~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0xd0c0504
//~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0xffffff00 opsel_hi
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b),
Operand::c16(0xff00));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0xff00));
//! p_unit_test 15
//~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
//~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b),
Operand::zero(2));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::zero(2));
//! p_unit_test 16
//~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword
//~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050d
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b),
Operand::c8(0xff));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0xff));
//! p_unit_test 17
//~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
//~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050c
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b),
Operand::zero(1));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::zero(1));
//! s_endpgm
@ -589,12 +555,12 @@ BEGIN_TEST(to_hw_instr.self_intersecting_swap)
//! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3]
//! s_endpgm
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
//v[1:2] = v[2:3]
//v3 = v7
//v7 = v1
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(reg_v1, v2), Definition(reg_v3, v1), Definition(reg_v7, v1),
Operand(reg_v2, v2), Operand(reg_v7, v1), Operand(reg_v1, v1));
// v[1:2] = v[2:3]
// v3 = v7
// v7 = v1
bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v1, v2), Definition(reg_v3, v1),
Definition(reg_v7, v1), Operand(reg_v2, v2), Operand(reg_v7, v1),
Operand(reg_v1, v1));
finish_to_hw_instr_test();
END_TEST
@ -606,98 +572,98 @@ BEGIN_TEST(to_hw_instr.extract)
PhysReg v1_lo{257};
for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) {
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
continue;
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
continue;
#define EXT(idx, size) \
bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \
Operand::c32(size), Operand::c32(is_signed));
//; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32'
//; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32'
//; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32'
//; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32'
//; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n)
//; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32'
//; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32'
//; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32'
//; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32'
//; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n)
//>> p_unit_test 0
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
//! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8
EXT(0, 8)
//! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8
EXT(1, 8)
//! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8
EXT(2, 8)
//! v1: %_:v[0] = @v_shr 24, %_:v[1]
EXT(3, 8)
//~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
//~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1]
//~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1]
EXT(0, 16)
//! v1: %_:v[0] = @v_shr 16, %_:v[1]
EXT(1, 16)
//>> p_unit_test 0
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
//! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8
EXT(0, 8)
//! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8
EXT(1, 8)
//! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8
EXT(2, 8)
//! v1: %_:v[0] = @v_shr 24, %_:v[1]
EXT(3, 8)
//~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
//~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1]
//~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1]
EXT(0, 16)
//! v1: %_:v[0] = @v_shr 16, %_:v[1]
EXT(1, 16)
#undef EXT
#undef EXT
#define EXT(idx, size) \
bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \
Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed));
//>> p_unit_test 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
//~gfx.*_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000
//~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]
EXT(0, 8)
//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008
EXT(1, 8)
//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010
EXT(2, 8)
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24
EXT(3, 8)
//~gfx(7|8)_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000
//~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0
//~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1]
EXT(0, 16)
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16
EXT(1, 16)
//>> p_unit_test 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
//~gfx.*_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000
//~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]
EXT(0, 8)
//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008
EXT(1, 8)
//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010
EXT(2, 8)
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24
EXT(3, 8)
//~gfx(7|8)_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000
//~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0
//~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1]
EXT(0, 16)
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16
EXT(1, 16)
#undef EXT
#undef EXT
#define EXT(idx, src_b) \
bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \
Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed));
//>> p_unit_test 4
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
EXT(0, 0)
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
if (lvl != GFX7)
EXT(0, 2)
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801
EXT(1, 0)
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903
if (lvl != GFX7)
EXT(1, 2)
//>> p_unit_test 4
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
EXT(0, 0)
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
if (lvl != GFX7)
EXT(0, 2)
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801
EXT(1, 0)
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
//~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03
//~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903
if (lvl != GFX7)
EXT(1, 2)
#undef EXT
#undef EXT
finish_to_hw_instr_test();
finish_to_hw_instr_test();
//! s_endpgm
}
//! s_endpgm
}
}
END_TEST
@ -736,7 +702,7 @@ BEGIN_TEST(to_hw_instr.insert)
//! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1]
INS(1, 16)
#undef INS
#undef INS
#define INS(idx, size) \
bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \
@ -759,7 +725,7 @@ BEGIN_TEST(to_hw_instr.insert)
//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16
INS(1, 16)
#undef INS
#undef INS
#define INS(idx, def_b) \
bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \
@ -784,7 +750,7 @@ BEGIN_TEST(to_hw_instr.insert)
if (lvl != GFX7)
INS(1, 2)
#undef INS
#undef INS
finish_to_hw_instr_test();
@ -816,10 +782,9 @@ BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc)
//! lv1: %0:v[0] = v_mov_b32 %0:v[1]
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
//! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0
Instruction *instr = bld.pseudo(
aco_opcode::p_parallelcopy,
Definition(scc, s1), Definition(v0_lo, v1.as_linear()),
Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear()));
Instruction* instr =
bld.pseudo(aco_opcode::p_parallelcopy, Definition(scc, s1), Definition(v0_lo, v1.as_linear()),
Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear()));
instr->pseudo().scratch_sgpr = m0;
finish_to_hw_instr_test();
@ -836,10 +801,9 @@ BEGIN_TEST(to_hw_instr.swap_linear_vgpr)
//>> p_unit_test 0
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
Instruction *instr = bld.pseudo(
aco_opcode::p_parallelcopy,
Definition(reg_v0, v1_linear), Definition(reg_v1, v1_linear),
Operand(reg_v1, v1_linear), Operand(reg_v0, v1_linear));
Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
Definition(reg_v1, v1_linear), Operand(reg_v1, v1_linear),
Operand(reg_v0, v1_linear));
instr->pseudo().scratch_sgpr = m0;
finish_to_hw_instr_test();