aco: use array indexing for opsel/opsel_lo/opsel_hi

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21766>
This commit is contained in:
Georg Lehmann 2023-03-07 14:38:34 +01:00 committed by Marge Bot
parent a47c3f84fb
commit 828aff2a2d
4 changed files with 59 additions and 73 deletions

View File

@ -954,8 +954,8 @@ propagate_constants_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& i
/* try to fold inline constants */
VALU_instruction* vop3p = &instr->valu();
bool opsel_lo = (vop3p->opsel_lo >> i) & 1;
bool opsel_hi = (vop3p->opsel_hi >> i) & 1;
bool opsel_lo = vop3p->opsel_lo[i];
bool opsel_hi = vop3p->opsel_hi[i];
Operand const_op[2];
bool const_opsel[2] = {false, false};
@ -1026,8 +1026,8 @@ propagate_constants_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& i
opsel_hi = false;
}
vop3p->opsel_lo = opsel_lo ? (vop3p->opsel_lo | (1 << i)) : (vop3p->opsel_lo & ~(1 << i));
vop3p->opsel_hi = opsel_hi ? (vop3p->opsel_hi | (1 << i)) : (vop3p->opsel_hi & ~(1 << i));
vop3p->opsel_lo[i] = opsel_lo;
vop3p->opsel_hi[i] = opsel_hi;
}
bool
@ -1103,7 +1103,7 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
return true;
} else if (instr->isVOP3() && sel.size() == 2 &&
can_use_opsel(ctx.program->gfx_level, instr->opcode, idx) &&
!(instr->valu().opsel & (1 << idx))) {
!instr->valu().opsel[idx]) {
return true;
} else if (instr->opcode == aco_opcode::p_extract) {
SubdwordSel instrSel = parse_extract(instr.get());
@ -1162,7 +1162,7 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
mad->operands[0] = instr->operands[0];
mad->operands[1] = instr->operands[1];
mad->operands[2] = Operand::zero();
mad->valu().opsel = (sel.offset() / 2) << idx;
mad->valu().opsel[idx] = sel.offset();
instr.reset(mad);
} else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
(tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
@ -1170,7 +1170,7 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
static_cast<SDWA_instruction*>(instr.get())->sel[idx] = sel;
} else if (instr->isVOP3()) {
if (sel.offset())
instr->valu().opsel |= 1 << idx;
instr->valu().opsel[idx] = true;
} else if (instr->opcode == aco_opcode::p_extract) {
SubdwordSel instrSel = parse_extract(instr.get());
@ -2235,12 +2235,12 @@ combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (op_instr[i]->isVOP3()) {
VALU_instruction& vop3 = op_instr[i]->valu();
if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 ||
vop3.opsel == 2)
if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] ||
vop3.opsel[0] != vop3.opsel[1])
return false;
neg[i] = vop3.neg[0];
abs[i] = vop3.abs[0];
opsel |= (vop3.opsel & 1) << i;
opsel |= vop3.opsel[0] << i;
} else if (op_instr[i]->isSDWA()) {
return false;
}
@ -2515,8 +2515,8 @@ combine_constant_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (nan_test->isVOP3()) {
VALU_instruction& vop3 = nan_test->valu();
if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 ||
vop3.opsel == 2)
if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] ||
vop3.opsel[0] != vop3.opsel[1])
return false;
}
@ -2636,8 +2636,8 @@ match_op3_for_vop3(opt_ctx& ctx, aco_opcode op1, aco_opcode op2, Instruction* op
return false;
if (inbetween_opsel)
*inbetween_opsel = op1_vop3 ? op1_vop3->opsel & (1 << (unsigned)swap) : false;
else if (op1_vop3 && op1_vop3->opsel & (1 << (unsigned)swap))
*inbetween_opsel = op1_vop3 ? op1_vop3->opsel[swap] : false;
else if (op1_vop3 && op1_vop3->opsel[swap])
return false;
*precise = op1_instr->definitions[0].isPrecise() || op2_instr->definitions[0].isPrecise();
@ -2650,14 +2650,14 @@ match_op3_for_vop3(opt_ctx& ctx, aco_opcode op1, aco_opcode op2, Instruction* op
operands[shuffle[0]] = op1_instr->operands[!swap];
neg[shuffle[0]] = op1_vop3 ? op1_vop3->neg[!swap] : false;
abs[shuffle[0]] = op1_vop3 ? op1_vop3->abs[!swap] : false;
if (op1_vop3 && (op1_vop3->opsel & (1 << (unsigned)!swap)))
if (op1_vop3 && op1_vop3->opsel[!swap])
*opsel |= 1 << shuffle[0];
for (unsigned i = 0; i < 2; i++) {
operands[shuffle[i + 1]] = op2_instr->operands[i];
neg[shuffle[i + 1]] = op2_vop3 ? op2_vop3->neg[i] : false;
abs[shuffle[i + 1]] = op2_vop3 ? op2_vop3->abs[i] : false;
if (op2_vop3 && op2_vop3->opsel & (1 << i))
if (op2_vop3 && op2_vop3->opsel[i])
*opsel |= 1 << shuffle[i + 1];
}
@ -3733,12 +3733,10 @@ combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool is_sub)
}
void
propagate_swizzles(VALU_instruction* instr, uint8_t opsel_lo, uint8_t opsel_hi)
propagate_swizzles(VALU_instruction* instr, bool opsel_lo, bool opsel_hi)
{
/* propagate swizzles which apply to a result down to the instruction's operands:
* result = a.xy + b.xx -> result.yx = a.yx + b.xx */
assert((opsel_lo & 1) == opsel_lo);
assert((opsel_hi & 1) == opsel_hi);
uint8_t tmp_lo = instr->opsel_lo;
uint8_t tmp_hi = instr->opsel_hi;
uint8_t neg_lo = instr->neg_lo;
@ -3761,13 +3759,13 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
/* apply clamp */
if (instr->opcode == aco_opcode::v_pk_mul_f16 && instr->operands[1].constantEquals(0x3C00) &&
vop3p->clamp && instr->operands[0].isTemp() && ctx.uses[instr->operands[0].tempId()] == 1 &&
!((vop3p->opsel_lo | vop3p->opsel_hi) & 2)) {
!vop3p->opsel_lo[1] && !vop3p->opsel_hi[1]) {
ssa_info& info = ctx.info[instr->operands[0].tempId()];
if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) {
VALU_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->valu();
candidate->clamp = true;
propagate_swizzles(candidate, vop3p->opsel_lo, vop3p->opsel_hi);
propagate_swizzles(candidate, vop3p->opsel_lo[0], vop3p->opsel_hi[0]);
instr->definitions[0].swapTemp(candidate->definitions[0]);
ctx.info[candidate->definitions[0].tempId()].instr = candidate;
ctx.uses[instr->definitions[0].tempId()]--;
@ -3788,7 +3786,7 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
VALU_instruction* fneg = &info.instr->valu();
if ((fneg->opsel_lo | fneg->opsel_hi) & 2)
if (fneg->opsel_lo[1] || fneg->opsel_hi[1])
continue;
Operand ops[3];
@ -3806,14 +3804,14 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
* if 0 - pick selection from fneg->lo
* if 1 - pick selection from fneg->hi
*/
bool opsel_lo = (vop3p->opsel_lo >> i) & 1;
bool opsel_hi = (vop3p->opsel_hi >> i) & 1;
bool opsel_lo = vop3p->opsel_lo[i];
bool opsel_hi = vop3p->opsel_hi[i];
bool neg_lo = fneg->neg_lo[0] ^ fneg->neg_lo[1];
bool neg_hi = fneg->neg_hi[0] ^ fneg->neg_hi[1];
vop3p->neg_lo[i] ^= opsel_lo ? neg_hi : neg_lo;
vop3p->neg_hi[i] ^= opsel_hi ? neg_hi : neg_lo;
vop3p->opsel_lo ^= ((opsel_lo ? ~fneg->opsel_hi : (unsigned)fneg->opsel_lo) & 1) << i;
vop3p->opsel_hi ^= ((opsel_hi ? ~fneg->opsel_hi : (unsigned)fneg->opsel_lo) & 1) << i;
vop3p->opsel_lo[i] ^= opsel_lo ? !fneg->opsel_hi[0] : fneg->opsel_lo[0];
vop3p->opsel_hi[i] ^= opsel_hi ? !fneg->opsel_hi[0] : fneg->opsel_lo[0];
if (--ctx.uses[fneg->definitions[0].tempId()])
ctx.uses[fneg->operands[0].tempId()]++;
@ -3828,7 +3826,7 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
Instruction* mul_instr = nullptr;
unsigned add_op_idx = 0;
uint8_t opsel_lo = 0, opsel_hi = 0;
bool opsel_lo = false, opsel_hi = false;
uint32_t uses = UINT32_MAX;
/* find the 'best' mul instruction to combine with the add */
@ -3855,8 +3853,8 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
mul_instr = info.instr;
add_op_idx = 1 - i;
opsel_lo = (vop3p->opsel_lo >> i) & 1;
opsel_hi = (vop3p->opsel_hi >> i) & 1;
opsel_lo = vop3p->opsel_lo[i];
opsel_hi = vop3p->opsel_hi[i];
uses = ctx.uses[instr->operands[i].tempId()];
}
@ -3888,8 +3886,8 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
fma->opsel_lo = mul->opsel_lo;
fma->opsel_hi = mul->opsel_hi;
propagate_swizzles(fma.get(), opsel_lo, opsel_hi);
fma->opsel_lo |= (vop3p->opsel_lo << (2 - add_op_idx)) & 0x4;
fma->opsel_hi |= (vop3p->opsel_hi << (2 - add_op_idx)) & 0x4;
fma->opsel_lo[2] = vop3p->opsel_lo[add_op_idx];
fma->opsel_hi[2] = vop3p->opsel_hi[add_op_idx];
fma->neg_lo[2] = vop3p->neg_lo[add_op_idx];
fma->neg_hi[2] = vop3p->neg_hi[add_op_idx];
fma->neg_lo[1] = fma->neg_lo[1] ^ vop3p->neg_lo[1 - add_op_idx];
@ -3927,7 +3925,7 @@ can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return false;
if (instr->isVOP3())
return !instr->valu().omod && !(instr->valu().opsel & 0x8);
return !instr->valu().omod && !instr->valu().opsel[3];
return instr->format == Format::VOP2;
}
@ -4046,9 +4044,9 @@ combine_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
instr->operands[i].setTemp(conv->operands[0].getTemp());
if (conv->definitions[0].isPrecise())
instr->definitions[0].setPrecise(true);
instr->valu().opsel_hi ^= 1u << i;
instr->valu().opsel_hi[i] ^= true;
if (conv->isSDWA() && conv->sdwa().sel[0].offset() == 2)
instr->valu().opsel_lo |= 1u << i;
instr->valu().opsel_lo[i] = true;
bool neg = conv->valu().neg[0];
bool abs = conv->valu().abs[0];
if (!instr->valu().abs[i]) {
@ -4221,9 +4219,9 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
(instr->opcode == aco_opcode::v_fma_mix_f32 ||
instr->opcode == aco_opcode::v_fma_mixlo_f16) &&
!instr->valu().neg_lo[0] &&
((instr->operands[0].constantEquals(0x3f800000) && (instr->valu().opsel_hi & 0x1) == 0) ||
(instr->operands[0].constantEquals(0x3C00) && (instr->valu().opsel_hi & 0x1) &&
!(instr->valu().opsel_lo & 0x1)));
((instr->operands[0].constantEquals(0x3f800000) && !instr->valu().opsel_hi[0]) ||
(instr->operands[0].constantEquals(0x3C00) && instr->valu().opsel_hi[0] &&
!instr->valu().opsel_lo[0]));
bool mad32 = instr->opcode == aco_opcode::v_add_f32 || instr->opcode == aco_opcode::v_sub_f32 ||
instr->opcode == aco_opcode::v_subrev_f32;
bool mad16 = instr->opcode == aco_opcode::v_add_f16 || instr->opcode == aco_opcode::v_sub_f16 ||
@ -4332,8 +4330,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
VALU_instruction& valu = instr->valu();
neg[2] = valu.neg[add_op_idx];
abs[2] = valu.abs[add_op_idx];
opsel_lo |= valu.opsel_lo & (1 << add_op_idx) ? 0x4 : 0x0;
opsel_hi |= valu.opsel_hi & (1 << add_op_idx) ? 0x4 : 0x0;
opsel_lo |= valu.opsel_lo[add_op_idx] ? 0x4 : 0x0;
opsel_hi |= valu.opsel_hi[add_op_idx] ? 0x4 : 0x0;
omod = valu.omod;
clamp = valu.clamp;
/* abs of the multiplication result */
@ -5068,7 +5066,7 @@ unswizzle_vop3p_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
for (unsigned i = 0; i < instr->operands.size(); i++) {
if (!instr->operands[i].isLiteral())
continue;
unsigned new_swizzle = ((vop3p.opsel_lo >> i) & 0x1) | (((vop3p.opsel_hi >> i) & 0x1) << 1);
unsigned new_swizzle = vop3p.opsel_lo[i] | (vop3p.opsel_hi[i] << 1);
if (literal_swizzle != ~0u && new_swizzle != literal_swizzle)
return; /* Literal swizzles conflict. */
literal_swizzle = new_swizzle;
@ -5084,8 +5082,8 @@ unswizzle_vop3p_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
literal = (literal >> (16 * (literal_swizzle & 0x1)) & 0xffff) |
(literal >> (8 * (literal_swizzle & 0x2)) << 16);
instr->operands[i] = Operand::literal32(literal);
vop3p.opsel_lo &= ~(1 << i);
vop3p.opsel_hi |= (1 << i);
vop3p.opsel_lo[i] = false;
vop3p.opsel_hi[i] = true;
}
}
@ -5113,8 +5111,8 @@ apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
u_foreach_bit (i, info->fp16_mask) {
float value = uif(ctx.info[instr->operands[i].tempId()].val);
literal |= _mesa_float_to_half(value) << (second * 16);
instr->valu().opsel_lo |= second << i;
instr->valu().opsel_hi |= 1 << i;
instr->valu().opsel_lo[i] = second;
instr->valu().opsel_hi[i] = true;
second = true;
}

View File

@ -746,15 +746,15 @@ aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* ou
for (unsigned i = 0; i < MIN2(num_operands, 3); ++i) {
abs[i] = valu.abs[i];
neg[i] = valu.neg[i];
opsel[i] = valu.opsel & (1 << i);
opsel[i] = valu.opsel[i];
}
} else if (instr->isVOP3P() && is_mad_mix) {
const VALU_instruction& vop3p = instr->valu();
for (unsigned i = 0; i < MIN2(num_operands, 3); ++i) {
abs[i] = vop3p.neg_hi[i];
neg[i] = vop3p.neg_lo[i];
f2f32[i] = vop3p.opsel_hi & (1 << i);
opsel[i] = f2f32[i] && (vop3p.opsel_lo & (1 << i));
f2f32[i] = vop3p.opsel_hi[i];
opsel[i] = f2f32[i] && vop3p.opsel_lo[i];
}
}
for (unsigned i = 0; i < num_operands; ++i) {
@ -779,9 +779,8 @@ aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* ou
if (instr->isVOP3P() && !is_mad_mix) {
const VALU_instruction& vop3 = instr->valu();
if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) {
fprintf(output, ".%c%c", vop3.opsel_lo & (1 << i) ? 'y' : 'x',
vop3.opsel_hi & (1 << i) ? 'y' : 'x');
if (vop3.opsel_lo[i] || !vop3.opsel_hi[i]) {
fprintf(output, ".%c%c", vop3.opsel_lo[i] ? 'y' : 'x', vop3.opsel_hi[i] ? 'y' : 'x');
}
if (vop3.neg_lo[i] && vop3.neg_hi[i])
fprintf(output, "*[-1,-1]");

View File

@ -541,20 +541,15 @@ add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, uns
assert(rc.bytes() <= 2);
if (instr->isVALU()) {
/* check if we can use opsel */
if (instr->format == Format::VOP3) {
if (instr->format == Format::VOP3 || instr->isVINTERP_INREG()) {
assert(byte == 2);
instr->valu().opsel |= 1 << idx;
return;
}
if (instr->isVINTERP_INREG()) {
assert(byte == 2);
instr->vinterp_inreg().opsel |= 1 << idx;
instr->valu().opsel[idx] = true;
return;
}
if (instr->isVOP3P()) {
assert(byte == 2 && !(instr->valu().opsel_lo & (1 << idx)));
instr->valu().opsel_lo |= 1 << idx;
instr->valu().opsel_hi |= 1 << idx;
assert(byte == 2 && !instr->valu().opsel_lo[idx]);
instr->valu().opsel_lo[idx] = true;
instr->valu().opsel_hi[idx] = true;
return;
}
if (instr->opcode == aco_opcode::v_cvt_f32_ubyte0) {
@ -692,15 +687,10 @@ add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg r
return;
/* check if we can use opsel */
if (instr->format == Format::VOP3) {
if (instr->format == Format::VOP3 || instr->isVINTERP_INREG()) {
assert(reg.byte() == 2);
assert(can_use_opsel(gfx_level, instr->opcode, -1));
instr->valu().opsel |= (1 << 3); /* dst in high half */
return;
} else if (instr->isVINTERP_INREG()) {
assert(reg.byte() == 2);
assert(can_use_opsel(gfx_level, instr->opcode, -1));
instr->vinterp_inreg().opsel |= (1 << 3); /* dst in high half */
instr->valu().opsel[3] = true; /* dst in high half */
return;
}

View File

@ -232,11 +232,10 @@ validate_ir(Program* program)
if (i >= instr->operands.size() ||
(instr->operands[i].hasRegClass() &&
instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
check(!vop3.opsel[i], "Unexpected opsel for operand", instr.get());
}
if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition",
instr.get());
check(!vop3.opsel[3], "Unexpected opsel for sub-dword definition", instr.get());
} else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
instr->opcode == aco_opcode::v_fma_mix_f32) {
@ -248,7 +247,7 @@ validate_ir(Program* program)
for (unsigned i = 0; i < instr->operands.size(); i++) {
if (instr->operands[i].hasRegClass() &&
instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
check((vop3p.opsel_lo & (1 << i)) == 0 && (vop3p.opsel_hi & (1 << i)) == 0,
check(!vop3p.opsel_lo[i] && !vop3p.opsel_hi[i],
"Unexpected opsel for subdword operand", instr.get());
}
check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition",
@ -866,8 +865,8 @@ validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& i
bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
instr->opcode == aco_opcode::v_fma_mix_f32;
return ((instr->valu().opsel_lo >> index) & 1) == (byte >> 1) &&
((instr->valu().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
return instr->valu().opsel_lo[index] == (byte >> 1) &&
instr->valu().opsel_hi[index] == (fma_mix || (byte >> 1));
}
if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
return true;