r600/sfn: Use clause local registers in RA

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24480>
This commit is contained in:
Gert Wollny 2023-08-04 09:23:35 +02:00 committed by Marge Bot
parent ebf45153d8
commit cfbd1fd413
5 changed files with 103 additions and 6 deletions

View File

@ -2029,14 +2029,14 @@ static int print_dst(struct r600_bytecode_alu *alu)
int o = 0;
unsigned sel = alu->dst.sel;
char reg_char = 'R';
if (sel > 128 - 4) { /* clause temporary gpr */
if (sel >= 128 - 4) { /* clause temporary gpr */
sel -= 128 - 4;
reg_char = 'T';
}
if (alu_writes(alu)) {
o += fprintf(stderr, "%c", reg_char);
o += print_sel(alu->dst.sel, alu->dst.rel, alu->index_mode, 0);
o += print_sel(sel, alu->dst.rel, alu->index_mode, 0);
} else {
o += fprintf(stderr, "__");
}

View File

@ -215,6 +215,7 @@ struct r600_bytecode_cf {
unsigned isa[2];
unsigned nlds_read;
unsigned nqueue_read;
unsigned clause_local_written;
};
#define FC_NONE 0

View File

@ -34,6 +34,10 @@
namespace r600 {
static const int g_clause_local_start = 124;
static const int g_clause_local_end = 126;
/* ALU op2 instructions 17:7 top three bits always zero. */
enum EAluOp {
op2_add = 0,

View File

@ -426,6 +426,11 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai)
}
}
if (alu.dst.sel >= g_clause_local_start && alu.dst.sel < g_clause_local_end) {
int clidx = 4 * (alu.dst.sel - g_clause_local_start) + alu.dst.chan;
m_bc->cf_last->clause_local_written |= 1 << clidx;
}
if (ai.opcode() == op1_set_cf_idx0) {
m_bc->index_loaded[0] = 1;
m_bc->index_reg[0] = -1;
@ -1211,9 +1216,9 @@ AssamblerVisitor::emit_loop_cont()
bool
AssamblerVisitor::copy_dst(r600_bytecode_alu_dst& dst, const Register& d, bool write)
{
if (write && d.sel() > 124) {
R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try "
"using %d\n",
if (write && d.sel() > g_clause_local_end) {
R600_ERR("shader_from_nir: Don't support more then 124 GPRs + 2 claus "
"local, but try using %d\n",
d.sel());
m_result = false;
return false;
@ -1281,6 +1286,13 @@ AssamblerVisitor::copy_src(r600_bytecode_alu_src& src, const VirtualValue& s)
src.sel = s.sel();
src.chan = s.chan();
if (s.sel() >= g_clause_local_start && s.sel() < g_clause_local_end ) {
assert(m_bc->cf_last);
int clidx = 4 * (s.sel() - g_clause_local_start) + s.chan();
/* Ensure that the clause local register was already written */
assert(m_bc->cf_last->clause_local_written & (1 << clidx));
}
s.accept(visitor);
return visitor.m_buffer_offset;
}
@ -1294,7 +1306,7 @@ EncodeSourceVisitor::EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode
void
EncodeSourceVisitor::visit(const Register& value)
{
assert(value.sel() <= 124 && "Only have 124 registers");
assert(value.sel() < g_clause_local_end && "Only have 124 reisters + 4 clause local");
}
void

View File

@ -26,6 +26,7 @@
#include "sfn_ra.h"
#include "sfn_alu_defines.h"
#include "sfn_debug.h"
#include <cassert>
@ -229,6 +230,83 @@ scalar_allocation(LiveRangeMap& lrm, const Interference& interference)
return true;
}
struct AluRegister {
int lifetime;
LiveRangeEntry *lre;
};
static inline bool operator < (const AluRegister& lhs, const AluRegister& rhs)
{
return lhs.lifetime > rhs.lifetime;
}
using AluClauseRegisters = std::priority_queue<AluRegister>;
static void
scalar_clause_local_allocation (LiveRangeMap& lrm, const Interference& interference)
{
for (int comp = 0; comp < 4; ++comp) {
AluClauseRegisters clause_reg;
auto& live_ranges = lrm.component(comp);
for (auto& r : live_ranges) {
sfn_log << SfnLog::merge << "LR: " << *r.m_register
<< "[ " << r.m_start << ", " << r.m_end
<< " ], AC: " << r.m_alu_clause_local
<< " Color; " << r.m_color << "\n";
if (r.m_color != -1)
continue;
if (r.m_start == -1 &&
r.m_end == -1)
continue;
if (!r.m_alu_clause_local)
continue;
int len = r.m_end - r.m_start;
if (len > 1) {
clause_reg.push({len, &r});
sfn_log << SfnLog::merge << "Consider " << *r.m_register
<< " for clause local\n";
}
}
while (!clause_reg.empty()) {
auto& r = clause_reg.top().lre;
clause_reg.pop();
sfn_log << SfnLog::merge << "Color " << *r->m_register << "\n";
auto& adjecency = interference.row(comp, r->m_register->index());
int color = g_clause_local_start;
while (color < g_clause_local_end) {
bool color_in_use = false;
for (auto adj : adjecency) {
if (live_ranges[adj].m_color == color) {
color_in_use = true;
break;
}
}
if (color_in_use) {
++color;
continue;
}
r->m_color = color;
break;
}
if (color == g_clause_local_end)
break;
}
}
}
bool
register_allocation(LiveRangeMap& lrm)
{
@ -289,6 +367,8 @@ register_allocation(LiveRangeMap& lrm)
if (!group_allocation(lrm, interference, groups_sorted))
return false;
scalar_clause_local_allocation(lrm, interference);
if (!scalar_allocation(lrm, interference))
return false;