mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-27 04:04:23 +08:00
aco/insert_NOPs: implement vector-based RegCounterMap as replacement for VGPRCounterMap
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32191>
This commit is contained in:
parent
b44faa22ab
commit
fb5e5adfb3
@ -164,6 +164,73 @@ struct NOP_ctx_gfx10 {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <int Max> struct RegCounterMap {
|
||||||
|
void inc() { base++; }
|
||||||
|
void set(PhysReg reg) { update(reg, 0); }
|
||||||
|
|
||||||
|
uint8_t get(PhysReg reg)
|
||||||
|
{
|
||||||
|
if (present.test(reg.reg() & 0x7F)) {
|
||||||
|
for (entry& e : list) {
|
||||||
|
if (e.reg == reg.reg())
|
||||||
|
return MIN2(base - e.val, Max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Max;
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset()
|
||||||
|
{
|
||||||
|
present.reset();
|
||||||
|
list.clear();
|
||||||
|
base = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void join_min(const RegCounterMap& other)
|
||||||
|
{
|
||||||
|
for (const entry& e : other.list) {
|
||||||
|
int idx = other.base - e.val;
|
||||||
|
if (idx >= Max)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
update(e.reg, idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void update(uint16_t reg, int idx)
|
||||||
|
{
|
||||||
|
int16_t val = base - idx;
|
||||||
|
for (entry& e : list) {
|
||||||
|
if (e.reg == reg) {
|
||||||
|
e.val = MAX2(e.val, val);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list.push_back(entry{reg, val});
|
||||||
|
present.set(reg & 0x7F);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const RegCounterMap& other) const
|
||||||
|
{
|
||||||
|
/* Two maps with different bases could also be equal, but for our use case,
|
||||||
|
* i.e. checking for changes at loop headers, this is sufficient since we
|
||||||
|
* always join the predecessors into an empty map with base=0.
|
||||||
|
*/
|
||||||
|
return base == other.base && list == other.list;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct entry {
|
||||||
|
uint16_t reg;
|
||||||
|
int16_t val;
|
||||||
|
bool operator==(const entry& other) const { return reg == other.reg && val == other.val; }
|
||||||
|
};
|
||||||
|
|
||||||
|
std::bitset<128> present;
|
||||||
|
std::vector<entry> list;
|
||||||
|
int base = 0;
|
||||||
|
};
|
||||||
|
|
||||||
template <int Start, int Size, int Max> struct CounterMap {
|
template <int Start, int Size, int Max> struct CounterMap {
|
||||||
public:
|
public:
|
||||||
int base = 0;
|
int base = 0;
|
||||||
@ -263,8 +330,8 @@ struct NOP_ctx_gfx11 {
|
|||||||
std::bitset<256> vgpr_used_by_ds;
|
std::bitset<256> vgpr_used_by_ds;
|
||||||
|
|
||||||
/* VALUTransUseHazard */
|
/* VALUTransUseHazard */
|
||||||
VGPRCounterMap<15> valu_since_wr_by_trans;
|
RegCounterMap<6> valu_since_wr_by_trans;
|
||||||
VGPRCounterMap<2> trans_since_wr_by_trans;
|
RegCounterMap<2> trans_since_wr_by_trans;
|
||||||
|
|
||||||
/* VALUMaskWriteHazard */
|
/* VALUMaskWriteHazard */
|
||||||
std::bitset<128> sgpr_read_by_valu_as_lanemask;
|
std::bitset<128> sgpr_read_by_valu_as_lanemask;
|
||||||
@ -1449,8 +1516,9 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||||||
if (op.physReg().reg() < 256)
|
if (op.physReg().reg() < 256)
|
||||||
continue;
|
continue;
|
||||||
for (unsigned i = 0; i < op.size(); i++) {
|
for (unsigned i = 0; i < op.size(); i++) {
|
||||||
num_valu = std::min(num_valu, ctx.valu_since_wr_by_trans.get(op.physReg(), i));
|
PhysReg reg = op.physReg().advance(i * 4);
|
||||||
num_trans = std::min(num_trans, ctx.trans_since_wr_by_trans.get(op.physReg(), i));
|
num_valu = std::min(num_valu, ctx.valu_since_wr_by_trans.get(reg));
|
||||||
|
num_trans = std::min(num_trans, ctx.trans_since_wr_by_trans.get(reg));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (num_trans <= 1 && num_valu <= 5) {
|
if (num_trans <= 1 && num_valu <= 5) {
|
||||||
@ -1500,8 +1568,11 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||||||
|
|
||||||
if (is_trans) {
|
if (is_trans) {
|
||||||
for (Definition& def : instr->definitions) {
|
for (Definition& def : instr->definitions) {
|
||||||
ctx.valu_since_wr_by_trans.set(def.physReg(), def.bytes());
|
for (unsigned i = 0; i < def.size(); i++) {
|
||||||
ctx.trans_since_wr_by_trans.set(def.physReg(), def.bytes());
|
PhysReg reg = def.physReg().advance(i * 4);
|
||||||
|
ctx.valu_since_wr_by_trans.set(reg);
|
||||||
|
ctx.trans_since_wr_by_trans.set(reg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user