ir3/ra: Add specialized shared register RA/spilling

There are two problems with shared register allocation at the moment:

1. We weren't modelling physical edges correctly, and once we do, the
   current hack in RA for handling them won't work correctly. This means
   live-range splitting doesn't work. I've tried various strategies but
   none of them seems to fix this.
2. Spilling of shared registers to non-shared registers isn't
   implemented.

Spilling of shared regs is significantly simpler than spilling
non-shared regs, because (1) spilling and unspilling is significantly
cheaper, just a single mov, and (2) we can swap "stack slots" (actually
non-shared regs) so all the complexity of parallel copy handling isn't
necessary. This means that it's much easier to integrate RA and
spilling, while still using the tree-scan framework, so that we can
spill instead of splitting live ranges. The other issue, of phi nodes
with physical edges, we can handle by spilling those phis earlier. For
this to work, we need to accurately insert physical edges based on
divergence analysis or else every phi node would involve physical edges,
which later commits will accomplish.

This commit adds a shared register allocation pass which is a
severely-cut-down version of RA and spilling. Everything to do with live
range splitting is cut from RA, and everything to do with parallel copy
handling and for spilling we simply always spill as long as soon as we
encounter a case where it's necessary. This could be improved,
especially the spilling strategy, but for now it keeps the pass simple
and cuts down on code duplication. Unfortunately there's still some
shared boilerplate with regular RA which seems unavoidable however.

The new RA requires us to redo liveness information, which is
significantly expensive, so we keep the ability of the old RA to handle
shared registers and only use the new RA when it may be required: either
something potentially requiring live-range splitting, or a too-high
shared register limit.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22072>
This commit is contained in:
Connor Abbott 2023-02-09 13:06:30 +01:00 committed by Marge Bot
parent f977501a7c
commit fa22b0901a
9 changed files with 1687 additions and 75 deletions

View File

@ -335,7 +335,14 @@ typedef enum ir3_instruction_flags {
* before register assignment is done:
*/
IR3_INSTR_MARK = BIT(15),
IR3_INSTR_UNUSED = BIT(16),
/* Used by shared register allocation when creating spill/reload instructions
* to inform validation that this is created by RA. This also may be set on
* an instruction where a spill has been folded into it.
*/
IR3_INSTR_SHARED_SPILL = IR3_INSTR_MARK,
IR3_INSTR_UNUSED = BIT(17),
} ir3_instruction_flags;
struct ir3_instruction {

View File

@ -377,6 +377,8 @@ static void
aggressive_coalesce_split(struct ir3_liveness *live,
struct ir3_instruction *split)
{
if (!(split->dsts[0]->flags & IR3_REG_SSA))
return;
try_merge_defs(live, split->srcs[0]->def, split->dsts[0],
split->split.off * reg_elem_size(split->dsts[0]));
}
@ -409,6 +411,10 @@ create_parallel_copy(struct ir3_block *block)
if (phi->opc != OPC_META_PHI)
break;
/* Avoid phis we've already colored */
if (!(phi->dsts[0]->flags & IR3_REG_SSA))
continue;
/* Avoid undef */
if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) &&
!phi->srcs[pred_idx]->def)
@ -430,6 +436,8 @@ create_parallel_copy(struct ir3_block *block)
foreach_instr (phi, &succ->instr_list) {
if (phi->opc != OPC_META_PHI)
break;
if (!(phi->dsts[0]->flags & IR3_REG_SSA))
continue;
if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) &&
!phi->srcs[pred_idx]->def)
continue;
@ -456,6 +464,8 @@ create_parallel_copy(struct ir3_block *block)
foreach_instr (phi, &succ->instr_list) {
if (phi->opc != OPC_META_PHI)
break;
if (!(phi->dsts[0]->flags & IR3_REG_SSA))
continue;
if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) &&
!phi->srcs[pred_idx]->def)
continue;

View File

@ -193,6 +193,8 @@ void
ir3_reg_interval_remove(struct ir3_reg_ctx *ctx,
struct ir3_reg_interval *interval)
{
assert(interval->inserted);
if (interval->parent) {
rb_tree_remove(&interval->parent->children, &interval->node);
} else {
@ -684,6 +686,8 @@ ra_pop_interval(struct ra_ctx *ctx, struct ra_file *file,
struct ra_interval *interval)
{
assert(!interval->interval.parent);
/* shared live splitting is not allowed! */
assert(!(interval->interval.reg->flags & IR3_REG_SHARED));
/* Check if we've already moved this reg before */
unsigned pcopy_index;
@ -1665,6 +1669,9 @@ handle_split(struct ra_ctx *ctx, struct ir3_instruction *instr)
struct ir3_register *dst = instr->dsts[0];
struct ir3_register *src = instr->srcs[0];
if (!(dst->flags & IR3_REG_SSA))
return;
if (dst->merge_set == NULL || src->def->merge_set != dst->merge_set) {
handle_normal_instr(ctx, instr);
return;
@ -1683,6 +1690,9 @@ handle_split(struct ra_ctx *ctx, struct ir3_instruction *instr)
static void
handle_collect(struct ra_ctx *ctx, struct ir3_instruction *instr)
{
if (!(instr->dsts[0]->flags & IR3_REG_SSA))
return;
struct ir3_merge_set *dst_set = instr->dsts[0]->merge_set;
unsigned dst_offset = instr->dsts[0]->merge_set_offset;
@ -1798,7 +1808,8 @@ handle_pcopy(struct ra_ctx *ctx, struct ir3_instruction *instr)
static void
handle_precolored_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
{
if (instr->dsts[0]->num == INVALID_REG)
if (instr->dsts[0]->num == INVALID_REG ||
!(instr->dsts[0]->flags & IR3_REG_SSA))
return;
struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
@ -1829,6 +1840,9 @@ handle_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
static void
assign_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
{
if (!(instr->dsts[0]->flags & IR3_REG_SSA))
return;
struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
@ -1973,6 +1987,9 @@ handle_live_out(struct ra_ctx *ctx, struct ir3_register *def)
static void
handle_phi(struct ra_ctx *ctx, struct ir3_register *def)
{
if (!(def->flags & IR3_REG_SSA))
return;
struct ra_file *file = ra_get_file(ctx, def);
struct ra_interval *interval = &ctx->intervals[def->name];
@ -1999,6 +2016,9 @@ handle_phi(struct ra_ctx *ctx, struct ir3_register *def)
static void
assign_phi(struct ra_ctx *ctx, struct ir3_instruction *phi)
{
if (!(phi->dsts[0]->flags & IR3_REG_SSA))
return;
struct ra_file *file = ra_get_file(ctx, phi->dsts[0]);
struct ra_interval *interval = &ctx->intervals[phi->dsts[0]->name];
assert(!interval->interval.parent);
@ -2085,15 +2105,8 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
{
physreg_t physreg = ra_interval_get_physreg(interval);
bool shared = interval->interval.reg->flags & IR3_REG_SHARED;
struct ir3_block **predecessors =
shared ? ctx->block->physical_predecessors : ctx->block->predecessors;
unsigned predecessors_count = shared
? ctx->block->physical_predecessors_count
: ctx->block->predecessors_count;
for (unsigned i = 0; i < predecessors_count; i++) {
struct ir3_block *pred = predecessors[i];
for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
struct ir3_block *pred = ctx->block->predecessors[i];
struct ra_block_state *pred_state = &ctx->blocks[pred->index];
if (!pred_state->visited)
@ -2101,28 +2114,8 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
physreg_t pred_reg = read_register(ctx, pred, interval->interval.reg);
if (pred_reg != physreg) {
assert(!(interval->interval.reg->flags & IR3_REG_SHARED));
insert_liveout_copy(pred, physreg, pred_reg, interval->interval.reg);
/* This is a bit tricky, but when visiting the destination of a
* physical-only edge, we have two predecessors (the if and the
* header block) and both have multiple successors. We pick the
* register for all live-ins from the normal edge, which should
* guarantee that there's no need for shuffling things around in
* the normal predecessor as long as there are no phi nodes, but
* we still may need to insert fixup code in the physical
* predecessor (i.e. the last block of the if) and that has
* another successor (the block after the if) so we need to update
* the renames state for when we process the other successor. This
* crucially depends on the other successor getting processed
* after this.
*
* For normal (non-physical) edges we disallow critical edges so
* that hacks like this aren't necessary.
*/
if (!pred_state->renames)
pred_state->renames = _mesa_pointer_hash_table_create(ctx);
_mesa_hash_table_insert(pred_state->renames, interval->interval.reg,
(void *)(uintptr_t)physreg);
}
}
}
@ -2561,6 +2554,18 @@ ir3_ra(struct ir3_shader_variant *v)
ir3_merge_regs(live, v->ir);
bool has_shared_vectors = false;
foreach_block (block, &v->ir->block_list) {
foreach_instr (instr, &block->instr_list) {
ra_foreach_dst (dst, instr) {
if ((dst->flags & IR3_REG_SHARED) && reg_elems(dst) > 1) {
has_shared_vectors = true;
break;
}
}
}
}
struct ir3_pressure max_pressure;
ir3_calc_pressure(v, live, &max_pressure);
d("max pressure:");
@ -2590,10 +2595,17 @@ ir3_ra(struct ir3_shader_variant *v)
if (ir3_shader_debug & IR3_DBG_SPILLALL)
calc_min_limit_pressure(v, live, &limit_pressure);
if (max_pressure.shared > limit_pressure.shared) {
/* TODO shared reg -> normal reg spilling */
d("shared max pressure exceeded!");
goto fail;
if (max_pressure.shared > limit_pressure.shared || has_shared_vectors) {
ir3_ra_shared(v, live);
/* Recalculate liveness and register pressure now that additional values
* have been added.
*/
ralloc_free(live);
live = ir3_calc_liveness(ctx, v->ir);
ir3_calc_pressure(v, live, &max_pressure);
ir3_debug_print(v->ir, "AFTER: shared register allocation");
}
bool spilled = false;
@ -2629,7 +2641,7 @@ ir3_ra(struct ir3_shader_variant *v)
foreach_block (block, &v->ir->block_list)
handle_block(ctx, block);
ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count);
ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count, false);
/* Strip array-ness and SSA-ness at the end, because various helpers still
* need to work even on definitions that have already been assigned. For

View File

@ -168,8 +168,10 @@ bool ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
bool ir3_lower_spill(struct ir3 *ir);
void ir3_ra_shared(struct ir3_shader_variant *v, struct ir3_liveness *live);
void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
unsigned half_size, unsigned block_count);
unsigned half_size, unsigned block_count, bool shared_ra);
void ir3_lower_copies(struct ir3_shader_variant *v);

View File

@ -92,13 +92,25 @@ struct reaching_state {
struct ra_val_ctx {
struct ir3_instruction *current_instr;
/* The current state of the dataflow analysis for the instruction we're
* processing.
*/
struct reaching_state reaching;
/* The state at the end of each basic block. */
struct reaching_state *block_reaching;
unsigned block_count;
/* When validating shared RA, we have to take spill/reload instructions into
* account. This saves an array of reg_state for the source of each spill
* instruction, to be restored at the corresponding reload(s).
*/
struct hash_table *spill_reaching;
unsigned full_size, half_size;
bool merged_regs;
bool shared_ra;
bool failed;
};
@ -130,6 +142,28 @@ get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
return ctx->half_size;
}
static struct reg_state *
get_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst)
{
struct hash_entry *entry = _mesa_hash_table_search(ctx->spill_reaching, dst);
if (entry)
return entry->data;
else
return NULL;
}
static struct reg_state *
get_or_create_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst)
{
struct reg_state *state = get_spill_state(ctx, dst);
if (state)
return state;
state = rzalloc_array(ctx, struct reg_state, reg_size(dst));
_mesa_hash_table_insert(ctx->spill_reaching, dst, state);
return state;
}
/* Validate simple things, like the registers being in-bounds. This way we
* don't have to worry about out-of-bounds accesses later.
*/
@ -139,6 +173,8 @@ validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
{
ctx->current_instr = instr;
ra_foreach_dst (dst, instr) {
if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED))
continue;
unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
if (dst->tied)
@ -146,6 +182,8 @@ validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
}
ra_foreach_src (src, instr) {
if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED))
continue;
unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
validate_assert(ctx, src_max <= get_file_size(ctx, src));
}
@ -219,6 +257,24 @@ static void
propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
{
ra_foreach_dst (dst, instr) {
/* Process destinations from scalar ALU instructions that were demoted to
* normal ALU instructions. For these we must treat the instruction as a
* spill of itself and set the propagate state to itself. See
* try_demote_instructions().
*/
if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
if (instr->flags & IR3_INSTR_SHARED_SPILL) {
struct reg_state *state = get_or_create_spill_state(ctx, dst);
for (unsigned i = 0; i < reg_size(dst); i++) {
state[i] = (struct reg_state){
.def = dst,
.offset = i,
};
}
}
continue;
}
struct file_state *file = ra_val_get_file(ctx, dst);
physreg_t physreg = ra_reg_get_physreg(dst);
for (unsigned i = 0; i < reg_size(dst); i++) {
@ -239,6 +295,16 @@ propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
physreg_t src_physreg = ra_reg_get_physreg(src);
struct file_state *file = ra_val_get_file(ctx, dst);
if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
struct reg_state *src_state = get_spill_state(ctx, src->def);
if (src_state) {
struct reg_state *dst_state = get_or_create_spill_state(ctx, dst);
memcpy(dst_state, &src_state[split->split.off * reg_elem_size(src)],
reg_size(dst) * sizeof(struct reg_state));
}
return;
}
unsigned offset = split->split.off * reg_elem_size(src);
for (unsigned i = 0; i < reg_elem_size(src); i++) {
file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
@ -249,30 +315,50 @@ static void
propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
{
struct ir3_register *dst = collect->dsts[0];
physreg_t dst_physreg = ra_reg_get_physreg(dst);
struct file_state *file = ra_val_get_file(ctx, dst);
unsigned size = reg_size(dst);
struct reg_state srcs[size];
for (unsigned i = 0; i < collect->srcs_count; i++) {
struct ir3_register *src = collect->srcs[i];
unsigned dst_offset = i * reg_elem_size(dst);
for (unsigned j = 0; j < reg_elem_size(dst); j++) {
if (!ra_reg_is_src(src)) {
srcs[dst_offset + j] = (struct reg_state){
.def = dst,
.offset = dst_offset + j,
};
} else {
physreg_t src_physreg = ra_reg_get_physreg(src);
srcs[dst_offset + j] = file->regs[src_physreg + j];
if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
struct reg_state *dst_state = NULL;
for (unsigned i = 0; i < collect->srcs_count; i++) {
struct ir3_register *src = collect->srcs[i];
unsigned dst_offset = i * reg_elem_size(dst);
if (ra_reg_is_src(src)) {
struct reg_state *src_state = get_spill_state(ctx, src->def);
if (src_state) {
if (!dst_state)
dst_state = get_or_create_spill_state(ctx, dst);
memcpy(&dst_state[dst_offset], src_state,
reg_size(src) * sizeof(struct reg_state));
}
}
}
}
} else {
struct file_state *file = ra_val_get_file(ctx, dst);
physreg_t dst_physreg = ra_reg_get_physreg(dst);
struct reg_state srcs[size];
for (unsigned i = 0; i < size; i++)
file->regs[dst_physreg + i] = srcs[i];
for (unsigned i = 0; i < collect->srcs_count; i++) {
struct ir3_register *src = collect->srcs[i];
unsigned dst_offset = i * reg_elem_size(dst);
for (unsigned j = 0; j < reg_elem_size(dst); j++) {
if (!ra_reg_is_src(src)) {
srcs[dst_offset + j] = (struct reg_state){
.def = dst,
.offset = dst_offset + j,
};
} else {
physreg_t src_physreg = ra_reg_get_physreg(src);
srcs[dst_offset + j] = file->regs[src_physreg + j];
}
}
}
for (unsigned i = 0; i < size; i++)
file->regs[dst_physreg + i] = srcs[i];
}
}
static void
@ -291,15 +377,25 @@ propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
struct ir3_register *src = pcopy->srcs[i];
struct file_state *file = ra_val_get_file(ctx, dst);
for (unsigned j = 0; j < reg_size(dst); j++) {
if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
srcs[offset + j] = (struct reg_state){
.def = dst,
.offset = j,
};
} else {
physreg_t src_physreg = ra_reg_get_physreg(src);
srcs[offset + j] = file->regs[src_physreg + j];
if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
if (ra_reg_is_src(src)) {
struct reg_state *src_state = get_spill_state(ctx, src->def);
if (src_state) {
struct reg_state *dst_state = get_or_create_spill_state(ctx, dst);
memcpy(dst_state, src_state, reg_size(dst) * sizeof(struct reg_state));
}
}
} else {
for (unsigned j = 0; j < reg_size(dst); j++) {
if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
srcs[offset + j] = (struct reg_state){
.def = dst,
.offset = j,
};
} else {
physreg_t src_physreg = ra_reg_get_physreg(src);
srcs[offset + j] = file->regs[src_physreg + j];
}
}
}
@ -310,6 +406,12 @@ propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
offset = 0;
for (unsigned i = 0; i < pcopy->dsts_count; i++) {
struct ir3_register *dst = pcopy->dsts[i];
if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
offset += reg_size(dst);
continue;
}
physreg_t dst_physreg = ra_reg_get_physreg(dst);
struct file_state *file = ra_val_get_file(ctx, dst);
@ -321,6 +423,23 @@ propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
assert(offset == size);
}
static void
propagate_spill(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
{
if (instr->srcs[0]->flags & IR3_REG_SHARED) { /* spill */
struct reg_state *state = get_or_create_spill_state(ctx, instr->dsts[0]);
physreg_t src_physreg = ra_reg_get_physreg(instr->srcs[0]);
memcpy(state, &ctx->reaching.shared.regs[src_physreg],
reg_size(instr->srcs[0]) * sizeof(struct reg_state));
} else { /* reload */
struct reg_state *state = get_spill_state(ctx, instr->srcs[0]->def);
assert(state);
physreg_t dst_physreg = ra_reg_get_physreg(instr->dsts[0]);
memcpy(&ctx->reaching.shared.regs[dst_physreg], state,
reg_size(instr->dsts[0]) * sizeof(struct reg_state));
}
}
static void
propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
{
@ -330,6 +449,13 @@ propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
propagate_collect(ctx, instr);
else if (instr->opc == OPC_META_PARALLEL_COPY)
propagate_parallelcopy(ctx, instr);
else if (ctx->shared_ra && instr->opc == OPC_MOV &&
/* Moves from immed/const with IR3_INSTR_SHARED_SPILL were demoted
* from scalar ALU, see try_demote_instruction().
*/
!(instr->srcs[0]->flags & (IR3_REG_IMMED | IR3_REG_CONST)) &&
(instr->flags & IR3_INSTR_SHARED_SPILL))
propagate_spill(ctx, instr);
else
propagate_normal_instr(ctx, instr);
}
@ -439,6 +565,8 @@ static void
check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
struct ir3_register *src)
{
if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED))
return;
struct file_state *file = ra_val_get_file(ctx, src);
physreg_t physreg = ra_reg_get_physreg(src);
for (unsigned i = 0; i < reg_size(src); i++) {
@ -541,7 +669,7 @@ check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
void
ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
unsigned half_size, unsigned block_count)
unsigned half_size, unsigned block_count, bool shared_ra)
{
#ifdef NDEBUG
#define VALIDATE 0
@ -557,6 +685,9 @@ ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
ctx->full_size = full_size;
ctx->half_size = half_size;
ctx->block_count = block_count;
ctx->shared_ra = shared_ra;
if (ctx->shared_ra)
ctx->spill_reaching = _mesa_pointer_hash_table_create(ctx);
foreach_block (block, &v->ir->block_list) {
foreach_instr (instr, &block->instr_list) {

File diff suppressed because it is too large Load Diff

View File

@ -1193,20 +1193,23 @@ is_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n)
static void
handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy)
{
foreach_dst (dst, pcopy) {
ra_foreach_dst (dst, pcopy) {
struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
ra_spill_interval_init(dst_interval, dst);
}
foreach_src_n (src, i, pcopy) {
d("processing src %u", i);
struct ir3_register *dst = pcopy->dsts[i];
if (!(dst->flags & IR3_REG_SSA))
continue;
d("processing src %u", i);
/* Skip the intermediate copy for cases where the source is merged with
* the destination. Crucially this means that we also don't reload/spill
* it if it's been spilled, because it shares the same spill slot.
*/
if (src->def && src->def->merge_set &&
if ((src->flags & IR3_REG_SSA) && src->def->merge_set &&
src->def->merge_set == dst->merge_set &&
src->def->merge_set_offset == dst->merge_set_offset) {
struct ra_spill_interval *src_interval = ctx->intervals[src->def->name];
@ -1221,7 +1224,7 @@ handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy)
dst_interval->cant_spill = false;
dst_interval->dst = src_interval->dst;
}
} else if (src->def) {
} else if (src->flags & IR3_REG_SSA) {
struct ra_spill_interval *temp_interval =
create_temp_interval(ctx, dst);
struct ir3_register *temp = temp_interval->interval.reg;
@ -1251,15 +1254,17 @@ handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy)
foreach_src_n (src, i, pcopy) {
struct ir3_register *dst = pcopy->dsts[i];
if (!(dst->flags & IR3_REG_SSA))
continue;
if (src->def && src->def->merge_set &&
if ((src->flags & IR3_REG_SSA) && src->def->merge_set &&
src->def->merge_set == dst->merge_set &&
src->def->merge_set_offset == dst->merge_set_offset)
continue;
struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
if (!src->def) {
if (!(src->flags & IR3_REG_SSA)) {
dst_interval->cant_spill = true;
ra_spill_ctx_insert(ctx, dst_interval);
limit(ctx, pcopy);
@ -1292,6 +1297,9 @@ handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy)
static void
handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
{
if (!(instr->dsts[0]->flags & IR3_REG_SSA))
return;
init_dst(ctx, instr->dsts[0]);
insert_dst(ctx, instr->dsts[0]);
finish_dst(ctx, instr->dsts[0]);
@ -1300,6 +1308,9 @@ handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
static void
remove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
{
if (!(instr->dsts[0]->flags & IR3_REG_SSA))
return;
if (instr->opc == OPC_META_TEX_PREFETCH) {
ra_foreach_src (src, instr)
remove_src(ctx, instr, src);
@ -1623,6 +1634,9 @@ static void
rewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi,
struct ir3_block *block)
{
if (!(phi->dsts[0]->flags & IR3_REG_SSA))
return;
if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) {
phi->flags |= IR3_INSTR_UNUSED;
return;
@ -1977,8 +1991,25 @@ cleanup_dead(struct ir3 *ir)
{
foreach_block (block, &ir->block_list) {
foreach_instr_safe (instr, &block->instr_list) {
if (instr->flags & IR3_INSTR_UNUSED)
list_delinit(&instr->node);
if (instr->flags & IR3_INSTR_UNUSED) {
if (instr->opc == OPC_META_PARALLEL_COPY) {
/* There may be non-SSA shared copies, we need to preserve these.
*/
for (unsigned i = 0; i < instr->dsts_count;) {
if (instr->dsts[i]->flags & IR3_REG_SSA) {
instr->dsts[i] = instr->dsts[--instr->dsts_count];
instr->srcs[i] = instr->srcs[--instr->srcs_count];
} else {
i++;
}
}
if (instr->dsts_count == 0)
list_delinit(&instr->node);
} else {
list_delinit(&instr->node);
}
}
}
}
}

View File

@ -84,6 +84,9 @@ validate_src(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr,
validate_assert(ctx, src->wrmask == reg->wrmask);
validate_assert(ctx, reg_class_flags(src) == reg_class_flags(reg));
if (src->flags & IR3_REG_CONST)
validate_assert(ctx, !(src->flags & IR3_REG_SHARED));
if (reg->tied) {
validate_assert(ctx, reg->tied->tied == reg);
bool found = false;

View File

@ -112,6 +112,7 @@ libfreedreno_ir3_files = files(
'ir3_sched.c',
'ir3_shader.c',
'ir3_shader.h',
'ir3_shared_ra.c',
'ir3_spill.c',
'ir3_validate.c',
)