mirror of
https://github.com/qemu/qemu.git
synced 2024-11-24 11:23:43 +08:00
tcg: Use per-temp state data in liveness
This avoids having to allocate external memory for each temporary. Reviewed-by: Emilio G. Cota <cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
parent
1807f4c400
commit
b83eabeac0
225
tcg/tcg.c
225
tcg/tcg.c
@ -1615,42 +1615,54 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
|
||||
|
||||
/* liveness analysis: end of function: all temps are dead, and globals
|
||||
should be in memory. */
|
||||
static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
|
||||
static void tcg_la_func_end(TCGContext *s)
|
||||
{
|
||||
memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
|
||||
memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
|
||||
int ng = s->nb_globals;
|
||||
int nt = s->nb_temps;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ng; ++i) {
|
||||
s->temps[i].state = TS_DEAD | TS_MEM;
|
||||
}
|
||||
for (i = ng; i < nt; ++i) {
|
||||
s->temps[i].state = TS_DEAD;
|
||||
}
|
||||
}
|
||||
|
||||
/* liveness analysis: end of basic block: all temps are dead, globals
|
||||
and local temps should be in memory. */
|
||||
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
|
||||
static void tcg_la_bb_end(TCGContext *s)
|
||||
{
|
||||
int i, n;
|
||||
int ng = s->nb_globals;
|
||||
int nt = s->nb_temps;
|
||||
int i;
|
||||
|
||||
tcg_la_func_end(s, temp_state);
|
||||
for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
|
||||
if (s->temps[i].temp_local) {
|
||||
temp_state[i] |= TS_MEM;
|
||||
}
|
||||
for (i = 0; i < ng; ++i) {
|
||||
s->temps[i].state = TS_DEAD | TS_MEM;
|
||||
}
|
||||
for (i = ng; i < nt; ++i) {
|
||||
s->temps[i].state = (s->temps[i].temp_local
|
||||
? TS_DEAD | TS_MEM
|
||||
: TS_DEAD);
|
||||
}
|
||||
}
|
||||
|
||||
/* Liveness analysis : update the opc_arg_life array to tell if a
|
||||
given input arguments is dead. Instructions updating dead
|
||||
temporaries are removed. */
|
||||
static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
static void liveness_pass_1(TCGContext *s)
|
||||
{
|
||||
int nb_globals = s->nb_globals;
|
||||
int oi, oi_prev;
|
||||
|
||||
tcg_la_func_end(s, temp_state);
|
||||
tcg_la_func_end(s);
|
||||
|
||||
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
|
||||
int i, nb_iargs, nb_oargs;
|
||||
TCGOpcode opc_new, opc_new2;
|
||||
bool have_opc_new2;
|
||||
TCGLifeData arg_life = 0;
|
||||
TCGArg arg;
|
||||
TCGTemp *arg_ts;
|
||||
|
||||
TCGOp * const op = &s->gen_op_buf[oi];
|
||||
TCGOpcode opc = op->opc;
|
||||
@ -1670,8 +1682,8 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
/* pure functions can be removed if their result is unused */
|
||||
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = op->args[i];
|
||||
if (temp_state[arg] != TS_DEAD) {
|
||||
arg_ts = arg_temp(op->args[i]);
|
||||
if (arg_ts->state != TS_DEAD) {
|
||||
goto do_not_remove_call;
|
||||
}
|
||||
}
|
||||
@ -1681,41 +1693,41 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
|
||||
/* output args are dead */
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = op->args[i];
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_ts = arg_temp(op->args[i]);
|
||||
if (arg_ts->state & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
if (temp_state[arg] & TS_MEM) {
|
||||
if (arg_ts->state & TS_MEM) {
|
||||
arg_life |= SYNC_ARG << i;
|
||||
}
|
||||
temp_state[arg] = TS_DEAD;
|
||||
arg_ts->state = TS_DEAD;
|
||||
}
|
||||
|
||||
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
|
||||
TCG_CALL_NO_READ_GLOBALS))) {
|
||||
/* globals should go back to memory */
|
||||
memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
|
||||
for (i = 0; i < nb_globals; i++) {
|
||||
s->temps[i].state = TS_DEAD | TS_MEM;
|
||||
}
|
||||
} else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
|
||||
/* globals should be synced to memory */
|
||||
for (i = 0; i < nb_globals; i++) {
|
||||
temp_state[i] |= TS_MEM;
|
||||
s->temps[i].state |= TS_MEM;
|
||||
}
|
||||
}
|
||||
|
||||
/* record arguments that die in this helper */
|
||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||
arg = op->args[i];
|
||||
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
arg_ts = arg_temp(op->args[i]);
|
||||
if (arg_ts && arg_ts->state & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
}
|
||||
/* input arguments are live for preceding opcodes */
|
||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||
arg = op->args[i];
|
||||
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||
temp_state[arg] &= ~TS_DEAD;
|
||||
arg_ts = arg_temp(op->args[i]);
|
||||
if (arg_ts) {
|
||||
arg_ts->state &= ~TS_DEAD;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1725,7 +1737,7 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
break;
|
||||
case INDEX_op_discard:
|
||||
/* mark the temporary as dead */
|
||||
temp_state[op->args[0]] = TS_DEAD;
|
||||
arg_temp(op->args[0])->state = TS_DEAD;
|
||||
break;
|
||||
|
||||
case INDEX_op_add2_i32:
|
||||
@ -1746,8 +1758,8 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
the low part. The result can be optimized to a simple
|
||||
add or sub. This happens often for x86_64 guest when the
|
||||
cpu mode is set to 32 bit. */
|
||||
if (temp_state[op->args[1]] == TS_DEAD) {
|
||||
if (temp_state[op->args[0]] == TS_DEAD) {
|
||||
if (arg_temp(op->args[1])->state == TS_DEAD) {
|
||||
if (arg_temp(op->args[0])->state == TS_DEAD) {
|
||||
goto do_remove;
|
||||
}
|
||||
/* Replace the opcode and adjust the args in place,
|
||||
@ -1784,8 +1796,8 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
do_mul2:
|
||||
nb_iargs = 2;
|
||||
nb_oargs = 2;
|
||||
if (temp_state[op->args[1]] == TS_DEAD) {
|
||||
if (temp_state[op->args[0]] == TS_DEAD) {
|
||||
if (arg_temp(op->args[1])->state == TS_DEAD) {
|
||||
if (arg_temp(op->args[0])->state == TS_DEAD) {
|
||||
/* Both parts of the operation are dead. */
|
||||
goto do_remove;
|
||||
}
|
||||
@ -1793,7 +1805,7 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
op->opc = opc = opc_new;
|
||||
op->args[1] = op->args[2];
|
||||
op->args[2] = op->args[3];
|
||||
} else if (temp_state[op->args[0]] == TS_DEAD && have_opc_new2) {
|
||||
} else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
|
||||
/* The low part of the operation is dead; generate the high. */
|
||||
op->opc = opc = opc_new2;
|
||||
op->args[0] = op->args[1];
|
||||
@ -1816,7 +1828,7 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
implies side effects */
|
||||
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
if (temp_state[op->args[i]] != TS_DEAD) {
|
||||
if (arg_temp(op->args[i])->state != TS_DEAD) {
|
||||
goto do_not_remove;
|
||||
}
|
||||
}
|
||||
@ -1826,36 +1838,36 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
do_not_remove:
|
||||
/* output args are dead */
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = op->args[i];
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_ts = arg_temp(op->args[i]);
|
||||
if (arg_ts->state & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
if (temp_state[arg] & TS_MEM) {
|
||||
if (arg_ts->state & TS_MEM) {
|
||||
arg_life |= SYNC_ARG << i;
|
||||
}
|
||||
temp_state[arg] = TS_DEAD;
|
||||
arg_ts->state = TS_DEAD;
|
||||
}
|
||||
|
||||
/* if end of basic block, update */
|
||||
if (def->flags & TCG_OPF_BB_END) {
|
||||
tcg_la_bb_end(s, temp_state);
|
||||
tcg_la_bb_end(s);
|
||||
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
|
||||
/* globals should be synced to memory */
|
||||
for (i = 0; i < nb_globals; i++) {
|
||||
temp_state[i] |= TS_MEM;
|
||||
s->temps[i].state |= TS_MEM;
|
||||
}
|
||||
}
|
||||
|
||||
/* record arguments that die in this opcode */
|
||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||
arg = op->args[i];
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_ts = arg_temp(op->args[i]);
|
||||
if (arg_ts->state & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
}
|
||||
/* input arguments are live for preceding opcodes */
|
||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||
temp_state[op->args[i]] &= ~TS_DEAD;
|
||||
arg_temp(op->args[i])->state &= ~TS_DEAD;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -1865,16 +1877,12 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
}
|
||||
|
||||
/* Liveness analysis: Convert indirect regs to direct temporaries. */
|
||||
static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
|
||||
static bool liveness_pass_2(TCGContext *s)
|
||||
{
|
||||
int nb_globals = s->nb_globals;
|
||||
int16_t *dir_temps;
|
||||
int i, oi, oi_next;
|
||||
int nb_temps, i, oi, oi_next;
|
||||
bool changes = false;
|
||||
|
||||
dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
|
||||
memset(dir_temps, 0, nb_globals * sizeof(int16_t));
|
||||
|
||||
/* Create a temporary for each indirect global. */
|
||||
for (i = 0; i < nb_globals; ++i) {
|
||||
TCGTemp *its = &s->temps[i];
|
||||
@ -1882,11 +1890,18 @@ static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
|
||||
TCGTemp *dts = tcg_temp_alloc(s);
|
||||
dts->type = its->type;
|
||||
dts->base_type = its->base_type;
|
||||
dir_temps[i] = temp_idx(s, dts);
|
||||
its->state_ptr = dts;
|
||||
} else {
|
||||
its->state_ptr = NULL;
|
||||
}
|
||||
/* All globals begin dead. */
|
||||
its->state = TS_DEAD;
|
||||
}
|
||||
for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
|
||||
TCGTemp *its = &s->temps[i];
|
||||
its->state_ptr = NULL;
|
||||
its->state = TS_DEAD;
|
||||
}
|
||||
|
||||
memset(temp_state, TS_DEAD, nb_globals);
|
||||
|
||||
for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
|
||||
TCGOp *op = &s->gen_op_buf[oi];
|
||||
@ -1894,7 +1909,7 @@ static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
|
||||
const TCGOpDef *def = &tcg_op_defs[opc];
|
||||
TCGLifeData arg_life = op->life;
|
||||
int nb_iargs, nb_oargs, call_flags;
|
||||
TCGArg arg, dir;
|
||||
TCGTemp *arg_ts, *dir_ts;
|
||||
|
||||
oi_next = op->next;
|
||||
|
||||
@ -1922,23 +1937,21 @@ static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
|
||||
|
||||
/* Make sure that input arguments are available. */
|
||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||
arg = op->args[i];
|
||||
/* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */
|
||||
if (arg < nb_globals) {
|
||||
dir = dir_temps[arg];
|
||||
if (dir != 0 && temp_state[arg] == TS_DEAD) {
|
||||
TCGTemp *its = arg_temp(arg);
|
||||
TCGOpcode lopc = (its->type == TCG_TYPE_I32
|
||||
arg_ts = arg_temp(op->args[i]);
|
||||
if (arg_ts) {
|
||||
dir_ts = arg_ts->state_ptr;
|
||||
if (dir_ts && arg_ts->state == TS_DEAD) {
|
||||
TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
|
||||
? INDEX_op_ld_i32
|
||||
: INDEX_op_ld_i64);
|
||||
TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
|
||||
|
||||
lop->args[0] = dir;
|
||||
lop->args[1] = temp_arg(its->mem_base);
|
||||
lop->args[2] = its->mem_offset;
|
||||
lop->args[0] = temp_arg(dir_ts);
|
||||
lop->args[1] = temp_arg(arg_ts->mem_base);
|
||||
lop->args[2] = arg_ts->mem_offset;
|
||||
|
||||
/* Loaded, but synced with memory. */
|
||||
temp_state[arg] = TS_MEM;
|
||||
arg_ts->state = TS_MEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1947,14 +1960,14 @@ static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
|
||||
No action is required except keeping temp_state up to date
|
||||
so that we reload when needed. */
|
||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||
arg = op->args[i];
|
||||
if (arg < nb_globals) {
|
||||
dir = dir_temps[arg];
|
||||
if (dir != 0) {
|
||||
op->args[i] = dir;
|
||||
arg_ts = arg_temp(op->args[i]);
|
||||
if (arg_ts) {
|
||||
dir_ts = arg_ts->state_ptr;
|
||||
if (dir_ts) {
|
||||
op->args[i] = temp_arg(dir_ts);
|
||||
changes = true;
|
||||
if (IS_DEAD_ARG(i)) {
|
||||
temp_state[arg] = TS_DEAD;
|
||||
arg_ts->state = TS_DEAD;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1968,51 +1981,49 @@ static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
|
||||
for (i = 0; i < nb_globals; ++i) {
|
||||
/* Liveness should see that globals are synced back,
|
||||
that is, either TS_DEAD or TS_MEM. */
|
||||
tcg_debug_assert(dir_temps[i] == 0
|
||||
|| temp_state[i] != 0);
|
||||
arg_ts = &s->temps[i];
|
||||
tcg_debug_assert(arg_ts->state_ptr == 0
|
||||
|| arg_ts->state != 0);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < nb_globals; ++i) {
|
||||
/* Liveness should see that globals are saved back,
|
||||
that is, TS_DEAD, waiting to be reloaded. */
|
||||
tcg_debug_assert(dir_temps[i] == 0
|
||||
|| temp_state[i] == TS_DEAD);
|
||||
arg_ts = &s->temps[i];
|
||||
tcg_debug_assert(arg_ts->state_ptr == 0
|
||||
|| arg_ts->state == TS_DEAD);
|
||||
}
|
||||
}
|
||||
|
||||
/* Outputs become available. */
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = op->args[i];
|
||||
if (arg >= nb_globals) {
|
||||
arg_ts = arg_temp(op->args[i]);
|
||||
dir_ts = arg_ts->state_ptr;
|
||||
if (!dir_ts) {
|
||||
continue;
|
||||
}
|
||||
dir = dir_temps[arg];
|
||||
if (dir == 0) {
|
||||
continue;
|
||||
}
|
||||
op->args[i] = dir;
|
||||
op->args[i] = temp_arg(dir_ts);
|
||||
changes = true;
|
||||
|
||||
/* The output is now live and modified. */
|
||||
temp_state[arg] = 0;
|
||||
arg_ts->state = 0;
|
||||
|
||||
/* Sync outputs upon their last write. */
|
||||
if (NEED_SYNC_ARG(i)) {
|
||||
TCGTemp *its = arg_temp(arg);
|
||||
TCGOpcode sopc = (its->type == TCG_TYPE_I32
|
||||
TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
|
||||
? INDEX_op_st_i32
|
||||
: INDEX_op_st_i64);
|
||||
TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
|
||||
|
||||
sop->args[0] = dir;
|
||||
sop->args[1] = temp_arg(its->mem_base);
|
||||
sop->args[2] = its->mem_offset;
|
||||
sop->args[0] = temp_arg(dir_ts);
|
||||
sop->args[1] = temp_arg(arg_ts->mem_base);
|
||||
sop->args[2] = arg_ts->mem_offset;
|
||||
|
||||
temp_state[arg] = TS_MEM;
|
||||
arg_ts->state = TS_MEM;
|
||||
}
|
||||
/* Drop outputs that are dead. */
|
||||
if (IS_DEAD_ARG(i)) {
|
||||
temp_state[arg] = TS_DEAD;
|
||||
arg_ts->state = TS_DEAD;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2781,27 +2792,23 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
|
||||
s->la_time -= profile_getclock();
|
||||
#endif
|
||||
|
||||
{
|
||||
uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
|
||||
liveness_pass_1(s);
|
||||
|
||||
liveness_pass_1(s, temp_state);
|
||||
|
||||
if (s->nb_indirects > 0) {
|
||||
if (s->nb_indirects > 0) {
|
||||
#ifdef DEBUG_DISAS
|
||||
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
|
||||
&& qemu_log_in_addr_range(tb->pc))) {
|
||||
qemu_log_lock();
|
||||
qemu_log("OP before indirect lowering:\n");
|
||||
tcg_dump_ops(s);
|
||||
qemu_log("\n");
|
||||
qemu_log_unlock();
|
||||
}
|
||||
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
|
||||
&& qemu_log_in_addr_range(tb->pc))) {
|
||||
qemu_log_lock();
|
||||
qemu_log("OP before indirect lowering:\n");
|
||||
tcg_dump_ops(s);
|
||||
qemu_log("\n");
|
||||
qemu_log_unlock();
|
||||
}
|
||||
#endif
|
||||
/* Replace indirect temps with direct temps. */
|
||||
if (liveness_pass_2(s, temp_state)) {
|
||||
/* If changes were made, re-run liveness. */
|
||||
liveness_pass_1(s, temp_state);
|
||||
}
|
||||
/* Replace indirect temps with direct temps. */
|
||||
if (liveness_pass_2(s)) {
|
||||
/* If changes were made, re-run liveness. */
|
||||
liveness_pass_1(s);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -592,6 +592,12 @@ typedef struct TCGTemp {
|
||||
struct TCGTemp *mem_base;
|
||||
intptr_t mem_offset;
|
||||
const char *name;
|
||||
|
||||
/* Pass-specific information that can be stored for a temporary.
|
||||
One word worth of integer data, and one pointer to data
|
||||
allocated separately. */
|
||||
uintptr_t state;
|
||||
void *state_ptr;
|
||||
} TCGTemp;
|
||||
|
||||
typedef struct TCGContext TCGContext;
|
||||
|
Loading…
Reference in New Issue
Block a user