mirror of
https://github.com/qemu/qemu.git
synced 2024-12-12 21:23:36 +08:00
tcg/ppc: Reorg goto_tb implementation
The old ppc64 implementation replaces 2 or 4 insns, which leaves a race condition in which a thread could be stopped at a PC in the middle of the sequence, and when restarted does not see the complete address computation and branches to nowhere. The new implemetation replaces only one insn, swapping between b <dest> and mtctr r31 falling through to a general-case indirect branch. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
d59d83a1c3
commit
20b6643324
@ -1854,104 +1854,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
|
||||
tcg_out32(s, insn);
|
||||
}
|
||||
|
||||
static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2)
|
||||
{
|
||||
if (HOST_BIG_ENDIAN) {
|
||||
return (uint64_t)i1 << 32 | i2;
|
||||
}
|
||||
return (uint64_t)i2 << 32 | i1;
|
||||
}
|
||||
|
||||
static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw,
|
||||
tcg_insn_unit i0, tcg_insn_unit i1)
|
||||
{
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
qatomic_set((uint64_t *)rw, make_pair(i0, i1));
|
||||
flush_idcache_range(rx, rw, 8);
|
||||
#else
|
||||
qemu_build_not_reached();
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw,
|
||||
tcg_insn_unit i0, tcg_insn_unit i1,
|
||||
tcg_insn_unit i2, tcg_insn_unit i3)
|
||||
{
|
||||
uint64_t p[2];
|
||||
|
||||
p[!HOST_BIG_ENDIAN] = make_pair(i0, i1);
|
||||
p[HOST_BIG_ENDIAN] = make_pair(i2, i3);
|
||||
|
||||
/*
|
||||
* There's no convenient way to get the compiler to allocate a pair
|
||||
* of registers at an even index, so copy into r6/r7 and clobber.
|
||||
*/
|
||||
asm("mr %%r6, %1\n\t"
|
||||
"mr %%r7, %2\n\t"
|
||||
"stq %%r6, %0"
|
||||
: "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7");
|
||||
flush_idcache_range(rx, rw, 16);
|
||||
}
|
||||
|
||||
void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
|
||||
uintptr_t jmp_rx, uintptr_t jmp_rw)
|
||||
{
|
||||
tcg_insn_unit i0, i1, i2, i3;
|
||||
uintptr_t addr = tb->jmp_target_addr[n];
|
||||
intptr_t tb_diff = addr - (uintptr_t)tb->tc.ptr;
|
||||
intptr_t br_diff = addr - (jmp_rx + 4);
|
||||
intptr_t lo, hi;
|
||||
|
||||
if (TCG_TARGET_REG_BITS == 32) {
|
||||
intptr_t diff = addr - jmp_rx;
|
||||
tcg_debug_assert(in_range_b(diff));
|
||||
qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
|
||||
flush_idcache_range(jmp_rx, jmp_rw, 4);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* For 16-bit displacements, we can use a single add + branch.
|
||||
* This happens quite often.
|
||||
*/
|
||||
if (tb_diff == (int16_t)tb_diff) {
|
||||
i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
|
||||
i1 = B | (br_diff & 0x3fffffc);
|
||||
ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
|
||||
return;
|
||||
}
|
||||
|
||||
lo = (int16_t)tb_diff;
|
||||
hi = (int32_t)(tb_diff - lo);
|
||||
assert(tb_diff == hi + lo);
|
||||
i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
|
||||
i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
|
||||
|
||||
/*
|
||||
* Without stq from 2.07, we can only update two insns,
|
||||
* and those must be the ones that load the target address.
|
||||
*/
|
||||
if (!have_isa_2_07) {
|
||||
ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* For 26-bit displacements, we can use a direct branch.
|
||||
* Otherwise we still need the indirect branch, which we
|
||||
* must restore after a potential direct branch write.
|
||||
*/
|
||||
br_diff -= 4;
|
||||
if (in_range_b(br_diff)) {
|
||||
i2 = B | (br_diff & 0x3fffffc);
|
||||
i3 = NOP;
|
||||
} else {
|
||||
i2 = MTSPR | RS(TCG_REG_TB) | CTR;
|
||||
i3 = BCCTR | BO_ALWAYS;
|
||||
}
|
||||
ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3);
|
||||
}
|
||||
|
||||
static void tcg_out_call_int(TCGContext *s, int lk,
|
||||
const tcg_insn_unit *target)
|
||||
{
|
||||
@ -2625,30 +2527,56 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
|
||||
|
||||
static void tcg_out_goto_tb(TCGContext *s, int which)
|
||||
{
|
||||
/* Direct jump. */
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
/* Ensure the next insns are 8 or 16-byte aligned. */
|
||||
while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) {
|
||||
tcg_out32(s, NOP);
|
||||
}
|
||||
uintptr_t ptr = get_jmp_target_addr(s, which);
|
||||
|
||||
if (USE_REG_TB) {
|
||||
ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
|
||||
tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
|
||||
|
||||
/* Direct branch will be patched by tb_target_set_jmp_target. */
|
||||
set_jmp_insn_offset(s, which);
|
||||
tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
|
||||
tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
|
||||
tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
|
||||
|
||||
/* When branch is out of range, fall through to indirect. */
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
|
||||
/* For the unlinked case, need to reset TCG_REG_TB. */
|
||||
set_jmp_reset_offset(s, which);
|
||||
tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
|
||||
-tcg_current_code_size(s));
|
||||
} else {
|
||||
/* Direct branch will be patched by tb_target_set_jmp_target. */
|
||||
set_jmp_insn_offset(s, which);
|
||||
tcg_out32(s, NOP);
|
||||
|
||||
/* When branch is out of range, fall through to indirect. */
|
||||
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
|
||||
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
|
||||
tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
set_jmp_reset_offset(s, which);
|
||||
if (USE_REG_TB) {
|
||||
/* For the unlinked case, need to reset TCG_REG_TB. */
|
||||
tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
|
||||
-tcg_current_code_size(s));
|
||||
}
|
||||
} else {
|
||||
set_jmp_insn_offset(s, which);
|
||||
tcg_out32(s, B);
|
||||
set_jmp_reset_offset(s, which);
|
||||
}
|
||||
}
|
||||
|
||||
void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
|
||||
uintptr_t jmp_rx, uintptr_t jmp_rw)
|
||||
{
|
||||
uintptr_t addr = tb->jmp_target_addr[n];
|
||||
intptr_t diff = addr - jmp_rx;
|
||||
tcg_insn_unit insn;
|
||||
|
||||
if (in_range_b(diff)) {
|
||||
insn = B | (diff & 0x3fffffc);
|
||||
} else if (USE_REG_TB) {
|
||||
insn = MTSPR | RS(TCG_REG_TB) | CTR;
|
||||
} else {
|
||||
insn = NOP;
|
||||
}
|
||||
|
||||
qatomic_set((uint32_t *)jmp_rw, insn);
|
||||
flush_idcache_range(jmp_rx, jmp_rw, 4);
|
||||
}
|
||||
|
||||
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
const TCGArg args[TCG_MAX_OP_ARGS],
|
||||
const int const_args[TCG_MAX_OP_ARGS])
|
||||
|
@ -27,11 +27,10 @@
|
||||
|
||||
#ifdef _ARCH_PPC64
|
||||
# define TCG_TARGET_REG_BITS 64
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||
#else
|
||||
# define TCG_TARGET_REG_BITS 32
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
|
||||
#endif
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
||||
|
||||
#define TCG_TARGET_NB_REGS 64
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
|
Loading…
Reference in New Issue
Block a user