Collected tcg backend patches

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJWKATkAAoJEK0ScMxN0CebB8UIANdZAig2T1sls/ymYzzPN3HN
 FSUiekwxJIHD8Oq16Kz3Luj1yNnUyPqM0bE3sqVcmR/+xkdp1PwBQmklIG/P8DaC
 5FZErrZyccsRhMY9L2kRf1+NaQr8zOzZU2rNPLQmTifK80S+El3kHyBymeBZFODT
 TVAfdV5eeqQXK//7njR7JjjREolOnhLGi03zoTj6pEOIxxGGCPL9e6euruS8Eo82
 0oEy043rEOOLW3r+eLPmsOce43SejhKTdxnXGcToO8aQEknPoMHlbZVLj65KnLw1
 HB5HBooSMmZS+Y25rGzPKciuRqSDlwUD0VT7GllAMwoOH3TA8uvXjyKrzsbuRYY=
 =G59h
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20151021' into staging

Collected tcg backend patches

# gpg: Signature made Wed 21 Oct 2015 22:34:28 BST using RSA key ID 4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"

* remotes/rth/tags/pull-tcg-20151021:
  cpu-exec: Add "nochain" debug flag
  tcg/mips: Support r6 SEL{NE, EQ}Z instead of MOVN/MOVZ
  tcg/mips: Support r6 multiply/divide encodings
  tcg/mips: Support r6 JR encoding
  tcg/mips: Add use_mips32r6_instructions definition
  disas/mips: Add R6 jr/jr.hb to disassembler
  tcg-opc.h: Simplify insn_start def
  tcg/ppc: Prefer mask over andi.
  tcg/ppc: Revise goto_tb implementation
  tcg/ppc: Adjust exit_tb for change in prologue placement

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2015-10-22 18:01:53 +01:00
commit 6a6739de51
9 changed files with 158 additions and 46 deletions

View File

@ -477,7 +477,8 @@ int cpu_exec(CPUState *cpu)
/* see if we can patch the calling TB. When the TB
spans two pages, we cannot safely do a direct
jump. */
if (next_tb != 0 && tb->page_addr[1] == -1) {
if (next_tb != 0 && tb->page_addr[1] == -1
&& !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
next_tb & TB_EXIT_MASK, tb);
}

View File

@ -2420,9 +2420,11 @@ const struct mips_opcode mips_builtin_opcodes[] =
{"hibernate","", 0x42000023, 0xffffffff, 0, 0, V1 },
{"ins", "t,r,+A,+B", 0x7c000004, 0xfc00003f, WR_t|RD_s, 0, I33 },
{"jr", "s", 0x00000008, 0xfc1fffff, UBD|RD_s, 0, I1 },
{"jr", "s", 0x00000009, 0xfc1fffff, UBD|RD_s, 0, I32R6 }, /* jalr */
/* jr.hb is officially MIPS{32,64}R2, but it works on R1 as jr with
the same hazard barrier effect. */
{"jr.hb", "s", 0x00000408, 0xfc1fffff, UBD|RD_s, 0, I32 },
{"jr.hb", "s", 0x00000409, 0xfc1fffff, UBD|RD_s, 0, I32R6 }, /* jalr.hb */
{"j", "s", 0x00000008, 0xfc1fffff, UBD|RD_s, 0, I1 }, /* jr */
/* SVR4 PIC code requires special handling for j, so it must be a
macro. */

View File

@ -41,6 +41,7 @@ static inline bool qemu_log_enabled(void)
#define LOG_UNIMP (1 << 10)
#define LOG_GUEST_ERROR (1 << 11)
#define CPU_LOG_MMU (1 << 12)
#define CPU_LOG_TB_NOCHAIN (1 << 13)
/* Returns true if a bit is set in the current loglevel mask
*/

View File

@ -119,6 +119,9 @@ const QEMULogItem qemu_log_items[] = {
{ LOG_GUEST_ERROR, "guest_errors",
"log when the guest OS does something invalid (eg accessing a\n"
"non-existent register)" },
{ CPU_LOG_TB_NOCHAIN, "nochain",
"do not chain compiled TBs so that \"exec\" and \"cpu\" show\n"
"complete traces" },
{ 0, NULL, NULL },
};

View File

@ -288,16 +288,24 @@ typedef enum {
OPC_SRLV = OPC_SPECIAL | 0x06,
OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06,
OPC_SRAV = OPC_SPECIAL | 0x07,
OPC_JR = OPC_SPECIAL | 0x08,
OPC_JR_R5 = OPC_SPECIAL | 0x08,
OPC_JALR = OPC_SPECIAL | 0x09,
OPC_MOVZ = OPC_SPECIAL | 0x0A,
OPC_MOVN = OPC_SPECIAL | 0x0B,
OPC_MFHI = OPC_SPECIAL | 0x10,
OPC_MFLO = OPC_SPECIAL | 0x12,
OPC_MULT = OPC_SPECIAL | 0x18,
OPC_MUL_R6 = OPC_SPECIAL | (0x02 << 6) | 0x18,
OPC_MUH = OPC_SPECIAL | (0x03 << 6) | 0x18,
OPC_MULTU = OPC_SPECIAL | 0x19,
OPC_MULU = OPC_SPECIAL | (0x02 << 6) | 0x19,
OPC_MUHU = OPC_SPECIAL | (0x03 << 6) | 0x19,
OPC_DIV = OPC_SPECIAL | 0x1A,
OPC_DIV_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1A,
OPC_MOD = OPC_SPECIAL | (0x03 << 6) | 0x1A,
OPC_DIVU = OPC_SPECIAL | 0x1B,
OPC_DIVU_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1B,
OPC_MODU = OPC_SPECIAL | (0x03 << 6) | 0x1B,
OPC_ADDU = OPC_SPECIAL | 0x21,
OPC_SUBU = OPC_SPECIAL | 0x23,
OPC_AND = OPC_SPECIAL | 0x24,
@ -306,13 +314,15 @@ typedef enum {
OPC_NOR = OPC_SPECIAL | 0x27,
OPC_SLT = OPC_SPECIAL | 0x2A,
OPC_SLTU = OPC_SPECIAL | 0x2B,
OPC_SELEQZ = OPC_SPECIAL | 0x35,
OPC_SELNEZ = OPC_SPECIAL | 0x37,
OPC_REGIMM = 0x01 << 26,
OPC_BLTZ = OPC_REGIMM | (0x00 << 16),
OPC_BGEZ = OPC_REGIMM | (0x01 << 16),
OPC_SPECIAL2 = 0x1c << 26,
OPC_MUL = OPC_SPECIAL2 | 0x002,
OPC_MUL_R5 = OPC_SPECIAL2 | 0x002,
OPC_SPECIAL3 = 0x1f << 26,
OPC_EXT = OPC_SPECIAL3 | 0x000,
@ -320,6 +330,15 @@ typedef enum {
OPC_WSBH = OPC_SPECIAL3 | 0x0a0,
OPC_SEB = OPC_SPECIAL3 | 0x420,
OPC_SEH = OPC_SPECIAL3 | 0x620,
/* MIPS r6 doesn't have JR, JALR should be used instead */
OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5,
/*
* MIPS r6 replaces MUL with an alternative encoding which is
* backwards-compatible at the assembly level.
*/
OPC_MUL = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5,
} MIPSInsn;
/*
@ -841,13 +860,20 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
}
static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
TCGReg c1, TCGReg c2, TCGReg v)
TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2)
{
MIPSInsn m_opc = OPC_MOVN;
bool eqz = false;
/* If one of the values is zero, put it last to match SEL*Z instructions */
if (use_mips32r6_instructions && v1 == 0) {
v1 = v2;
v2 = 0;
cond = tcg_invert_cond(cond);
}
switch (cond) {
case TCG_COND_EQ:
m_opc = OPC_MOVZ;
eqz = true;
/* FALLTHRU */
case TCG_COND_NE:
if (c2 != 0) {
@ -860,14 +886,32 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
/* Minimize code size by preferring a compare not requiring INV. */
if (mips_cmp_map[cond] & MIPS_CMP_INV) {
cond = tcg_invert_cond(cond);
m_opc = OPC_MOVZ;
eqz = true;
}
tcg_out_setcond(s, cond, TCG_TMP0, c1, c2);
c1 = TCG_TMP0;
break;
}
tcg_out_opc_reg(s, m_opc, ret, v, c1);
if (use_mips32r6_instructions) {
MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ;
MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ;
if (v2 != 0) {
tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1);
}
tcg_out_opc_reg(s, m_opc_t, ret, v1, c1);
if (v2 != 0) {
tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
}
} else {
MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN;
tcg_out_opc_reg(s, m_opc, ret, v1, c1);
/* This should be guaranteed via constraints */
tcg_debug_assert(v2 == ret);
}
}
static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
@ -1445,21 +1489,45 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
i1 = OPC_MULT, i2 = OPC_MFLO;
goto do_hilo1;
case INDEX_op_mulsh_i32:
if (use_mips32r6_instructions) {
tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2);
break;
}
i1 = OPC_MULT, i2 = OPC_MFHI;
goto do_hilo1;
case INDEX_op_muluh_i32:
if (use_mips32r6_instructions) {
tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2);
break;
}
i1 = OPC_MULTU, i2 = OPC_MFHI;
goto do_hilo1;
case INDEX_op_div_i32:
if (use_mips32r6_instructions) {
tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2);
break;
}
i1 = OPC_DIV, i2 = OPC_MFLO;
goto do_hilo1;
case INDEX_op_divu_i32:
if (use_mips32r6_instructions) {
tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2);
break;
}
i1 = OPC_DIVU, i2 = OPC_MFLO;
goto do_hilo1;
case INDEX_op_rem_i32:
if (use_mips32r6_instructions) {
tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2);
break;
}
i1 = OPC_DIV, i2 = OPC_MFHI;
goto do_hilo1;
case INDEX_op_remu_i32:
if (use_mips32r6_instructions) {
tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2);
break;
}
i1 = OPC_DIVU, i2 = OPC_MFHI;
do_hilo1:
tcg_out_opc_reg(s, i1, 0, a1, a2);
@ -1536,7 +1604,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_movcond_i32:
tcg_out_movcond(s, args[5], a0, a1, a2, args[3]);
tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]);
break;
case INDEX_op_setcond_i32:
@ -1592,8 +1660,10 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_add_i32, { "r", "rZ", "rJ" } },
{ INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
#if !use_mips32r6_instructions
{ INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } },
{ INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } },
#endif
{ INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_muluh_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_div_i32, { "r", "rZ", "rZ" } },
@ -1623,7 +1693,11 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_deposit_i32, { "r", "0", "rZ" } },
{ INDEX_op_brcond_i32, { "rZ", "rZ" } },
#if use_mips32r6_instructions
{ INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
#else
{ INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } },
#endif
{ INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },

View File

@ -96,6 +96,13 @@ extern bool use_mips32_instructions;
extern bool use_mips32r2_instructions;
#endif
/* MIPS32R6 instruction set detection */
#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
#define use_mips32r6_instructions 1
#else
#define use_mips32r6_instructions 0
#endif
/* optional instructions */
#define TCG_TARGET_HAS_div_i32 1
#define TCG_TARGET_HAS_rem_i32 1
@ -105,8 +112,8 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_orc_i32 0
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_mulu2_i32 1
#define TCG_TARGET_HAS_muls2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions)
#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions)
#define TCG_TARGET_HAS_muluh_i32 1
#define TCG_TARGET_HAS_mulsh_i32 1

View File

@ -700,14 +700,14 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
{
int mb, me;
if ((c & 0xffff) == c) {
if (mask_operand(c, &mb, &me)) {
tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
} else if ((c & 0xffff) == c) {
tcg_out32(s, ANDI | SAI(src, dst, c));
return;
} else if ((c & 0xffff0000) == c) {
tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
return;
} else if (mask_operand(c, &mb, &me)) {
tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
} else {
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
@ -719,18 +719,18 @@ static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
int mb, me;
assert(TCG_TARGET_REG_BITS == 64);
if ((c & 0xffff) == c) {
tcg_out32(s, ANDI | SAI(src, dst, c));
return;
} else if ((c & 0xffff0000) == c) {
tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
return;
} else if (mask64_operand(c, &mb, &me)) {
if (mask64_operand(c, &mb, &me)) {
if (mb == 0) {
tcg_out_rld(s, RLDICR, dst, src, 0, me);
} else {
tcg_out_rld(s, RLDICL, dst, src, 0, mb);
}
} else if ((c & 0xffff) == c) {
tcg_out32(s, ANDI | SAI(src, dst, c));
return;
} else if ((c & 0xffff0000) == c) {
tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
return;
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
@ -1239,11 +1239,36 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
{
TCGContext s;
tcg_insn_unit i1, i2;
uint64_t pair;
intptr_t diff = addr - jmp_addr;
s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr;
tcg_out_b(&s, 0, (tcg_insn_unit *)addr);
flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s));
if (in_range_b(diff)) {
i1 = B | (diff & 0x3fffffc);
i2 = NOP;
} else if (USE_REG_RA) {
intptr_t lo, hi;
diff = addr - (uintptr_t)tb_ret_addr;
lo = (int16_t)diff;
hi = (int32_t)(diff - lo);
assert(diff == hi + lo);
i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16);
i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo);
} else {
assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr);
i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16);
i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr);
}
#ifdef HOST_WORDS_BIGENDIAN
pair = (uint64_t)i1 << 32 | i2;
#else
pair = (uint64_t)i2 << 32 | i1;
#endif
/* ??? __atomic_store_8, presuming there's some way to do that
for 32-bit, otherwise this is good enough for 64-bit. */
*(uint64_t *)jmp_addr = pair;
flush_icache_range(jmp_addr, jmp_addr + 8);
}
static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
@ -1855,12 +1880,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
if (USE_REG_RA) {
ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);
/* If we can use a direct branch, otherwise use the value in RA.
Note that the direct branch is always forward. If it's in
range now, it'll still be in range after the movi. Don't
bother about the 20 bytes where the test here fails but it
would succeed below. */
if (!in_range_b(disp)) {
/* Use a direct branch if we can, otherwise use the value in RA.
Note that the direct branch is always backward, thus we need
to account for the possibility of 5 insns from the movi. */
if (!in_range_b(disp - 20)) {
tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
tcg_out32(s, BCCTR | BO_ALWAYS);
@ -1871,14 +1894,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_b(s, 0, tb_ret_addr);
break;
case INDEX_op_goto_tb:
if (s->tb_jmp_offset) {
/* Direct jump method. */
s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
s->code_ptr += 7;
} else {
/* Indirect jump method. */
tcg_abort();
tcg_debug_assert(s->tb_jmp_offset);
/* Direct jump. Ensure the next insns are 8-byte aligned. */
if ((uintptr_t)s->code_ptr & 7) {
tcg_out32(s, NOP);
}
s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
/* To be replaced by either a branch+nop or a load into TMP1. */
s->code_ptr += 2;
tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
tcg_out32(s, BCCTR | BO_ALWAYS);
s->tb_next_offset[args[0]] = tcg_current_code_size(s);
break;
case INDEX_op_br:

View File

@ -173,18 +173,15 @@ DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64))
DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64))
#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
/* QEMU specific */
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
DEF(insn_start, 0, 0, 2 * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT)
#else
DEF(insn_start, 0, 0, TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT)
#endif
DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
TCG_OPF_NOT_PRESENT)
DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1,

View File

@ -468,6 +468,8 @@ static inline PageDesc *page_find(tb_page_addr_t index)
# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
#elif defined(__sparc__)
# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
#elif defined(__powerpc64__)
# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
#elif defined(__aarch64__)
# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024)
#elif defined(__arm__)