From 0953225588ee30de2e92485331ad1bb3d7c7d089 Mon Sep 17 00:00:00 2001 From: Bastian Koppelmann Date: Mon, 19 Jan 2015 15:43:07 +0000 Subject: [PATCH] target-tricore: Add instructions of RRR opcode format Add microcode generator function gen_cond_sub. Add helper functions: * ixmax/ixmin: search for the max/min value and its related index in a vector of 16-bit values. * pack: dack two data registers into an IEEE-754 single precision floating point format number. * dvadj: divide-adjust the result after dvstep instructions. * dvstep: divide a reg by a divisor, producing 8-bits of quotient at a time. OPCM_32_RRR_FLOAT -> OPCM_32_RRR_DIVIDE Signed-off-by: Bastian Koppelmann Reviewed-by: Richard Henderson --- target-tricore/helper.h | 8 ++ target-tricore/op_helper.c | 160 +++++++++++++++++++++++++++++++ target-tricore/translate.c | 150 +++++++++++++++++++++++++++++ target-tricore/tricore-opcodes.h | 2 +- 4 files changed, 319 insertions(+), 1 deletion(-) diff --git a/target-tricore/helper.h b/target-tricore/helper.h index 068dc7b422..7405feee88 100644 --- a/target-tricore/helper.h +++ b/target-tricore/helper.h @@ -60,10 +60,14 @@ DEF_HELPER_FLAGS_2(max_b, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_FLAGS_2(max_bu, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_FLAGS_2(max_h, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_FLAGS_2(max_hu, TCG_CALL_NO_RWG_SE, i32, i32, i32) +DEF_HELPER_FLAGS_2(ixmax, TCG_CALL_NO_RWG_SE, i64, i64, i32) +DEF_HELPER_FLAGS_2(ixmax_u, TCG_CALL_NO_RWG_SE, i64, i64, i32) DEF_HELPER_FLAGS_2(min_b, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_FLAGS_2(min_bu, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_FLAGS_2(min_h, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_FLAGS_2(min_hu, TCG_CALL_NO_RWG_SE, i32, i32, i32) +DEF_HELPER_FLAGS_2(ixmin, TCG_CALL_NO_RWG_SE, i64, i64, i32) +DEF_HELPER_FLAGS_2(ixmin_u, TCG_CALL_NO_RWG_SE, i64, i64, i32) /* count leading ... */ DEF_HELPER_FLAGS_1(clo, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(clo_h, TCG_CALL_NO_RWG_SE, i32, i32) @@ -81,12 +85,16 @@ DEF_HELPER_FLAGS_2(bmerge, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_FLAGS_1(bsplit, TCG_CALL_NO_RWG_SE, i64, i32) DEF_HELPER_FLAGS_1(parity, TCG_CALL_NO_RWG_SE, i32, i32) /* float */ +DEF_HELPER_FLAGS_4(pack, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32, i32) DEF_HELPER_1(unpack, i64, i32) /* dvinit */ DEF_HELPER_3(dvinit_b_13, i64, env, i32, i32) DEF_HELPER_3(dvinit_b_131, i64, env, i32, i32) DEF_HELPER_3(dvinit_h_13, i64, env, i32, i32) DEF_HELPER_3(dvinit_h_131, i64, env, i32, i32) +DEF_HELPER_FLAGS_2(dvadj, TCG_CALL_NO_RWG_SE, i64, i64, i32) +DEF_HELPER_FLAGS_2(dvstep, TCG_CALL_NO_RWG_SE, i64, i64, i32) +DEF_HELPER_FLAGS_2(dvstep_u, TCG_CALL_NO_RWG_SE, i64, i64, i32) /* mulh */ DEF_HELPER_FLAGS_5(mul_h, TCG_CALL_NO_RWG_SE, i64, i32, i32, i32, i32, i32) DEF_HELPER_FLAGS_5(mulm_h, TCG_CALL_NO_RWG_SE, i64, i32, i32, i32, i32, i32) diff --git a/target-tricore/op_helper.c b/target-tricore/op_helper.c index 254135ed9e..ed26b302b0 100644 --- a/target-tricore/op_helper.c +++ b/target-tricore/op_helper.c @@ -883,6 +883,50 @@ uint32_t helper_##name ##_hu(target_ulong r1, target_ulong r2)\ \ return ret; \ } \ + \ +uint64_t helper_ix##name(uint64_t r1, uint32_t r2) \ +{ \ + int64_t r2l, r2h, r1hl; \ + uint64_t ret = 0; \ + \ + ret = ((r1 + 2) & 0xffff); \ + r2l = sextract64(r2, 0, 16); \ + r2h = sextract64(r2, 16, 16); \ + r1hl = sextract64(r1, 32, 16); \ + \ + if ((r2l op ## = r2h) && (r2l op r1hl)) { \ + ret |= (r2l & 0xffff) << 32; \ + ret |= extract64(r1, 0, 16) << 16; \ + } else if ((r2h op r2l) && (r2h op r1hl)) { \ + ret |= extract64(r2, 16, 16) << 32; \ + ret |= extract64(r1 + 1, 0, 16) << 16; \ + } else { \ + ret |= r1 & 0xffffffff0000ull; \ + } \ + return ret; \ +} \ + \ +uint64_t helper_ix##name ##_u(uint64_t r1, uint32_t r2) \ +{ \ + int64_t r2l, r2h, r1hl; \ + uint64_t ret = 0; \ + \ + ret = ((r1 + 2) & 0xffff); \ + r2l = extract64(r2, 0, 16); \ + r2h = extract64(r2, 16, 16); \ + r1hl = extract64(r1, 32, 16); \ + \ + if ((r2l op ## = r2h) && (r2l op r1hl)) { \ + ret |= (r2l & 0xffff) << 32; \ + ret |= extract64(r1, 0, 16) << 16; \ + } else if ((r2h op r2l) && (r2h op r1hl)) { \ + ret |= extract64(r2, 16, 16) << 32; \ + ret |= extract64(r1 + 1, 0, 16) << 16; \ + } else { \ + ret |= r1 & 0xffffffff0000ull; \ + } \ + return ret; \ +} EXTREMA_H_B(max, >) EXTREMA_H_B(min, <) @@ -1116,6 +1160,48 @@ uint32_t helper_parity(target_ulong r1) return ret; } +uint32_t helper_pack(uint32_t carry, uint32_t r1_low, uint32_t r1_high, + target_ulong r2) +{ + uint32_t ret; + int32_t fp_exp, fp_frac, temp_exp, fp_exp_frac; + int32_t int_exp = r1_high; + int32_t int_mant = r1_low; + uint32_t flag_rnd = (int_mant & (1 << 7)) && ( + (int_mant & (1 << 8)) || + (int_mant & 0x7f) || + (carry != 0)); + if (((int_mant & (1<<31)) == 0) && (int_exp == 255)) { + fp_exp = 255; + fp_frac = extract32(int_mant, 8, 23); + } else if ((int_mant & (1<<31)) && (int_exp >= 127)) { + fp_exp = 255; + fp_frac = 0; + } else if ((int_mant & (1<<31)) && (int_exp <= -128)) { + fp_exp = 0; + fp_frac = 0; + } else if (int_mant == 0) { + fp_exp = 0; + fp_frac = 0; + } else { + if (((int_mant & (1 << 31)) == 0)) { + temp_exp = 0; + } else { + temp_exp = int_exp + 128; + } + fp_exp_frac = (((temp_exp & 0xff) << 23) | + extract32(int_mant, 8, 23)) + + flag_rnd; + fp_exp = extract32(fp_exp_frac, 23, 8); + fp_frac = extract32(fp_exp_frac, 0, 23); + } + ret = r2 & (1 << 31); + ret = ret + (fp_exp << 23); + ret = ret + (fp_frac & 0x7fffff); + + return ret; +} + uint64_t helper_unpack(target_ulong arg1) { int32_t fp_exp = extract32(arg1, 23, 8); @@ -1244,6 +1330,80 @@ uint64_t helper_dvinit_h_131(CPUTriCoreState *env, uint32_t r1, uint32_t r2) return ret; } +uint64_t helper_dvadj(uint64_t r1, uint32_t r2) +{ + int32_t x_sign = (r1 >> 63); + int32_t q_sign = x_sign ^ (r2 >> 31); + int32_t eq_pos = x_sign & ((r1 >> 32) == r2); + int32_t eq_neg = x_sign & ((r1 >> 32) == -r2); + uint32_t quotient; + uint64_t ret, remainder; + + if ((q_sign & ~eq_neg) | eq_pos) { + quotient = (r1 + 1) & 0xffffffff; + } else { + quotient = r1 & 0xffffffff; + } + + if (eq_pos | eq_neg) { + remainder = 0; + } else { + remainder = (r1 & 0xffffffff00000000ull); + } + ret = remainder|quotient; + return ret; +} + +uint64_t helper_dvstep(uint64_t r1, uint32_t r2) +{ + int32_t dividend_sign = extract64(r1, 63, 1); + int32_t divisor_sign = extract32(r2, 31, 1); + int32_t quotient_sign = (dividend_sign != divisor_sign); + int32_t addend, dividend_quotient, remainder; + int32_t i, temp; + + if (quotient_sign) { + addend = r2; + } else { + addend = -r2; + } + dividend_quotient = (int32_t)r1; + remainder = (int32_t)(r1 >> 32); + + for (i = 0; i < 8; i++) { + remainder = (remainder << 1) | extract32(dividend_quotient, 31, 1); + dividend_quotient <<= 1; + temp = remainder + addend; + if ((temp < 0) == dividend_sign) { + remainder = temp; + } + if (((temp < 0) == dividend_sign)) { + dividend_quotient = dividend_quotient | !quotient_sign; + } else { + dividend_quotient = dividend_quotient | quotient_sign; + } + } + return ((uint64_t)remainder << 32) | (uint32_t)dividend_quotient; +} + +uint64_t helper_dvstep_u(uint64_t r1, uint32_t r2) +{ + int32_t dividend_quotient = extract64(r1, 0, 32); + int64_t remainder = extract64(r1, 32, 32); + int32_t i; + int64_t temp; + for (i = 0; i < 8; i++) { + remainder = (remainder << 1) | extract32(dividend_quotient, 31, 1); + dividend_quotient <<= 1; + temp = (remainder & 0xffffffff) - r2; + if (temp >= 0) { + remainder = temp; + } + dividend_quotient = dividend_quotient | !(temp < 0); + } + return ((uint64_t)remainder << 32) | (uint32_t)dividend_quotient; +} + uint64_t helper_mul_h(uint32_t arg00, uint32_t arg01, uint32_t arg10, uint32_t arg11, uint32_t n) { diff --git a/target-tricore/translate.c b/target-tricore/translate.c index 8943a39f94..a73b7000b4 100644 --- a/target-tricore/translate.c +++ b/target-tricore/translate.c @@ -182,6 +182,18 @@ void tricore_cpu_dump_state(CPUState *cs, FILE *f, tcg_temp_free(arg11); \ } while (0) +#define GEN_HELPER_RRR(name, rl, rh, al1, ah1, arg2) do { \ + TCGv_i64 ret = tcg_temp_new_i64(); \ + TCGv_i64 arg1 = tcg_temp_new_i64(); \ + \ + tcg_gen_concat_i32_i64(arg1, al1, ah1); \ + gen_helper_##name(ret, arg1, arg2); \ + tcg_gen_extr_i64_i32(rl, rh, ret); \ + \ + tcg_temp_free_i64(ret); \ + tcg_temp_free_i64(arg1); \ +} while (0) + #define EA_ABS_FORMAT(con) (((con & 0x3C000) << 14) + (con & 0x3FFF)) #define EA_B_ABSOLUT(con) (((offset & 0xf00000) << 8) | \ ((offset & 0x0fffff) << 1)) @@ -820,6 +832,45 @@ static inline void gen_subc_CC(TCGv ret, TCGv r1, TCGv r2) tcg_temp_free(temp); } +static inline void gen_cond_sub(TCGCond cond, TCGv r1, TCGv r2, TCGv r3, + TCGv r4) +{ + TCGv temp = tcg_temp_new(); + TCGv temp2 = tcg_temp_new(); + TCGv result = tcg_temp_new(); + TCGv mask = tcg_temp_new(); + TCGv t0 = tcg_const_i32(0); + + /* create mask for sticky bits */ + tcg_gen_setcond_tl(cond, mask, r4, t0); + tcg_gen_shli_tl(mask, mask, 31); + + tcg_gen_sub_tl(result, r1, r2); + /* Calc PSW_V */ + tcg_gen_xor_tl(temp, result, r1); + tcg_gen_xor_tl(temp2, r1, r2); + tcg_gen_and_tl(temp, temp, temp2); + tcg_gen_movcond_tl(cond, cpu_PSW_V, r4, t0, temp, cpu_PSW_V); + /* Set PSW_SV */ + tcg_gen_and_tl(temp, temp, mask); + tcg_gen_or_tl(cpu_PSW_SV, temp, cpu_PSW_SV); + /* calc AV bit */ + tcg_gen_add_tl(temp, result, result); + tcg_gen_xor_tl(temp, temp, result); + tcg_gen_movcond_tl(cond, cpu_PSW_AV, r4, t0, temp, cpu_PSW_AV); + /* calc SAV bit */ + tcg_gen_and_tl(temp, temp, mask); + tcg_gen_or_tl(cpu_PSW_SAV, temp, cpu_PSW_SAV); + /* write back result */ + tcg_gen_movcond_tl(cond, r3, r4, t0, result, r1); + + tcg_temp_free(t0); + tcg_temp_free(temp); + tcg_temp_free(temp2); + tcg_temp_free(result); + tcg_temp_free(mask); +} + static inline void gen_abs(TCGv ret, TCGv r1) { TCGv temp = tcg_temp_new(); @@ -5042,6 +5093,99 @@ static void decode_rrpw_extract_insert(CPUTriCoreState *env, DisasContext *ctx) } } +/* RRR format */ +static void decode_rrr_cond_select(CPUTriCoreState *env, DisasContext *ctx) +{ + uint32_t op2; + int r1, r2, r3, r4; + TCGv temp; + + op2 = MASK_OP_RRR_OP2(ctx->opcode); + r1 = MASK_OP_RRR_S1(ctx->opcode); + r2 = MASK_OP_RRR_S2(ctx->opcode); + r3 = MASK_OP_RRR_S3(ctx->opcode); + r4 = MASK_OP_RRR_D(ctx->opcode); + + switch (op2) { + case OPC2_32_RRR_CADD: + gen_cond_add(TCG_COND_NE, cpu_gpr_d[r1], cpu_gpr_d[r2], + cpu_gpr_d[r4], cpu_gpr_d[r3]); + break; + case OPC2_32_RRR_CADDN: + gen_cond_add(TCG_COND_EQ, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r4], + cpu_gpr_d[r3]); + break; + case OPC2_32_RRR_CSUB: + gen_cond_sub(TCG_COND_NE, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r4], + cpu_gpr_d[r3]); + break; + case OPC2_32_RRR_CSUBN: + gen_cond_sub(TCG_COND_EQ, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r4], + cpu_gpr_d[r3]); + break; + case OPC2_32_RRR_SEL: + temp = tcg_const_i32(0); + tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr_d[r4], cpu_gpr_d[r3], temp, + cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_temp_free(temp); + break; + case OPC2_32_RRR_SELN: + temp = tcg_const_i32(0); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr_d[r4], cpu_gpr_d[r3], temp, + cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_temp_free(temp); + break; + } +} + +static void decode_rrr_divide(CPUTriCoreState *env, DisasContext *ctx) +{ + uint32_t op2; + + int r1, r2, r3, r4; + + op2 = MASK_OP_RRR_OP2(ctx->opcode); + r1 = MASK_OP_RRR_S1(ctx->opcode); + r2 = MASK_OP_RRR_S2(ctx->opcode); + r3 = MASK_OP_RRR_S3(ctx->opcode); + r4 = MASK_OP_RRR_D(ctx->opcode); + + switch (op2) { + case OPC2_32_RRR_DVADJ: + GEN_HELPER_RRR(dvadj, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + break; + case OPC2_32_RRR_DVSTEP: + GEN_HELPER_RRR(dvstep, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + break; + case OPC2_32_RRR_DVSTEP_U: + GEN_HELPER_RRR(dvstep_u, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + break; + case OPC2_32_RRR_IXMAX: + GEN_HELPER_RRR(ixmax, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + break; + case OPC2_32_RRR_IXMAX_U: + GEN_HELPER_RRR(ixmax_u, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + break; + case OPC2_32_RRR_IXMIN: + GEN_HELPER_RRR(ixmin, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + break; + case OPC2_32_RRR_IXMIN_U: + GEN_HELPER_RRR(ixmin_u, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + break; + case OPC2_32_RRR_PACK: + gen_helper_pack(cpu_gpr_d[r4], cpu_PSW_C, cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1]); + break; + } +} + static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx) { int op1; @@ -5325,6 +5469,12 @@ static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx) tcg_temp_free(temp); } break; +/* RRR Format */ + case OPCM_32_RRR_COND_SELECT: + decode_rrr_cond_select(env, ctx); + break; + case OPCM_32_RRR_DIVIDE: + decode_rrr_divide(env, ctx); } } diff --git a/target-tricore/tricore-opcodes.h b/target-tricore/tricore-opcodes.h index 82bd161645..baf537f160 100644 --- a/target-tricore/tricore-opcodes.h +++ b/target-tricore/tricore-opcodes.h @@ -516,7 +516,7 @@ enum { OPC1_32_RRPW_DEXTR = 0x77, /* RRR Format */ OPCM_32_RRR_COND_SELECT = 0x2b, - OPCM_32_RRR_FLOAT = 0x6b, + OPCM_32_RRR_DIVIDE = 0x6b, /* RRR1 Format */ OPCM_32_RRR1_MADD = 0x83, OPCM_32_RRR1_MADDQ_H = 0x43,