mirror of
https://github.com/qemu/qemu.git
synced 2024-11-25 03:43:37 +08:00
target-arm: Use standard FPSCR for Neon half-precision operations
The Neon half-precision conversion operations (VCVT.F16.F32 and VCVT.F32.F16) use ARM standard floating-point arithmetic, unlike the VFP versions (VCVTB and VCVTT). Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
parent
fb91678d2c
commit
2d981da77d
@ -2623,9 +2623,8 @@ VFP_CONV_FIX(ul, s, float32, uint32, u)
|
|||||||
#undef VFP_CONV_FIX
|
#undef VFP_CONV_FIX
|
||||||
|
|
||||||
/* Half precision conversions. */
|
/* Half precision conversions. */
|
||||||
float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
|
static float32 do_fcvt_f16_to_f32(uint32_t a, CPUState *env, float_status *s)
|
||||||
{
|
{
|
||||||
float_status *s = &env->vfp.fp_status;
|
|
||||||
int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
|
int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
|
||||||
float32 r = float16_to_float32(make_float16(a), ieee, s);
|
float32 r = float16_to_float32(make_float16(a), ieee, s);
|
||||||
if (ieee) {
|
if (ieee) {
|
||||||
@ -2634,9 +2633,8 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
|
static uint32_t do_fcvt_f32_to_f16(float32 a, CPUState *env, float_status *s)
|
||||||
{
|
{
|
||||||
float_status *s = &env->vfp.fp_status;
|
|
||||||
int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
|
int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
|
||||||
float16 r = float32_to_float16(a, ieee, s);
|
float16 r = float32_to_float16(a, ieee, s);
|
||||||
if (ieee) {
|
if (ieee) {
|
||||||
@ -2645,6 +2643,26 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
|
|||||||
return float16_val(r);
|
return float16_val(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float32 HELPER(neon_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
|
||||||
|
{
|
||||||
|
return do_fcvt_f16_to_f32(a, env, &env->vfp.standard_fp_status);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t HELPER(neon_fcvt_f32_to_f16)(float32 a, CPUState *env)
|
||||||
|
{
|
||||||
|
return do_fcvt_f32_to_f16(a, env, &env->vfp.standard_fp_status);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
|
||||||
|
{
|
||||||
|
return do_fcvt_f16_to_f32(a, env, &env->vfp.fp_status);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
|
||||||
|
{
|
||||||
|
return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status);
|
||||||
|
}
|
||||||
|
|
||||||
float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env)
|
float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env)
|
||||||
{
|
{
|
||||||
float_status *s = &env->vfp.fp_status;
|
float_status *s = &env->vfp.fp_status;
|
||||||
|
@ -129,6 +129,8 @@ DEF_HELPER_3(vfp_ultod, f64, f64, i32, env)
|
|||||||
|
|
||||||
DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
|
DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
|
||||||
DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
|
DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
|
||||||
|
DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env)
|
||||||
|
DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env)
|
||||||
|
|
||||||
DEF_HELPER_3(recps_f32, f32, f32, f32, env)
|
DEF_HELPER_3(recps_f32, f32, f32, f32, env)
|
||||||
DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
|
DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
|
||||||
|
@ -5535,17 +5535,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
|
|||||||
tmp = new_tmp();
|
tmp = new_tmp();
|
||||||
tmp2 = new_tmp();
|
tmp2 = new_tmp();
|
||||||
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
|
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
|
||||||
gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
|
gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
|
||||||
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
|
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
|
||||||
gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
|
gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
|
||||||
tcg_gen_shli_i32(tmp2, tmp2, 16);
|
tcg_gen_shli_i32(tmp2, tmp2, 16);
|
||||||
tcg_gen_or_i32(tmp2, tmp2, tmp);
|
tcg_gen_or_i32(tmp2, tmp2, tmp);
|
||||||
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
|
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
|
||||||
gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
|
gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
|
||||||
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
|
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
|
||||||
neon_store_reg(rd, 0, tmp2);
|
neon_store_reg(rd, 0, tmp2);
|
||||||
tmp2 = new_tmp();
|
tmp2 = new_tmp();
|
||||||
gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
|
gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
|
||||||
tcg_gen_shli_i32(tmp2, tmp2, 16);
|
tcg_gen_shli_i32(tmp2, tmp2, 16);
|
||||||
tcg_gen_or_i32(tmp2, tmp2, tmp);
|
tcg_gen_or_i32(tmp2, tmp2, tmp);
|
||||||
neon_store_reg(rd, 1, tmp2);
|
neon_store_reg(rd, 1, tmp2);
|
||||||
@ -5558,17 +5558,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
|
|||||||
tmp = neon_load_reg(rm, 0);
|
tmp = neon_load_reg(rm, 0);
|
||||||
tmp2 = neon_load_reg(rm, 1);
|
tmp2 = neon_load_reg(rm, 1);
|
||||||
tcg_gen_ext16u_i32(tmp3, tmp);
|
tcg_gen_ext16u_i32(tmp3, tmp);
|
||||||
gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
||||||
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
|
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
|
||||||
tcg_gen_shri_i32(tmp3, tmp, 16);
|
tcg_gen_shri_i32(tmp3, tmp, 16);
|
||||||
gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
||||||
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
|
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
|
||||||
dead_tmp(tmp);
|
dead_tmp(tmp);
|
||||||
tcg_gen_ext16u_i32(tmp3, tmp2);
|
tcg_gen_ext16u_i32(tmp3, tmp2);
|
||||||
gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
||||||
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
|
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
|
||||||
tcg_gen_shri_i32(tmp3, tmp2, 16);
|
tcg_gen_shri_i32(tmp3, tmp2, 16);
|
||||||
gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
||||||
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
|
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
|
||||||
dead_tmp(tmp2);
|
dead_tmp(tmp2);
|
||||||
dead_tmp(tmp3);
|
dead_tmp(tmp3);
|
||||||
|
Loading…
Reference in New Issue
Block a user