mirror of
https://github.com/qemu/qemu.git
synced 2024-11-24 03:13:44 +08:00
ARM FP16 support
Implement the ARM VFP half precision floating point extensions. Signed-off-by: Paul Brook <paul@codesourcery.com>
This commit is contained in:
parent
f165b53a89
commit
600114988c
138
fpu/softfloat.c
138
fpu/softfloat.c
@ -2457,6 +2457,144 @@ float32 float64_to_float32( float64 a STATUS_PARAM )
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
|
||||
| half-precision floating-point value, returning the result. After being
|
||||
| shifted into the proper positions, the three fields are simply added
|
||||
| together to form the result. This means that any integer portion of `zSig'
|
||||
| will be added into the exponent. Since a properly normalized significand
|
||||
| will have an integer portion equal to 1, the `zExp' input should be 1 less
|
||||
| than the desired result exponent whenever `zSig' is a complete, normalized
|
||||
| significand.
|
||||
*----------------------------------------------------------------------------*/
|
||||
static bits16 packFloat16(flag zSign, int16 zExp, bits16 zSig)
|
||||
{
|
||||
return (((bits32)zSign) << 15) + (((bits32)zExp) << 10) + zSig;
|
||||
}
|
||||
|
||||
/* Half precision floats come in two formats: standard IEEE and "ARM" format.
|
||||
The latter gains extra exponent range by omitting the NaN/Inf encodings. */
|
||||
|
||||
float32 float16_to_float32( bits16 a, flag ieee STATUS_PARAM )
|
||||
{
|
||||
flag aSign;
|
||||
int16 aExp;
|
||||
bits32 aSig;
|
||||
|
||||
aSign = a >> 15;
|
||||
aExp = (a >> 10) & 0x1f;
|
||||
aSig = a & 0x3ff;
|
||||
|
||||
if (aExp == 0x1f && ieee) {
|
||||
if (aSig) {
|
||||
/* Make sure correct exceptions are raised. */
|
||||
float32ToCommonNaN(a STATUS_VAR);
|
||||
aSig |= 0x200;
|
||||
}
|
||||
return packFloat32(aSign, 0xff, aSig << 13);
|
||||
}
|
||||
if (aExp == 0) {
|
||||
int8 shiftCount;
|
||||
|
||||
if (aSig == 0) {
|
||||
return packFloat32(aSign, 0, 0);
|
||||
}
|
||||
|
||||
shiftCount = countLeadingZeros32( aSig ) - 21;
|
||||
aSig = aSig << shiftCount;
|
||||
aExp = -shiftCount;
|
||||
}
|
||||
return packFloat32( aSign, aExp + 0x70, aSig << 13);
|
||||
}
|
||||
|
||||
bits16 float32_to_float16( float32 a, flag ieee STATUS_PARAM)
|
||||
{
|
||||
flag aSign;
|
||||
int16 aExp;
|
||||
bits32 aSig;
|
||||
bits32 mask;
|
||||
bits32 increment;
|
||||
int8 roundingMode;
|
||||
|
||||
aSig = extractFloat32Frac( a );
|
||||
aExp = extractFloat32Exp( a );
|
||||
aSign = extractFloat32Sign( a );
|
||||
if ( aExp == 0xFF ) {
|
||||
if (aSig) {
|
||||
/* Make sure correct exceptions are raised. */
|
||||
float32ToCommonNaN(a STATUS_VAR);
|
||||
aSig |= 0x00400000;
|
||||
}
|
||||
return packFloat16(aSign, 0x1f, aSig >> 13);
|
||||
}
|
||||
if (aExp == 0 && aSign == 0) {
|
||||
return packFloat16(aSign, 0, 0);
|
||||
}
|
||||
/* Decimal point between bits 22 and 23. */
|
||||
aSig |= 0x00800000;
|
||||
aExp -= 0x7f;
|
||||
if (aExp < -14) {
|
||||
mask = 0x007fffff;
|
||||
if (aExp < -24) {
|
||||
aExp = -25;
|
||||
} else {
|
||||
mask >>= 24 + aExp;
|
||||
}
|
||||
} else {
|
||||
mask = 0x00001fff;
|
||||
}
|
||||
if (aSig & mask) {
|
||||
float_raise( float_flag_underflow STATUS_VAR );
|
||||
roundingMode = STATUS(float_rounding_mode);
|
||||
switch (roundingMode) {
|
||||
case float_round_nearest_even:
|
||||
increment = (mask + 1) >> 1;
|
||||
if ((aSig & mask) == increment) {
|
||||
increment = aSig & (increment << 1);
|
||||
}
|
||||
break;
|
||||
case float_round_up:
|
||||
increment = aSign ? 0 : mask;
|
||||
break;
|
||||
case float_round_down:
|
||||
increment = aSign ? mask : 0;
|
||||
break;
|
||||
default: /* round_to_zero */
|
||||
increment = 0;
|
||||
break;
|
||||
}
|
||||
aSig += increment;
|
||||
if (aSig >= 0x01000000) {
|
||||
aSig >>= 1;
|
||||
aExp++;
|
||||
}
|
||||
} else if (aExp < -14
|
||||
&& STATUS(float_detect_tininess) == float_tininess_before_rounding) {
|
||||
float_raise( float_flag_underflow STATUS_VAR);
|
||||
}
|
||||
|
||||
if (ieee) {
|
||||
if (aExp > 15) {
|
||||
float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
|
||||
return packFloat16(aSign, 0x1f, 0);
|
||||
}
|
||||
} else {
|
||||
if (aExp > 16) {
|
||||
float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
|
||||
return packFloat16(aSign, 0x1f, 0x3ff);
|
||||
}
|
||||
}
|
||||
if (aExp < -24) {
|
||||
return packFloat16(aSign, 0, 0);
|
||||
}
|
||||
if (aExp < -14) {
|
||||
aSig >>= -14 - aExp;
|
||||
aExp = -14;
|
||||
}
|
||||
return packFloat16(aSign, aExp + 14, aSig >> 13);
|
||||
}
|
||||
|
||||
#ifdef FLOATX80
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
|
@ -242,6 +242,12 @@ floatx80 int64_to_floatx80( int64_t STATUS_PARAM );
|
||||
float128 int64_to_float128( int64_t STATUS_PARAM );
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Software half-precision conversion routines.
|
||||
*----------------------------------------------------------------------------*/
|
||||
bits16 float32_to_float16( float32, flag STATUS_PARAM );
|
||||
float32 float16_to_float32( bits16, flag STATUS_PARAM );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Software IEC/IEEE single-precision conversion routines.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
@ -337,6 +337,7 @@ enum arm_features {
|
||||
ARM_FEATURE_THUMB2,
|
||||
ARM_FEATURE_MPU, /* Only has Memory Protection Unit, not full MMU. */
|
||||
ARM_FEATURE_VFP3,
|
||||
ARM_FEATURE_VFP_FP16,
|
||||
ARM_FEATURE_NEON,
|
||||
ARM_FEATURE_DIV,
|
||||
ARM_FEATURE_M, /* Microcontroller profile. */
|
||||
|
@ -115,6 +115,7 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
|
||||
set_feature(env, ARM_FEATURE_THUMB2);
|
||||
set_feature(env, ARM_FEATURE_VFP);
|
||||
set_feature(env, ARM_FEATURE_VFP3);
|
||||
set_feature(env, ARM_FEATURE_VFP_FP16);
|
||||
set_feature(env, ARM_FEATURE_NEON);
|
||||
set_feature(env, ARM_FEATURE_THUMB2EE);
|
||||
set_feature(env, ARM_FEATURE_DIV);
|
||||
@ -2568,6 +2569,21 @@ VFP_CONV_FIX(uh, s, float32, uint16, u)
|
||||
VFP_CONV_FIX(ul, s, float32, uint32, u)
|
||||
#undef VFP_CONV_FIX
|
||||
|
||||
/* Half precision conversions. */
|
||||
float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
|
||||
{
|
||||
float_status *s = &env->vfp.fp_status;
|
||||
int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
|
||||
return float16_to_float32(a, ieee, s);
|
||||
}
|
||||
|
||||
uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
|
||||
{
|
||||
float_status *s = &env->vfp.fp_status;
|
||||
int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
|
||||
return float32_to_float16(a, ieee, s);
|
||||
}
|
||||
|
||||
float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env)
|
||||
{
|
||||
float_status *s = &env->vfp.fp_status;
|
||||
|
@ -131,6 +131,9 @@ DEF_HELPER_3(vfp_sltod, f64, f64, i32, env)
|
||||
DEF_HELPER_3(vfp_uhtod, f64, f64, i32, env)
|
||||
DEF_HELPER_3(vfp_ultod, f64, f64, i32, env)
|
||||
|
||||
DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
|
||||
DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
|
||||
|
||||
DEF_HELPER_3(recps_f32, f32, f32, f32, env)
|
||||
DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
|
||||
DEF_HELPER_2(recpe_f32, f32, f32, env)
|
||||
|
@ -2974,6 +2974,47 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
|
||||
case 3: /* sqrt */
|
||||
gen_vfp_sqrt(dp);
|
||||
break;
|
||||
case 4: /* vcvtb.f32.f16 */
|
||||
if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
|
||||
return 1;
|
||||
tmp = gen_vfp_mrs();
|
||||
tcg_gen_ext16u_i32(tmp, tmp);
|
||||
gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
|
||||
dead_tmp(tmp);
|
||||
break;
|
||||
case 5: /* vcvtt.f32.f16 */
|
||||
if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
|
||||
return 1;
|
||||
tmp = gen_vfp_mrs();
|
||||
tcg_gen_shri_i32(tmp, tmp, 16);
|
||||
gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
|
||||
dead_tmp(tmp);
|
||||
break;
|
||||
case 6: /* vcvtb.f16.f32 */
|
||||
if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
|
||||
return 1;
|
||||
tmp = new_tmp();
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
|
||||
gen_mov_F0_vreg(0, rd);
|
||||
tmp2 = gen_vfp_mrs();
|
||||
tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
|
||||
tcg_gen_or_i32(tmp, tmp, tmp2);
|
||||
dead_tmp(tmp2);
|
||||
gen_vfp_msr(tmp);
|
||||
break;
|
||||
case 7: /* vcvtt.f16.f32 */
|
||||
if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
|
||||
return 1;
|
||||
tmp = new_tmp();
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
|
||||
tcg_gen_shli_i32(tmp, tmp, 16);
|
||||
gen_mov_F0_vreg(0, rd);
|
||||
tmp2 = gen_vfp_mrs();
|
||||
tcg_gen_ext16u_i32(tmp2, tmp2);
|
||||
tcg_gen_or_i32(tmp, tmp, tmp2);
|
||||
dead_tmp(tmp2);
|
||||
gen_vfp_msr(tmp);
|
||||
break;
|
||||
case 8: /* cmp */
|
||||
gen_vfp_cmp(dp);
|
||||
break;
|
||||
@ -5328,6 +5369,50 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
|
||||
neon_store_reg64(cpu_V0, rd + pass);
|
||||
}
|
||||
break;
|
||||
case 44: /* VCVT.F16.F32 */
|
||||
if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
|
||||
return 1;
|
||||
tmp = new_tmp();
|
||||
tmp2 = new_tmp();
|
||||
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
|
||||
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
|
||||
tcg_gen_shli_i32(tmp2, tmp2, 16);
|
||||
tcg_gen_or_i32(tmp2, tmp2, tmp);
|
||||
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
|
||||
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
|
||||
neon_store_reg(rd, 0, tmp2);
|
||||
tmp2 = new_tmp();
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
|
||||
tcg_gen_shli_i32(tmp2, tmp2, 16);
|
||||
tcg_gen_or_i32(tmp2, tmp2, tmp);
|
||||
neon_store_reg(rd, 1, tmp2);
|
||||
dead_tmp(tmp);
|
||||
break;
|
||||
case 46: /* VCVT.F32.F16 */
|
||||
if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
|
||||
return 1;
|
||||
tmp3 = new_tmp();
|
||||
tmp = neon_load_reg(rm, 0);
|
||||
tmp2 = neon_load_reg(rm, 1);
|
||||
tcg_gen_ext16u_i32(tmp3, tmp);
|
||||
gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
||||
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
|
||||
tcg_gen_shri_i32(tmp3, tmp, 16);
|
||||
gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
||||
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
|
||||
dead_tmp(tmp);
|
||||
tcg_gen_ext16u_i32(tmp3, tmp2);
|
||||
gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
||||
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
|
||||
tcg_gen_shri_i32(tmp3, tmp2, 16);
|
||||
gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
|
||||
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
|
||||
dead_tmp(tmp2);
|
||||
dead_tmp(tmp3);
|
||||
break;
|
||||
default:
|
||||
elementwise:
|
||||
for (pass = 0; pass < (q ? 4 : 2); pass++) {
|
||||
|
Loading…
Reference in New Issue
Block a user