From 4e624edaebec9312be7b63096bbe5b2fe76f3613 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:23 +0100 Subject: [PATCH] target-arm: add support for v8 VMULL.P64 instruction Add support for the VMULL.P64 polynomial 64x64 to 128 bit multiplication instruction in the A32/T32 instruction sets; this is part of the v8 Crypto Extensions. To do this we have to move the neon_pmull_64_{lo,hi} helpers from helper-a64.c into neon_helper.c so they can be used by the AArch32 translator. Inspired-by: Ard Biesheuvel Signed-off-by: Peter Maydell Message-id: 1401386724-26529-4-git-send-email-peter.maydell@linaro.org --- linux-user/elfload.c | 1 + target-arm/cpu.c | 1 + target-arm/cpu.h | 1 + target-arm/helper-a64.c | 30 ------------------------------ target-arm/helper-a64.h | 2 -- target-arm/helper.h | 3 +++ target-arm/neon_helper.c | 30 ++++++++++++++++++++++++++++++ target-arm/translate.c | 26 +++++++++++++++++++++++++- 8 files changed, 61 insertions(+), 33 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 9bda262419..3241fec035 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -468,6 +468,7 @@ static uint32_t get_elf_hwcap2(void) uint32_t hwcaps = 0; GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES); + GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL); GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1); GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2); GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32); diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 753f6cb1da..fb9c12d81e 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -319,6 +319,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) set_feature(env, ARM_FEATURE_V8_AES); set_feature(env, ARM_FEATURE_V8_SHA1); set_feature(env, ARM_FEATURE_V8_SHA256); + set_feature(env, ARM_FEATURE_V8_PMULL); } if (arm_feature(env, ARM_FEATURE_V7)) { set_feature(env, ARM_FEATURE_VAPA); diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 14d5f21e7a..79e7f82515 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -637,6 +637,7 @@ enum arm_features { ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */ }; static inline int arm_feature(CPUARMState *env, int feature) diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index cccda74113..f0d27229d4 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -186,36 +186,6 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices, return result; } -/* Helper function for 64 bit polynomial multiply case: - * perform PolynomialMult(op1, op2) and return either the top or - * bottom half of the 128 bit result. - */ -uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) -{ - int bitnum; - uint64_t res = 0; - - for (bitnum = 0; bitnum < 64; bitnum++) { - if (op1 & (1ULL << bitnum)) { - res ^= op2 << bitnum; - } - } - return res; -} -uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) -{ - int bitnum; - uint64_t res = 0; - - /* bit 0 of op1 can't influence the high 64 bits at all */ - for (bitnum = 1; bitnum < 64; bitnum++) { - if (op1 & (1ULL << bitnum)) { - res ^= op2 >> (64 - bitnum); - } - } - return res; -} - /* 64bit/double versions of the neon float compare functions */ uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) { diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h index 3f05bedcca..8de7536ff7 100644 --- a/target-arm/helper-a64.h +++ b/target-arm/helper-a64.h @@ -28,8 +28,6 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr) DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr) DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32) -DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr) DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr) DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) diff --git a/target-arm/helper.h b/target-arm/helper.h index 113b09d35e..0ef8fcac17 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -525,6 +525,9 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_2(dc_zva, void, env, i64) +DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #endif diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index 492e500700..47d13e908c 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -2211,3 +2211,33 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm) env->vfp.regs[rm] = make_float64(m0); env->vfp.regs[rd] = make_float64(d0); } + +/* Helper function for 64 bit polynomial multiply case: + * perform PolynomialMult(op1, op2) and return either the top or + * bottom half of the 128 bit result. + */ +uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + for (bitnum = 0; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 << bitnum; + } + } + return res; +} +uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + /* bit 0 of op1 can't influence the high 64 bits at all */ + for (bitnum = 1; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 >> (64 - bitnum); + } + } + return res; +} diff --git a/target-arm/translate.c b/target-arm/translate.c index 7124606723..41c3fc77b0 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -5977,7 +5977,7 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins {0, 0, 0, 9}, /* VQDMLSL */ {0, 0, 0, 0}, /* Integer VMULL */ {0, 0, 0, 1}, /* VQDMULL */ - {0, 0, 0, 15}, /* Polynomial VMULL */ + {0, 0, 0, 0xa}, /* Polynomial VMULL */ {0, 0, 0, 7}, /* Reserved: always UNDEF */ }; @@ -5996,6 +5996,30 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins return 1; } + /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply) + * outside the loop below as it only performs a single pass. + */ + if (op == 14 && size == 2) { + TCGv_i64 tcg_rn, tcg_rm, tcg_rd; + + if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) { + return 1; + } + tcg_rn = tcg_temp_new_i64(); + tcg_rm = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + neon_load_reg64(tcg_rn, rn); + neon_load_reg64(tcg_rm, rm); + gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm); + neon_store_reg64(tcg_rd, rd); + gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm); + neon_store_reg64(tcg_rd, rd + 1); + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rm); + tcg_temp_free_i64(tcg_rd); + return 0; + } + /* Avoid overlapping operands. Wide source operands are always aligned so will never overlap with wide destinations in problematic ways. */