aboutsummaryrefslogtreecommitdiff
path: root/target/arm/vec_helper.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-08-28 19:33:43 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-09-01 11:38:13 +0100
commit40fde72dda2da8d55b820fa6c5efd85814be2023 (patch)
tree05bd081acd01f3666077f489273146a2e13dd3e1 /target/arm/vec_helper.c
parentac8c62c4e5a3f24e6d47f52ec1bfb20994caefa5 (diff)
target/arm: Implement fp16 for Neon VRSQRTS
Convert the Neon VRSQRTS insn to using a gvec helper, and use this to implement the fp16 case. As with VRECPS, we adjust the phrasing of the new implementation slightly so that the fp32 version parallels the fp16 one. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200828183354.27913-35-peter.maydell@linaro.org
Diffstat (limited to 'target/arm/vec_helper.c')
-rw-r--r--target/arm/vec_helper.c30
1 files changed, 30 insertions, 0 deletions
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 0111a23024..e5bb5e395c 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -825,6 +825,33 @@ static float32 float32_recps_nf(float32 op1, float32 op2, float_status *stat)
return float32_sub(float32_two, float32_mul(op1, op2, stat), stat);
}
+/* Reciprocal square-root step. AArch32 non-fused semantics. */
+static float16 float16_rsqrts_nf(float16 op1, float16 op2, float_status *stat)
+{
+ op1 = float16_squash_input_denormal(op1, stat);
+ op2 = float16_squash_input_denormal(op2, stat);
+
+ if ((float16_is_infinity(op1) && float16_is_zero(op2)) ||
+ (float16_is_infinity(op2) && float16_is_zero(op1))) {
+ return float16_one_point_five;
+ }
+ op1 = float16_sub(float16_three, float16_mul(op1, op2, stat), stat);
+ return float16_div(op1, float16_two, stat);
+}
+
+static float32 float32_rsqrts_nf(float32 op1, float32 op2, float_status *stat)
+{
+ op1 = float32_squash_input_denormal(op1, stat);
+ op2 = float32_squash_input_denormal(op2, stat);
+
+ if ((float32_is_infinity(op1) && float32_is_zero(op2)) ||
+ (float32_is_infinity(op2) && float32_is_zero(op1))) {
+ return float32_one_point_five;
+ }
+ op1 = float32_sub(float32_three, float32_mul(op1, op2, stat), stat);
+ return float32_div(op1, float32_two, stat);
+}
+
#define DO_3OP(NAME, FUNC, TYPE) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \
@@ -885,6 +912,9 @@ DO_3OP(gvec_fminnum_s, float32_minnum, float32)
DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16)
DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32)
+DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16)
+DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32)
+
#ifdef TARGET_AARCH64
DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)