aboutsummaryrefslogtreecommitdiff
path: root/target/arm/vec_helper.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-08-28 19:33:39 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-09-01 11:31:48 +0100
commite5adc70665ecaf4009c2fb8d66775ea718a85abd (patch)
tree766718f65a0757cef7643adc46b548adaa36eef4 /target/arm/vec_helper.c
parente22705bb941d82d6c2a09e8b2031084326902be3 (diff)
target/arm: Implement fp16 for Neon VMLA, VMLS operations
Convert the Neon floating-point VMLA and VMLS insns over to using a gvec helper, and use this to implement the fp16 case. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200828183354.27913-31-peter.maydell@linaro.org
Diffstat (limited to 'target/arm/vec_helper.c')
-rw-r--r--target/arm/vec_helper.c42
1 files changed, 42 insertions, 0 deletions
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index f551f86d5a..5da5969c1c 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -842,6 +842,48 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
#endif
#undef DO_3OP
+/* Non-fused multiply-add (unlike float16_muladd etc, which are fused) */
+static float16 float16_muladd_nf(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_add(dest, float16_mul(op1, op2, stat), stat);
+}
+
+static float32 float32_muladd_nf(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_add(dest, float32_mul(op1, op2, stat), stat);
+}
+
+static float16 float16_mulsub_nf(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_sub(dest, float16_mul(op1, op2, stat), stat);
+}
+
+static float32 float32_mulsub_nf(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_sub(dest, float32_mul(op1, op2, stat), stat);
+}
+
+#define DO_MULADD(NAME, FUNC, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(d[i], n[i], m[i], stat); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_MULADD(gvec_fmla_h, float16_muladd_nf, float16)
+DO_MULADD(gvec_fmla_s, float32_muladd_nf, float32)
+
+DO_MULADD(gvec_fmls_h, float16_mulsub_nf, float16)
+DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32)
+
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
* For AdvSIMD, there is of course only one such vector segment.
*/