aboutsummaryrefslogtreecommitdiff
path: root/gcc/testsuite
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/testsuite')
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c73
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c81
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c9
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/vdot-compile.c56
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/vdot-exec.c55
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/vect-dot-qi.h16
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/vect-dot-s8.c12
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/vect-dot-u8.c12
-rw-r--r--gcc/testsuite/lib/target-supports.exp87
13 files changed, 430 insertions, 1 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
index dc4f52019d5..ac674749b6f 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
@@ -1,4 +1,7 @@
/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */
+/* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */
+/* { dg-add-options arm_v8_2a_dotprod_neon } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
index f3cc6c78c25..d020f643bb8 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
@@ -1,4 +1,7 @@
/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */
+/* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */
+/* { dg-add-options arm_v8_2a_dotprod_neon } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c
new file mode 100644
index 00000000000..b7378adf8ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c
@@ -0,0 +1,73 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */
+
+#include <arm_neon.h>
+
+/* Unsigned Dot Product instructions. */
+
+uint32x2_t ufoo (uint32x2_t r, uint8x8_t x, uint8x8_t y)
+{
+ return vdot_u32 (r, x, y);
+}
+
+uint32x4_t ufooq (uint32x4_t r, uint8x16_t x, uint8x16_t y)
+{
+ return vdotq_u32 (r, x, y);
+}
+
+uint32x2_t ufoo_lane (uint32x2_t r, uint8x8_t x, uint8x8_t y)
+{
+ return vdot_lane_u32 (r, x, y, 0);
+}
+
+uint32x2_t ufoo_laneq (uint32x2_t r, uint8x8_t x, uint8x16_t y)
+{
+ return vdot_laneq_u32 (r, x, y, 0);
+}
+
+uint32x4_t ufooq_lane (uint32x4_t r, uint8x16_t x, uint8x8_t y)
+{
+ return vdotq_lane_u32 (r, x, y, 0);
+}
+
+uint32x4_t ufooq_laneq (uint32x4_t r, uint8x16_t x, uint8x16_t y)
+{
+ return vdotq_laneq_u32 (r, x, y, 0);
+}
+
+/* Signed Dot Product instructions. */
+
+int32x2_t sfoo (int32x2_t r, int8x8_t x, int8x8_t y)
+{
+ return vdot_s32 (r, x, y);
+}
+
+int32x4_t sfooq (int32x4_t r, int8x16_t x, int8x16_t y)
+{
+ return vdotq_s32 (r, x, y);
+}
+
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, int8x8_t y)
+{
+ return vdot_lane_s32 (r, x, y, 0);
+}
+
+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, int8x16_t y)
+{
+ return vdot_laneq_s32 (r, x, y, 0);
+}
+
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, int8x8_t y)
+{
+ return vdotq_lane_s32 (r, x, y, 0);
+}
+
+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, int8x16_t y)
+{
+ return vdotq_laneq_s32 (r, x, y, 0);
+}
+
+/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.8b} 2 } } */
+/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */
+/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 2 } } */
+/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c
new file mode 100644
index 00000000000..3e7cd6c2fc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c
@@ -0,0 +1,81 @@
+/* { dg-skip-if "can't compile on arm." { arm*-*-* } } */
+/* { dg-do run { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */
+
+#include <arm_neon.h>
+
+extern void abort();
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define ORDER(x, y) y
+#else
+# define ORDER(x, y) x - y
+#endif
+
+#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2
+#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ }
+#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \
+ ARR(f, x, t1, r1); \
+ ARR(f, y, t2, r2); \
+ t3 f##_##r = {0}; \
+ f##_##r = f (f##_##r, f##_##x, f##_##y); \
+ if (f##_##r[0] != n1 || f##_##r[1] != n2) \
+ abort ();
+
+#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \
+ ARR(f, x, t1, r1); \
+ ARR(f, y, t2, r2); \
+ t3 f##_##rx = {0}; \
+ f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0)); \
+ if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \
+ abort (); \
+ t3 f##_##rx1 = {0}; \
+ f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1)); \
+ if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \
+ abort ();
+
+#define Px(n1,n2,n3,n4) P(n1,n2),P(n3,n4)
+#define TEST_LANEQ(t1, t2, t3, f, r1, r2, n1, n2, n3, n4, n5, n6, n7, n8) \
+ ARR(f, x, t1, r1); \
+ ARR(f, y, t2, r2); \
+ t3 f##_##rx = {0}; \
+ f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (3, 0)); \
+ if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \
+ abort (); \
+ t3 f##_##rx1 = {0}; \
+ f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (3, 1)); \
+ if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \
+ abort (); \
+ t3 f##_##rx2 = {0}; \
+ f##_##rx2 = f (f##_##rx2, f##_##x, f##_##y, ORDER (3, 2)); \
+ if (f##_##rx2[0] != n5 || f##_##rx2[1] != n6) \
+ abort (); \
+ t3 f##_##rx3 = {0}; \
+ f##_##rx3 = f (f##_##rx3, f##_##x, f##_##y, ORDER (3, 3)); \
+ if (f##_##rx3[0] != n7 || f##_##rx3[1] != n8) \
+ abort ();
+
+int
+main()
+{
+ TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24);
+ TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24);
+
+ TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24);
+ TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24);
+
+ TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
+ TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);
+
+ TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
+ TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);
+
+ TEST_LANEQ (uint8x8_t, uint8x16_t, uint32x2_t, vdot_laneq_u32, P(1,2), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32);
+ TEST_LANEQ (int8x8_t, int8x16_t, int32x2_t, vdot_laneq_s32, P(1,2), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32);
+
+ TEST_LANEQ (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_laneq_u32, Px(1,2,2,1), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32);
+ TEST_LANEQ (int8x16_t, int8x16_t, int32x4_t, vdotq_laneq_s32, Px(1,2,2,1), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32);
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h
new file mode 100644
index 00000000000..90b00aff95c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h
@@ -0,0 +1,15 @@
+TYPE char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+TYPE char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+__attribute__ ((noinline)) int
+foo1(int len) {
+ int i;
+ TYPE int result = 0;
+ TYPE short prod;
+
+ for (i=0; i<len; i++) {
+ prod = X[i] * Y[i];
+ result += prod;
+ }
+ return result;
+} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c
new file mode 100644
index 00000000000..57b5ef82f85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */
+
+#define N 64
+#define TYPE signed
+
+#include "vect-dot-qi.h"
+
+/* { dg-final { scan-assembler-times {sdot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c
new file mode 100644
index 00000000000..b2cef318500
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */
+
+#define N 64
+#define TYPE unsigned
+
+#include "vect-dot-qi.h"
+
+/* { dg-final { scan-assembler-times {udot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 4 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-compile.c b/gcc/testsuite/gcc.target/arm/simd/vdot-compile.c
new file mode 100644
index 00000000000..b3bd3bf00e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vdot-compile.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_ok } */
+/* { dg-add-options arm_v8_2a_dotprod_neon } */
+
+#include <arm_neon.h>
+
+/* Unsigned Dot Product instructions. */
+
+uint32x2_t ufoo (uint32x2_t r, uint8x8_t x, uint8x8_t y)
+{
+ return vdot_u32 (r, x, y);
+}
+
+uint32x4_t ufooq (uint32x4_t r, uint8x16_t x, uint8x16_t y)
+{
+ return vdotq_u32 (r, x, y);
+}
+
+uint32x2_t ufoo_lane (uint32x2_t r, uint8x8_t x, uint8x8_t y)
+{
+ return vdot_lane_u32 (r, x, y, 0);
+}
+
+uint32x4_t ufooq_lane (uint32x4_t r, uint8x16_t x, uint8x8_t y)
+{
+ return vdotq_lane_u32 (r, x, y, 0);
+}
+
+/* Signed Dot Product instructions. */
+
+int32x2_t sfoo (int32x2_t r, int8x8_t x, int8x8_t y)
+{
+ return vdot_s32 (r, x, y);
+}
+
+int32x4_t sfooq (int32x4_t r, int8x16_t x, int8x16_t y)
+{
+ return vdotq_s32 (r, x, y);
+}
+
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, int8x8_t y)
+{
+ return vdot_lane_s32 (r, x, y, 0);
+}
+
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, int8x8_t y)
+{
+ return vdotq_lane_s32 (r, x, y, 0);
+}
+
+/* { dg-final { scan-assembler-times {v[us]dot\.[us]8\td[0-9]+, d[0-9]+, d[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {v[us]dot\.[us]8\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {v[us]dot\.[us]8\td[0-9]+, d[0-9]+, d[0-9]+\[#?[0-9]\]} 2 } } */
+/* { dg-final { scan-assembler-times {v[us]dot\.[us]8\tq[0-9]+, q[0-9]+, d[0-9]+\[#?[0-9]\]} 2 } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c b/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c
new file mode 100644
index 00000000000..054f4703394
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3" } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */
+/* { dg-add-options arm_v8_2a_dotprod_neon } */
+
+#include <arm_neon.h>
+
+extern void abort();
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define ORDER(x, y) y
+#else
+# define ORDER(x, y) x - y
+#endif
+
+#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2
+#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ }
+#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \
+ ARR(f, x, t1, r1); \
+ ARR(f, y, t2, r2); \
+ t3 f##_##r = {0}; \
+ f##_##r = f (f##_##r, f##_##x, f##_##y); \
+ if (f##_##r[0] != n1 || f##_##r[1] != n2) \
+ abort ();
+
+#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \
+ ARR(f, x, t1, r1); \
+ ARR(f, y, t2, r2); \
+ t3 f##_##rx = {0}; \
+ f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0)); \
+ if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \
+ abort (); \
+ t3 f##_##rx1 = {0}; \
+ f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1)); \
+ if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \
+ abort (); \
+
+int
+main()
+{
+ TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24);
+ TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24);
+
+ TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24);
+ TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24);
+
+ TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
+
+ TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);
+
+ TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
+ TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/simd/vect-dot-qi.h b/gcc/testsuite/gcc.target/arm/simd/vect-dot-qi.h
new file mode 100644
index 00000000000..ea5c212419d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vect-dot-qi.h
@@ -0,0 +1,16 @@
+TYPE char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+TYPE char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+__attribute__ ((noinline)) int
+foo1(int len) {
+ int i;
+ TYPE int result = 0;
+ TYPE short prod;
+
+ for (i=0; i<len; i++) {
+ prod = X[i] * Y[i];
+ result += prod;
+ }
+ return result;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/simd/vect-dot-s8.c b/gcc/testsuite/gcc.target/arm/simd/vect-dot-s8.c
new file mode 100644
index 00000000000..d0b9d8ae71a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vect-dot-s8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_ok } */
+/* { dg-add-options arm_v8_2a_dotprod_neon } */
+
+#define N 64
+#define TYPE signed
+
+#include "vect-dot-qi.h"
+
+/* { dg-final { scan-assembler-times {vsdot\.s8\tq[0-9]+, q[0-9]+, q[0-9]+} 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/simd/vect-dot-u8.c b/gcc/testsuite/gcc.target/arm/simd/vect-dot-u8.c
new file mode 100644
index 00000000000..957be3c9886
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vect-dot-u8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_ok } */
+/* { dg-add-options arm_v8_2a_dotprod_neon } */
+
+#define N 64
+#define TYPE unsigned
+
+#include "vect-dot-qi.h"
+
+/* { dg-final { scan-assembler-times {vudot\.u8\tq[0-9]+, q[0-9]+, q[0-9]+} 4 } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 677230c1406..290704dabea 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4156,6 +4156,51 @@ proc check_effective_target_arm_v8_2a_fp16_neon_ok { } {
check_effective_target_arm_v8_2a_fp16_neon_ok_nocache]
}
+# Return 1 if the target supports ARMv8.2 Adv.SIMD Dot Product
+# instructions, 0 otherwise. The test is valid for ARM and for AArch64.
+# Record the command line options needed.
+
+proc check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache { } {
+ global et_arm_v8_2a_dotprod_neon_flags
+ set et_arm_v8_2a_dotprod_neon_flags ""
+
+ if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } {
+ return 0;
+ }
+
+ # Iterate through sets of options to find the compiler flags that
+ # need to be added to the -march option.
+ foreach flags {"" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" "-mfloat-abi=hard -mfpu=neon-fp-armv8"} {
+ if { [check_no_compiler_messages_nocache \
+ arm_v8_2a_dotprod_neon_ok object {
+ #if !defined (__ARM_FEATURE_DOTPROD)
+ #error "__ARM_FEATURE_DOTPROD not defined"
+ #else
+ /* Make sure including arm_neon.h is OK. */
+ #include <arm_neon.h>
+ #endif
+ } "$flags -march=armv8.2-a+dotprod"] } {
+ set et_arm_v8_2a_dotprod_neon_flags "$flags -march=armv8.2-a+dotprod"
+ return 1
+ }
+ }
+
+ return 0;
+}
+
+proc check_effective_target_arm_v8_2a_dotprod_neon_ok { } {
+ return [check_cached_effective_target arm_v8_2a_dotprod_neon_ok \
+ check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache]
+}
+
+proc add_options_for_arm_v8_2a_dotprod_neon { flags } {
+ if { ! [check_effective_target_arm_v8_2a_dotprod_neon_ok] } {
+ return "$flags"
+ }
+ global et_arm_v8_2a_dotprod_neon_flags
+ return "$flags $et_arm_v8_2a_dotprod_neon_flags"
+}
+
# Return 1 if the target supports executing ARMv8 NEON instructions, 0
# otherwise.
@@ -4293,6 +4338,42 @@ proc check_effective_target_arm_v8_2a_fp16_neon_hw { } {
} [add_options_for_arm_v8_2a_fp16_neon ""]]
}
+# Return 1 if the target supports executing AdvSIMD instructions from ARMv8.2
+# with the Dot Product extension, 0 otherwise. The test is valid for ARM and for
+# AArch64.
+
+proc check_effective_target_arm_v8_2a_dotprod_neon_hw { } {
+ if { ![check_effective_target_arm_v8_2a_dotprod_neon_ok] } {
+ return 0;
+ }
+ return [check_runtime arm_v8_2a_dotprod_neon_hw_available {
+ #include "arm_neon.h"
+ int
+ main (void)
+ {
+
+ uint32x2_t results = {0,0};
+ uint8x8_t a = {1,1,1,1,2,2,2,2};
+ uint8x8_t b = {2,2,2,2,3,3,3,3};
+
+ #ifdef __ARM_ARCH_ISA_A64
+ asm ("udot %0.2s, %1.8b, %2.8b"
+ : "=w"(results)
+ : "w"(a), "w"(b)
+ : /* No clobbers. */);
+
+ #else
+ asm ("vudot.u8 %P0, %P1, %P2"
+ : "=w"(results)
+ : "w"(a), "w"(b)
+ : /* No clobbers. */);
+ #endif
+
+ return (results[0] == 8 && results[1] == 24) ? 1 : 0;
+ }
+ } [add_options_for_arm_v8_2a_dotprod_neon ""]]
+}
+
# Return 1 if this is a ARM target with NEON enabled.
proc check_effective_target_arm_neon { } {
@@ -5563,6 +5644,8 @@ proc check_effective_target_vect_sdot_qi { } {
} else {
set et_vect_sdot_qi_saved($et_index) 0
if { [istarget ia64-*-*]
+ || [istarget aarch64*-*-*]
+ || [istarget arm*-*-*]
|| ([istarget mips*-*-*]
&& [et-is-effective-target mips_msa]) } {
set et_vect_udot_qi_saved 1
@@ -5587,6 +5670,8 @@ proc check_effective_target_vect_udot_qi { } {
} else {
set et_vect_udot_qi_saved($et_index) 0
if { [istarget powerpc*-*-*]
+ || [istarget aarch64*-*-*]
+ || [istarget arm*-*-*]
|| [istarget ia64-*-*]
|| ([istarget mips*-*-*]
&& [et-is-effective-target mips_msa]) } {
@@ -7885,7 +7970,7 @@ proc check_effective_target_aarch64_tiny { } {
# Create functions to check that the AArch64 assembler supports the
# various architecture extensions via the .arch_extension pseudo-op.
-foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse"} {
+foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod"} {
eval [string map [list FUNC $aarch64_ext] {
proc check_effective_target_aarch64_asm_FUNC_ok { } {
if { [istarget aarch64*-*-*] } {