diff options
Diffstat (limited to 'gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics')
5 files changed, 187 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c new file mode 100644 index 00000000000..b7378adf8ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c @@ -0,0 +1,73 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#include <arm_neon.h> + +/* Unsigned Dot Product instructions. */ + +uint32x2_t ufoo (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_u32 (r, x, y); +} + +uint32x4_t ufooq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_u32 (r, x, y); +} + +uint32x2_t ufoo_lane (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_lane_u32 (r, x, y, 0); +} + +uint32x2_t ufoo_laneq (uint32x2_t r, uint8x8_t x, uint8x16_t y) +{ + return vdot_laneq_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_lane (uint32x4_t r, uint8x16_t x, uint8x8_t y) +{ + return vdotq_lane_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_laneq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_laneq_u32 (r, x, y, 0); +} + +/* Signed Dot Product instructions. */ + +int32x2_t sfoo (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_s32 (r, x, y); +} + +int32x4_t sfooq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_s32 (r, x, y); +} + +int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_lane_s32 (r, x, y, 0); +} + +int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, int8x16_t y) +{ + return vdot_laneq_s32 (r, x, y, 0); +} + +int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, int8x8_t y) +{ + return vdotq_lane_s32 (r, x, y, 0); +} + +int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_laneq_s32 (r, x, y, 0); +} + +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.8b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c new file mode 100644 index 00000000000..3e7cd6c2fc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c @@ -0,0 +1,81 @@ +/* { dg-skip-if "can't compile on arm." { arm*-*-* } } */ +/* { dg-do run { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */ + +#include <arm_neon.h> + +extern void abort(); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define ORDER(x, y) y +#else +# define ORDER(x, y) x - y +#endif + +#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2 +#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ } +#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##r = {0}; \ + f##_##r = f (f##_##r, f##_##x, f##_##y); \ + if (f##_##r[0] != n1 || f##_##r[1] != n2) \ + abort (); + +#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); + +#define Px(n1,n2,n3,n4) P(n1,n2),P(n3,n4) +#define TEST_LANEQ(t1, t2, t3, f, r1, r2, n1, n2, n3, n4, n5, n6, n7, n8) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (3, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (3, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); \ + t3 f##_##rx2 = {0}; \ + f##_##rx2 = f (f##_##rx2, f##_##x, f##_##y, ORDER (3, 2)); \ + if (f##_##rx2[0] != n5 || f##_##rx2[1] != n6) \ + abort (); \ + t3 f##_##rx3 = {0}; \ + f##_##rx3 = f (f##_##rx3, f##_##x, f##_##y, ORDER (3, 3)); \ + if (f##_##rx3[0] != n7 || f##_##rx3[1] != n8) \ + abort (); + +int +main() +{ + TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24); + + TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24); + + TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANEQ (uint8x8_t, uint8x16_t, uint32x2_t, vdot_laneq_u32, P(1,2), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32); + TEST_LANEQ (int8x8_t, int8x16_t, int32x2_t, vdot_laneq_s32, P(1,2), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32); + + TEST_LANEQ (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_laneq_u32, Px(1,2,2,1), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32); + TEST_LANEQ (int8x16_t, int8x16_t, int32x4_t, vdotq_laneq_s32, Px(1,2,2,1), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h new file mode 100644 index 00000000000..90b00aff95c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h @@ -0,0 +1,15 @@ +TYPE char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +TYPE char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + +__attribute__ ((noinline)) int +foo1(int len) { + int i; + TYPE int result = 0; + TYPE short prod; + + for (i=0; i<len; i++) { + prod = X[i] * Y[i]; + result += prod; + } + return result; +}
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c new file mode 100644 index 00000000000..57b5ef82f85 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#define N 64 +#define TYPE signed + +#include "vect-dot-qi.h" + +/* { dg-final { scan-assembler-times {sdot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c new file mode 100644 index 00000000000..b2cef318500 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#define N 64 +#define TYPE unsigned + +#include "vect-dot-qi.h" + +/* { dg-final { scan-assembler-times {udot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 4 } } */ |