From f7cd0a49635b07f902e4ce8f5323b894f0baef20 Mon Sep 17 00:00:00 2001 From: Maxim Kuvyrkov Date: Tue, 16 Jan 2018 09:35:54 +0000 Subject: gcc/ Backport from trunk r253780. 2017-10-16 Tamar Christina * config/arm/arm.h (TARGET_DOTPROD): New. * config/arm/arm.c (arm_arch_dotprod): New. (arm_option_reconfigure_globals): Add arm_arch_dotprod. * config/arm/arm-c.c (__ARM_FEATURE_DOTPROD): New. * config/arm/arm-cpus.in (armv8.2-a): Enabled +dotprod. (feature dotprod, group dotprod, ALL_SIMD_INTERNAL): New. (ALL_FPU_INTERNAL): Use ALL_SIMD_INTERNAL. * config/arm/t-multilib (v8_2_a_simd_variants): Add dotprod. * doc/invoke.texi (armv8.2-a): Document dotprod gcc/ Backport from trunk r253781. 2017-10-16 Tamar Christina * config/arm/arm-builtins.c (arm_unsigned_uternop_qualifiers): New. (UTERNOP_QUALIFIERS, arm_umac_lane_qualifiers, UMAC_LANE_QUALIFIERS): New. * config/arm/arm_neon_builtins.def (sdot, udot, sdot_lane, udot_lane): new. * config/arm/iterators.md (DOTPROD, VSI2QI, vsi2qi): New. (UNSPEC_DOT_S, UNSPEC_DOT_U, opsuffix): New. * config/arm/neon.md (neon_dot): New. (neon_dot_lane, dot_prod): New. * config/arm/types.md (neon_dot, neon_dot_q): New. * config/arm/unspecs.md (sup): Add UNSPEC_DOT_S, UNSPEC_DOT_U. gcc/ Backport from trunk r253782. 2017-10-16 Tamar Christina * config/aarch64/aarch64.h (AARCH64_FL_DOTPROD): New. (AARCH64_ISA_DOTPROD, TARGET_DOTPROD): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add TARGET_DOTPROD. * config/aarch64/aarch64-option-extensions.def (dotprod): New. * config/aarch64/aarch64-cores.def (cortex-a55, cortex-a75): Enable TARGET_DOTPROD. (cortex-a75.cortex-a55): Likewise. * doc/invoke.texi (aarch64-feature-modifiers): Document dotprod. gcc/ Backport from trunk r253783. 2017-10-16 Tamar Christina * config/aarch64/aarch64-builtins.c (aarch64_types_quadopu_lane_qualifiers): New. (TYPES_QUADOPU_LANE): New. * config/aarch64/aarch64-simd.md (aarch64_dot): New. (dot_prod, aarch64_dot_lane): New. (aarch64_dot_laneq): New. * config/aarch64/aarch64-simd-builtins.def (sdot, udot): New. (sdot_lane, udot_lane, sdot_laneq, udot_laneq): New. * config/aarch64/iterators.md (sur): Add UNSPEC_SDOT, UNSPEC_UDOT. (Vdottype, DOTPROD): New. (sur): Add SDOT and UDOT. gcc/ Backport from trunk r253784. 2017-10-16 Tamar Christina * config/aarch64/arm_neon.h (vdot_u32, vdotq_u32, vdot_s32, vdotq_s32): New. (vdot_lane_u32, vdot_laneq_u32, vdotq_lane_u32, vdotq_laneq_u32): New. (vdot_lane_s32, vdot_laneq_s32, vdotq_lane_s32, vdotq_laneq_s32): New. gcc/testsuite/ Backport from trunk r253784. 2017-10-16 Tamar Christina * gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h: New. * gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c: New. * gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c: New. * gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c: New. gcc/testsuite/ Backport from trunk r254097. 2017-10-26 Tamar Christina * gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h: New. * gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c: New. * gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c: New. * gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c: New. gcc/testsuite/ Backport from trunk r254100. 2017-10-26 Tamar Christina * lib/target-supports.exp (check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache): New. (check_effective_target_arm_v8_2a_dotprod_neon_ok): New. (add_options_for_arm_v8_2a_dotprod_neon): New. (check_effective_target_arm_v8_2a_dotprod_neon_hw): New. (check_effective_target_vect_sdot_qi): Add ARM && AArch64. (check_effective_target_vect_udot_qi): Likewise. * gcc.target/arm/simd/vdot-exec.c: New. * gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c: New. * gcc/doc/sourcebuild.texi: Document arm_v8_2a_dotprod_neon. gcc/testsuite/ Backport from trunk r254101. 2017-10-26 Tamar Christina * gcc.dg/vect/vect-reduc-dot-s8a.c (dg-additional-options, dg-require-effective-target): Add +dotprod. * gcc.dg/vect/vect-reduc-dot-u8a.c (dg-additional-options, dg-require-effective-target): Add +dotprod. gcc/ Backport from trunk r254775. 2017-11-15 Tamar Christina * config/arm/arm.h (TARGET_DOTPROD): Add arm_arch8_2. gcc/ Backport from trunk r255064. 2017-11-22 Tamar Christina * config/arm/arm_neon.h (vdot_u32, vdotq_u32) (vdot_s32, vdotq_s32): New. (vdot_lane_u32, vdotq_lane_u32): New. (vdot_lane_s32, vdotq_lane_s32): New. gcc/testsuite/ Backport from trunk r255064. 2017-11-22 Tamar Christina * gcc.target/arm/simd/vdot-compile.c: New. * gcc.target/arm/simd/vect-dot-qi.h: New. * gcc.target/arm/simd/vect-dot-s8.c: New. * gcc.target/arm/simd/vect-dot-u8.c: New gcc/ Backport from trunk r255126. 2017-11-24 Christophe Lyon * config/arm/arm_neon.h: Fix pragma GCC push_options before vdot_u32. Change-Id: I469f803e36949046f8e339cfd9d1556170ce4a90 --- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c | 3 + gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c | 3 + .../aarch64/advsimd-intrinsics/vdot-compile.c | 73 ++++++++++++++++++ .../aarch64/advsimd-intrinsics/vdot-exec.c | 81 ++++++++++++++++++++ .../aarch64/advsimd-intrinsics/vect-dot-qi.h | 15 ++++ .../aarch64/advsimd-intrinsics/vect-dot-s8.c | 9 +++ .../aarch64/advsimd-intrinsics/vect-dot-u8.c | 9 +++ gcc/testsuite/gcc.target/arm/simd/vdot-compile.c | 56 ++++++++++++++ gcc/testsuite/gcc.target/arm/simd/vdot-exec.c | 55 ++++++++++++++ gcc/testsuite/gcc.target/arm/simd/vect-dot-qi.h | 16 ++++ gcc/testsuite/gcc.target/arm/simd/vect-dot-s8.c | 12 +++ gcc/testsuite/gcc.target/arm/simd/vect-dot-u8.c | 12 +++ gcc/testsuite/lib/target-supports.exp | 87 +++++++++++++++++++++- 13 files changed, 430 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vdot-compile.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vdot-exec.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vect-dot-qi.h create mode 100644 gcc/testsuite/gcc.target/arm/simd/vect-dot-s8.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vect-dot-u8.c (limited to 'gcc/testsuite') diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c index dc4f52019d5..ac674749b6f 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c @@ -1,4 +1,7 @@ /* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */ +/* { dg-add-options arm_v8_2a_dotprod_neon } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c index f3cc6c78c25..d020f643bb8 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c @@ -1,4 +1,7 @@ /* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */ +/* { dg-add-options arm_v8_2a_dotprod_neon } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c new file mode 100644 index 00000000000..b7378adf8ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c @@ -0,0 +1,73 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#include + +/* Unsigned Dot Product instructions. */ + +uint32x2_t ufoo (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_u32 (r, x, y); +} + +uint32x4_t ufooq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_u32 (r, x, y); +} + +uint32x2_t ufoo_lane (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_lane_u32 (r, x, y, 0); +} + +uint32x2_t ufoo_laneq (uint32x2_t r, uint8x8_t x, uint8x16_t y) +{ + return vdot_laneq_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_lane (uint32x4_t r, uint8x16_t x, uint8x8_t y) +{ + return vdotq_lane_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_laneq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_laneq_u32 (r, x, y, 0); +} + +/* Signed Dot Product instructions. */ + +int32x2_t sfoo (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_s32 (r, x, y); +} + +int32x4_t sfooq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_s32 (r, x, y); +} + +int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_lane_s32 (r, x, y, 0); +} + +int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, int8x16_t y) +{ + return vdot_laneq_s32 (r, x, y, 0); +} + +int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, int8x8_t y) +{ + return vdotq_lane_s32 (r, x, y, 0); +} + +int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_laneq_s32 (r, x, y, 0); +} + +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.8b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c new file mode 100644 index 00000000000..3e7cd6c2fc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c @@ -0,0 +1,81 @@ +/* { dg-skip-if "can't compile on arm." { arm*-*-* } } */ +/* { dg-do run { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */ + +#include + +extern void abort(); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define ORDER(x, y) y +#else +# define ORDER(x, y) x - y +#endif + +#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2 +#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ } +#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##r = {0}; \ + f##_##r = f (f##_##r, f##_##x, f##_##y); \ + if (f##_##r[0] != n1 || f##_##r[1] != n2) \ + abort (); + +#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); + +#define Px(n1,n2,n3,n4) P(n1,n2),P(n3,n4) +#define TEST_LANEQ(t1, t2, t3, f, r1, r2, n1, n2, n3, n4, n5, n6, n7, n8) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (3, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (3, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); \ + t3 f##_##rx2 = {0}; \ + f##_##rx2 = f (f##_##rx2, f##_##x, f##_##y, ORDER (3, 2)); \ + if (f##_##rx2[0] != n5 || f##_##rx2[1] != n6) \ + abort (); \ + t3 f##_##rx3 = {0}; \ + f##_##rx3 = f (f##_##rx3, f##_##x, f##_##y, ORDER (3, 3)); \ + if (f##_##rx3[0] != n7 || f##_##rx3[1] != n8) \ + abort (); + +int +main() +{ + TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24); + + TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24); + + TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANEQ (uint8x8_t, uint8x16_t, uint32x2_t, vdot_laneq_u32, P(1,2), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32); + TEST_LANEQ (int8x8_t, int8x16_t, int32x2_t, vdot_laneq_s32, P(1,2), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32); + + TEST_LANEQ (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_laneq_u32, Px(1,2,2,1), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32); + TEST_LANEQ (int8x16_t, int8x16_t, int32x4_t, vdotq_laneq_s32, Px(1,2,2,1), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h new file mode 100644 index 00000000000..90b00aff95c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h @@ -0,0 +1,15 @@ +TYPE char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +TYPE char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + +__attribute__ ((noinline)) int +foo1(int len) { + int i; + TYPE int result = 0; + TYPE short prod; + + for (i=0; i + +/* Unsigned Dot Product instructions. */ + +uint32x2_t ufoo (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_u32 (r, x, y); +} + +uint32x4_t ufooq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_u32 (r, x, y); +} + +uint32x2_t ufoo_lane (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_lane_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_lane (uint32x4_t r, uint8x16_t x, uint8x8_t y) +{ + return vdotq_lane_u32 (r, x, y, 0); +} + +/* Signed Dot Product instructions. */ + +int32x2_t sfoo (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_s32 (r, x, y); +} + +int32x4_t sfooq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_s32 (r, x, y); +} + +int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_lane_s32 (r, x, y, 0); +} + +int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, int8x8_t y) +{ + return vdotq_lane_s32 (r, x, y, 0); +} + +/* { dg-final { scan-assembler-times {v[us]dot\.[us]8\td[0-9]+, d[0-9]+, d[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {v[us]dot\.[us]8\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {v[us]dot\.[us]8\td[0-9]+, d[0-9]+, d[0-9]+\[#?[0-9]\]} 2 } } */ +/* { dg-final { scan-assembler-times {v[us]dot\.[us]8\tq[0-9]+, q[0-9]+, d[0-9]+\[#?[0-9]\]} 2 } } */ + diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c b/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c new file mode 100644 index 00000000000..054f4703394 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3" } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */ +/* { dg-add-options arm_v8_2a_dotprod_neon } */ + +#include + +extern void abort(); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define ORDER(x, y) y +#else +# define ORDER(x, y) x - y +#endif + +#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2 +#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ } +#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##r = {0}; \ + f##_##r = f (f##_##r, f##_##x, f##_##y); \ + if (f##_##r[0] != n1 || f##_##r[1] != n2) \ + abort (); + +#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); \ + +int +main() +{ + TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24); + + TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24); + + TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + + TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/simd/vect-dot-qi.h b/gcc/testsuite/gcc.target/arm/simd/vect-dot-qi.h new file mode 100644 index 00000000000..ea5c212419d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vect-dot-qi.h @@ -0,0 +1,16 @@ +TYPE char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +TYPE char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + +__attribute__ ((noinline)) int +foo1(int len) { + int i; + TYPE int result = 0; + TYPE short prod; + + for (i=0; i + #endif + } "$flags -march=armv8.2-a+dotprod"] } { + set et_arm_v8_2a_dotprod_neon_flags "$flags -march=armv8.2-a+dotprod" + return 1 + } + } + + return 0; +} + +proc check_effective_target_arm_v8_2a_dotprod_neon_ok { } { + return [check_cached_effective_target arm_v8_2a_dotprod_neon_ok \ + check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache] +} + +proc add_options_for_arm_v8_2a_dotprod_neon { flags } { + if { ! [check_effective_target_arm_v8_2a_dotprod_neon_ok] } { + return "$flags" + } + global et_arm_v8_2a_dotprod_neon_flags + return "$flags $et_arm_v8_2a_dotprod_neon_flags" +} + # Return 1 if the target supports executing ARMv8 NEON instructions, 0 # otherwise. @@ -4293,6 +4338,42 @@ proc check_effective_target_arm_v8_2a_fp16_neon_hw { } { } [add_options_for_arm_v8_2a_fp16_neon ""]] } +# Return 1 if the target supports executing AdvSIMD instructions from ARMv8.2 +# with the Dot Product extension, 0 otherwise. The test is valid for ARM and for +# AArch64. + +proc check_effective_target_arm_v8_2a_dotprod_neon_hw { } { + if { ![check_effective_target_arm_v8_2a_dotprod_neon_ok] } { + return 0; + } + return [check_runtime arm_v8_2a_dotprod_neon_hw_available { + #include "arm_neon.h" + int + main (void) + { + + uint32x2_t results = {0,0}; + uint8x8_t a = {1,1,1,1,2,2,2,2}; + uint8x8_t b = {2,2,2,2,3,3,3,3}; + + #ifdef __ARM_ARCH_ISA_A64 + asm ("udot %0.2s, %1.8b, %2.8b" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + + #else + asm ("vudot.u8 %P0, %P1, %P2" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + #endif + + return (results[0] == 8 && results[1] == 24) ? 1 : 0; + } + } [add_options_for_arm_v8_2a_dotprod_neon ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { @@ -5563,6 +5644,8 @@ proc check_effective_target_vect_sdot_qi { } { } else { set et_vect_sdot_qi_saved($et_index) 0 if { [istarget ia64-*-*] + || [istarget aarch64*-*-*] + || [istarget arm*-*-*] || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) } { set et_vect_udot_qi_saved 1 @@ -5587,6 +5670,8 @@ proc check_effective_target_vect_udot_qi { } { } else { set et_vect_udot_qi_saved($et_index) 0 if { [istarget powerpc*-*-*] + || [istarget aarch64*-*-*] + || [istarget arm*-*-*] || [istarget ia64-*-*] || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) } { @@ -7885,7 +7970,7 @@ proc check_effective_target_aarch64_tiny { } { # Create functions to check that the AArch64 assembler supports the # various architecture extensions via the .arch_extension pseudo-op. -foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse"} { +foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod"} { eval [string map [list FUNC $aarch64_ext] { proc check_effective_target_aarch64_asm_FUNC_ok { } { if { [istarget aarch64*-*-*] } { -- cgit v1.2.3