diff options
author | Yvan Roux <yvan.roux@linaro.org> | 2015-12-10 12:58:54 +0100 |
---|---|---|
committer | Linaro Code Review <review@review.linaro.org> | 2015-12-31 11:28:08 +0000 |
commit | dad84c9a5ef6b373364319202be3f01036ea3fa3 (patch) | |
tree | 5a495507553b3657aa72defac9f301d8d0f059fe | |
parent | 41fa3f1b335b8274596f2db5c20162b3f30aa54a (diff) |
gcc/
Backport from trunk r230953.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64.h (AARCH64_ISA_RDMA): New.
(TARGET_SIMD_RDMA): New.
gcc/
Backport from trunk r230959.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64-simd.md
(aarch64_sqmovun<mode>): Fix some white-space.
(aarch64_<sur>qmovun<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>): New.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): New.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): New.
* config/aarch64/iterators.md (UNSPEC_SQRDMLAH): New.
(UNSPEC_SQRDMLSH): New.
(SQRDMLH_AS): New.
(rdma_as): New.
gcc/
Backport from trunk r230962.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64-simd-builtins.def
(sqrdmlah, sqrdmlsh): New.
(sqrdmlah_lane, sqrdmlsh_lane): New.
(sqrdmlah_laneq, sqrdmlsh_laneq): New.
gcc/
Backport from trunk r230966.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64-simd-builtins.def:
Add missing changes from r230962.
gcc/
Backport from trunk r230969.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add
ARM_FEATURE_QRDMX.
gcc/testsuite/
Backport from trunk r230970.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* lib/target-supports.exp (add_options_for_arm_v8_1a_neon): New.
(check_effective_target_arm_arch_FUNC_ok)
(add_options_for_arm_arch_FUNC)
(check_effective_target_arm_arch_FUNC_multilib): Add "armv8.1-a"
to the list to be generated.
(check_effective_target_arm_v8_1a_neon_ok_nocache): New.
(check_effective_target_arm_v8_1a_neon_ok): New.
(check_effective_target_arm_v8_1a_neon_hw): New.
gcc/
Backport from trunk r230971.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* gcc/config/aarch64/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New.
(vqrdmlahq_s16, vqrdmlahq_s32): New.
(vqrdmlsh_s16, vqrdmlsh_s32): New.
(vqrdmlshq_s16, vqrdmlshq_s32): New.
gcc/testsuite/
Backport from trunk r230971.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc: New file,
support code for vqrdml{as}h tests.
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c: New.
gcc/
Backport from trunk r230972.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* gcc/config/aarch64/arm_neon.h
(vqrdmlah_laneq_s16, vqrdmlah_laneq_s32): New.
(vqrdmlahq_laneq_s16, vqrdmlahq_laneq_s32): New.
(vqrdmlsh_lane_s16, vqrdmlsh_lane_s32): New.
(vqrdmlshq_laneq_s16, vqrdmlshq_laneq_s32): New.
(vqrdmlah_lane_s16, vqrdmlah_lane_s32): New.
(vqrdmlahq_lane_s16, vqrdmlahq_lane_s32): New.
(vqrdmlahh_s16, vqrdmlahh_lane_s16, vqrdmlahh_laneq_s16): New.
(vqrdmlahs_s32, vqrdmlahs_lane_s32, vqrdmlahs_laneq_s32): New.
(vqrdmlsh_lane_s16, vqrdmlsh_lane_s32): New.
(vqrdmlshq_lane_s16, vqrdmlshq_lane_s32): New.
(vqrdmlshh_s16, vqrdmlshh_lane_s16, vqrdmlshh_laneq_s16): New.
(vqrdmlshs_s32, vqrdmlshs_lane_s32, vqrdmlshs_laneq_s32): New.
gcc/testsuite/
Backport from trunk r230972.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc: New file,
support code for vqrdml{as}h_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c: New.
gcc/
Backport from trunk r231400.
2015-12-08 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64-options-extensions.def: Remove
AARCH64_FL_RDMA from "fp" and "simd". Remove "pan", "lor",
"rdma".
* config/aarch64/aarch64.h (AARCH64_FL_PAN): Remove.
(AARCH64_FL_LOR): Remove.
(AARCH64_FL_RDMA): Remove.
(AARCH64_FL_V8_1): New.
(AARCH64_FL_FOR_AARCH8_1): Replace AARCH64_FL_PAN, AARCH64_FL_LOR
and AARCH64_FL_RDMA with AARCH64_FL_V8_1.
(AARCH64_ISA_RDMA): Replace AARCH64_FL_RDMA with AARCH64_FL_V8_1.
* doc/invoke.texi (AArch64 -march): Rewrite initial paragraph and
section on -march=native. Group descriptions of permitted
architecture names together. Expand description of
-march=armv8.1-a.
(AArch64 -mtune): Slightly rework section on -march=native.
(AArch64 -mcpu): Slightly rework section on -march=native.
(AArch64 Feature Modifiers): Remove "pan", "lor" and "rdma".
State that -march=armv8.1-a enables "crc" and "lse".
Change-Id: I635bd74c33bc7b54b7f4a19970ea45a0ce72ff66
-rw-r--r-- | gcc/config/aarch64/aarch64-c.c | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-option-extensions.def | 9 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 14 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 94 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.h | 11 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 221 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 6 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 47 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc | 138 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc | 154 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c | 57 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c | 57 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c | 61 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c | 61 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 57 |
15 files changed, 951 insertions, 37 deletions
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c index 303025f77f0..ad95c78b989 100644 --- a/gcc/config/aarch64/aarch64-c.c +++ b/gcc/config/aarch64/aarch64-c.c @@ -126,6 +126,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_ILP32, "__ILP32__", pfile); aarch64_def_or_undef (TARGET_CRYPTO, "__ARM_FEATURE_CRYPTO", pfile); + aarch64_def_or_undef (TARGET_SIMD_RDMA, "__ARM_FEATURE_QRDMX", pfile); } /* Implement TARGET_CPU_CPP_BUILTINS. */ diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index b261a0f7c3c..4f1d53515a9 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -34,11 +34,10 @@ should contain a whitespace-separated list of the strings in 'Features' that are required. Their order is not important. */ -AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "fp") -AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "asimd") +AARCH64_OPT_EXTENSION ("fp", AARCH64_FL_FP, + AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO, "fp") +AARCH64_OPT_EXTENSION ("simd", AARCH64_FL_FPSIMD, + AARCH64_FL_SIMD | AARCH64_FL_CRYPTO, "asimd") AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2") AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC, "crc32") AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, AARCH64_FL_LSE, "lse") -AARCH64_OPT_EXTENSION("pan", AARCH64_FL_PAN, AARCH64_FL_PAN, "pan") -AARCH64_OPT_EXTENSION("lor", AARCH64_FL_LOR, AARCH64_FL_LOR, "lor") -AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA | AARCH64_FL_FPSIMD, AARCH64_FL_RDMA, "rdma") diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 654e963fa53..4aadbf37ed8 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -412,3 +412,17 @@ /* Implemented by aarch64_tbx4v8qi. */ VAR1 (TERNOP, tbx4, 0, v8qi) + + /* Builtins for ARMv8.1 Adv.SIMD instructions. */ + + /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>. */ + BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0) + BUILTIN_VSDQ_HSI (TERNOP, sqrdmlsh, 0) + + /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>. */ + BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_lane, 0) + BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_lane, 0) + + /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>. */ + BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0) + BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 555f5afb469..7acf7222ccd 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2880,7 +2880,7 @@ "TARGET_SIMD" "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" [(set_attr "type" "neon_sat_shift_imm_narrow_q")] - ) +) ;; sqmovn and uqmovn @@ -2891,7 +2891,7 @@ "TARGET_SIMD" "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" [(set_attr "type" "neon_sat_shift_imm_narrow_q")] - ) +) ;; <su>q<absneg> @@ -2979,6 +2979,96 @@ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] ) +;; sqrdml[as]h. + +(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>" + [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") + (unspec:VSDQ_HSI + [(match_operand:VSDQ_HSI 1 "register_operand" "0") + (match_operand:VSDQ_HSI 2 "register_operand" "w") + (match_operand:VSDQ_HSI 3 "register_operand" "w")] + SQRDMLH_AS))] + "TARGET_SIMD_RDMA" + "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" + [(set_attr "type" "neon_sat_mla_<Vetype>_long")] +) + +;; sqrdml[as]h_lane. + +(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (unspec:VDQHS + [(match_operand:VDQHS 1 "register_operand" "0") + (match_operand:VDQHS 2 "register_operand" "w") + (vec_select:<VEL> + (match_operand:<VCOND> 3 "register_operand" "w") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] + SQRDMLH_AS))] + "TARGET_SIMD_RDMA" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); + return + "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" + [(set (match_operand:SD_HSI 0 "register_operand" "=w") + (unspec:SD_HSI + [(match_operand:SD_HSI 1 "register_operand" "0") + (match_operand:SD_HSI 2 "register_operand" "w") + (vec_select:<VEL> + (match_operand:<VCOND> 3 "register_operand" "w") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] + SQRDMLH_AS))] + "TARGET_SIMD_RDMA" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); + return + "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +;; sqrdml[as]h_laneq. + +(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (unspec:VDQHS + [(match_operand:VDQHS 1 "register_operand" "0") + (match_operand:VDQHS 2 "register_operand" "w") + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "w") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] + SQRDMLH_AS))] + "TARGET_SIMD_RDMA" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + return + "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" + [(set (match_operand:SD_HSI 0 "register_operand" "=w") + (unspec:SD_HSI + [(match_operand:SD_HSI 1 "register_operand" "0") + (match_operand:SD_HSI 2 "register_operand" "w") + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "w") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] + SQRDMLH_AS))] + "TARGET_SIMD_RDMA" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + return + "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + ;; vqdml[sa]l (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>" diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index be4a0954faa..76bcca59eab 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -136,9 +136,7 @@ extern unsigned aarch64_architecture_version; #define AARCH64_FL_CRC (1 << 3) /* Has CRC. */ /* ARMv8.1 architecture extensions. */ #define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ -#define AARCH64_FL_PAN (1 << 5) /* Has Privileged Access Never. */ -#define AARCH64_FL_LOR (1 << 6) /* Has Limited Ordering regions. */ -#define AARCH64_FL_RDMA (1 << 7) /* Has ARMv8.1 Adv.SIMD. */ +#define AARCH64_FL_V8_1 (1 << 5) /* Has ARMv8.1 extensions. */ /* Has FP and SIMD. */ #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) @@ -149,8 +147,7 @@ extern unsigned aarch64_architecture_version; /* Architecture flags that effect instruction selection. */ #define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) #define AARCH64_FL_FOR_ARCH8_1 \ - (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_PAN \ - | AARCH64_FL_LOR | AARCH64_FL_RDMA) + (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_V8_1) /* Macros to test ISA flags. */ @@ -159,6 +156,7 @@ extern unsigned aarch64_architecture_version; #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP) #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) +#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_V8_1) /* Crypto is an optional extension to AdvSIMD. */ #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) @@ -183,6 +181,9 @@ extern unsigned aarch64_architecture_version; ((aarch64_fix_a53_err835769 == 2) \ ? TARGET_FIX_ERR_A53_835769_DEFAULT : aarch64_fix_a53_err835769) +/* ARMv8.1 Adv.SIMD support. */ +#define TARGET_SIMD_RDMA (TARGET_SIMD && AARCH64_ISA_RDMA) + /* Standard register usage. */ /* 31 64-bit general purpose registers R0-R30: diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index e1863488b73..b5189612684 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -11746,6 +11746,227 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c); } +/* ARMv8.1 instrinsics. */ +#pragma GCC push_options +#pragma GCC target ("arch=armv8.1-a") + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c) +{ + return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c) +{ + return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c) +{ + return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c) +{ + return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d); +} +#pragma GCC pop_options + #pragma GCC push_options #pragma GCC target ("+nothing+crypto") /* vaes */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index fe7ca398c61..0d897bfb4bd 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -303,6 +303,8 @@ UNSPEC_PMULL2 ; Used in aarch64-simd.md. UNSPEC_REV_REGLIST ; Used in aarch64-simd.md. UNSPEC_VEC_SHR ; Used in aarch64-simd.md. + UNSPEC_SQRDMLAH ; Used in aarch64-simd.md. + UNSPEC_SQRDMLSH ; Used in aarch64-simd.md. ]) ;; ------------------------------------------------------------------ @@ -955,6 +957,8 @@ UNSPEC_SQSHRN UNSPEC_UQSHRN UNSPEC_SQRSHRN UNSPEC_UQRSHRN]) +(define_int_iterator SQRDMLH_AS [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH]) + (define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2 UNSPEC_TRN1 UNSPEC_TRN2 UNSPEC_UZP1 UNSPEC_UZP2]) @@ -1129,3 +1133,5 @@ (UNSPEC_SHA1M "m")]) (define_int_attr sha256_op [(UNSPEC_SHA256H "") (UNSPEC_SHA256H2 "2")]) + +(define_int_attr rdma_as [(UNSPEC_SQRDMLAH "a") (UNSPEC_SQRDMLSH "s")]) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index fe04af2d705..7a765318658 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -12332,23 +12332,27 @@ corresponding flag to the linker. @item -march=@var{name} @opindex march -Specify the name of the target architecture, optionally suffixed by one or +Specify the name of the target architecture and, optionally, one or more feature modifiers. This option has the form @option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}. -The permissible values for @var{arch} are @samp{armv8-a} or -@samp{armv8.1-a}. +The permissible values for @var{arch} are @samp{armv8-a}, +@samp{armv8.1-a} or @var{native}. -For the permissible values for @var{feature}, see the sub-section on -@ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu} +The value @samp{armv8.1-a} implies @samp{armv8-a} and enables compiler +support for the ARMv8.1 architecture extension. In particular, it +enables the @samp{+crc} and @samp{+lse} features. + +The value @samp{native} is available on native AArch64 GNU/Linux and +causes the compiler to pick the architecture of the host system. This +option has no effect if the compiler is unable to recognize the +architecture of the host system, + +The permissible values for @var{feature} are listed in the sub-section +on @ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu} Feature Modifiers}. Where conflicting feature modifiers are specified, the right-most feature is used. -Additionally on native AArch64 GNU/Linux systems the value -@samp{native} is available. This option causes the compiler to pick the -architecture of the host system. If the compiler is unable to recognize the -architecture of the host system this option has no effect. - GCC uses @var{name} to determine what kind of instructions it can emit when generating assembly code. If @option{-march} is specified without either of @option{-mtune} or @option{-mcpu} also being @@ -12369,8 +12373,8 @@ option are: @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}. Additionally on native AArch64 GNU/Linux systems the value @samp{native} is available. This option causes the compiler to pick the architecture of and tune the performance of the code for the -processor of the host system. If the compiler is unable to recognize -the processor of the host system this option has no effect. +processor of the host system. This option has no effect if the +compiler is unable to recognize the architecture of the host system. Where none of @option{-mtune=}, @option{-mcpu=} or @option{-march=} are specified, the code is tuned to perform well across a range @@ -12392,9 +12396,9 @@ specified, the right-most feature is used. Additionally on native AArch64 GNU/Linux systems the value @samp{native} is available. This option causes the compiler to tune -the performance of the code for the processor of the host system. If -the compiler is unable to recognize the processor of the host system -this option has no effect. +the performance of the code for the processor of the host system. +This option has no effect if the compiler is unable to recognize the +architecture of the host system. GCC uses @var{name} to determine what kind of instructions it can emit when generating assembly code (as if by @option{-march}) and to determine @@ -12430,7 +12434,8 @@ the following and their inverses @option{no@var{feature}}: @table @samp @item crc -Enable CRC extension. +Enable CRC extension. This is on by default for +@option{-march=armv8.1-a}. @item crypto Enable Crypto extension. This also enables Advanced SIMD and floating-point instructions. @@ -12442,14 +12447,8 @@ Enable Advanced SIMD instructions. This also enables floating-point instructions. This is on by default for all possible values for options @option{-march} and @option{-mcpu}. @item lse -Enable Large System Extension instructions. -@item pan -Enable Privileged Access Never support. -@item lor -Enable Limited Ordering Regions support. -@item rdma -Enable ARMv8.1 Advanced SIMD instructions. This implies Advanced SIMD -is enabled. +Enable Large System Extension instructions. This is on by default for +@option{-march=armv8.1-a}. @end table diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc new file mode 100644 index 00000000000..a504ca68d46 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc @@ -0,0 +1,138 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1 (NAME) + +void FNNAME (INSN) (void) +{ + /* vector_res = vqrdmlah (vector, vector2, vector3, vector4), + then store the result. */ +#define TEST_VQRDMLAH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N)); \ + VECT_VAR (vector_res, T1, W, N) = \ + INSN##Q##_##T2##W (VECT_VAR (vector, T1, W, N), \ + VECT_VAR (vector2, T1, W, N), \ + VECT_VAR (vector3, T1, W, N)); \ + vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \ + VECT_VAR (vector_res, T1, W, N)); \ + CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N, \ + EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQRDMLAH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQRDMLAH2 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQRDMLAH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQRDMLAH1 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE (vector, int, 16, 4); + DECL_VARIABLE (vector, int, 32, 2); + DECL_VARIABLE (vector, int, 16, 8); + DECL_VARIABLE (vector, int, 32, 4); + + DECL_VARIABLE (vector_res, int, 16, 4); + DECL_VARIABLE (vector_res, int, 32, 2); + DECL_VARIABLE (vector_res, int, 16, 8); + DECL_VARIABLE (vector_res, int, 32, 4); + + DECL_VARIABLE (vector2, int, 16, 4); + DECL_VARIABLE (vector2, int, 32, 2); + DECL_VARIABLE (vector2, int, 16, 8); + DECL_VARIABLE (vector2, int, 32, 4); + + DECL_VARIABLE (vector3, int, 16, 4); + DECL_VARIABLE (vector3, int, 32, 2); + DECL_VARIABLE (vector3, int, 16, 8); + DECL_VARIABLE (vector3, int, 32, 4); + + clean_results (); + + VLOAD (vector, buffer, , int, s, 16, 4); + VLOAD (vector, buffer, , int, s, 32, 2); + VLOAD (vector, buffer, q, int, s, 16, 8); + VLOAD (vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2. */ + VDUP (vector2, , int, s, 16, 4, 0x5555); + VDUP (vector2, , int, s, 32, 2, 0xBB); + VDUP (vector2, q, int, s, 16, 8, 0xBB); + VDUP (vector2, q, int, s, 32, 4, 0x22); + + /* Initialize vector3. */ + VDUP (vector3, , int, s, 16, 4, 0x5555); + VDUP (vector3, , int, s, 32, 2, 0xBB); + VDUP (vector3, q, int, s, 16, 8, 0x33); + VDUP (vector3, q, int, s, 32, 4, 0x22); + +#define CMT "" + TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat, CMT); + TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat, CMT); + TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat, CMT); + TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat, CMT); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT); + + /* Now use input values such that the multiplication causes + saturation. */ +#define TEST_MSG_MUL " (check mul cumulative saturation)" + VDUP (vector, , int, s, 16, 4, 0x8000); + VDUP (vector, , int, s, 32, 2, 0x80000000); + VDUP (vector, q, int, s, 16, 8, 0x8000); + VDUP (vector, q, int, s, 32, 4, 0x80000000); + VDUP (vector2, , int, s, 16, 4, 0x8000); + VDUP (vector2, , int, s, 32, 2, 0x80000000); + VDUP (vector2, q, int, s, 16, 8, 0x8000); + VDUP (vector2, q, int, s, 32, 4, 0x80000000); + VDUP (vector3, , int, s, 16, 4, 0x8000); + VDUP (vector3, , int, s, 32, 2, 0x80000000); + VDUP (vector3, q, int, s, 16, 8, 0x8000); + VDUP (vector3, q, int, s, 32, 4, 0x80000000); + + TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL); + TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL); + TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL); + TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL); + + /* Use input values where rounding produces a result equal to the + saturation value, but does not set the saturation flag. */ +#define TEST_MSG_ROUND " (check rounding)" + VDUP (vector, , int, s, 16, 4, 0x8000); + VDUP (vector, , int, s, 32, 2, 0x80000000); + VDUP (vector, q, int, s, 16, 8, 0x8000); + VDUP (vector, q, int, s, 32, 4, 0x80000000); + VDUP (vector2, , int, s, 16, 4, 0x8001); + VDUP (vector2, , int, s, 32, 2, 0x80000001); + VDUP (vector2, q, int, s, 16, 8, 0x8001); + VDUP (vector2, q, int, s, 32, 4, 0x80000001); + VDUP (vector3, , int, s, 16, 4, 0x8001); + VDUP (vector3, , int, s, 32, 2, 0x80000001); + VDUP (vector3, q, int, s, 16, 8, 0x8001); + VDUP (vector3, q, int, s, 32, 4, 0x80000001); + + TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_round, \ + TEST_MSG_ROUND); + TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_round, \ + TEST_MSG_ROUND); + TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_round, \ + TEST_MSG_ROUND); + TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_round, \ + TEST_MSG_ROUND); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND); +} + +int +main (void) +{ + FNNAME (INSN) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc new file mode 100644 index 00000000000..a8555029496 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc @@ -0,0 +1,154 @@ +#define FNNAME1(NAME) exec_ ## NAME ## _lane +#define FNNAME(NAME) FNNAME1 (NAME) + +void FNNAME (INSN) (void) +{ + /* vector_res = vqrdmlXh_lane (vector, vector2, vector3, lane), + then store the result. */ +#define TEST_VQRDMLXH_LANE2(INSN, Q, T1, T2, W, N, N2, L, \ + EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N)); \ + VECT_VAR (vector_res, T1, W, N) = \ + INSN##Q##_lane_##T2##W (VECT_VAR (vector, T1, W, N), \ + VECT_VAR (vector2, T1, W, N), \ + VECT_VAR (vector3, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \ + VECT_VAR (vector_res, T1, W, N)); \ + CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQRDMLXH_LANE1(INSN, Q, T1, T2, W, N, N2, L, \ + EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQRDMLXH_LANE2 (INSN, Q, T1, T2, W, N, N2, L, \ + EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQRDMLXH_LANE(Q, T1, T2, W, N, N2, L, \ + EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQRDMLXH_LANE1 (INSN, Q, T1, T2, W, N, N2, L, \ + EXPECTED_CUMULATIVE_SAT, CMT) + + + DECL_VARIABLE (vector, int, 16, 4); + DECL_VARIABLE (vector, int, 32, 2); + DECL_VARIABLE (vector, int, 16, 8); + DECL_VARIABLE (vector, int, 32, 4); + + DECL_VARIABLE (vector_res, int, 16, 4); + DECL_VARIABLE (vector_res, int, 32, 2); + DECL_VARIABLE (vector_res, int, 16, 8); + DECL_VARIABLE (vector_res, int, 32, 4); + + DECL_VARIABLE (vector2, int, 16, 4); + DECL_VARIABLE (vector2, int, 32, 2); + DECL_VARIABLE (vector2, int, 16, 8); + DECL_VARIABLE (vector2, int, 32, 4); + + DECL_VARIABLE (vector3, int, 16, 4); + DECL_VARIABLE (vector3, int, 32, 2); + DECL_VARIABLE (vector3, int, 16, 8); + DECL_VARIABLE (vector3, int, 32, 4); + + clean_results (); + + VLOAD (vector, buffer, , int, s, 16, 4); + VLOAD (vector, buffer, , int, s, 32, 2); + + VLOAD (vector, buffer, q, int, s, 16, 8); + VLOAD (vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2. */ + VDUP (vector2, , int, s, 16, 4, 0x5555); + VDUP (vector2, , int, s, 32, 2, 0xBB); + VDUP (vector2, q, int, s, 16, 8, 0xBB); + VDUP (vector2, q, int, s, 32, 4, 0x22); + + /* Initialize vector3. */ + VDUP (vector3, , int, s, 16, 4, 0x5555); + VDUP (vector3, , int, s, 32, 2, 0xBB); + VDUP (vector3, q, int, s, 16, 8, 0x33); + VDUP (vector3, q, int, s, 32, 4, 0x22); + + /* Choose lane arbitrarily. */ +#define CMT "" + TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT); + TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT); + TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT); + TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT); + + /* Now use input values such that the multiplication causes + saturation. */ +#define TEST_MSG_MUL " (check mul cumulative saturation)" + VDUP (vector, , int, s, 16, 4, 0x8000); + VDUP (vector, , int, s, 32, 2, 0x80000000); + VDUP (vector, q, int, s, 16, 8, 0x8000); + VDUP (vector, q, int, s, 32, 4, 0x80000000); + + VDUP (vector2, , int, s, 16, 4, 0x8000); + VDUP (vector2, , int, s, 32, 2, 0x80000000); + VDUP (vector2, q, int, s, 16, 8, 0x8000); + VDUP (vector2, q, int, s, 32, 4, 0x80000000); + + VDUP (vector3, , int, s, 16, 4, 0x8000); + VDUP (vector3, , int, s, 32, 2, 0x80000000); + VDUP (vector3, q, int, s, 16, 8, 0x8000); + VDUP (vector3, q, int, s, 32, 4, 0x80000000); + + TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul, + TEST_MSG_MUL); + TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul, + TEST_MSG_MUL); + TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul, + TEST_MSG_MUL); + TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul, + TEST_MSG_MUL); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL); + + VDUP (vector, , int, s, 16, 4, 0x8000); + VDUP (vector, , int, s, 32, 2, 0x80000000); + VDUP (vector, q, int, s, 16, 8, 0x8000); + VDUP (vector, q, int, s, 32, 4, 0x80000000); + + VDUP (vector2, , int, s, 16, 4, 0x8001); + VDUP (vector2, , int, s, 32, 2, 0x80000001); + VDUP (vector2, q, int, s, 16, 8, 0x8001); + VDUP (vector2, q, int, s, 32, 4, 0x80000001); + + VDUP (vector3, , int, s, 16, 4, 0x8001); + VDUP (vector3, , int, s, 32, 2, 0x80000001); + VDUP (vector3, q, int, s, 16, 8, 0x8001); + VDUP (vector3, q, int, s, 32, 4, 0x80000001); + + /* Use input values where rounding produces a result equal to the + saturation value, but does not set the saturation flag. */ +#define TEST_MSG_ROUND " (check rounding)" + TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat_round, + TEST_MSG_ROUND); + TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat_round, + TEST_MSG_ROUND); + TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round, + TEST_MSG_ROUND); + TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round, + TEST_MSG_ROUND); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND); +} + +int +main (void) +{ + FNNAME (INSN) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c new file mode 100644 index 00000000000..148d94c6196 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c @@ -0,0 +1,57 @@ +/* { dg-require-effective-target arm_v8_1a_neon_hw } */ +/* { dg-add-options arm_v8_1a_neon } */ + +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0; + +/* Expected results. */ +VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 }; +VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; + +/* Expected values of cumulative_saturation flag when multiplication + saturates. */ +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0; + +/* Expected results when multiplication saturates. */ +VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +/* Expected values of cumulative_saturation flag when rounding + should not cause saturation. */ +int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0; + +/* Expected results when rounding should not cause saturation. */ +VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe, + 0xfffe, 0xfffe }; +VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0xfffe, 0xfffe, + 0xfffe, 0xfffe, + 0xfffe, 0xfffe, + 0xfffe, 0xfffe }; +VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe, + 0xfffffffe, 0xfffffffe }; + +#define INSN vqrdmlah +#define TEST_MSG "VQRDMLAH" + +#include "vqrdmlXh.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c new file mode 100644 index 00000000000..ed43e010539 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c @@ -0,0 +1,57 @@ +/* { dg-require-effective-target arm_v8_1a_neon_hw } */ +/* { dg-add-options arm_v8_1a_neon } */ + +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0; + +/* Expected results. */ +VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 }; +VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL (expected, int, 16, 8) [] = { 0x006d, 0x006e, 0x006f, 0x0070, + 0x0071, 0x0072, 0x0073, 0x0074 }; +VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; + +/* Expected values of cumulative_saturation flag when multiplication + saturates. */ +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0; + +/* Expected results when multiplication saturates. */ +VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +/* Expected values of cumulative_saturation flag when rounding + should not cause saturation. */ +int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0; + +/* Expected results when rounding should not cause saturation. */ +VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe, + 0xfffe, 0xfffe }; +VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL (expected_round,int, 16, 8) [] = { 0xfffe, 0xfffe, + 0xfffe, 0xfffe, + 0xfffe, 0xfffe, + 0xfffe, 0xfffe }; +VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe, + 0xfffffffe, 0xfffffffe }; + +#define INSN vqrdmlah +#define TEST_MSG "VQRDMLAH_LANE" + +#include "vqrdmlXh_lane.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c new file mode 100644 index 00000000000..91c3b34806d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c @@ -0,0 +1,61 @@ +/* { dg-require-effective-target arm_v8_1a_neon_hw } */ +/* { dg-add-options arm_v8_1a_neon } */ + +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0; + +/* Expected results. */ +VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 }; +VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; + +/* Expected values of cumulative_saturation flag when multiplication + saturates. */ +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1; + +/* Expected results when multiplication saturates. */ +VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000, + 0x80000000, 0x80000000 }; + +/* Expected values of cumulative_saturation flag when rounding + should not cause saturation. */ +int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1; + +/* Expected results when rounding should not cause saturation. */ +VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000, + 0x80000000, 0x80000000 }; + +#define INSN vqrdmlsh +#define TEST_MSG "VQRDMLSH" + +#include "vqrdmlXh.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c new file mode 100644 index 00000000000..6010b4209e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c @@ -0,0 +1,61 @@ +/* { dg-require-effective-target arm_v8_1a_neon_hw } */ +/* { dg-add-options arm_v8_1a_neon } */ + +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0; + +/* Expected results. */ +VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 }; +VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xff73, 0xff74, 0xff75, 0xff76, + 0xff77, 0xff78, 0xff79, 0xff7a }; +VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; + +/* Expected values of cumulative_saturation flag when multiplication + saturates. */ +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1; + +/* Expected results when multiplication saturates. */ +VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000, + 0x80000000, 0x80000000 }; + +/* Expected values of cumulative_saturation flag when rounding + should not cause saturation. */ +int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1; + +/* Expected results when rounding should not cause saturation. */ +VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000, + 0x80000000, 0x80000000 }; + +#define INSN vqrdmlsh +#define TEST_MSG "VQRDMLSH_LANE" + +#include "vqrdmlXh_lane.inc" diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 0510d880ee7..7d237a2ac01 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2666,6 +2666,16 @@ proc add_options_for_arm_v8_neon { flags } { return "$flags $et_arm_v8_neon_flags -march=armv8-a" } +# Add the options needed for ARMv8.1 Adv.SIMD. + +proc add_options_for_arm_v8_1a_neon { flags } { + if { [istarget aarch64*-*-*] } { + return "$flags -march=armv8.1-a" + } else { + return "$flags" + } +} + proc add_options_for_arm_crc { flags } { if { ! [check_effective_target_arm_crc_ok] } { return "$flags" @@ -2950,7 +2960,8 @@ foreach { armfunc armflag armdef } { v4 "-march=armv4 -marm" __ARM_ARCH_4__ v7r "-march=armv7-r" __ARM_ARCH_7R__ v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__ v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__ - v8a "-march=armv8-a" __ARM_ARCH_8A__ } { + v8a "-march=armv8-a" __ARM_ARCH_8A__ + v8_1a "-march=armv8.1a" __ARM_ARCH_8A__ } { eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] { proc check_effective_target_arm_arch_FUNC_ok { } { if { [ string match "*-marm*" "FLAG" ] && @@ -3107,6 +3118,25 @@ proc check_effective_target_arm_neonv2_hw { } { } [add_options_for_arm_neonv2 ""]] } +# Return 1 if the target supports the ARMv8.1 Adv.SIMD extension, 0 +# otherwise. The test is valid for AArch64. + +proc check_effective_target_arm_v8_1a_neon_ok_nocache { } { + if { ![istarget aarch64*-*-*] } { + return 0 + } + return [check_no_compiler_messages_nocache arm_v8_1a_neon_ok assembly { + #if !defined (__ARM_FEATURE_QRDMX) + #error "__ARM_FEATURE_QRDMX not defined" + #endif + } [add_options_for_arm_v8_1a_neon ""]] +} + +proc check_effective_target_arm_v8_1a_neon_ok { } { + return [check_cached_effective_target arm_v8_1a_neon_ok \ + check_effective_target_arm_v8_1a_neon_ok_nocache] +} + # Return 1 if the target supports executing ARMv8 NEON instructions, 0 # otherwise. @@ -3125,6 +3155,31 @@ proc check_effective_target_arm_v8_neon_hw { } { } [add_options_for_arm_v8_neon ""]] } +# Return 1 if the target supports executing the ARMv8.1 Adv.SIMD extension, 0 +# otherwise. The test is valid for AArch64. + +proc check_effective_target_arm_v8_1a_neon_hw { } { + if { ![check_effective_target_arm_v8_1a_neon_ok] } { + return 0; + } + return [check_runtime_nocache arm_v8_1a_neon_hw_available { + int + main (void) + { + __Int32x2_t a = {0, 1}; + __Int32x2_t b = {0, 2}; + __Int32x2_t result; + + asm ("sqrdmlah %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers. */); + + return result[0]; + } + } [add_options_for_arm_v8_1a_neon ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { |