aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYvan Roux <yvan.roux@linaro.org>2015-12-10 12:58:54 +0100
committerLinaro Code Review <review@review.linaro.org>2015-12-31 11:28:08 +0000
commitdad84c9a5ef6b373364319202be3f01036ea3fa3 (patch)
tree5a495507553b3657aa72defac9f301d8d0f059fe
parent41fa3f1b335b8274596f2db5c20162b3f30aa54a (diff)
gcc/
Backport from trunk r230953. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64.h (AARCH64_ISA_RDMA): New. (TARGET_SIMD_RDMA): New. gcc/ Backport from trunk r230959. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-simd.md (aarch64_sqmovun<mode>): Fix some white-space. (aarch64_<sur>qmovun<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>): New. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): New. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): New. * config/aarch64/iterators.md (UNSPEC_SQRDMLAH): New. (UNSPEC_SQRDMLSH): New. (SQRDMLH_AS): New. (rdma_as): New. gcc/ Backport from trunk r230962. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-simd-builtins.def (sqrdmlah, sqrdmlsh): New. (sqrdmlah_lane, sqrdmlsh_lane): New. (sqrdmlah_laneq, sqrdmlsh_laneq): New. gcc/ Backport from trunk r230966. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-simd-builtins.def: Add missing changes from r230962. gcc/ Backport from trunk r230969. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add ARM_FEATURE_QRDMX. gcc/testsuite/ Backport from trunk r230970. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * lib/target-supports.exp (add_options_for_arm_v8_1a_neon): New. (check_effective_target_arm_arch_FUNC_ok) (add_options_for_arm_arch_FUNC) (check_effective_target_arm_arch_FUNC_multilib): Add "armv8.1-a" to the list to be generated. (check_effective_target_arm_v8_1a_neon_ok_nocache): New. (check_effective_target_arm_v8_1a_neon_ok): New. (check_effective_target_arm_v8_1a_neon_hw): New. gcc/ Backport from trunk r230971. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * gcc/config/aarch64/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New. (vqrdmlahq_s16, vqrdmlahq_s32): New. (vqrdmlsh_s16, vqrdmlsh_s32): New. (vqrdmlshq_s16, vqrdmlshq_s32): New. gcc/testsuite/ Backport from trunk r230971. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc: New file, support code for vqrdml{as}h tests. * gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c: New. * gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c: New. gcc/ Backport from trunk r230972. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * gcc/config/aarch64/arm_neon.h (vqrdmlah_laneq_s16, vqrdmlah_laneq_s32): New. (vqrdmlahq_laneq_s16, vqrdmlahq_laneq_s32): New. (vqrdmlsh_lane_s16, vqrdmlsh_lane_s32): New. (vqrdmlshq_laneq_s16, vqrdmlshq_laneq_s32): New. (vqrdmlah_lane_s16, vqrdmlah_lane_s32): New. (vqrdmlahq_lane_s16, vqrdmlahq_lane_s32): New. (vqrdmlahh_s16, vqrdmlahh_lane_s16, vqrdmlahh_laneq_s16): New. (vqrdmlahs_s32, vqrdmlahs_lane_s32, vqrdmlahs_laneq_s32): New. (vqrdmlsh_lane_s16, vqrdmlsh_lane_s32): New. (vqrdmlshq_lane_s16, vqrdmlshq_lane_s32): New. (vqrdmlshh_s16, vqrdmlshh_lane_s16, vqrdmlshh_laneq_s16): New. (vqrdmlshs_s32, vqrdmlshs_lane_s32, vqrdmlshs_laneq_s32): New. gcc/testsuite/ Backport from trunk r230972. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc: New file, support code for vqrdml{as}h_lane tests. * gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c: New. * gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c: New. gcc/ Backport from trunk r231400. 2015-12-08 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-options-extensions.def: Remove AARCH64_FL_RDMA from "fp" and "simd". Remove "pan", "lor", "rdma". * config/aarch64/aarch64.h (AARCH64_FL_PAN): Remove. (AARCH64_FL_LOR): Remove. (AARCH64_FL_RDMA): Remove. (AARCH64_FL_V8_1): New. (AARCH64_FL_FOR_AARCH8_1): Replace AARCH64_FL_PAN, AARCH64_FL_LOR and AARCH64_FL_RDMA with AARCH64_FL_V8_1. (AARCH64_ISA_RDMA): Replace AARCH64_FL_RDMA with AARCH64_FL_V8_1. * doc/invoke.texi (AArch64 -march): Rewrite initial paragraph and section on -march=native. Group descriptions of permitted architecture names together. Expand description of -march=armv8.1-a. (AArch64 -mtune): Slightly rework section on -march=native. (AArch64 -mcpu): Slightly rework section on -march=native. (AArch64 Feature Modifiers): Remove "pan", "lor" and "rdma". State that -march=armv8.1-a enables "crc" and "lse". Change-Id: I635bd74c33bc7b54b7f4a19970ea45a0ce72ff66
-rw-r--r--gcc/config/aarch64/aarch64-c.c1
-rw-r--r--gcc/config/aarch64/aarch64-option-extensions.def9
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def14
-rw-r--r--gcc/config/aarch64/aarch64-simd.md94
-rw-r--r--gcc/config/aarch64/aarch64.h11
-rw-r--r--gcc/config/aarch64/arm_neon.h221
-rw-r--r--gcc/config/aarch64/iterators.md6
-rw-r--r--gcc/doc/invoke.texi47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc138
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc154
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c61
-rw-r--r--gcc/testsuite/lib/target-supports.exp57
15 files changed, 951 insertions, 37 deletions
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 303025f77f0..ad95c78b989 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -126,6 +126,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_ILP32, "__ILP32__", pfile);
aarch64_def_or_undef (TARGET_CRYPTO, "__ARM_FEATURE_CRYPTO", pfile);
+ aarch64_def_or_undef (TARGET_SIMD_RDMA, "__ARM_FEATURE_QRDMX", pfile);
}
/* Implement TARGET_CPU_CPP_BUILTINS. */
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index b261a0f7c3c..4f1d53515a9 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -34,11 +34,10 @@
should contain a whitespace-separated list of the strings in 'Features'
that are required. Their order is not important. */
-AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "fp")
-AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "asimd")
+AARCH64_OPT_EXTENSION ("fp", AARCH64_FL_FP,
+ AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO, "fp")
+AARCH64_OPT_EXTENSION ("simd", AARCH64_FL_FPSIMD,
+ AARCH64_FL_SIMD | AARCH64_FL_CRYPTO, "asimd")
AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2")
AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC, "crc32")
AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, AARCH64_FL_LSE, "lse")
-AARCH64_OPT_EXTENSION("pan", AARCH64_FL_PAN, AARCH64_FL_PAN, "pan")
-AARCH64_OPT_EXTENSION("lor", AARCH64_FL_LOR, AARCH64_FL_LOR, "lor")
-AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA | AARCH64_FL_FPSIMD, AARCH64_FL_RDMA, "rdma")
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 654e963fa53..4aadbf37ed8 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -412,3 +412,17 @@
/* Implemented by aarch64_tbx4v8qi. */
VAR1 (TERNOP, tbx4, 0, v8qi)
+
+ /* Builtins for ARMv8.1 Adv.SIMD instructions. */
+
+ /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>. */
+ BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0)
+ BUILTIN_VSDQ_HSI (TERNOP, sqrdmlsh, 0)
+
+ /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>. */
+ BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_lane, 0)
+ BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_lane, 0)
+
+ /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>. */
+ BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0)
+ BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 555f5afb469..7acf7222ccd 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2880,7 +2880,7 @@
"TARGET_SIMD"
"sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
- )
+)
;; sqmovn and uqmovn
@@ -2891,7 +2891,7 @@
"TARGET_SIMD"
"<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
- )
+)
;; <su>q<absneg>
@@ -2979,6 +2979,96 @@
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
+;; sqrdml[as]h.
+
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
+ [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
+ (unspec:VSDQ_HSI
+ [(match_operand:VSDQ_HSI 1 "register_operand" "0")
+ (match_operand:VSDQ_HSI 2 "register_operand" "w")
+ (match_operand:VSDQ_HSI 3 "register_operand" "w")]
+ SQRDMLH_AS))]
+ "TARGET_SIMD_RDMA"
+ "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+ [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
+)
+
+;; sqrdml[as]h_lane.
+
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+ (unspec:VDQHS
+ [(match_operand:VDQHS 1 "register_operand" "0")
+ (match_operand:VDQHS 2 "register_operand" "w")
+ (vec_select:<VEL>
+ (match_operand:<VCOND> 3 "register_operand" "w")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
+ SQRDMLH_AS))]
+ "TARGET_SIMD_RDMA"
+ {
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ return
+ "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
+ [(set (match_operand:SD_HSI 0 "register_operand" "=w")
+ (unspec:SD_HSI
+ [(match_operand:SD_HSI 1 "register_operand" "0")
+ (match_operand:SD_HSI 2 "register_operand" "w")
+ (vec_select:<VEL>
+ (match_operand:<VCOND> 3 "register_operand" "w")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
+ SQRDMLH_AS))]
+ "TARGET_SIMD_RDMA"
+ {
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ return
+ "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+;; sqrdml[as]h_laneq.
+
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+ (unspec:VDQHS
+ [(match_operand:VDQHS 1 "register_operand" "0")
+ (match_operand:VDQHS 2 "register_operand" "w")
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "w")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
+ SQRDMLH_AS))]
+ "TARGET_SIMD_RDMA"
+ {
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ return
+ "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
+ [(set (match_operand:SD_HSI 0 "register_operand" "=w")
+ (unspec:SD_HSI
+ [(match_operand:SD_HSI 1 "register_operand" "0")
+ (match_operand:SD_HSI 2 "register_operand" "w")
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "w")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
+ SQRDMLH_AS))]
+ "TARGET_SIMD_RDMA"
+ {
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ return
+ "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
;; vqdml[sa]l
(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index be4a0954faa..76bcca59eab 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -136,9 +136,7 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_FL_CRC (1 << 3) /* Has CRC. */
/* ARMv8.1 architecture extensions. */
#define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */
-#define AARCH64_FL_PAN (1 << 5) /* Has Privileged Access Never. */
-#define AARCH64_FL_LOR (1 << 6) /* Has Limited Ordering regions. */
-#define AARCH64_FL_RDMA (1 << 7) /* Has ARMv8.1 Adv.SIMD. */
+#define AARCH64_FL_V8_1 (1 << 5) /* Has ARMv8.1 extensions. */
/* Has FP and SIMD. */
#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
@@ -149,8 +147,7 @@ extern unsigned aarch64_architecture_version;
/* Architecture flags that effect instruction selection. */
#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD)
#define AARCH64_FL_FOR_ARCH8_1 \
- (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_PAN \
- | AARCH64_FL_LOR | AARCH64_FL_RDMA)
+ (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_V8_1)
/* Macros to test ISA flags. */
@@ -159,6 +156,7 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP)
#define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD)
#define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE)
+#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_V8_1)
/* Crypto is an optional extension to AdvSIMD. */
#define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
@@ -183,6 +181,9 @@ extern unsigned aarch64_architecture_version;
((aarch64_fix_a53_err835769 == 2) \
? TARGET_FIX_ERR_A53_835769_DEFAULT : aarch64_fix_a53_err835769)
+/* ARMv8.1 Adv.SIMD support. */
+#define TARGET_SIMD_RDMA (TARGET_SIMD && AARCH64_ISA_RDMA)
+
/* Standard register usage. */
/* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index e1863488b73..b5189612684 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -11746,6 +11746,227 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
}
+/* ARMv8.1 instrinsics. */
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.1-a")
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+ return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+ return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+ return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+ return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+ return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+ return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+ return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+ return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c)
+{
+ return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c)
+{
+ return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c)
+{
+ return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c)
+{
+ return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d);
+}
+#pragma GCC pop_options
+
#pragma GCC push_options
#pragma GCC target ("+nothing+crypto")
/* vaes */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index fe7ca398c61..0d897bfb4bd 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -303,6 +303,8 @@
UNSPEC_PMULL2 ; Used in aarch64-simd.md.
UNSPEC_REV_REGLIST ; Used in aarch64-simd.md.
UNSPEC_VEC_SHR ; Used in aarch64-simd.md.
+ UNSPEC_SQRDMLAH ; Used in aarch64-simd.md.
+ UNSPEC_SQRDMLSH ; Used in aarch64-simd.md.
])
;; ------------------------------------------------------------------
@@ -955,6 +957,8 @@
UNSPEC_SQSHRN UNSPEC_UQSHRN
UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
+(define_int_iterator SQRDMLH_AS [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH])
+
(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
UNSPEC_TRN1 UNSPEC_TRN2
UNSPEC_UZP1 UNSPEC_UZP2])
@@ -1129,3 +1133,5 @@
(UNSPEC_SHA1M "m")])
(define_int_attr sha256_op [(UNSPEC_SHA256H "") (UNSPEC_SHA256H2 "2")])
+
+(define_int_attr rdma_as [(UNSPEC_SQRDMLAH "a") (UNSPEC_SQRDMLSH "s")])
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fe04af2d705..7a765318658 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12332,23 +12332,27 @@ corresponding flag to the linker.
@item -march=@var{name}
@opindex march
-Specify the name of the target architecture, optionally suffixed by one or
+Specify the name of the target architecture and, optionally, one or
more feature modifiers. This option has the form
@option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}.
-The permissible values for @var{arch} are @samp{armv8-a} or
-@samp{armv8.1-a}.
+The permissible values for @var{arch} are @samp{armv8-a},
+@samp{armv8.1-a} or @var{native}.
-For the permissible values for @var{feature}, see the sub-section on
-@ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu}
+The value @samp{armv8.1-a} implies @samp{armv8-a} and enables compiler
+support for the ARMv8.1 architecture extension. In particular, it
+enables the @samp{+crc} and @samp{+lse} features.
+
+The value @samp{native} is available on native AArch64 GNU/Linux and
+causes the compiler to pick the architecture of the host system. This
+option has no effect if the compiler is unable to recognize the
+architecture of the host system,
+
+The permissible values for @var{feature} are listed in the sub-section
+on @ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu}
Feature Modifiers}. Where conflicting feature modifiers are
specified, the right-most feature is used.
-Additionally on native AArch64 GNU/Linux systems the value
-@samp{native} is available. This option causes the compiler to pick the
-architecture of the host system. If the compiler is unable to recognize the
-architecture of the host system this option has no effect.
-
GCC uses @var{name} to determine what kind of instructions it can emit
when generating assembly code. If @option{-march} is specified
without either of @option{-mtune} or @option{-mcpu} also being
@@ -12369,8 +12373,8 @@ option are: @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}.
Additionally on native AArch64 GNU/Linux systems the value
@samp{native} is available. This option causes the compiler to pick
the architecture of and tune the performance of the code for the
-processor of the host system. If the compiler is unable to recognize
-the processor of the host system this option has no effect.
+processor of the host system. This option has no effect if the
+compiler is unable to recognize the architecture of the host system.
Where none of @option{-mtune=}, @option{-mcpu=} or @option{-march=}
are specified, the code is tuned to perform well across a range
@@ -12392,9 +12396,9 @@ specified, the right-most feature is used.
Additionally on native AArch64 GNU/Linux systems the value
@samp{native} is available. This option causes the compiler to tune
-the performance of the code for the processor of the host system. If
-the compiler is unable to recognize the processor of the host system
-this option has no effect.
+the performance of the code for the processor of the host system.
+This option has no effect if the compiler is unable to recognize the
+architecture of the host system.
GCC uses @var{name} to determine what kind of instructions it can emit when
generating assembly code (as if by @option{-march}) and to determine
@@ -12430,7 +12434,8 @@ the following and their inverses @option{no@var{feature}}:
@table @samp
@item crc
-Enable CRC extension.
+Enable CRC extension. This is on by default for
+@option{-march=armv8.1-a}.
@item crypto
Enable Crypto extension. This also enables Advanced SIMD and floating-point
instructions.
@@ -12442,14 +12447,8 @@ Enable Advanced SIMD instructions. This also enables floating-point
instructions. This is on by default for all possible values for options
@option{-march} and @option{-mcpu}.
@item lse
-Enable Large System Extension instructions.
-@item pan
-Enable Privileged Access Never support.
-@item lor
-Enable Limited Ordering Regions support.
-@item rdma
-Enable ARMv8.1 Advanced SIMD instructions. This implies Advanced SIMD
-is enabled.
+Enable Large System Extension instructions. This is on by default for
+@option{-march=armv8.1-a}.
@end table
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc
new file mode 100644
index 00000000000..a504ca68d46
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc
@@ -0,0 +1,138 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1 (NAME)
+
+void FNNAME (INSN) (void)
+{
+ /* vector_res = vqrdmlah (vector, vector2, vector3, vector4),
+ then store the result. */
+#define TEST_VQRDMLAH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+ Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N)); \
+ VECT_VAR (vector_res, T1, W, N) = \
+ INSN##Q##_##T2##W (VECT_VAR (vector, T1, W, N), \
+ VECT_VAR (vector2, T1, W, N), \
+ VECT_VAR (vector3, T1, W, N)); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vector_res, T1, W, N)); \
+ CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N, \
+ EXPECTED_CUMULATIVE_SAT, CMT)
+
+ /* Two auxliary macros are necessary to expand INSN. */
+#define TEST_VQRDMLAH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+ TEST_VQRDMLAH2 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQRDMLAH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+ TEST_VQRDMLAH1 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+ DECL_VARIABLE (vector, int, 16, 4);
+ DECL_VARIABLE (vector, int, 32, 2);
+ DECL_VARIABLE (vector, int, 16, 8);
+ DECL_VARIABLE (vector, int, 32, 4);
+
+ DECL_VARIABLE (vector_res, int, 16, 4);
+ DECL_VARIABLE (vector_res, int, 32, 2);
+ DECL_VARIABLE (vector_res, int, 16, 8);
+ DECL_VARIABLE (vector_res, int, 32, 4);
+
+ DECL_VARIABLE (vector2, int, 16, 4);
+ DECL_VARIABLE (vector2, int, 32, 2);
+ DECL_VARIABLE (vector2, int, 16, 8);
+ DECL_VARIABLE (vector2, int, 32, 4);
+
+ DECL_VARIABLE (vector3, int, 16, 4);
+ DECL_VARIABLE (vector3, int, 32, 2);
+ DECL_VARIABLE (vector3, int, 16, 8);
+ DECL_VARIABLE (vector3, int, 32, 4);
+
+ clean_results ();
+
+ VLOAD (vector, buffer, , int, s, 16, 4);
+ VLOAD (vector, buffer, , int, s, 32, 2);
+ VLOAD (vector, buffer, q, int, s, 16, 8);
+ VLOAD (vector, buffer, q, int, s, 32, 4);
+
+ /* Initialize vector2. */
+ VDUP (vector2, , int, s, 16, 4, 0x5555);
+ VDUP (vector2, , int, s, 32, 2, 0xBB);
+ VDUP (vector2, q, int, s, 16, 8, 0xBB);
+ VDUP (vector2, q, int, s, 32, 4, 0x22);
+
+ /* Initialize vector3. */
+ VDUP (vector3, , int, s, 16, 4, 0x5555);
+ VDUP (vector3, , int, s, 32, 2, 0xBB);
+ VDUP (vector3, q, int, s, 16, 8, 0x33);
+ VDUP (vector3, q, int, s, 32, 4, 0x22);
+
+#define CMT ""
+ TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat, CMT);
+ TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat, CMT);
+ TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat, CMT);
+ TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat, CMT);
+
+ CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
+ CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
+ CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
+
+ /* Now use input values such that the multiplication causes
+ saturation. */
+#define TEST_MSG_MUL " (check mul cumulative saturation)"
+ VDUP (vector, , int, s, 16, 4, 0x8000);
+ VDUP (vector, , int, s, 32, 2, 0x80000000);
+ VDUP (vector, q, int, s, 16, 8, 0x8000);
+ VDUP (vector, q, int, s, 32, 4, 0x80000000);
+ VDUP (vector2, , int, s, 16, 4, 0x8000);
+ VDUP (vector2, , int, s, 32, 2, 0x80000000);
+ VDUP (vector2, q, int, s, 16, 8, 0x8000);
+ VDUP (vector2, q, int, s, 32, 4, 0x80000000);
+ VDUP (vector3, , int, s, 16, 4, 0x8000);
+ VDUP (vector3, , int, s, 32, 2, 0x80000000);
+ VDUP (vector3, q, int, s, 16, 8, 0x8000);
+ VDUP (vector3, q, int, s, 32, 4, 0x80000000);
+
+ TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
+ TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL);
+ TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL);
+ TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
+
+ CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
+ CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
+ CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
+
+ /* Use input values where rounding produces a result equal to the
+ saturation value, but does not set the saturation flag. */
+#define TEST_MSG_ROUND " (check rounding)"
+ VDUP (vector, , int, s, 16, 4, 0x8000);
+ VDUP (vector, , int, s, 32, 2, 0x80000000);
+ VDUP (vector, q, int, s, 16, 8, 0x8000);
+ VDUP (vector, q, int, s, 32, 4, 0x80000000);
+ VDUP (vector2, , int, s, 16, 4, 0x8001);
+ VDUP (vector2, , int, s, 32, 2, 0x80000001);
+ VDUP (vector2, q, int, s, 16, 8, 0x8001);
+ VDUP (vector2, q, int, s, 32, 4, 0x80000001);
+ VDUP (vector3, , int, s, 16, 4, 0x8001);
+ VDUP (vector3, , int, s, 32, 2, 0x80000001);
+ VDUP (vector3, q, int, s, 16, 8, 0x8001);
+ VDUP (vector3, q, int, s, 32, 4, 0x80000001);
+
+ TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_round, \
+ TEST_MSG_ROUND);
+ TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_round, \
+ TEST_MSG_ROUND);
+ TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_round, \
+ TEST_MSG_ROUND);
+ TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_round, \
+ TEST_MSG_ROUND);
+
+ CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
+ CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
+ CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
+}
+
+int
+main (void)
+{
+ FNNAME (INSN) ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc
new file mode 100644
index 00000000000..a8555029496
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc
@@ -0,0 +1,154 @@
+#define FNNAME1(NAME) exec_ ## NAME ## _lane
+#define FNNAME(NAME) FNNAME1 (NAME)
+
+void FNNAME (INSN) (void)
+{
+ /* vector_res = vqrdmlXh_lane (vector, vector2, vector3, lane),
+ then store the result. */
+#define TEST_VQRDMLXH_LANE2(INSN, Q, T1, T2, W, N, N2, L, \
+ EXPECTED_CUMULATIVE_SAT, CMT) \
+ Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N)); \
+ VECT_VAR (vector_res, T1, W, N) = \
+ INSN##Q##_lane_##T2##W (VECT_VAR (vector, T1, W, N), \
+ VECT_VAR (vector2, T1, W, N), \
+ VECT_VAR (vector3, T1, W, N2), \
+ L); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vector_res, T1, W, N)); \
+ CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+ /* Two auxliary macros are necessary to expand INSN. */
+#define TEST_VQRDMLXH_LANE1(INSN, Q, T1, T2, W, N, N2, L, \
+ EXPECTED_CUMULATIVE_SAT, CMT) \
+ TEST_VQRDMLXH_LANE2 (INSN, Q, T1, T2, W, N, N2, L, \
+ EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQRDMLXH_LANE(Q, T1, T2, W, N, N2, L, \
+ EXPECTED_CUMULATIVE_SAT, CMT) \
+ TEST_VQRDMLXH_LANE1 (INSN, Q, T1, T2, W, N, N2, L, \
+ EXPECTED_CUMULATIVE_SAT, CMT)
+
+
+ DECL_VARIABLE (vector, int, 16, 4);
+ DECL_VARIABLE (vector, int, 32, 2);
+ DECL_VARIABLE (vector, int, 16, 8);
+ DECL_VARIABLE (vector, int, 32, 4);
+
+ DECL_VARIABLE (vector_res, int, 16, 4);
+ DECL_VARIABLE (vector_res, int, 32, 2);
+ DECL_VARIABLE (vector_res, int, 16, 8);
+ DECL_VARIABLE (vector_res, int, 32, 4);
+
+ DECL_VARIABLE (vector2, int, 16, 4);
+ DECL_VARIABLE (vector2, int, 32, 2);
+ DECL_VARIABLE (vector2, int, 16, 8);
+ DECL_VARIABLE (vector2, int, 32, 4);
+
+ DECL_VARIABLE (vector3, int, 16, 4);
+ DECL_VARIABLE (vector3, int, 32, 2);
+ DECL_VARIABLE (vector3, int, 16, 8);
+ DECL_VARIABLE (vector3, int, 32, 4);
+
+ clean_results ();
+
+ VLOAD (vector, buffer, , int, s, 16, 4);
+ VLOAD (vector, buffer, , int, s, 32, 2);
+
+ VLOAD (vector, buffer, q, int, s, 16, 8);
+ VLOAD (vector, buffer, q, int, s, 32, 4);
+
+ /* Initialize vector2. */
+ VDUP (vector2, , int, s, 16, 4, 0x5555);
+ VDUP (vector2, , int, s, 32, 2, 0xBB);
+ VDUP (vector2, q, int, s, 16, 8, 0xBB);
+ VDUP (vector2, q, int, s, 32, 4, 0x22);
+
+ /* Initialize vector3. */
+ VDUP (vector3, , int, s, 16, 4, 0x5555);
+ VDUP (vector3, , int, s, 32, 2, 0xBB);
+ VDUP (vector3, q, int, s, 16, 8, 0x33);
+ VDUP (vector3, q, int, s, 32, 4, 0x22);
+
+ /* Choose lane arbitrarily. */
+#define CMT ""
+ TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT);
+ TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT);
+ TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT);
+ TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT);
+
+ CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
+ CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
+ CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
+
+ /* Now use input values such that the multiplication causes
+ saturation. */
+#define TEST_MSG_MUL " (check mul cumulative saturation)"
+ VDUP (vector, , int, s, 16, 4, 0x8000);
+ VDUP (vector, , int, s, 32, 2, 0x80000000);
+ VDUP (vector, q, int, s, 16, 8, 0x8000);
+ VDUP (vector, q, int, s, 32, 4, 0x80000000);
+
+ VDUP (vector2, , int, s, 16, 4, 0x8000);
+ VDUP (vector2, , int, s, 32, 2, 0x80000000);
+ VDUP (vector2, q, int, s, 16, 8, 0x8000);
+ VDUP (vector2, q, int, s, 32, 4, 0x80000000);
+
+ VDUP (vector3, , int, s, 16, 4, 0x8000);
+ VDUP (vector3, , int, s, 32, 2, 0x80000000);
+ VDUP (vector3, q, int, s, 16, 8, 0x8000);
+ VDUP (vector3, q, int, s, 32, 4, 0x80000000);
+
+ TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul,
+ TEST_MSG_MUL);
+ TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul,
+ TEST_MSG_MUL);
+ TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul,
+ TEST_MSG_MUL);
+ TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul,
+ TEST_MSG_MUL);
+
+ CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
+ CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
+ CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
+
+ VDUP (vector, , int, s, 16, 4, 0x8000);
+ VDUP (vector, , int, s, 32, 2, 0x80000000);
+ VDUP (vector, q, int, s, 16, 8, 0x8000);
+ VDUP (vector, q, int, s, 32, 4, 0x80000000);
+
+ VDUP (vector2, , int, s, 16, 4, 0x8001);
+ VDUP (vector2, , int, s, 32, 2, 0x80000001);
+ VDUP (vector2, q, int, s, 16, 8, 0x8001);
+ VDUP (vector2, q, int, s, 32, 4, 0x80000001);
+
+ VDUP (vector3, , int, s, 16, 4, 0x8001);
+ VDUP (vector3, , int, s, 32, 2, 0x80000001);
+ VDUP (vector3, q, int, s, 16, 8, 0x8001);
+ VDUP (vector3, q, int, s, 32, 4, 0x80000001);
+
+ /* Use input values where rounding produces a result equal to the
+ saturation value, but does not set the saturation flag. */
+#define TEST_MSG_ROUND " (check rounding)"
+ TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat_round,
+ TEST_MSG_ROUND);
+ TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat_round,
+ TEST_MSG_ROUND);
+ TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round,
+ TEST_MSG_ROUND);
+ TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round,
+ TEST_MSG_ROUND);
+
+ CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
+ CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
+ CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
+}
+
+int
+main (void)
+{
+ FNNAME (INSN) ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c
new file mode 100644
index 00000000000..148d94c6196
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c
@@ -0,0 +1,57 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag. */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results. */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+ 0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+ saturates. */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0;
+
+/* Expected results when multiplication saturates. */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+/* Expected values of cumulative_saturation flag when rounding
+ should not cause saturation. */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0;
+
+/* Expected results when rounding should not cause saturation. */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe };
+VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe,
+ 0xfffffffe, 0xfffffffe };
+
+#define INSN vqrdmlah
+#define TEST_MSG "VQRDMLAH"
+
+#include "vqrdmlXh.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c
new file mode 100644
index 00000000000..ed43e010539
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c
@@ -0,0 +1,57 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag. */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results. */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0x006d, 0x006e, 0x006f, 0x0070,
+ 0x0071, 0x0072, 0x0073, 0x0074 };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+ 0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+ saturates. */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0;
+
+/* Expected results when multiplication saturates. */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+/* Expected values of cumulative_saturation flag when rounding
+ should not cause saturation. */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0;
+
+/* Expected results when rounding should not cause saturation. */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe };
+VECT_VAR_DECL (expected_round,int, 16, 8) [] = { 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe,
+ 0xfffffffe, 0xfffffffe };
+
+#define INSN vqrdmlah
+#define TEST_MSG "VQRDMLAH_LANE"
+
+#include "vqrdmlXh_lane.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c
new file mode 100644
index 00000000000..91c3b34806d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c
@@ -0,0 +1,61 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag. */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results. */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+ 0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+ saturates. */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1;
+
+/* Expected results when multiplication saturates. */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000,
+ 0x80000000, 0x80000000 };
+
+/* Expected values of cumulative_saturation flag when rounding
+ should not cause saturation. */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1;
+
+/* Expected results when rounding should not cause saturation. */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000,
+ 0x80000000, 0x80000000 };
+
+#define INSN vqrdmlsh
+#define TEST_MSG "VQRDMLSH"
+
+#include "vqrdmlXh.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c
new file mode 100644
index 00000000000..6010b4209e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c
@@ -0,0 +1,61 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag. */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results. */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xff73, 0xff74, 0xff75, 0xff76,
+ 0xff77, 0xff78, 0xff79, 0xff7a };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+ 0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+ saturates. */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1;
+
+/* Expected results when multiplication saturates. */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000,
+ 0x80000000, 0x80000000 };
+
+/* Expected values of cumulative_saturation flag when rounding
+ should not cause saturation. */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1;
+
+/* Expected results when rounding should not cause saturation. */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000,
+ 0x80000000, 0x80000000 };
+
+#define INSN vqrdmlsh
+#define TEST_MSG "VQRDMLSH_LANE"
+
+#include "vqrdmlXh_lane.inc"
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 0510d880ee7..7d237a2ac01 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2666,6 +2666,16 @@ proc add_options_for_arm_v8_neon { flags } {
return "$flags $et_arm_v8_neon_flags -march=armv8-a"
}
+# Add the options needed for ARMv8.1 Adv.SIMD.
+
+proc add_options_for_arm_v8_1a_neon { flags } {
+ if { [istarget aarch64*-*-*] } {
+ return "$flags -march=armv8.1-a"
+ } else {
+ return "$flags"
+ }
+}
+
proc add_options_for_arm_crc { flags } {
if { ! [check_effective_target_arm_crc_ok] } {
return "$flags"
@@ -2950,7 +2960,8 @@ foreach { armfunc armflag armdef } { v4 "-march=armv4 -marm" __ARM_ARCH_4__
v7r "-march=armv7-r" __ARM_ARCH_7R__
v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__
v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__
- v8a "-march=armv8-a" __ARM_ARCH_8A__ } {
+ v8a "-march=armv8-a" __ARM_ARCH_8A__
+ v8_1a "-march=armv8.1a" __ARM_ARCH_8A__ } {
eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
proc check_effective_target_arm_arch_FUNC_ok { } {
if { [ string match "*-marm*" "FLAG" ] &&
@@ -3107,6 +3118,25 @@ proc check_effective_target_arm_neonv2_hw { } {
} [add_options_for_arm_neonv2 ""]]
}
+# Return 1 if the target supports the ARMv8.1 Adv.SIMD extension, 0
+# otherwise. The test is valid for AArch64.
+
+proc check_effective_target_arm_v8_1a_neon_ok_nocache { } {
+ if { ![istarget aarch64*-*-*] } {
+ return 0
+ }
+ return [check_no_compiler_messages_nocache arm_v8_1a_neon_ok assembly {
+ #if !defined (__ARM_FEATURE_QRDMX)
+ #error "__ARM_FEATURE_QRDMX not defined"
+ #endif
+ } [add_options_for_arm_v8_1a_neon ""]]
+}
+
+proc check_effective_target_arm_v8_1a_neon_ok { } {
+ return [check_cached_effective_target arm_v8_1a_neon_ok \
+ check_effective_target_arm_v8_1a_neon_ok_nocache]
+}
+
# Return 1 if the target supports executing ARMv8 NEON instructions, 0
# otherwise.
@@ -3125,6 +3155,31 @@ proc check_effective_target_arm_v8_neon_hw { } {
} [add_options_for_arm_v8_neon ""]]
}
+# Return 1 if the target supports executing the ARMv8.1 Adv.SIMD extension, 0
+# otherwise. The test is valid for AArch64.
+
+proc check_effective_target_arm_v8_1a_neon_hw { } {
+ if { ![check_effective_target_arm_v8_1a_neon_ok] } {
+ return 0;
+ }
+ return [check_runtime_nocache arm_v8_1a_neon_hw_available {
+ int
+ main (void)
+ {
+ __Int32x2_t a = {0, 1};
+ __Int32x2_t b = {0, 2};
+ __Int32x2_t result;
+
+ asm ("sqrdmlah %0.2s, %1.2s, %2.2s"
+ : "=w"(result)
+ : "w"(a), "w"(b)
+ : /* No clobbers. */);
+
+ return result[0];
+ }
+ } [add_options_for_arm_v8_1a_neon ""]]
+}
+
# Return 1 if this is a ARM target with NEON enabled.
proc check_effective_target_arm_neon { } {