gcc/

Backport from trunk r230953. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64.h (AARCH64_ISA_RDMA): New. (TARGET_SIMD_RDMA): New. gcc/ Backport from trunk r230959. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-simd.md (aarch64_sqmovun<mode>): Fix some white-space. (aarch64_<sur>qmovun<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>): New. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): New. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): New. * config/aarch64/iterators.md (UNSPEC_SQRDMLAH): New. (UNSPEC_SQRDMLSH): New. (SQRDMLH_AS): New. (rdma_as): New. gcc/ Backport from trunk r230962. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-simd-builtins.def (sqrdmlah, sqrdmlsh): New. (sqrdmlah_lane, sqrdmlsh_lane): New. (sqrdmlah_laneq, sqrdmlsh_laneq): New. gcc/ Backport from trunk r230966. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-simd-builtins.def: Add missing changes from r230962. gcc/ Backport from trunk r230969. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add ARM_FEATURE_QRDMX. gcc/testsuite/ Backport from trunk r230970. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * lib/target-supports.exp (add_options_for_arm_v8_1a_neon): New. (check_effective_target_arm_arch_FUNC_ok) (add_options_for_arm_arch_FUNC) (check_effective_target_arm_arch_FUNC_multilib): Add "armv8.1-a" to the list to be generated. (check_effective_target_arm_v8_1a_neon_ok_nocache): New. (check_effective_target_arm_v8_1a_neon_ok): New. (check_effective_target_arm_v8_1a_neon_hw): New. gcc/ Backport from trunk r230971. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * gcc/config/aarch64/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New. (vqrdmlahq_s16, vqrdmlahq_s32): New. (vqrdmlsh_s16, vqrdmlsh_s32): New. (vqrdmlshq_s16, vqrdmlshq_s32): New. gcc/testsuite/ Backport from trunk r230971. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc: New file, support code for vqrdml{as}h tests. * gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c: New. * gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c: New. gcc/ Backport from trunk r230972. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * gcc/config/aarch64/arm_neon.h (vqrdmlah_laneq_s16, vqrdmlah_laneq_s32): New. (vqrdmlahq_laneq_s16, vqrdmlahq_laneq_s32): New. (vqrdmlsh_lane_s16, vqrdmlsh_lane_s32): New. (vqrdmlshq_laneq_s16, vqrdmlshq_laneq_s32): New. (vqrdmlah_lane_s16, vqrdmlah_lane_s32): New. (vqrdmlahq_lane_s16, vqrdmlahq_lane_s32): New. (vqrdmlahh_s16, vqrdmlahh_lane_s16, vqrdmlahh_laneq_s16): New. (vqrdmlahs_s32, vqrdmlahs_lane_s32, vqrdmlahs_laneq_s32): New. (vqrdmlsh_lane_s16, vqrdmlsh_lane_s32): New. (vqrdmlshq_lane_s16, vqrdmlshq_lane_s32): New. (vqrdmlshh_s16, vqrdmlshh_lane_s16, vqrdmlshh_laneq_s16): New. (vqrdmlshs_s32, vqrdmlshs_lane_s32, vqrdmlshs_laneq_s32): New. gcc/testsuite/ Backport from trunk r230972. 2015-11-26 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc: New file, support code for vqrdml{as}h_lane tests. * gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c: New. * gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c: New. gcc/ Backport from trunk r231400. 2015-12-08 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-options-extensions.def: Remove AARCH64_FL_RDMA from "fp" and "simd". Remove "pan", "lor", "rdma". * config/aarch64/aarch64.h (AARCH64_FL_PAN): Remove. (AARCH64_FL_LOR): Remove. (AARCH64_FL_RDMA): Remove. (AARCH64_FL_V8_1): New. (AARCH64_FL_FOR_AARCH8_1): Replace AARCH64_FL_PAN, AARCH64_FL_LOR and AARCH64_FL_RDMA with AARCH64_FL_V8_1. (AARCH64_ISA_RDMA): Replace AARCH64_FL_RDMA with AARCH64_FL_V8_1. * doc/invoke.texi (AArch64 -march): Rewrite initial paragraph and section on -march=native. Group descriptions of permitted architecture names together. Expand description of -march=armv8.1-a. (AArch64 -mtune): Slightly rework section on -march=native. (AArch64 -mcpu): Slightly rework section on -march=native. (AArch64 Feature Modifiers): Remove "pan", "lor" and "rdma". State that -march=armv8.1-a enables "crc" and "lse". Change-Id: I635bd74c33bc7b54b7f4a19970ea45a0ce72ff66
author: Yvan Roux <yvan.roux@linaro.org> 2015-12-10 12:58:54 +0100
committer: Linaro Code Review <review@review.linaro.org> 2015-12-31 11:28:08 +0000
commit: dad84c9a5ef6b373364319202be3f01036ea3fa3 (patch)
tree: 5a495507553b3657aa72defac9f301d8d0f059fe
parent: 41fa3f1b335b8274596f2db5c20162b3f30aa54a (diff)
15 files changed, 951 insertions, 37 deletions
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 303025f77f0..ad95c78b989 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -126,6 +126,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
   aarch64_def_or_undef (TARGET_ILP32, "__ILP32__", pfile);
 
   aarch64_def_or_undef (TARGET_CRYPTO, "__ARM_FEATURE_CRYPTO", pfile);
+  aarch64_def_or_undef (TARGET_SIMD_RDMA, "__ARM_FEATURE_QRDMX", pfile);
 }
 
 /* Implement TARGET_CPU_CPP_BUILTINS.  */
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index b261a0f7c3c..4f1d53515a9 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -34,11 +34,10 @@
    should contain a whitespace-separated list of the strings in 'Features'
    that are required.  Their order is not important.  */
 
-AARCH64_OPT_EXTENSION("fp",	AARCH64_FL_FP,                          AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "fp")
-AARCH64_OPT_EXTENSION("simd",	AARCH64_FL_FPSIMD,                      AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA,   "asimd")
+AARCH64_OPT_EXTENSION ("fp", AARCH64_FL_FP,
+		       AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO, "fp")
+AARCH64_OPT_EXTENSION ("simd", AARCH64_FL_FPSIMD,
+		       AARCH64_FL_SIMD | AARCH64_FL_CRYPTO, "asimd")
 AARCH64_OPT_EXTENSION("crypto",	AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD,  AARCH64_FL_CRYPTO,   "aes pmull sha1 sha2")
 AARCH64_OPT_EXTENSION("crc",	AARCH64_FL_CRC,                         AARCH64_FL_CRC,                        "crc32")
 AARCH64_OPT_EXTENSION("lse",	AARCH64_FL_LSE,                         AARCH64_FL_LSE,                        "lse")
-AARCH64_OPT_EXTENSION("pan",	AARCH64_FL_PAN,		AARCH64_FL_PAN,		"pan")
-AARCH64_OPT_EXTENSION("lor",	AARCH64_FL_LOR,		AARCH64_FL_LOR,		"lor")
-AARCH64_OPT_EXTENSION("rdma",	AARCH64_FL_RDMA | AARCH64_FL_FPSIMD,	AARCH64_FL_RDMA,	"rdma")
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 654e963fa53..4aadbf37ed8 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -412,3 +412,17 @@
 
   /* Implemented by aarch64_tbx4v8qi.  */
   VAR1 (TERNOP, tbx4, 0, v8qi)
+
+  /* Builtins for ARMv8.1 Adv.SIMD instructions.  */
+
+  /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>.  */
+  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0)
+  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlsh, 0)
+
+  /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>.  */
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_lane, 0)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_lane, 0)
+
+  /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>.  */
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 555f5afb469..7acf7222ccd 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2880,7 +2880,7 @@
    "TARGET_SIMD"
    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
- )
+)
 
 ;; sqmovn and uqmovn
 
@@ -2891,7 +2891,7 @@
   "TARGET_SIMD"
   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
- )
+)
 
 ;; <su>q<absneg>
 
@@ -2979,6 +2979,96 @@
   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
 )
 
+;; sqrdml[as]h.
+
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
+  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
+	(unspec:VSDQ_HSI
+	  [(match_operand:VSDQ_HSI 1 "register_operand" "0")
+	   (match_operand:VSDQ_HSI 2 "register_operand" "w")
+	   (match_operand:VSDQ_HSI 3 "register_operand" "w")]
+	  SQRDMLH_AS))]
+   "TARGET_SIMD_RDMA"
+   "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
+)
+
+;; sqrdml[as]h_lane.
+
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
+  [(set (match_operand:VDQHS 0 "register_operand" "=w")
+	(unspec:VDQHS
+	  [(match_operand:VDQHS 1 "register_operand" "0")
+	   (match_operand:VDQHS 2 "register_operand" "w")
+	   (vec_select:<VEL>
+	     (match_operand:<VCOND> 3 "register_operand" "w")
+	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
+	  SQRDMLH_AS))]
+   "TARGET_SIMD_RDMA"
+   {
+     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+     return
+      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
+   }
+   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
+  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
+	(unspec:SD_HSI
+	  [(match_operand:SD_HSI 1 "register_operand" "0")
+	   (match_operand:SD_HSI 2 "register_operand" "w")
+	   (vec_select:<VEL>
+	     (match_operand:<VCOND> 3 "register_operand" "w")
+	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
+	  SQRDMLH_AS))]
+   "TARGET_SIMD_RDMA"
+   {
+     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+     return
+      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
+   }
+   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+;; sqrdml[as]h_laneq.
+
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
+  [(set (match_operand:VDQHS 0 "register_operand" "=w")
+	(unspec:VDQHS
+	  [(match_operand:VDQHS 1 "register_operand" "0")
+	   (match_operand:VDQHS 2 "register_operand" "w")
+	   (vec_select:<VEL>
+	     (match_operand:<VCONQ> 3 "register_operand" "w")
+	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
+	  SQRDMLH_AS))]
+   "TARGET_SIMD_RDMA"
+   {
+     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+     return
+      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
+   }
+   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
+  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
+	(unspec:SD_HSI
+	  [(match_operand:SD_HSI 1 "register_operand" "0")
+	   (match_operand:SD_HSI 2 "register_operand" "w")
+	   (vec_select:<VEL>
+	     (match_operand:<VCONQ> 3 "register_operand" "w")
+	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
+	  SQRDMLH_AS))]
+   "TARGET_SIMD_RDMA"
+   {
+     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+     return
+      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
+   }
+   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
 ;; vqdml[sa]l
 
 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index be4a0954faa..76bcca59eab 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -136,9 +136,7 @@ extern unsigned aarch64_architecture_version;
 #define AARCH64_FL_CRC        (1 << 3)	/* Has CRC.  */
 /* ARMv8.1 architecture extensions.  */
 #define AARCH64_FL_LSE	      (1 << 4)  /* Has Large System Extensions.  */
-#define AARCH64_FL_PAN	      (1 << 5)  /* Has Privileged Access Never.  */
-#define AARCH64_FL_LOR	      (1 << 6)  /* Has Limited Ordering regions.  */
-#define AARCH64_FL_RDMA	      (1 << 7)  /* Has ARMv8.1 Adv.SIMD.  */
+#define AARCH64_FL_V8_1	      (1 << 5)  /* Has ARMv8.1 extensions.  */
 
 /* Has FP and SIMD.  */
 #define AARCH64_FL_FPSIMD     (AARCH64_FL_FP | AARCH64_FL_SIMD)
@@ -149,8 +147,7 @@ extern unsigned aarch64_architecture_version;
 /* Architecture flags that effect instruction selection.  */
 #define AARCH64_FL_FOR_ARCH8       (AARCH64_FL_FPSIMD)
 #define AARCH64_FL_FOR_ARCH8_1			       \
-  (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_PAN \
-   | AARCH64_FL_LOR | AARCH64_FL_RDMA)
+  (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_V8_1)
 
 /* Macros to test ISA flags.  */
 
@@ -159,6 +156,7 @@ extern unsigned aarch64_architecture_version;
 #define AARCH64_ISA_FP             (aarch64_isa_flags & AARCH64_FL_FP)
 #define AARCH64_ISA_SIMD           (aarch64_isa_flags & AARCH64_FL_SIMD)
 #define AARCH64_ISA_LSE		   (aarch64_isa_flags & AARCH64_FL_LSE)
+#define AARCH64_ISA_RDMA	   (aarch64_isa_flags & AARCH64_FL_V8_1)
 
 /* Crypto is an optional extension to AdvSIMD.  */
 #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
@@ -183,6 +181,9 @@ extern unsigned aarch64_architecture_version;
   ((aarch64_fix_a53_err835769 == 2)	\
   ? TARGET_FIX_ERR_A53_835769_DEFAULT : aarch64_fix_a53_err835769)
 
+/* ARMv8.1 Adv.SIMD support.  */
+#define TARGET_SIMD_RDMA (TARGET_SIMD && AARCH64_ISA_RDMA)
+
 /* Standard register usage.  */
 
 /* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index e1863488b73..b5189612684 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -11746,6 +11746,227 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
   return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
 }
 
+/* ARMv8.1 instrinsics.  */
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.1-a")
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
+{
+  return  __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
+{
+  return  __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return  __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c)
+{
+  return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c)
+{
+  return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return  __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c)
+{
+  return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c)
+{
+  return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d);
+}
+#pragma GCC pop_options
+
 #pragma GCC push_options
 #pragma GCC target ("+nothing+crypto")
 /* vaes  */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index fe7ca398c61..0d897bfb4bd 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -303,6 +303,8 @@
     UNSPEC_PMULL2       ; Used in aarch64-simd.md.
     UNSPEC_REV_REGLIST  ; Used in aarch64-simd.md.
     UNSPEC_VEC_SHR      ; Used in aarch64-simd.md.
+    UNSPEC_SQRDMLAH     ; Used in aarch64-simd.md.
+    UNSPEC_SQRDMLSH     ; Used in aarch64-simd.md.
 ])
 
 ;; ------------------------------------------------------------------
@@ -955,6 +957,8 @@
                                UNSPEC_SQSHRN UNSPEC_UQSHRN
                                UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
 
+(define_int_iterator SQRDMLH_AS [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH])
+
 (define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
 			      UNSPEC_TRN1 UNSPEC_TRN2
 			      UNSPEC_UZP1 UNSPEC_UZP2])
@@ -1129,3 +1133,5 @@
 			  (UNSPEC_SHA1M "m")])
 
 (define_int_attr sha256_op [(UNSPEC_SHA256H "") (UNSPEC_SHA256H2 "2")])
+
+(define_int_attr rdma_as [(UNSPEC_SQRDMLAH "a") (UNSPEC_SQRDMLSH "s")])
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fe04af2d705..7a765318658 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12332,23 +12332,27 @@ corresponding flag to the linker.
 
 @item -march=@var{name}
 @opindex march
-Specify the name of the target architecture, optionally suffixed by one or
+Specify the name of the target architecture and, optionally, one or
 more feature modifiers.  This option has the form
 @option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}.
 
-The permissible values for @var{arch} are @samp{armv8-a} or
-@samp{armv8.1-a}.
+The permissible values for @var{arch} are @samp{armv8-a},
+@samp{armv8.1-a} or @var{native}.
 
-For the permissible values for @var{feature}, see the sub-section on
-@ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu}
+The value @samp{armv8.1-a} implies @samp{armv8-a} and enables compiler
+support for the ARMv8.1 architecture extension.  In particular, it
+enables the @samp{+crc} and @samp{+lse} features.
+
+The value @samp{native} is available on native AArch64 GNU/Linux and
+causes the compiler to pick the architecture of the host system.  This
+option has no effect if the compiler is unable to recognize the
+architecture of the host system,
+
+The permissible values for @var{feature} are listed in the sub-section
+on @ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu}
 Feature Modifiers}.  Where conflicting feature modifiers are
 specified, the right-most feature is used.
 
-Additionally on native AArch64 GNU/Linux systems the value
-@samp{native} is available.  This option causes the compiler to pick the
-architecture of the host system.  If the compiler is unable to recognize the
-architecture of the host system this option has no effect.
-
 GCC uses @var{name} to determine what kind of instructions it can emit
 when generating assembly code.  If @option{-march} is specified
 without either of @option{-mtune} or @option{-mcpu} also being
@@ -12369,8 +12373,8 @@ option are: @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}.
 Additionally on native AArch64 GNU/Linux systems the value
 @samp{native} is available.  This option causes the compiler to pick
 the architecture of and tune the performance of the code for the
-processor of the host system.  If the compiler is unable to recognize
-the processor of the host system this option has no effect.
+processor of the host system.  This option has no effect if the
+compiler is unable to recognize the architecture of the host system.
 
 Where none of @option{-mtune=}, @option{-mcpu=} or @option{-march=}
 are specified, the code is tuned to perform well across a range
@@ -12392,9 +12396,9 @@ specified, the right-most feature is used.
 
 Additionally on native AArch64 GNU/Linux systems the value
 @samp{native} is available.  This option causes the compiler to tune
-the performance of the code for the processor of the host system.  If
-the compiler is unable to recognize the processor of the host system
-this option has no effect.
+the performance of the code for the processor of the host system.
+This option has no effect if the compiler is unable to recognize the
+architecture of the host system.
 
 GCC uses @var{name} to determine what kind of instructions it can emit when
 generating assembly code (as if by @option{-march}) and to determine
@@ -12430,7 +12434,8 @@ the following and their inverses @option{no@var{feature}}:
 
 @table @samp
 @item crc
-Enable CRC extension.
+Enable CRC extension.  This is on by default for
+@option{-march=armv8.1-a}.
 @item crypto
 Enable Crypto extension.  This also enables Advanced SIMD and floating-point
 instructions.
@@ -12442,14 +12447,8 @@ Enable Advanced SIMD instructions.  This also enables floating-point
 instructions.  This is on by default for all possible values for options
 @option{-march} and @option{-mcpu}.
 @item lse
-Enable Large System Extension instructions.
-@item pan
-Enable Privileged Access Never support.
-@item lor
-Enable Limited Ordering Regions support.
-@item rdma
-Enable ARMv8.1 Advanced SIMD instructions.  This implies Advanced SIMD
-is enabled.
+Enable Large System Extension instructions.  This is on by default for
+@option{-march=armv8.1-a}.
 
 @end table
 
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc
new file mode 100644
index 00000000000..a504ca68d46
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc
@@ -0,0 +1,138 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1 (NAME)
+
+void FNNAME (INSN) (void)
+{
+  /* vector_res = vqrdmlah (vector, vector2, vector3, vector4),
+     then store the result.  */
+#define TEST_VQRDMLAH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N));		\
+  VECT_VAR (vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W (VECT_VAR (vector, T1, W, N),			\
+		       VECT_VAR (vector2, T1, W, N),			\
+		       VECT_VAR (vector3, T1, W, N));			\
+  vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N),			\
+		     VECT_VAR (vector_res, T1, W, N));			\
+  CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N,				\
+			EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VQRDMLAH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQRDMLAH2 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQRDMLAH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
+  TEST_VQRDMLAH1 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE (vector, int, 16, 4);
+  DECL_VARIABLE (vector, int, 32, 2);
+  DECL_VARIABLE (vector, int, 16, 8);
+  DECL_VARIABLE (vector, int, 32, 4);
+
+  DECL_VARIABLE (vector_res, int, 16, 4);
+  DECL_VARIABLE (vector_res, int, 32, 2);
+  DECL_VARIABLE (vector_res, int, 16, 8);
+  DECL_VARIABLE (vector_res, int, 32, 4);
+
+  DECL_VARIABLE (vector2, int, 16, 4);
+  DECL_VARIABLE (vector2, int, 32, 2);
+  DECL_VARIABLE (vector2, int, 16, 8);
+  DECL_VARIABLE (vector2, int, 32, 4);
+
+  DECL_VARIABLE (vector3, int, 16, 4);
+  DECL_VARIABLE (vector3, int, 32, 2);
+  DECL_VARIABLE (vector3, int, 16, 8);
+  DECL_VARIABLE (vector3, int, 32, 4);
+
+  clean_results ();
+
+  VLOAD (vector, buffer, , int, s, 16, 4);
+  VLOAD (vector, buffer, , int, s, 32, 2);
+  VLOAD (vector, buffer, q, int, s, 16, 8);
+  VLOAD (vector, buffer, q, int, s, 32, 4);
+
+  /* Initialize vector2.  */
+  VDUP (vector2, , int, s, 16, 4, 0x5555);
+  VDUP (vector2, , int, s, 32, 2, 0xBB);
+  VDUP (vector2, q, int, s, 16, 8, 0xBB);
+  VDUP (vector2, q, int, s, 32, 4, 0x22);
+
+  /* Initialize vector3.  */
+  VDUP (vector3, , int, s, 16, 4, 0x5555);
+  VDUP (vector3, , int, s, 32, 2, 0xBB);
+  VDUP (vector3, q, int, s, 16, 8, 0x33);
+  VDUP (vector3, q, int, s, 32, 4, 0x22);
+
+#define CMT ""
+  TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat, CMT);
+  TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat, CMT);
+  TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat, CMT);
+  TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat, CMT);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
+
+  /* Now use input values such that the multiplication causes
+     saturation.  */
+#define TEST_MSG_MUL " (check mul cumulative saturation)"
+  VDUP (vector, , int, s, 16, 4, 0x8000);
+  VDUP (vector, , int, s, 32, 2, 0x80000000);
+  VDUP (vector, q, int, s, 16, 8, 0x8000);
+  VDUP (vector, q, int, s, 32, 4, 0x80000000);
+  VDUP (vector2, , int, s, 16, 4, 0x8000);
+  VDUP (vector2, , int, s, 32, 2, 0x80000000);
+  VDUP (vector2, q, int, s, 16, 8, 0x8000);
+  VDUP (vector2, q, int, s, 32, 4, 0x80000000);
+  VDUP (vector3, , int, s, 16, 4, 0x8000);
+  VDUP (vector3, , int, s, 32, 2, 0x80000000);
+  VDUP (vector3, q, int, s, 16, 8, 0x8000);
+  VDUP (vector3, q, int, s, 32, 4, 0x80000000);
+
+  TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
+  TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL);
+  TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL);
+  TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
+
+  /* Use input values where rounding produces a result equal to the
+     saturation value, but does not set the saturation flag.  */
+#define TEST_MSG_ROUND " (check rounding)"
+  VDUP (vector, , int, s, 16, 4, 0x8000);
+  VDUP (vector, , int, s, 32, 2, 0x80000000);
+  VDUP (vector, q, int, s, 16, 8, 0x8000);
+  VDUP (vector, q, int, s, 32, 4, 0x80000000);
+  VDUP (vector2, , int, s, 16, 4, 0x8001);
+  VDUP (vector2, , int, s, 32, 2, 0x80000001);
+  VDUP (vector2, q, int, s, 16, 8, 0x8001);
+  VDUP (vector2, q, int, s, 32, 4, 0x80000001);
+  VDUP (vector3, , int, s, 16, 4, 0x8001);
+  VDUP (vector3, , int, s, 32, 2, 0x80000001);
+  VDUP (vector3, q, int, s, 16, 8, 0x8001);
+  VDUP (vector3, q, int, s, 32, 4, 0x80000001);
+
+  TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_round, \
+		 TEST_MSG_ROUND);
+  TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_round, \
+		 TEST_MSG_ROUND);
+  TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_round, \
+		 TEST_MSG_ROUND);
+  TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_round, \
+		 TEST_MSG_ROUND);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
+}
+
+int
+main (void)
+{
+  FNNAME (INSN) ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc
new file mode 100644
index 00000000000..a8555029496
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc
@@ -0,0 +1,154 @@
+#define FNNAME1(NAME) exec_ ## NAME ## _lane
+#define FNNAME(NAME) FNNAME1 (NAME)
+
+void FNNAME (INSN) (void)
+{
+  /* vector_res = vqrdmlXh_lane (vector, vector2, vector3, lane),
+     then store the result.  */
+#define TEST_VQRDMLXH_LANE2(INSN, Q, T1, T2, W, N, N2, L,		\
+			    EXPECTED_CUMULATIVE_SAT, CMT)		\
+  Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N));		\
+  VECT_VAR (vector_res, T1, W, N) =					\
+    INSN##Q##_lane_##T2##W (VECT_VAR (vector, T1, W, N),		\
+			    VECT_VAR (vector2, T1, W, N),		\
+			    VECT_VAR (vector3, T1, W, N2),		\
+			    L);						\
+  vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N),			\
+		     VECT_VAR (vector_res, T1, W, N));			\
+  CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VQRDMLXH_LANE1(INSN, Q, T1, T2, W, N, N2, L,	\
+			    EXPECTED_CUMULATIVE_SAT, CMT)	\
+  TEST_VQRDMLXH_LANE2 (INSN, Q, T1, T2, W, N, N2, L,		\
+		       EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQRDMLXH_LANE(Q, T1, T2, W, N, N2, L,		\
+			   EXPECTED_CUMULATIVE_SAT, CMT)	\
+  TEST_VQRDMLXH_LANE1 (INSN, Q, T1, T2, W, N, N2, L,		\
+		       EXPECTED_CUMULATIVE_SAT, CMT)
+
+
+  DECL_VARIABLE (vector, int, 16, 4);
+  DECL_VARIABLE (vector, int, 32, 2);
+  DECL_VARIABLE (vector, int, 16, 8);
+  DECL_VARIABLE (vector, int, 32, 4);
+
+  DECL_VARIABLE (vector_res, int, 16, 4);
+  DECL_VARIABLE (vector_res, int, 32, 2);
+  DECL_VARIABLE (vector_res, int, 16, 8);
+  DECL_VARIABLE (vector_res, int, 32, 4);
+
+  DECL_VARIABLE (vector2, int, 16, 4);
+  DECL_VARIABLE (vector2, int, 32, 2);
+  DECL_VARIABLE (vector2, int, 16, 8);
+  DECL_VARIABLE (vector2, int, 32, 4);
+
+  DECL_VARIABLE (vector3, int, 16, 4);
+  DECL_VARIABLE (vector3, int, 32, 2);
+  DECL_VARIABLE (vector3, int, 16, 8);
+  DECL_VARIABLE (vector3, int, 32, 4);
+
+  clean_results ();
+
+  VLOAD (vector, buffer, , int, s, 16, 4);
+  VLOAD (vector, buffer, , int, s, 32, 2);
+
+  VLOAD (vector, buffer, q, int, s, 16, 8);
+  VLOAD (vector, buffer, q, int, s, 32, 4);
+
+  /* Initialize vector2.  */
+  VDUP (vector2, , int, s, 16, 4, 0x5555);
+  VDUP (vector2, , int, s, 32, 2, 0xBB);
+  VDUP (vector2, q, int, s, 16, 8, 0xBB);
+  VDUP (vector2, q, int, s, 32, 4, 0x22);
+
+  /* Initialize vector3.  */
+  VDUP (vector3, , int, s, 16, 4, 0x5555);
+  VDUP (vector3, , int, s, 32, 2, 0xBB);
+  VDUP (vector3, q, int, s, 16, 8, 0x33);
+  VDUP (vector3, q, int, s, 32, 4, 0x22);
+
+  /* Choose lane arbitrarily.  */
+#define CMT ""
+  TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT);
+  TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT);
+  TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT);
+  TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
+
+  /* Now use input values such that the multiplication causes
+     saturation.  */
+#define TEST_MSG_MUL " (check mul cumulative saturation)"
+  VDUP (vector, , int, s, 16, 4, 0x8000);
+  VDUP (vector, , int, s, 32, 2, 0x80000000);
+  VDUP (vector, q, int, s, 16, 8, 0x8000);
+  VDUP (vector, q, int, s, 32, 4, 0x80000000);
+
+  VDUP (vector2, , int, s, 16, 4, 0x8000);
+  VDUP (vector2, , int, s, 32, 2, 0x80000000);
+  VDUP (vector2, q, int, s, 16, 8, 0x8000);
+  VDUP (vector2, q, int, s, 32, 4, 0x80000000);
+
+  VDUP (vector3, , int, s, 16, 4, 0x8000);
+  VDUP (vector3, , int, s, 32, 2, 0x80000000);
+  VDUP (vector3, q, int, s, 16, 8, 0x8000);
+  VDUP (vector3, q, int, s, 32, 4, 0x80000000);
+
+  TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul,
+		      TEST_MSG_MUL);
+  TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul,
+		      TEST_MSG_MUL);
+  TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul,
+		      TEST_MSG_MUL);
+  TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul,
+		      TEST_MSG_MUL);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
+
+  VDUP (vector, , int, s, 16, 4, 0x8000);
+  VDUP (vector, , int, s, 32, 2, 0x80000000);
+  VDUP (vector, q, int, s, 16, 8, 0x8000);
+  VDUP (vector, q, int, s, 32, 4, 0x80000000);
+
+  VDUP (vector2, , int, s, 16, 4, 0x8001);
+  VDUP (vector2, , int, s, 32, 2, 0x80000001);
+  VDUP (vector2, q, int, s, 16, 8, 0x8001);
+  VDUP (vector2, q, int, s, 32, 4, 0x80000001);
+
+  VDUP (vector3, , int, s, 16, 4, 0x8001);
+  VDUP (vector3, , int, s, 32, 2, 0x80000001);
+  VDUP (vector3, q, int, s, 16, 8, 0x8001);
+  VDUP (vector3, q, int, s, 32, 4, 0x80000001);
+
+  /* Use input values where rounding produces a result equal to the
+     saturation value, but does not set the saturation flag.  */
+#define TEST_MSG_ROUND " (check rounding)"
+  TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat_round,
+		      TEST_MSG_ROUND);
+  TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat_round,
+		      TEST_MSG_ROUND);
+  TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round,
+		      TEST_MSG_ROUND);
+  TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round,
+		      TEST_MSG_ROUND);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
+}
+
+int
+main (void)
+{
+  FNNAME (INSN) ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c
new file mode 100644
index 00000000000..148d94c6196
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c
@@ -0,0 +1,57 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0,  0xfff1, 0xfff2,  0xfff3,
+					    0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+					    0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+   saturates.  */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0;
+
+/* Expected results when multiplication saturates.  */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0,
+						0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+/* Expected values of cumulative_saturation flag when rounding
+   should not cause saturation.  */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0;
+
+/* Expected results when rounding should not cause saturation.  */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe,
+						  0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe };
+VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0xfffe, 0xfffe,
+						  0xfffe, 0xfffe,
+						  0xfffe, 0xfffe,
+						  0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe,
+						  0xfffffffe, 0xfffffffe };
+
+#define INSN vqrdmlah
+#define TEST_MSG "VQRDMLAH"
+
+#include "vqrdmlXh.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c
new file mode 100644
index 00000000000..ed43e010539
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c
@@ -0,0 +1,57 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0x006d, 0x006e, 0x006f, 0x0070,
+					    0x0071, 0x0072, 0x0073, 0x0074 };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+					    0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+   saturates.  */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0;
+
+/* Expected results when multiplication saturates.  */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0,
+						0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+/* Expected values of cumulative_saturation flag when rounding
+   should not cause saturation.  */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0;
+
+/* Expected results when rounding should not cause saturation.  */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe,
+						  0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe };
+VECT_VAR_DECL (expected_round,int, 16, 8) [] = { 0xfffe, 0xfffe,
+						 0xfffe, 0xfffe,
+						 0xfffe, 0xfffe,
+						 0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe,
+						  0xfffffffe, 0xfffffffe };
+
+#define INSN vqrdmlah
+#define TEST_MSG "VQRDMLAH_LANE"
+
+#include "vqrdmlXh_lane.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c
new file mode 100644
index 00000000000..91c3b34806d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c
@@ -0,0 +1,61 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					    0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+					    0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+   saturates.  */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1;
+
+/* Expected results when multiplication saturates.  */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000,
+						0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000,
+						0x8000, 0x8000,
+						0x8000, 0x8000,
+						0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000,
+						0x80000000, 0x80000000 };
+
+/* Expected values of cumulative_saturation flag when rounding
+   should not cause saturation.  */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1;
+
+/* Expected results when rounding should not cause saturation.  */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000,
+						  0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000,
+						  0x8000, 0x8000,
+						  0x8000, 0x8000,
+						  0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000,
+						  0x80000000, 0x80000000 };
+
+#define INSN vqrdmlsh
+#define TEST_MSG "VQRDMLSH"
+
+#include "vqrdmlXh.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c
new file mode 100644
index 00000000000..6010b4209e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c
@@ -0,0 +1,61 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xff73, 0xff74, 0xff75, 0xff76,
+					    0xff77, 0xff78, 0xff79, 0xff7a };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+					    0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+   saturates.  */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1;
+
+/* Expected results when multiplication saturates.  */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000,
+						0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000,
+						0x8000, 0x8000,
+						0x8000, 0x8000,
+						0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000,
+						0x80000000, 0x80000000 };
+
+/* Expected values of cumulative_saturation flag when rounding
+   should not cause saturation.  */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1;
+
+/* Expected results when rounding should not cause saturation.  */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000,
+						  0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000,
+						  0x8000, 0x8000,
+						  0x8000, 0x8000,
+						  0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000,
+						  0x80000000, 0x80000000 };
+
+#define INSN vqrdmlsh
+#define TEST_MSG "VQRDMLSH_LANE"
+
+#include "vqrdmlXh_lane.inc"
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 0510d880ee7..7d237a2ac01 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2666,6 +2666,16 @@ proc add_options_for_arm_v8_neon { flags } {
     return "$flags $et_arm_v8_neon_flags -march=armv8-a"
 }
 
+# Add the options needed for ARMv8.1 Adv.SIMD.
+
+proc add_options_for_arm_v8_1a_neon { flags } {
+    if { [istarget aarch64*-*-*] } {
+	return "$flags -march=armv8.1-a"
+    } else {
+	return "$flags"
+    }
+}
+
 proc add_options_for_arm_crc { flags } {
     if { ! [check_effective_target_arm_crc_ok] } {
         return "$flags"
@@ -2950,7 +2960,8 @@ foreach { armfunc armflag armdef } { v4 "-march=armv4 -marm" __ARM_ARCH_4__
 				     v7r "-march=armv7-r" __ARM_ARCH_7R__
 				     v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__
 				     v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__
-				     v8a "-march=armv8-a" __ARM_ARCH_8A__ } {
+				     v8a "-march=armv8-a" __ARM_ARCH_8A__
+				     v8_1a "-march=armv8.1a" __ARM_ARCH_8A__ } {
     eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
 	proc check_effective_target_arm_arch_FUNC_ok { } {
 	    if { [ string match "*-marm*" "FLAG" ] &&
@@ -3107,6 +3118,25 @@ proc check_effective_target_arm_neonv2_hw { } {
     } [add_options_for_arm_neonv2 ""]]
 }
 
+# Return 1 if the target supports the ARMv8.1 Adv.SIMD extension, 0
+# otherwise.  The test is valid for AArch64.
+
+proc check_effective_target_arm_v8_1a_neon_ok_nocache { } {
+    if { ![istarget aarch64*-*-*] } {
+	return 0
+    }
+    return [check_no_compiler_messages_nocache arm_v8_1a_neon_ok assembly {
+	#if !defined (__ARM_FEATURE_QRDMX)
+	#error "__ARM_FEATURE_QRDMX not defined"
+	#endif
+    } [add_options_for_arm_v8_1a_neon ""]]
+}
+
+proc check_effective_target_arm_v8_1a_neon_ok { } {
+    return [check_cached_effective_target arm_v8_1a_neon_ok \
+		check_effective_target_arm_v8_1a_neon_ok_nocache]
+}
+
 # Return 1 if the target supports executing ARMv8 NEON instructions, 0
 # otherwise.
 
@@ -3125,6 +3155,31 @@ proc check_effective_target_arm_v8_neon_hw { } {
     } [add_options_for_arm_v8_neon ""]]
 }
 
+# Return 1 if the target supports executing the ARMv8.1 Adv.SIMD extension, 0
+# otherwise.  The test is valid for AArch64.
+
+proc check_effective_target_arm_v8_1a_neon_hw { } {
+    if { ![check_effective_target_arm_v8_1a_neon_ok] } {
+	return 0;
+    }
+    return [check_runtime_nocache arm_v8_1a_neon_hw_available {
+	int
+	main (void)
+	{
+	  __Int32x2_t a = {0, 1};
+	  __Int32x2_t b = {0, 2};
+	  __Int32x2_t result;
+
+	  asm ("sqrdmlah %0.2s, %1.2s, %2.2s"
+	       : "=w"(result)
+	       : "w"(a), "w"(b)
+	       : /* No clobbers.  */);
+
+	  return result[0];
+	}
+    }  [add_options_for_arm_v8_1a_neon ""]]
+}
+
 # Return 1 if this is a ARM target with NEON enabled.
 
 proc check_effective_target_arm_neon { } {
author	Yvan Roux <yvan.roux@linaro.org>	2015-12-10 12:58:54 +0100
committer	Linaro Code Review <review@review.linaro.org>	2015-12-31 11:28:08 +0000
commit	dad84c9a5ef6b373364319202be3f01036ea3fa3 (patch)
tree	5a495507553b3657aa72defac9f301d8d0f059fe
parent	41fa3f1b335b8274596f2db5c20162b3f30aa54a (diff)