Merge from FSF arm/aarch64-4.7-branch r198254..r198689.

author: Yvan Roux <yvan.roux@linaro.org> 2013-05-14 11:19:11 +0200
committer: Yvan Roux <yvan.roux@linaro.org> 2013-05-14 11:19:11 +0200
commit: 5ea5e3a42fd60d479d9e1d432b85bdfe107e2fdb (patch)
tree: de3d75006de15e378d35038de2d1269d9f33401c
parent: 2a8b4bf9b046554efca8267486a77b0bc27abbe0 (diff)
parent: 0c6015add23907b02c7e8e3c1fb5e4c5b78a92b1 (diff)
23 files changed, 578 insertions, 256 deletions
diff --git a/ChangeLog.linaro b/ChangeLog.linaro
index 8d6933c6652..a109ac233f2 100644
--- a/ChangeLog.linaro
+++ b/ChangeLog.linaro
@@ -1,5 +1,39 @@
 2013-05-08  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
 
+	Merge from FSF arm/aarch64-4.7-branch r198254..r198689.
+
+	Backport /work/sources/gcc-bzr/arm-aarch64-4.7 r198254:
+	[Backport AArch64] Fix vld1<q>_* asm constraints in arm_neon.h
+	
+	gcc/
+	* config/aarch64/arm_neon.h (vld1<q>_lane*): Fix constraints.
+	(vld1<q>_dup_<sufp><8, 16, 32, 64>): Likewise.
+	(vld1<q>_<sufp><8, 16, 32, 64>): Likewise.
+
+	Backport /work/sources/gcc-bzr/arm-aarch64-4.7 r198452:
+	gcc/
+	Backported from mainline.
+	* config/aarch64/aarch64-simd.md (aarch64_vcond_internal): Fix
+	floating-point vector comparisons against 0.
+	
+	gcc/testsuite/
+	Backported from mainline.
+	* gcc.target/aarch64/vect-fcm.x: Add check for zero forms of
+	inverse operands.
+	* gcc.target/aarch64/vect-fcm-eq-d.c: Check that new zero form
+	loop is vectorized.
+	* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-ge-d.c: Check that new zero form
+	loop is vectorized and that the correct instruction is generated.
+	* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
+
+	Backport /work/sources/gcc-bzr/arm-aarch64-4.7 r198689:
+	Backport of AArch64 -fomit-frame-pointer fix
+
+2013-05-08  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
 	Merge from FSF GCC 4.7.3 (svn branches/gcc-4_7-branch 198701).
 
 2013-04-08  Yvan Roux  <yvan.roux@linaro.org>
diff --git a/gcc/ChangeLog.aarch64 b/gcc/ChangeLog.aarch64
index d6c870e5575..d502bbb4a1b 100644
--- a/gcc/ChangeLog.aarch64
+++ b/gcc/ChangeLog.aarch64
@@ -1,3 +1,28 @@
+2013-05-07  Ian Bolton  <ian.bolton@arm.com>
+
+	Backport from mainline
+	2013-03-28  Ian Bolton  <ian.bolton@arm.com>
+
+	* config/aarch64/aarch64.md (aarch64_can_eliminate): Keep frame
+	record only when desired or required.
+
+2013-04-30  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	Backport from mainline.
+	2013-04-11  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-simd.md (aarch64_vcond_internal): Fix
+	floating-point vector comparisons against 0.
+
+2013-04-24  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	Backport from mainline.
+	2013-04-24  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/arm_neon.h (vld1<q>_lane*): Fix constraints.
+	(vld1<q>_dup_<sufp><8, 16, 32, 64>): Likewise.
+	(vld1<q>_<sufp><8, 16, 32, 64>): Likewise.
+
 2013-03-01  James Greenhalgh  <james.greenhalgh@arm.com>
 
 	* config/aarch64/aarch64.c:
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9cbbed5e362..443a9a98233 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1586,6 +1586,7 @@
   "TARGET_SIMD"
 {
   int inverse = 0;
+  int use_zero_form = 0;
   int swap_bsl_operands = 0;
   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
   rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
@@ -1596,12 +1597,16 @@
   switch (GET_CODE (operands[3]))
     {
     case GE:
+    case GT:
     case LE:
+    case LT:
     case EQ:
-      if (!REG_P (operands[5])
-	  && (operands[5] != CONST0_RTX (<MODE>mode)))
-	operands[5] = force_reg (<MODE>mode, operands[5]);
-      break;
+      if (operands[5] == CONST0_RTX (<MODE>mode))
+	{
+	  use_zero_form = 1;
+	  break;
+	}
+      /* Fall through.  */
     default:
       if (!REG_P (operands[5]))
 	operands[5] = force_reg (<MODE>mode, operands[5]);
@@ -1652,7 +1657,26 @@
 	 a GT b -> a GT b
 	 a LE b -> b GE a
 	 a LT b -> b GT a
-	 a EQ b -> a EQ b  */
+	 a EQ b -> a EQ b
+	 Note that there also exist direct comparison against 0 forms,
+	 so catch those as a special case.  */
+      if (use_zero_form)
+	{
+	  inverse = 0;
+	  switch (GET_CODE (operands[3]))
+	    {
+	    case LT:
+	      base_comparison = gen_aarch64_cmlt<mode>;
+	      break;
+	    case LE:
+	      base_comparison = gen_aarch64_cmle<mode>;
+	      break;
+	    default:
+	      /* Do nothing, other zero form cases already have the correct
+		 base_comparison.  */
+	      break;
+	    }
+	}
 
       if (!inverse)
 	emit_insn (base_comparison (mask, operands[4], operands[5]));
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5bc6f843357..2a1ada29dc9 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3865,14 +3865,21 @@ aarch64_can_eliminate (const int from, const int to)
     }
   else
     {
-      /* If we decided that we didn't need a frame pointer but then used
-	 LR in the function, then we do need a frame pointer after all, so
-	 prevent this elimination to ensure a frame pointer is used.  */
-
+      /* If we decided that we didn't need a leaf frame pointer but then used
+	 LR in the function, then we'll want a frame pointer after all, so
+	 prevent this elimination to ensure a frame pointer is used.
+
+	 NOTE: the original value of flag_omit_frame_pointer gets trashed
+	 IFF flag_omit_leaf_frame_pointer is true, so we check the value
+	 of faked_omit_frame_pointer here (which is true when we always
+	 wish to keep non-leaf frame pointers but only wish to keep leaf frame
+	 pointers when LR is clobbered).  */
       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
-	  && df_regs_ever_live_p (LR_REGNUM))
+	  && df_regs_ever_live_p (LR_REGNUM)
+	  && faked_omit_frame_pointer)
 	return false;
     }
+
   return true;
 }
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index ca474033e1d..669217e2762 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -8390,10 +8390,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_dup_f32 (const float32_t * a)
 {
   float32x2_t result;
-  __asm__ ("ld1r {%0.2s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.2s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8401,10 +8401,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vld1_dup_f64 (const float64_t * a)
 {
   float64x1_t result;
-  __asm__ ("ld1 {%0.1d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.1d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8412,10 +8412,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vld1_dup_p8 (const poly8_t * a)
 {
   poly8x8_t result;
-  __asm__ ("ld1r {%0.8b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.8b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8423,10 +8423,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vld1_dup_p16 (const poly16_t * a)
 {
   poly16x4_t result;
-  __asm__ ("ld1r {%0.4h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.4h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8434,10 +8434,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vld1_dup_s8 (const int8_t * a)
 {
   int8x8_t result;
-  __asm__ ("ld1r {%0.8b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.8b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8445,10 +8445,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vld1_dup_s16 (const int16_t * a)
 {
   int16x4_t result;
-  __asm__ ("ld1r {%0.4h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.4h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8456,10 +8456,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vld1_dup_s32 (const int32_t * a)
 {
   int32x2_t result;
-  __asm__ ("ld1r {%0.2s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.2s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8467,10 +8467,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vld1_dup_s64 (const int64_t * a)
 {
   int64x1_t result;
-  __asm__ ("ld1 {%0.1d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.1d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8478,10 +8478,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vld1_dup_u8 (const uint8_t * a)
 {
   uint8x8_t result;
-  __asm__ ("ld1r {%0.8b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.8b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8489,10 +8489,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vld1_dup_u16 (const uint16_t * a)
 {
   uint16x4_t result;
-  __asm__ ("ld1r {%0.4h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.4h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8500,10 +8500,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vld1_dup_u32 (const uint32_t * a)
 {
   uint32x2_t result;
-  __asm__ ("ld1r {%0.2s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.2s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8511,10 +8511,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vld1_dup_u64 (const uint64_t * a)
 {
   uint64x1_t result;
-  __asm__ ("ld1 {%0.1d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.1d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8522,10 +8522,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_f32 (const float32_t * a)
 {
   float32x2_t result;
-  __asm__ ("ld1 {%0.2s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.2s}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const float32x2_t *_a = (float32x2_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8533,10 +8533,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vld1_f64 (const float64_t * a)
 {
   float64x1_t result;
-  __asm__ ("ld1 {%0.1d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.1d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8546,9 +8546,9 @@ vld1_f64 (const float64_t * a)
        float32x2_t b_ = (b);                                            \
        const float32_t * a_ = (a);                                      \
        float32x2_t result;                                              \
-       __asm__ ("ld1 {%0.s}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8559,9 +8559,9 @@ vld1_f64 (const float64_t * a)
        float64x1_t b_ = (b);                                            \
        const float64_t * a_ = (a);                                      \
        float64x1_t result;                                              \
-       __asm__ ("ld1 {%0.d}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8572,9 +8572,9 @@ vld1_f64 (const float64_t * a)
        poly8x8_t b_ = (b);                                              \
        const poly8_t * a_ = (a);                                        \
        poly8x8_t result;                                                \
-       __asm__ ("ld1 {%0.b}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8585,9 +8585,9 @@ vld1_f64 (const float64_t * a)
        poly16x4_t b_ = (b);                                             \
        const poly16_t * a_ = (a);                                       \
        poly16x4_t result;                                               \
-       __asm__ ("ld1 {%0.h}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8598,9 +8598,9 @@ vld1_f64 (const float64_t * a)
        int8x8_t b_ = (b);                                               \
        const int8_t * a_ = (a);                                         \
        int8x8_t result;                                                 \
-       __asm__ ("ld1 {%0.b}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8611,9 +8611,9 @@ vld1_f64 (const float64_t * a)
        int16x4_t b_ = (b);                                              \
        const int16_t * a_ = (a);                                        \
        int16x4_t result;                                                \
-       __asm__ ("ld1 {%0.h}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8624,9 +8624,9 @@ vld1_f64 (const float64_t * a)
        int32x2_t b_ = (b);                                              \
        const int32_t * a_ = (a);                                        \
        int32x2_t result;                                                \
-       __asm__ ("ld1 {%0.s}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8637,9 +8637,9 @@ vld1_f64 (const float64_t * a)
        int64x1_t b_ = (b);                                              \
        const int64_t * a_ = (a);                                        \
        int64x1_t result;                                                \
-       __asm__ ("ld1 {%0.d}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8650,9 +8650,9 @@ vld1_f64 (const float64_t * a)
        uint8x8_t b_ = (b);                                              \
        const uint8_t * a_ = (a);                                        \
        uint8x8_t result;                                                \
-       __asm__ ("ld1 {%0.b}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8663,9 +8663,9 @@ vld1_f64 (const float64_t * a)
        uint16x4_t b_ = (b);                                             \
        const uint16_t * a_ = (a);                                       \
        uint16x4_t result;                                               \
-       __asm__ ("ld1 {%0.h}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8676,9 +8676,9 @@ vld1_f64 (const float64_t * a)
        uint32x2_t b_ = (b);                                             \
        const uint32_t * a_ = (a);                                       \
        uint32x2_t result;                                               \
-       __asm__ ("ld1 {%0.s}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8689,9 +8689,9 @@ vld1_f64 (const float64_t * a)
        uint64x1_t b_ = (b);                                             \
        const uint64_t * a_ = (a);                                       \
        uint64x1_t result;                                               \
-       __asm__ ("ld1 {%0.d}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i" (c), "Utv"(*a_), "0"(b_)                          \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8700,10 +8700,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vld1_p8 (const poly8_t * a)
 {
   poly8x8_t result;
-  __asm__ ("ld1 {%0.8b}, [%1]"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
+  __asm__ ("ld1 {%0.8b}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const poly8x8_t *_a = (poly8x8_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8711,10 +8711,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vld1_p16 (const poly16_t * a)
 {
   poly16x4_t result;
-  __asm__ ("ld1 {%0.4h}, [%1]"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
+  __asm__ ("ld1 {%0.4h}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const poly16x4_t *_a = (poly16x4_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8722,10 +8722,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vld1_s8 (const int8_t * a)
 {
   int8x8_t result;
-  __asm__ ("ld1 {%0.8b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.8b}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const int8x8_t *_a = (int8x8_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8733,10 +8733,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vld1_s16 (const int16_t * a)
 {
   int16x4_t result;
-  __asm__ ("ld1 {%0.4h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.4h}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const int16x4_t *_a = (int16x4_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8744,10 +8744,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vld1_s32 (const int32_t * a)
 {
   int32x2_t result;
-  __asm__ ("ld1 {%0.2s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.2s}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const int32x2_t *_a = (int32x2_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8755,10 +8755,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vld1_s64 (const int64_t * a)
 {
   int64x1_t result;
-  __asm__ ("ld1 {%0.1d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.1d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8766,10 +8766,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vld1_u8 (const uint8_t * a)
 {
   uint8x8_t result;
-  __asm__ ("ld1 {%0.8b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.8b}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const uint8x8_t *_a = (uint8x8_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8777,10 +8777,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vld1_u16 (const uint16_t * a)
 {
   uint16x4_t result;
-  __asm__ ("ld1 {%0.4h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.4h}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const uint16x4_t *_a = (uint16x4_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8788,10 +8788,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vld1_u32 (const uint32_t * a)
 {
   uint32x2_t result;
-  __asm__ ("ld1 {%0.2s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.2s}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const uint32x2_t *_a = (uint32x2_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8799,10 +8799,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vld1_u64 (const uint64_t * a)
 {
   uint64x1_t result;
-  __asm__ ("ld1 {%0.1d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.1d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8810,10 +8810,10 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_dup_f32 (const float32_t * a)
 {
   float32x4_t result;
-  __asm__ ("ld1r {%0.4s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.4s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8821,10 +8821,10 @@ __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vld1q_dup_f64 (const float64_t * a)
 {
   float64x2_t result;
-  __asm__ ("ld1r {%0.2d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.2d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8832,10 +8832,10 @@ __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vld1q_dup_p8 (const poly8_t * a)
 {
   poly8x16_t result;
-  __asm__ ("ld1r {%0.16b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.16b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8843,10 +8843,10 @@ __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vld1q_dup_p16 (const poly16_t * a)
 {
   poly16x8_t result;
-  __asm__ ("ld1r {%0.8h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.8h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8854,10 +8854,10 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vld1q_dup_s8 (const int8_t * a)
 {
   int8x16_t result;
-  __asm__ ("ld1r {%0.16b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.16b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8865,10 +8865,10 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vld1q_dup_s16 (const int16_t * a)
 {
   int16x8_t result;
-  __asm__ ("ld1r {%0.8h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.8h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8876,10 +8876,10 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vld1q_dup_s32 (const int32_t * a)
 {
   int32x4_t result;
-  __asm__ ("ld1r {%0.4s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.4s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8887,10 +8887,10 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vld1q_dup_s64 (const int64_t * a)
 {
   int64x2_t result;
-  __asm__ ("ld1r {%0.2d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.2d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8898,10 +8898,10 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vld1q_dup_u8 (const uint8_t * a)
 {
   uint8x16_t result;
-  __asm__ ("ld1r {%0.16b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.16b}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8909,10 +8909,10 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vld1q_dup_u16 (const uint16_t * a)
 {
   uint16x8_t result;
-  __asm__ ("ld1r {%0.8h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.8h}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8920,10 +8920,10 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vld1q_dup_u32 (const uint32_t * a)
 {
   uint32x4_t result;
-  __asm__ ("ld1r {%0.4s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.4s}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8931,10 +8931,10 @@ __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vld1q_dup_u64 (const uint64_t * a)
 {
   uint64x2_t result;
-  __asm__ ("ld1r {%0.2d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1r {%0.2d}, %1"
+	   : "=w"(result)
+	   : "Utv"(*a)
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8942,10 +8942,10 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_f32 (const float32_t * a)
 {
   float32x4_t result;
-  __asm__ ("ld1 {%0.4s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.4s}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const float32x4_t *_a = (float32x4_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8953,10 +8953,10 @@ __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vld1q_f64 (const float64_t * a)
 {
   float64x2_t result;
-  __asm__ ("ld1 {%0.2d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.2d}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const float64x2_t *_a = (float64x2_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -8966,9 +8966,9 @@ vld1q_f64 (const float64_t * a)
        float32x4_t b_ = (b);                                            \
        const float32_t * a_ = (a);                                      \
        float32x4_t result;                                              \
-       __asm__ ("ld1 {%0.s}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8979,9 +8979,9 @@ vld1q_f64 (const float64_t * a)
        float64x2_t b_ = (b);                                            \
        const float64_t * a_ = (a);                                      \
        float64x2_t result;                                              \
-       __asm__ ("ld1 {%0.d}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8992,9 +8992,9 @@ vld1q_f64 (const float64_t * a)
        poly8x16_t b_ = (b);                                             \
        const poly8_t * a_ = (a);                                        \
        poly8x16_t result;                                               \
-       __asm__ ("ld1 {%0.b}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9005,9 +9005,9 @@ vld1q_f64 (const float64_t * a)
        poly16x8_t b_ = (b);                                             \
        const poly16_t * a_ = (a);                                       \
        poly16x8_t result;                                               \
-       __asm__ ("ld1 {%0.h}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9018,9 +9018,9 @@ vld1q_f64 (const float64_t * a)
        int8x16_t b_ = (b);                                              \
        const int8_t * a_ = (a);                                         \
        int8x16_t result;                                                \
-       __asm__ ("ld1 {%0.b}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9031,9 +9031,9 @@ vld1q_f64 (const float64_t * a)
        int16x8_t b_ = (b);                                              \
        const int16_t * a_ = (a);                                        \
        int16x8_t result;                                                \
-       __asm__ ("ld1 {%0.h}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9044,9 +9044,9 @@ vld1q_f64 (const float64_t * a)
        int32x4_t b_ = (b);                                              \
        const int32_t * a_ = (a);                                        \
        int32x4_t result;                                                \
-       __asm__ ("ld1 {%0.s}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9057,9 +9057,9 @@ vld1q_f64 (const float64_t * a)
        int64x2_t b_ = (b);                                              \
        const int64_t * a_ = (a);                                        \
        int64x2_t result;                                                \
-       __asm__ ("ld1 {%0.d}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9070,9 +9070,9 @@ vld1q_f64 (const float64_t * a)
        uint8x16_t b_ = (b);                                             \
        const uint8_t * a_ = (a);                                        \
        uint8x16_t result;                                               \
-       __asm__ ("ld1 {%0.b}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.b}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9083,9 +9083,9 @@ vld1q_f64 (const float64_t * a)
        uint16x8_t b_ = (b);                                             \
        const uint16_t * a_ = (a);                                       \
        uint16x8_t result;                                               \
-       __asm__ ("ld1 {%0.h}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.h}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9096,9 +9096,9 @@ vld1q_f64 (const float64_t * a)
        uint32x4_t b_ = (b);                                             \
        const uint32_t * a_ = (a);                                       \
        uint32x4_t result;                                               \
-       __asm__ ("ld1 {%0.s}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.s}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9109,9 +9109,9 @@ vld1q_f64 (const float64_t * a)
        uint64x2_t b_ = (b);                                             \
        const uint64_t * a_ = (a);                                       \
        uint64x2_t result;                                               \
-       __asm__ ("ld1 {%0.d}[%3],[%1]"                                   \
+       __asm__ ("ld1 {%0.d}[%1], %2"                                    \
                 : "=w"(result)                                          \
-                : "r"(a_), "0"(b_), "i"(c)                              \
+                : "i"(c), "Utv"(*a_), "0"(b_)                           \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9120,10 +9120,10 @@ __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vld1q_p8 (const poly8_t * a)
 {
   poly8x16_t result;
-  __asm__ ("ld1 {%0.16b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.16b}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const poly8x16_t *_a = (poly8x16_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -9131,10 +9131,10 @@ __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vld1q_p16 (const poly16_t * a)
 {
   poly16x8_t result;
-  __asm__ ("ld1 {%0.8h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.16b}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const poly16x8_t *_a = (poly16x8_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -9142,10 +9142,10 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vld1q_s8 (const int8_t * a)
 {
   int8x16_t result;
-  __asm__ ("ld1 {%0.16b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.16b}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const int8x16_t *_a = (int8x16_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -9153,10 +9153,10 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vld1q_s16 (const int16_t * a)
 {
   int16x8_t result;
-  __asm__ ("ld1 {%0.8h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.8h}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const int16x8_t *_a = (int16x8_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -9164,10 +9164,10 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vld1q_s32 (const int32_t * a)
 {
   int32x4_t result;
-  __asm__ ("ld1 {%0.4s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.4s}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const int32x4_t *_a = (int32x4_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -9175,10 +9175,10 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vld1q_s64 (const int64_t * a)
 {
   int64x2_t result;
-  __asm__ ("ld1 {%0.2d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.2d}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const int64x2_t *_a = (int64x2_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -9186,10 +9186,10 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vld1q_u8 (const uint8_t * a)
 {
   uint8x16_t result;
-  __asm__ ("ld1 {%0.16b},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.16b}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const uint8x16_t *_a = (uint8x16_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -9197,10 +9197,10 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vld1q_u16 (const uint16_t * a)
 {
   uint16x8_t result;
-  __asm__ ("ld1 {%0.8h},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.8h}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const uint16x8_t *_a = (uint16x8_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -9208,10 +9208,10 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vld1q_u32 (const uint32_t * a)
 {
   uint32x4_t result;
-  __asm__ ("ld1 {%0.4s},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.4s}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const uint32x4_t *_a = (uint32x4_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
@@ -9219,10 +9219,10 @@ __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vld1q_u64 (const uint64_t * a)
 {
   uint64x2_t result;
-  __asm__ ("ld1 {%0.2d},[%1]"
-           : "=w"(result)
-           : "r"(a)
-           : "memory");
+  __asm__ ("ld1 {%0.2d}, %1"
+	   : "=w"(result)
+	   : "Utv"(({const uint64x2_t *_a = (uint64x2_t *) a; *_a;}))
+	   : /* No clobbers */);
   return result;
 }
 
diff --git a/gcc/testsuite/ChangeLog.aarch64 b/gcc/testsuite/ChangeLog.aarch64
index 65358e15e28..14abda6ed26 100644
--- a/gcc/testsuite/ChangeLog.aarch64
+++ b/gcc/testsuite/ChangeLog.aarch64
@@ -1,3 +1,49 @@
+2013-05-07  Ian Bolton  <ian.bolton@arm.com>
+
+	Backport from mainline (fix to botched commit)
+	2013-04-04  Tejas Belagod  <tejas.belagod@arm.com>
+
+	* gcc.target/aarch64/inc/asm-adder-clobber-lr.c: Remove duplication.
+	* gcc.target/aarch64/inc/asm-adder-no-clobber-lr.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-1.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-2.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-3.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-4.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-5.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-6.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-7.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-8.c: Likewise.
+
+	Backport from mainline
+	2013-03-28  Ian Bolton  <ian.bolton@arm.com>
+
+	* gcc.target/aarch64/inc/asm-adder-clobber-lr.c: New test.
+	* gcc.target/aarch64/inc/asm-adder-no-clobber-lr.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-1.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-2.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-3.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-4.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-5.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-6.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-7.c: Likewise.
+	* gcc.target/aarch64/test-framepointer-8.c: Likewise.
+
+2013-04-30  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	Backported from mainline.
+	2013-04-11  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* gcc.target/aarch64/vect-fcm.x: Add check for zero forms of
+	inverse operands.
+	* gcc.target/aarch64/vect-fcm-eq-d.c: Check that new zero form
+	loop is vectorized.
+	* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-ge-d.c: Check that new zero form
+	loop is vectorized and that the correct instruction is generated.
+	* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
+
 2013-02-13  James Greenhalgh  <james.greenhalgh@arm.com>
 
 	Backport from mainline.
diff --git a/gcc/testsuite/gcc.target/aarch64/asm-adder-clobber-lr.c b/gcc/testsuite/gcc.target/aarch64/asm-adder-clobber-lr.c
new file mode 100644
index 00000000000..540c79b0135
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asm-adder-clobber-lr.c
@@ -0,0 +1,23 @@
+extern void abort (void);
+
+int
+adder (int a, int b)
+{
+  int result;
+  __asm__ ("add %w0,%w1,%w2" : "=r"(result) : "r"(a), "r"(b) : "x30");
+  return result;
+}
+
+int
+main (int argc, char** argv)
+{
+  int i;
+  int total = argc;
+  for (i = 0; i < 20; i++)
+    total = adder (total, i);
+
+  if (total != (190 + argc))
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/asm-adder-no-clobber-lr.c b/gcc/testsuite/gcc.target/aarch64/asm-adder-no-clobber-lr.c
new file mode 100644
index 00000000000..2543d50e78f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asm-adder-no-clobber-lr.c
@@ -0,0 +1,23 @@
+extern void abort (void);
+
+int
+adder (int a, int b)
+{
+  int result;
+  __asm__ ("add %w0,%w1,%w2" : "=r"(result) : "r"(a), "r"(b) : );
+  return result;
+}
+
+int
+main (int argc, char** argv)
+{
+  int i;
+  int total = argc;
+  for (i = 0; i < 20; i++)
+    total = adder (total, i);
+
+  if (total != (190 + argc))
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/test-framepointer-1.c b/gcc/testsuite/gcc.target/aarch64/test-framepointer-1.c
new file mode 100644
index 00000000000..e44ca6d4cd8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test-framepointer-1.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-inline --save-temps" } */
+
+#include "asm-adder-no-clobber-lr.c"
+
+/* omit-frame-pointer is FALSE.
+   omit-leaf-frame-pointer is FALSE.
+   LR is not being clobbered in the leaf.
+
+   With no frame pointer omissions, we expect a frame record
+   for main and the leaf.  */
+
+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test-framepointer-2.c b/gcc/testsuite/gcc.target/aarch64/test-framepointer-2.c
new file mode 100644
index 00000000000..40e483526b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test-framepointer-2.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer -mno-omit-leaf-frame-pointer -fno-inline --save-temps" } */
+
+#include "asm-adder-no-clobber-lr.c"
+
+/* omit-frame-pointer is TRUE.
+   omit-leaf-frame-pointer is false, but irrelevant due to omit-frame-pointer.
+   LR is not being clobbered in the leaf.
+
+   Since we asked to have no frame pointers anywhere, we expect no frame
+   record in main or the leaf.  */
+
+/* { dg-final { scan-assembler-not "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test-framepointer-3.c b/gcc/testsuite/gcc.target/aarch64/test-framepointer-3.c
new file mode 100644
index 00000000000..98cb2e0b62b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test-framepointer-3.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer -momit-leaf-frame-pointer -fno-inline --save-temps" } */
+
+#include "asm-adder-no-clobber-lr.c"
+
+/* omit-frame-pointer is TRUE.
+   omit-leaf-frame-pointer is true, but irrelevant due to omit-frame-pointer.
+   LR is not being clobbered in the leaf.
+
+   Since we asked to have no frame pointers anywhere, we expect no frame
+   record in main or the leaf.  */
+
+/* { dg-final { scan-assembler-not "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test-framepointer-4.c b/gcc/testsuite/gcc.target/aarch64/test-framepointer-4.c
new file mode 100644
index 00000000000..4143a7a9cc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test-framepointer-4.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -momit-leaf-frame-pointer -fno-inline --save-temps" } */
+
+#include "asm-adder-no-clobber-lr.c"
+
+/* omit-frame-pointer is FALSE.
+   omit-leaf-frame-pointer is TRUE.
+   LR is not being clobbered in the leaf.
+
+   Unless we are removing all frame records, it's OK to remove the frame
+   record for a leaf where LR is not clobbered.  Therefore, we expect a
+   frame record only in main.  */
+
+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test-framepointer-5.c b/gcc/testsuite/gcc.target/aarch64/test-framepointer-5.c
new file mode 100644
index 00000000000..c22bdc304a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test-framepointer-5.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-inline --save-temps" } */
+
+#include "asm-adder-clobber-lr.c"
+
+/* omit-frame-pointer is FALSE.
+   omit-leaf-frame-pointer is FALSE.
+   LR is being clobbered in the leaf.
+
+   With no frame pointer omissions, we expect a frame record for main
+   and the leaf.  */
+
+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test-framepointer-6.c b/gcc/testsuite/gcc.target/aarch64/test-framepointer-6.c
new file mode 100644
index 00000000000..e08ee43e5ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test-framepointer-6.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer -mno-omit-leaf-frame-pointer -fno-inline --save-temps" } */
+
+#include "asm-adder-clobber-lr.c"
+
+/* omit-frame-pointer is TRUE.
+   omit-leaf-frame-pointer is false, but irrelevant due to omit-frame-pointer.
+   LR is being clobbered in the leaf.
+
+   Since we asked to have no frame pointers anywhere, we expect no frame
+   record in main or the leaf.  */
+
+/* { dg-final { scan-assembler-not "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test-framepointer-7.c b/gcc/testsuite/gcc.target/aarch64/test-framepointer-7.c
new file mode 100644
index 00000000000..e8f7cabe77f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test-framepointer-7.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer -momit-leaf-frame-pointer -fno-inline --save-temps" } */
+
+#include "asm-adder-clobber-lr.c"
+
+/* omit-frame-pointer is TRUE.
+   omit-leaf-frame-pointer is true, but irrelevant due to omit-frame-pointer.
+   LR is being clobbered in the leaf.
+
+   Since we asked to have no frame pointers anywhere, we expect no frame
+   record in main or the leaf.  */
+
+/* { dg-final { scan-assembler-not "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test-framepointer-8.c b/gcc/testsuite/gcc.target/aarch64/test-framepointer-8.c
new file mode 100644
index 00000000000..c09b687597b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test-framepointer-8.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -momit-leaf-frame-pointer -fno-inline --save-temps" } */
+
+#include "asm-adder-clobber-lr.c"
+
+/* omit-frame-pointer is FALSE.
+   omit-leaf-frame-pointer is TRUE.
+   LR is being clobbered in the leaf.
+
+   Unless we are removing all frame records (which we aren't), it's
+   not OK to remove the frame record for a leaf where LR is clobbered.
+   Therefore, we expect a frame record in main and leaf.  */
+
+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
index b6fb5ae8798..19ecd63b1d1 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
@@ -7,7 +7,7 @@
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
index 283d34fb80a..30be5adf529 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
@@ -7,7 +7,7 @@
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
index 868e1f8e60d..b922833be8c 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
@@ -7,8 +7,9 @@
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
+/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
index e3258f3641e..04d3533ffe3 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
@@ -7,8 +7,9 @@
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
+/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
index ed8b4521591..421a04acfb4 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
@@ -7,8 +7,9 @@
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
+/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
index e90a87581d3..cdeab14e0ed 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
@@ -7,8 +7,9 @@
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
+/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm.x b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
index 7e51bef0ce2..803861b0293 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
@@ -40,6 +40,15 @@ foobar (FTYPE *in1, FTYPE *in2, FTYPE *output)
     output[i] = (in1[i] OP 0.0) ? 4.0 : 2.0;
 }
 
+void
+foobarbar (FTYPE *in1, FTYPE *in2, FTYPE *output)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = (in1[i] INV_OP 0.0) ? 4.0 : 2.0;
+}
+
 int
 main (int argc, char **argv)
 {
@@ -51,6 +60,11 @@ main (int argc, char **argv)
   for (i = 0; i < N; i++)
     if (out1[i] != out2[i])
       abort ();
+  foobar (input1, input2, out1);
+  foobarbar (input1, input2, out2);
+  for (i = 0; i < N; i++)
+    if (out1[i] == out2[i])
+      abort ();
   return 0;
 }
author	Yvan Roux <yvan.roux@linaro.org>	2013-05-14 11:19:11 +0200
committer	Yvan Roux <yvan.roux@linaro.org>	2013-05-14 11:19:11 +0200
commit	5ea5e3a42fd60d479d9e1d432b85bdfe107e2fdb (patch)
tree	de3d75006de15e378d35038de2d1269d9f33401c
parent	2a8b4bf9b046554efca8267486a77b0bc27abbe0 (diff)
parent	0c6015add23907b02c7e8e3c1fb5e4c5b78a92b1 (diff)