aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Gretton-Dann <matthew.gretton-dann@linaro.org>2012-10-09 10:06:03 +0100
committerMatthew Gretton-Dann <matthew.gretton-dann@linaro.org>2012-10-09 10:06:03 +0100
commitf751525a8b1d3f620c528faaa417146ee61b0d5c (patch)
tree2c50d8d57cfcecb503d0d9d4ca5a72663d4bd359
parent2c39415cbb77437858f8de547d13be3b608316f8 (diff)
parentc197f39ed0e00482c5631c74d2bf8f675de05b0d (diff)
Merge from fsf gcc arm/aarch64-4.7-branch
(svn branches/arm/aarch64-4.7-branch 192093).
-rw-r--r--ChangeLog.linaro9
-rw-r--r--gcc/ChangeLog.aarch6426
-rw-r--r--gcc/config/aarch64/arm_neon.h51
-rw-r--r--gcc/reload.c286
4 files changed, 208 insertions, 164 deletions
diff --git a/ChangeLog.linaro b/ChangeLog.linaro
index 11ee2416ea4..47a8ab14923 100644
--- a/ChangeLog.linaro
+++ b/ChangeLog.linaro
@@ -1,7 +1,12 @@
+2012-10-05 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
+
+ Merge from fsf gcc arm/aarch64-4.7-branch
+ (svn branches/arm/aarch64-4.7-branch 192093).
+
2012-10-03 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
- Merge from fsf gcc arm/aarch64-4_7-branch
- (svn branches/arm/aarch64-4_7-branch 191926).
+ Merge from fsf gcc arm/aarch64-4.7-branch
+ (svn branches/arm/aarch64-4.7-branch 191926).
2012-10-02 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
diff --git a/gcc/ChangeLog.aarch64 b/gcc/ChangeLog.aarch64
index 00b93a66489..36b9d0bfcc0 100644
--- a/gcc/ChangeLog.aarch64
+++ b/gcc/ChangeLog.aarch64
@@ -1,3 +1,29 @@
+2012-10-04 Tejas Belagod <tejas.belagod@arm.com>
+
+ * config/aarch64/arm_neon.h: Rename vqml<as>l<sh>_* to
+ vqdml<as>l<sh>_*.
+
+2012-10-04 Tejas Belagod <tejas.belagod@arm.com>
+
+ * config/aarch64/arm_neon.h (vfma_n_f32, vfmaq_n_f32, vfmaq_n_f64): New.
+
+2012-10-04 Tejas Belagod <tejas.belagod@arm.com>
+
+ * config/aarch64/arm_neon.h (vbslq_f64): Fix parameter type.
+
+2012-10-02 Tejas Belagod <tejas.belagod@arm.com>
+ Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
+
+ * reload.c (find_reloads_subreg_address): Remove FORCE_REPLACE
+ parameter. Always replace normal subreg with memory reference
+ whenever possible. Return NULL otherwise.
+ (find_reloads_toplev): Always call find_reloads_subreg_address
+ for subregs of registers equivalent to a memory location.
+ Only recurse further if find_reloads_subreg_address fails.
+ (find_reloads_address_1): Only call find_reloads_subreg_address
+ for subregs of registers equivalent to a memory location.
+ Properly handle failure of find_reloads_subreg_address.
+
2012-10-01 Ian Bolton <ian.bolton@arm.com>
Richard Henderson <rth@redhat.com>
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index a092dfff94e..6f4480e2288 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -4975,7 +4975,7 @@ vbslq_f32 (uint32x4_t a, float32x4_t b, float32x4_t c)
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vbslq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
+vbslq_f64 (uint64x2_t a, float64x2_t b, float64x2_t c)
{
float64x2_t result;
__asm__ ("bsl %0.16b, %2.16b, %3.16b"
@@ -7887,6 +7887,39 @@ vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
})
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
+{
+ float32x2_t result;
+ __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
+ : "=w"(result)
+ : "0"(a), "w"(b), "w"(c)
+ : /* No clobbers */);
+ return result;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
+{
+ float32x4_t result;
+ __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
+ : "=w"(result)
+ : "0"(a), "w"(b), "w"(c)
+ : /* No clobbers */);
+ return result;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
+{
+ float64x2_t result;
+ __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
+ : "=w"(result)
+ : "0"(a), "w"(b), "w"(c)
+ : /* No clobbers */);
+ return result;
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
{
float32x2_t result;
@@ -14084,7 +14117,7 @@ vqdmulhq_n_s32 (int32x4_t a, int32_t b)
return result;
}
-#define vqmlalh_lane_s16(a, b, c) \
+#define vqdmlalh_lane_s16(a, b, c) \
__extension__ \
({ \
int16x8_t b_ = (b); \
@@ -14098,7 +14131,7 @@ vqdmulhq_n_s32 (int32x4_t a, int32_t b)
})
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
-vqmlalh_s16 (int16_t a, int16_t b)
+vqdmlalh_s16 (int16_t a, int16_t b)
{
int32_t result;
__asm__ ("sqdmlal %s0,%h1,%h2"
@@ -14108,7 +14141,7 @@ vqmlalh_s16 (int16_t a, int16_t b)
return result;
}
-#define vqmlals_lane_s32(a, b, c) \
+#define vqdmlals_lane_s32(a, b, c) \
__extension__ \
({ \
int32x4_t b_ = (b); \
@@ -14122,7 +14155,7 @@ vqmlalh_s16 (int16_t a, int16_t b)
})
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-vqmlals_s32 (int32_t a, int32_t b)
+vqdmlals_s32 (int32_t a, int32_t b)
{
int64_t result;
__asm__ ("sqdmlal %d0,%s1,%s2"
@@ -14132,7 +14165,7 @@ vqmlals_s32 (int32_t a, int32_t b)
return result;
}
-#define vqmlslh_lane_s16(a, b, c) \
+#define vqdmlslh_lane_s16(a, b, c) \
__extension__ \
({ \
int16x8_t b_ = (b); \
@@ -14146,7 +14179,7 @@ vqmlals_s32 (int32_t a, int32_t b)
})
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
-vqmlslh_s16 (int16_t a, int16_t b)
+vqdmlslh_s16 (int16_t a, int16_t b)
{
int32_t result;
__asm__ ("sqdmlsl %s0,%h1,%h2"
@@ -14156,7 +14189,7 @@ vqmlslh_s16 (int16_t a, int16_t b)
return result;
}
-#define vqmlsls_lane_s32(a, b, c) \
+#define vqdmlsls_lane_s32(a, b, c) \
__extension__ \
({ \
int32x4_t b_ = (b); \
@@ -14170,7 +14203,7 @@ vqmlslh_s16 (int16_t a, int16_t b)
})
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-vqmlsls_s32 (int32_t a, int32_t b)
+vqdmlsls_s32 (int32_t a, int32_t b)
{
int64_t result;
__asm__ ("sqdmlsl %d0,%s1,%s2"
diff --git a/gcc/reload.c b/gcc/reload.c
index 8420c808073..a46241980bc 100644
--- a/gcc/reload.c
+++ b/gcc/reload.c
@@ -283,7 +283,7 @@ static int find_reloads_address_1 (enum machine_mode, addr_space_t, rtx, int,
static void find_reloads_address_part (rtx, rtx *, enum reg_class,
enum machine_mode, int,
enum reload_type, int);
-static rtx find_reloads_subreg_address (rtx, int, int, enum reload_type,
+static rtx find_reloads_subreg_address (rtx, int, enum reload_type,
int, rtx, int *);
static void copy_replacements_1 (rtx *, rtx *, int);
static int find_inc_amount (rtx, rtx);
@@ -4745,31 +4745,19 @@ find_reloads_toplev (rtx x, int opnum, enum reload_type type,
}
/* If the subreg contains a reg that will be converted to a mem,
- convert the subreg to a narrower memref now.
- Otherwise, we would get (subreg (mem ...) ...),
- which would force reload of the mem.
-
- We also need to do this if there is an equivalent MEM that is
- not offsettable. In that case, alter_subreg would produce an
- invalid address on big-endian machines.
-
- For machines that extend byte loads, we must not reload using
- a wider mode if we have a paradoxical SUBREG. find_reloads will
- force a reload in that case. So we should not do anything here. */
+ attempt to convert the whole subreg to a (narrower or wider)
+ memory reference instead. If this succeeds, we're done --
+ otherwise fall through to check whether the inner reg still
+ needs address reloads anyway. */
if (regno >= FIRST_PSEUDO_REGISTER
-#ifdef LOAD_EXTEND_OP
- && !paradoxical_subreg_p (x)
-#endif
- && (reg_equiv_address (regno) != 0
- || (reg_equiv_mem (regno) != 0
- && (! strict_memory_address_addr_space_p
- (GET_MODE (x), XEXP (reg_equiv_mem (regno), 0),
- MEM_ADDR_SPACE (reg_equiv_mem (regno)))
- || ! offsettable_memref_p (reg_equiv_mem (regno))
- || num_not_at_initial_offset))))
- x = find_reloads_subreg_address (x, 1, opnum, type, ind_levels,
- insn, address_reloaded);
+ && reg_equiv_memory_loc (regno) != 0)
+ {
+ tem = find_reloads_subreg_address (x, opnum, type, ind_levels,
+ insn, address_reloaded);
+ if (tem)
+ return tem;
+ }
}
for (copied = 0, i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
@@ -6007,12 +5995,31 @@ find_reloads_address_1 (enum machine_mode mode, addr_space_t as,
if (ira_reg_class_max_nregs [rclass][GET_MODE (SUBREG_REG (x))]
> reg_class_size[(int) rclass])
{
- x = find_reloads_subreg_address (x, 0, opnum,
- ADDR_TYPE (type),
- ind_levels, insn, NULL);
- push_reload (x, NULL_RTX, loc, (rtx*) 0, rclass,
- GET_MODE (x), VOIDmode, 0, 0, opnum, type);
- return 1;
+ /* If the inner register will be replaced by a memory
+ reference, we can do this only if we can replace the
+ whole subreg by a (narrower) memory reference. If
+ this is not possible, fall through and reload just
+ the inner register (including address reloads). */
+ if (reg_equiv_memory_loc (REGNO (SUBREG_REG (x))) != 0)
+ {
+ rtx tem = find_reloads_subreg_address (x, opnum,
+ ADDR_TYPE (type),
+ ind_levels, insn,
+ NULL);
+ if (tem)
+ {
+ push_reload (tem, NULL_RTX, loc, (rtx*) 0, rclass,
+ GET_MODE (tem), VOIDmode, 0, 0,
+ opnum, type);
+ return 1;
+ }
+ }
+ else
+ {
+ push_reload (x, NULL_RTX, loc, (rtx*) 0, rclass,
+ GET_MODE (x), VOIDmode, 0, 0, opnum, type);
+ return 1;
+ }
}
}
}
@@ -6089,17 +6096,12 @@ find_reloads_address_part (rtx x, rtx *loc, enum reg_class rclass,
}
/* X, a subreg of a pseudo, is a part of an address that needs to be
- reloaded.
-
- If the pseudo is equivalent to a memory location that cannot be directly
- addressed, make the necessary address reloads.
+ reloaded, and the pseusdo is equivalent to a memory location.
- If address reloads have been necessary, or if the address is changed
- by register elimination, return the rtx of the memory location;
- otherwise, return X.
-
- If FORCE_REPLACE is nonzero, unconditionally replace the subreg with the
- memory location.
+ Attempt to replace the whole subreg by a (possibly narrower or wider)
+ memory reference. If this is possible, return this new memory
+ reference, and push all required address reloads. Otherwise,
+ return NULL.
OPNUM and TYPE identify the purpose of the reload.
@@ -6111,130 +6113,108 @@ find_reloads_address_part (rtx x, rtx *loc, enum reg_class rclass,
stack slots. */
static rtx
-find_reloads_subreg_address (rtx x, int force_replace, int opnum,
- enum reload_type type, int ind_levels, rtx insn,
- int *address_reloaded)
+find_reloads_subreg_address (rtx x, int opnum, enum reload_type type,
+ int ind_levels, rtx insn, int *address_reloaded)
{
+ enum machine_mode outer_mode = GET_MODE (x);
+ enum machine_mode inner_mode = GET_MODE (SUBREG_REG (x));
+ unsigned outer_size = GET_MODE_SIZE (outer_mode);
+ unsigned inner_size = GET_MODE_SIZE (inner_mode);
int regno = REGNO (SUBREG_REG (x));
int reloaded = 0;
+ rtx tem, orig;
+ int offset;
- if (reg_equiv_memory_loc (regno))
- {
- /* If the address is not directly addressable, or if the address is not
- offsettable, then it must be replaced. */
- if (! force_replace
- && (reg_equiv_address (regno)
- || ! offsettable_memref_p (reg_equiv_mem (regno))))
- force_replace = 1;
-
- if (force_replace || num_not_at_initial_offset)
- {
- rtx tem = make_memloc (SUBREG_REG (x), regno);
+ gcc_assert (reg_equiv_memory_loc (regno) != 0);
- /* If the address changes because of register elimination, then
- it must be replaced. */
- if (force_replace
- || ! rtx_equal_p (tem, reg_equiv_mem (regno)))
- {
- unsigned outer_size = GET_MODE_SIZE (GET_MODE (x));
- unsigned inner_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)));
- int offset;
- rtx orig = tem;
-
- /* For big-endian paradoxical subregs, SUBREG_BYTE does not
- hold the correct (negative) byte offset. */
- if (BYTES_BIG_ENDIAN && outer_size > inner_size)
- offset = inner_size - outer_size;
- else
- offset = SUBREG_BYTE (x);
-
- XEXP (tem, 0) = plus_constant (XEXP (tem, 0), offset);
- PUT_MODE (tem, GET_MODE (x));
- if (MEM_OFFSET_KNOWN_P (tem))
- set_mem_offset (tem, MEM_OFFSET (tem) + offset);
- if (MEM_SIZE_KNOWN_P (tem)
- && MEM_SIZE (tem) != (HOST_WIDE_INT) outer_size)
- set_mem_size (tem, outer_size);
-
- /* If this was a paradoxical subreg that we replaced, the
- resulting memory must be sufficiently aligned to allow
- us to widen the mode of the memory. */
- if (outer_size > inner_size)
- {
- rtx base;
+ /* We cannot replace the subreg with a modified memory reference if:
- base = XEXP (tem, 0);
- if (GET_CODE (base) == PLUS)
- {
- if (CONST_INT_P (XEXP (base, 1))
- && INTVAL (XEXP (base, 1)) % outer_size != 0)
- return x;
- base = XEXP (base, 0);
- }
- if (!REG_P (base)
- || (REGNO_POINTER_ALIGN (REGNO (base))
- < outer_size * BITS_PER_UNIT))
- return x;
- }
+ - we have a paradoxical subreg that implicitly acts as a zero or
+ sign extension operation due to LOAD_EXTEND_OP;
- reloaded = find_reloads_address (GET_MODE (tem), &tem,
- XEXP (tem, 0), &XEXP (tem, 0),
- opnum, type, ind_levels, insn);
- /* ??? Do we need to handle nonzero offsets somehow? */
- if (!offset && !rtx_equal_p (tem, orig))
- push_reg_equiv_alt_mem (regno, tem);
-
- /* For some processors an address may be valid in the
- original mode but not in a smaller mode. For
- example, ARM accepts a scaled index register in
- SImode but not in HImode. Note that this is only
- a problem if the address in reg_equiv_mem is already
- invalid in the new mode; other cases would be fixed
- by find_reloads_address as usual.
-
- ??? We attempt to handle such cases here by doing an
- additional reload of the full address after the
- usual processing by find_reloads_address. Note that
- this may not work in the general case, but it seems
- to cover the cases where this situation currently
- occurs. A more general fix might be to reload the
- *value* instead of the address, but this would not
- be expected by the callers of this routine as-is.
-
- If find_reloads_address already completed replaced
- the address, there is nothing further to do. */
- if (reloaded == 0
- && reg_equiv_mem (regno) != 0
- && !strict_memory_address_addr_space_p
- (GET_MODE (x), XEXP (reg_equiv_mem (regno), 0),
- MEM_ADDR_SPACE (reg_equiv_mem (regno))))
- {
- push_reload (XEXP (tem, 0), NULL_RTX, &XEXP (tem, 0), (rtx*) 0,
- base_reg_class (GET_MODE (tem),
- MEM_ADDR_SPACE (tem),
- MEM, SCRATCH),
- GET_MODE (XEXP (tem, 0)), VOIDmode, 0, 0,
- opnum, type);
- reloaded = 1;
- }
- /* If this is not a toplevel operand, find_reloads doesn't see
- this substitution. We have to emit a USE of the pseudo so
- that delete_output_reload can see it. */
- if (replace_reloads && recog_data.operand[opnum] != x)
- /* We mark the USE with QImode so that we recognize it
- as one that can be safely deleted at the end of
- reload. */
- PUT_MODE (emit_insn_before (gen_rtx_USE (VOIDmode,
- SUBREG_REG (x)),
- insn), QImode);
- x = tem;
- }
- }
+ - we have a subreg that is implicitly supposed to act on the full
+ register due to WORD_REGISTER_OPERATIONS (see also eliminate_regs);
+
+ - the address of the equivalent memory location is mode-dependent; or
+
+ - we have a paradoxical subreg and the resulting memory is not
+ sufficiently aligned to allow access in the wider mode.
+
+ In addition, we choose not to perform the replacement for *any*
+ paradoxical subreg, even if it were possible in principle. This
+ is to avoid generating wider memory references than necessary.
+
+ This corresponds to how previous versions of reload used to handle
+ paradoxical subregs where no address reload was required. */
+
+ if (paradoxical_subreg_p (x))
+ return NULL;
+
+#ifdef WORD_REGISTER_OPERATIONS
+ if (outer_size < inner_size
+ && ((outer_size - 1) / UNITS_PER_WORD
+ == (inner_size - 1) / UNITS_PER_WORD))
+ return NULL;
+#endif
+
+ /* Since we don't attempt to handle paradoxical subregs, we can just
+ call into simplify_subreg, which will handle all remaining checks
+ for us. */
+ orig = make_memloc (SUBREG_REG (x), regno);
+ offset = SUBREG_BYTE (x);
+ tem = simplify_subreg (outer_mode, orig, inner_mode, offset);
+ if (!tem || !MEM_P (tem))
+ return NULL;
+
+ /* Now push all required address reloads, if any. */
+ reloaded = find_reloads_address (GET_MODE (tem), &tem,
+ XEXP (tem, 0), &XEXP (tem, 0),
+ opnum, type, ind_levels, insn);
+ /* ??? Do we need to handle nonzero offsets somehow? */
+ if (!offset && !rtx_equal_p (tem, orig))
+ push_reg_equiv_alt_mem (regno, tem);
+
+ /* For some processors an address may be valid in the original mode but
+ not in a smaller mode. For example, ARM accepts a scaled index register
+ in SImode but not in HImode. Note that this is only a problem if the
+ address in reg_equiv_mem is already invalid in the new mode; other
+ cases would be fixed by find_reloads_address as usual.
+
+ ??? We attempt to handle such cases here by doing an additional reload
+ of the full address after the usual processing by find_reloads_address.
+ Note that this may not work in the general case, but it seems to cover
+ the cases where this situation currently occurs. A more general fix
+ might be to reload the *value* instead of the address, but this would
+ not be expected by the callers of this routine as-is.
+
+ If find_reloads_address already completed replaced the address, there
+ is nothing further to do. */
+ if (reloaded == 0
+ && reg_equiv_mem (regno) != 0
+ && !strict_memory_address_addr_space_p
+ (GET_MODE (x), XEXP (reg_equiv_mem (regno), 0),
+ MEM_ADDR_SPACE (reg_equiv_mem (regno))))
+ {
+ push_reload (XEXP (tem, 0), NULL_RTX, &XEXP (tem, 0), (rtx*) 0,
+ base_reg_class (GET_MODE (tem), MEM_ADDR_SPACE (tem),
+ MEM, SCRATCH),
+ GET_MODE (XEXP (tem, 0)), VOIDmode, 0, 0, opnum, type);
+ reloaded = 1;
}
+
+ /* If this is not a toplevel operand, find_reloads doesn't see this
+ substitution. We have to emit a USE of the pseudo so that
+ delete_output_reload can see it. */
+ if (replace_reloads && recog_data.operand[opnum] != x)
+ /* We mark the USE with QImode so that we recognize it as one that
+ can be safely deleted at the end of reload. */
+ PUT_MODE (emit_insn_before (gen_rtx_USE (VOIDmode, SUBREG_REG (x)), insn),
+ QImode);
+
if (address_reloaded)
*address_reloaded = reloaded;
- return x;
+ return tem;
}
/* Substitute into the current INSN the registers into which we have reloaded