aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/aarch64
diff options
context:
space:
mode:
authorcollison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>2015-02-10 02:23:40 +0000
committercollison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>2015-02-10 02:23:40 +0000
commitddb01135e55ee8b994905998e9ed546787c45a59 (patch)
tree71eb6d21121fa1876fde3059b4a414095d2e1203 /gcc/config/aarch64
parent6a3e62e859aeb73dfbf20b264ea70f54750a57de (diff)
2015-02-09 Michael Collison <michael.collison@linaro.org>
Backport from trunk r216779. 2014-10-28 Alan Lawrence <alan.lawrence@arm.com> * expr.c (expand_expr_real_2): Remove code handling VEC_LSHIFT_EXPR. * fold-const.c (const_binop): Likewise. * cfgexpand.c (expand_debug_expr): Likewise. * tree-inline.c (estimate_operator_cost): Likewise. * tree-vect-generic.c (expand_vector_operations_1): Likewise. * optabs.c (optab_for_tree_code): Likewise. (expand_vec_shift_expr): Likewise, update comment. * tree.def: Delete VEC_LSHIFT_EXPR, remove comment. * optabs.h (expand_vec_shift_expr): Remove comment re. VEC_LSHIFT_EXPR. * optabs.def: Remove vec_shl_optab. * doc/md.texi: Remove references to vec_shr_m. 2015-02-09 Michael Collison <michael.collison@linaro.org> Backport from trunk r216742. 2014-10-27 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define again. * config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin): Restore, enable for bigendian, update to use __builtin..._scal... 2015-02-09 Michael Collison <michael.collison@linaro.org> Backport from trunk r216741. 2014-10-27 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/aarch64-simd-builtins.def (reduc_smax_, reduc_smin_, reduc_umax_, reduc_umin_, reduc_smax_nan_, reduc_smin_nan_): Remove. (reduc_smax_scal_, reduc_smin_scal_, reduc_umax_scal_, reduc_umin_scal_, reduc_smax_nan_scal_, reduc_smin_nan_scal_): New. * config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_<mode>): Rename VDQV_S variant to... (reduc_<maxmin_uns>_internal<mode>): ...this. (reduc_<maxmin_uns>_<mode>): New (VDQ_BHSI). (reduc_<maxmin_uns>_scal_<mode>): New (*2). (reduc_<maxmin_uns>_v2si): Combine with below, renaming... (reduc_<maxmin_uns>_<mode>): Combine V2F with above, renaming... (reduc_<maxmin_uns>_internal_<mode>): ...to this (VDQF). * config/aarch64/arm_neon.h (vmaxv_f32, vmaxv_s8, vmaxv_s16, vmaxv_s32, vmaxv_u8, vmaxv_u16, vmaxv_u32, vmaxvq_f32, vmaxvq_f64, vmaxvq_s8, vmaxvq_s16, vmaxvq_s32, vmaxvq_u8, vmaxvq_u16, vmaxvq_u32, vmaxnmv_f32, vmaxnmvq_f32, vmaxnmvq_f64, vminv_f32, vminv_s8, vminv_s16, vminv_s32, vminv_u8, vminv_u16, vminv_u32, vminvq_f32, vminvq_f64, vminvq_s8, vminvq_s16, vminvq_s32, vminvq_u8, vminvq_u16, vminvq_u32, vminnmv_f32, vminnmvq_f32, vminnmvq_f64): Update to use __builtin_aarch64_reduc_..._scal; remove vget_lane wrapper. 2015-02-09 Michael Collison <michael.collison@linaro.org> Backport from trunk r216738. 2014-10-27 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/aarch64-simd-builtins.def (reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf): Remove. (reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New. * config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove. (reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New. (reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to... (aarch64_reduc_plus_internal<mode>): ...this. (reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to... (aarch64_reduc_plus_internalv2si): ...this. (reduc_splus_<mode>/V2F): Rename to... (aarch64_reduc_plus_internal<mode>): ...this. * config/aarch64/iterators.md (UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove. (UNSPEC_ADDV): New. (sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV. * config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8, vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64, vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32, vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to __builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper. 2015-02-09 Michael Collison <michael.collison@linaro.org> Backport from trunk r216737. 2014-10-27 Alan Lawrence <alan.lawrence@arm.com> PR tree-optimization/61114 * doc/md.texi (Standard Names): Add reduc_(plus,[us](min|max))|scal optabs, and note in reduc_[us](plus|min|max) to prefer the former. * expr.c (expand_expr_real_2): Use reduc_..._scal if available, fall back to old reduc_... BIT_FIELD_REF only if not. * optabs.c (optab_for_tree_code): for REDUC_(MAX,MIN,PLUS)_EXPR, return the reduce-to-scalar (reduc_..._scal) optab. (scalar_reduc_to_vector): New. * optabs.def (reduc_smax_scal_optab, reduc_smin_scal_optab, reduc_plus_scal_optab, reduc_umax_scal_optab, reduc_umin_scal_optab): New. * optabs.h (scalar_reduc_to_vector): Declare. * tree-vect-loop.c (vectorizable_reduction): Look for optabs reducing to either scalar or vector. 2015-02-09 Michael Collison <michael.collison@linaro.org> Backport from trunk r216736. 2014-10-27 Alan Lawrence <alan.lawrence@arm.com> PR tree-optimization/61114 * expr.c (expand_expr_real_2): For REDUC_{MIN,MAX,PLUS}_EXPR, add extract_bit_field around optab result. * fold-const.c (fold_unary_loc): For REDUC_{MIN,MAX,PLUS}_EXPR, produce scalar not vector. * tree-cfg.c (verify_gimple_assign_unary): Check result vs operand type for REDUC_{MIN,MAX,PLUS}_EXPR. * tree-vect-loop.c (vect_analyze_loop): Update comment. (vect_create_epilog_for_reduction): For direct vector reduction, use result of tree code directly without extract_bit_field. * tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Update comment. 2015-02-09 Michael Collison <michael.collison@linaro.org> Backport from trunk r216734. 2014-10-27 Alan Lawrence <alan.lawrence@arm.com> * config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Comment out. * config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin): Remove using preprocessor directis. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@220562 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c26
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def21
-rw-r--r--gcc/config/aarch64/aarch64-simd.md102
-rw-r--r--gcc/config/aarch64/arm_neon.h158
-rw-r--r--gcc/config/aarch64/iterators.md6
5 files changed, 152 insertions, 161 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 2e3b3a36037..89b705e899c 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1236,19 +1236,6 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
tree fndecl;
gimple new_stmt = NULL;
- /* The operations folded below are reduction operations. These are
- defined to leave their result in the 0'th element (from the perspective
- of GCC). The architectural instruction we are folding will leave the
- result in the 0'th element (from the perspective of the architecture).
- For big-endian systems, these perspectives are not aligned.
-
- It is therefore wrong to perform this fold on big-endian. There
- are some tricks we could play with shuffling, but the mid-end is
- inconsistent in the way it treats reduction operations, so we will
- end up in difficulty. Until we fix the ambiguity - just bail out. */
- if (BYTES_BIG_ENDIAN)
- return false;
-
if (call)
{
fndecl = gimple_call_fndecl (stmt);
@@ -1260,23 +1247,28 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
? gimple_call_arg_ptr (stmt, 0)
: &error_mark_node);
+ /* We use gimple's REDUC_(PLUS|MIN|MAX)_EXPRs for float, signed int
+ and unsigned int; it will distinguish according to the types of
+ the arguments to the __builtin. */
switch (fcode)
{
- BUILTIN_VALL (UNOP, reduc_splus_, 10)
- new_stmt = gimple_build_assign_with_ops (
+ BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
+ new_stmt = gimple_build_assign_with_ops (
REDUC_PLUS_EXPR,
gimple_call_lhs (stmt),
args[0],
NULL_TREE);
break;
- BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
+ BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
+ BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
new_stmt = gimple_build_assign_with_ops (
REDUC_MAX_EXPR,
gimple_call_lhs (stmt),
args[0],
NULL_TREE);
break;
- BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
+ BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
+ BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
new_stmt = gimple_build_assign_with_ops (
REDUC_MIN_EXPR,
gimple_call_lhs (stmt),
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index fe969a054d2..c08b4ee42bc 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -241,17 +241,16 @@
BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0)
- /* Implemented by reduc_<sur>plus_<mode>. */
- BUILTIN_VALL (UNOP, reduc_splus_, 10)
- BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
-
- /* Implemented by reduc_<maxmin_uns>_<mode>. */
- BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
- BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
- BUILTIN_VDQ_BHSI (UNOP, reduc_umax_, 10)
- BUILTIN_VDQ_BHSI (UNOP, reduc_umin_, 10)
- BUILTIN_VDQF (UNOP, reduc_smax_nan_, 10)
- BUILTIN_VDQF (UNOP, reduc_smin_nan_, 10)
+ /* Implemented by aarch64_reduc_plus_<mode>. */
+ BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
+
+ /* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */
+ BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
+ BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
+ BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
+ BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
+ BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10)
+ BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10)
/* Implemented by <maxmin><mode>3.
smax variants map to fmaxnm,
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 566f3db2421..137519fee51 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1760,25 +1760,52 @@
;; 'across lanes' add.
-(define_insn "reduc_<sur>plus_<mode>"
+(define_expand "reduc_plus_scal_<mode>"
+ [(match_operand:<VEL> 0 "register_operand" "=w")
+ (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")]
+ UNSPEC_ADDV)]
+ "TARGET_SIMD"
+ {
+ rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+ rtx scratch = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
+ emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
+ DONE;
+ }
+)
+
+(define_expand "reduc_plus_scal_<mode>"
+ [(match_operand:<VEL> 0 "register_operand" "=w")
+ (match_operand:V2F 1 "register_operand" "w")]
+ "TARGET_SIMD"
+ {
+ rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+ rtx scratch = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
+ emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_reduc_plus_internal<mode>"
[(set (match_operand:VDQV 0 "register_operand" "=w")
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
- SUADDV))]
+ UNSPEC_ADDV))]
"TARGET_SIMD"
"add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_reduc_add<q>")]
)
-(define_insn "reduc_<sur>plus_v2si"
+(define_insn "aarch64_reduc_plus_internalv2si"
[(set (match_operand:V2SI 0 "register_operand" "=w")
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
- SUADDV))]
+ UNSPEC_ADDV))]
"TARGET_SIMD"
"addp\\t%0.2s, %1.2s, %1.2s"
[(set_attr "type" "neon_reduc_add")]
)
-(define_insn "reduc_splus_<mode>"
+(define_insn "aarch64_reduc_plus_internal<mode>"
[(set (match_operand:V2F 0 "register_operand" "=w")
(unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
UNSPEC_FADDV))]
@@ -1796,14 +1823,17 @@
[(set_attr "type" "neon_fp_reduc_add_s_q")]
)
-(define_expand "reduc_splus_v4sf"
- [(set (match_operand:V4SF 0 "register_operand")
+(define_expand "reduc_plus_scal_v4sf"
+ [(set (match_operand:SF 0 "register_operand")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
UNSPEC_FADDV))]
"TARGET_SIMD"
{
- emit_insn (gen_aarch64_addpv4sf (operands[0], operands[1]));
- emit_insn (gen_aarch64_addpv4sf (operands[0], operands[0]));
+ rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
+ rtx scratch = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_aarch64_addpv4sf (scratch, operands[1]));
+ emit_insn (gen_aarch64_addpv4sf (scratch, scratch));
+ emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
DONE;
})
@@ -1817,7 +1847,40 @@
;; 'across lanes' max and min ops.
-(define_insn "reduc_<maxmin_uns>_<mode>"
+;; Template for outputting a scalar, so we can create __builtins which can be
+;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
+(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+ [(match_operand:<VEL> 0 "register_operand")
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
+ FMAXMINV)]
+ "TARGET_SIMD"
+ {
+ rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+ rtx scratch = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
+ operands[1]));
+ emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
+ DONE;
+ }
+)
+
+;; Likewise for integer cases, signed and unsigned.
+(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+ [(match_operand:<VEL> 0 "register_operand")
+ (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
+ MAXMINV)]
+ "TARGET_SIMD"
+ {
+ rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+ rtx scratch = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
+ operands[1]));
+ emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
[(set (match_operand:VDQV_S 0 "register_operand" "=w")
(unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
MAXMINV))]
@@ -1826,7 +1889,7 @@
[(set_attr "type" "neon_reduc_minmax<q>")]
)
-(define_insn "reduc_<maxmin_uns>_v2si"
+(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
[(set (match_operand:V2SI 0 "register_operand" "=w")
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
MAXMINV))]
@@ -1835,24 +1898,15 @@
[(set_attr "type" "neon_reduc_minmax")]
)
-(define_insn "reduc_<maxmin_uns>_<mode>"
- [(set (match_operand:V2F 0 "register_operand" "=w")
- (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
+(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
FMAXMINV))]
"TARGET_SIMD"
- "<maxmin_uns_op>p\\t%<Vetype>0, %1.<Vtype>"
+ "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
)
-(define_insn "reduc_<maxmin_uns>_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=w")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
- FMAXMINV))]
- "TARGET_SIMD"
- "<maxmin_uns_op>v\\t%s0, %1.4s"
- [(set_attr "type" "neon_fp_reduc_minmax_s_q")]
-)
-
;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
;; allocation.
;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index ab55e9e8c88..6915d9fd8b9 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -13235,121 +13235,103 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddv_s8 (int8x8_t __a)
{
- return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
+ return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddv_s16 (int16x4_t __a)
{
- return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
+ return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddv_s32 (int32x2_t __a)
{
- return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
+ return __builtin_aarch64_reduc_plus_scal_v2si (__a);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vaddv_u8 (uint8x8_t __a)
{
- return vget_lane_u8 ((uint8x8_t)
- __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
- 0);
+ return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddv_u16 (uint16x4_t __a)
{
- return vget_lane_u16 ((uint16x4_t)
- __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
- 0);
+ return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddv_u32 (uint32x2_t __a)
{
- return vget_lane_u32 ((uint32x2_t)
- __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
- 0);
+ return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddvq_s8 (int8x16_t __a)
{
- return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
- 0);
+ return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddvq_s16 (int16x8_t __a)
{
- return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
+ return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddvq_s32 (int32x4_t __a)
{
- return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
+ return __builtin_aarch64_reduc_plus_scal_v4si (__a);
}
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vaddvq_s64 (int64x2_t __a)
{
- return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
+ return __builtin_aarch64_reduc_plus_scal_v2di (__a);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vaddvq_u8 (uint8x16_t __a)
{
- return vgetq_lane_u8 ((uint8x16_t)
- __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
- 0);
+ return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddvq_u16 (uint16x8_t __a)
{
- return vgetq_lane_u16 ((uint16x8_t)
- __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
- 0);
+ return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddvq_u32 (uint32x4_t __a)
{
- return vgetq_lane_u32 ((uint32x4_t)
- __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
- 0);
+ return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
}
__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vaddvq_u64 (uint64x2_t __a)
{
- return vgetq_lane_u64 ((uint64x2_t)
- __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
- 0);
+ return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vaddv_f32 (float32x2_t __a)
{
- float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
- return vget_lane_f32 (__t, 0);
+ return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vaddvq_f32 (float32x4_t __a)
{
- float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
- return vgetq_lane_f32 (__t, 0);
+ return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vaddvq_f64 (float64x2_t __a)
{
- float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
- return vgetq_lane_f64 (__t, 0);
+ return __builtin_aarch64_reduc_plus_scal_v2df (__a);
}
/* vbsl */
@@ -18520,106 +18502,91 @@ vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxv_f32 (float32x2_t __a)
{
- return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
- 0);
+ return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vmaxv_s8 (int8x8_t __a)
{
- return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
+ return __builtin_aarch64_reduc_smax_scal_v8qi (__a);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vmaxv_s16 (int16x4_t __a)
{
- return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
+ return __builtin_aarch64_reduc_smax_scal_v4hi (__a);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vmaxv_s32 (int32x2_t __a)
{
- return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
+ return __builtin_aarch64_reduc_smax_scal_v2si (__a);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vmaxv_u8 (uint8x8_t __a)
{
- return vget_lane_u8 ((uint8x8_t)
- __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vmaxv_u16 (uint16x4_t __a)
{
- return vget_lane_u16 ((uint16x4_t)
- __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vmaxv_u32 (uint32x2_t __a)
{
- return vget_lane_u32 ((uint32x2_t)
- __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxvq_f32 (float32x4_t __a)
{
- return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
- 0);
+ return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmaxvq_f64 (float64x2_t __a)
{
- return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
- 0);
+ return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vmaxvq_s8 (int8x16_t __a)
{
- return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
+ return __builtin_aarch64_reduc_smax_scal_v16qi (__a);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vmaxvq_s16 (int16x8_t __a)
{
- return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
+ return __builtin_aarch64_reduc_smax_scal_v8hi (__a);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vmaxvq_s32 (int32x4_t __a)
{
- return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
+ return __builtin_aarch64_reduc_smax_scal_v4si (__a);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vmaxvq_u8 (uint8x16_t __a)
{
- return vgetq_lane_u8 ((uint8x16_t)
- __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vmaxvq_u16 (uint16x8_t __a)
{
- return vgetq_lane_u16 ((uint16x8_t)
- __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vmaxvq_u32 (uint32x4_t __a)
{
- return vgetq_lane_u32 ((uint32x4_t)
- __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a);
}
/* vmaxnmv */
@@ -18627,20 +18594,19 @@ vmaxvq_u32 (uint32x4_t __a)
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxnmv_f32 (float32x2_t __a)
{
- return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
- 0);
+ return __builtin_aarch64_reduc_smax_scal_v2sf (__a);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxnmvq_f32 (float32x4_t __a)
{
- return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
+ return __builtin_aarch64_reduc_smax_scal_v4sf (__a);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmaxnmvq_f64 (float64x2_t __a)
{
- return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
+ return __builtin_aarch64_reduc_smax_scal_v2df (__a);
}
/* vmin */
@@ -18766,107 +18732,91 @@ vminnmq_f64 (float64x2_t __a, float64x2_t __b)
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminv_f32 (float32x2_t __a)
{
- return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
- 0);
+ return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vminv_s8 (int8x8_t __a)
{
- return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
- 0);
+ return __builtin_aarch64_reduc_smin_scal_v8qi (__a);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vminv_s16 (int16x4_t __a)
{
- return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
+ return __builtin_aarch64_reduc_smin_scal_v4hi (__a);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vminv_s32 (int32x2_t __a)
{
- return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
+ return __builtin_aarch64_reduc_smin_scal_v2si (__a);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vminv_u8 (uint8x8_t __a)
{
- return vget_lane_u8 ((uint8x8_t)
- __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vminv_u16 (uint16x4_t __a)
{
- return vget_lane_u16 ((uint16x4_t)
- __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vminv_u32 (uint32x2_t __a)
{
- return vget_lane_u32 ((uint32x2_t)
- __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminvq_f32 (float32x4_t __a)
{
- return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
- 0);
+ return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vminvq_f64 (float64x2_t __a)
{
- return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
- 0);
+ return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vminvq_s8 (int8x16_t __a)
{
- return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
+ return __builtin_aarch64_reduc_smin_scal_v16qi (__a);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vminvq_s16 (int16x8_t __a)
{
- return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
+ return __builtin_aarch64_reduc_smin_scal_v8hi (__a);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vminvq_s32 (int32x4_t __a)
{
- return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
+ return __builtin_aarch64_reduc_smin_scal_v4si (__a);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vminvq_u8 (uint8x16_t __a)
{
- return vgetq_lane_u8 ((uint8x16_t)
- __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vminvq_u16 (uint16x8_t __a)
{
- return vgetq_lane_u16 ((uint16x8_t)
- __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vminvq_u32 (uint32x4_t __a)
{
- return vgetq_lane_u32 ((uint32x4_t)
- __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
- 0);
+ return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a);
}
/* vminnmv */
@@ -18874,19 +18824,19 @@ vminvq_u32 (uint32x4_t __a)
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminnmv_f32 (float32x2_t __a)
{
- return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
+ return __builtin_aarch64_reduc_smin_scal_v2sf (__a);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminnmvq_f32 (float32x4_t __a)
{
- return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
+ return __builtin_aarch64_reduc_smin_scal_v4sf (__a);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vminnmvq_f64 (float64x2_t __a)
{
- return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
+ return __builtin_aarch64_reduc_smin_scal_v2df (__a);
}
/* vmla */
@@ -19640,7 +19590,7 @@ vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpaddd_f64 (float64x2_t __a)
{
- return vgetq_lane_f64 (__builtin_aarch64_reduc_splus_v2df (__a), 0);
+ return __builtin_aarch64_reduc_plus_scal_v2df (__a);
}
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 917cf2db7f1..d42b5d8f9dd 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -213,8 +213,7 @@
UNSPEC_FMINNMV ; Used in aarch64-simd.md.
UNSPEC_FMINV ; Used in aarch64-simd.md.
UNSPEC_FADDV ; Used in aarch64-simd.md.
- UNSPEC_SADDV ; Used in aarch64-simd.md.
- UNSPEC_UADDV ; Used in aarch64-simd.md.
+ UNSPEC_ADDV ; Used in aarch64-simd.md.
UNSPEC_SMAXV ; Used in aarch64-simd.md.
UNSPEC_SMINV ; Used in aarch64-simd.md.
UNSPEC_UMAXV ; Used in aarch64-simd.md.
@@ -854,8 +853,6 @@
(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
UNSPEC_FMAXNMV UNSPEC_FMINNMV])
-(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV])
-
(define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD
UNSPEC_SRHADD UNSPEC_URHADD
UNSPEC_SHSUB UNSPEC_UHSUB
@@ -960,7 +957,6 @@
(UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r")
(UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u")
(UNSPEC_USQADD "us") (UNSPEC_SUQADD "su")
- (UNSPEC_SADDV "s") (UNSPEC_UADDV "u")
(UNSPEC_SSLI "s") (UNSPEC_USLI "u")
(UNSPEC_SSRI "s") (UNSPEC_USRI "u")
(UNSPEC_USRA "u") (UNSPEC_SSRA "s")