aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@linaro.org>2016-07-12 12:03:28 +0200
committerYvan Roux <yvan.roux@linaro.org>2016-08-25 12:45:42 +0000
commit1406b1e395f946abc1bfa6cc5a24d10b67ba06a2 (patch)
tree0ff8d0d4607689aad99ef2a4ea604d833ec300ca /gcc/config
parentecce98641f1f442de27b998a5fab54a9e9973a0f (diff)
gcc/
Backport from trunk r235402, r235403. 2016-04-25 Michael Collison <michael.collison@linaro.org> * config/arm/neon.md (widen_<us>sum<mode>): New patterns where mode is VQI to improve mixed mode vectorization. * config/arm/neon.md (vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3): New define_insn to match low half of signed vaddw. * config/arm/neon.md (vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3): New define_insn to match high half of signed vaddw. * config/arm/neon.md (vec_sel_widen_usum_lo<VQI:mode><VW:mode>3): New define_insn to match low half of unsigned vaddw. * config/arm/neon.md (vec_sel_widen_usum_hi<VQI:mode><VW:mode>3): New define_insn to match high half of unsigned vaddw. * config/arm/arm.c (arm_simd_vect_par_cnst_half): New function. (arm_simd_check_vect_par_cnst_half_p): Likewise. * config/arm/arm-protos.h (arm_simd_vect_par_cnst_half): Prototype for new function. (arm_simd_check_vect_par_cnst_half_p): Likewise. * config/arm/predicates.md (vect_par_constant_high): Support big endian and simplify by calling arm_simd_check_vect_par_cnst_half (vect_par_constant_low): Likewise. gcc/testsuite/ Backport from trunk r235402. 2016-04-25 Michael Collison <michael.collison@arm.com> * testsuite/gcc.target/arm/neon-vaddws16.c: New test. * testsuite/gcc.target/arm/neon-vaddws32.c: New test. * testsuite/gcc.target/arm/neon-vaddwu16.c: New test. * testsuite/gcc.target/arm/neon-vaddwu32.c: New test. * testsuite/gcc.target/arm/neon-vaddwu8.c: New test. * testsuite/lib/target-supports.exp (check_effective_target_vect_widen_sum_hi_to_si_pattern): Indicate that arm neon support vector widen sum of HImode TO SImode. Change-Id: I73e392d5089153973547e78cb918f3bf8e5594d0
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/arm/arm-protos.h4
-rw-r--r--gcc/config/arm/arm.c76
-rw-r--r--gcc/config/arm/neon.md123
-rw-r--r--gcc/config/arm/predicates.md50
4 files changed, 201 insertions, 52 deletions
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 0cfc900d9db..aaaabb761cf 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -50,7 +50,9 @@ extern tree arm_builtin_decl (unsigned code, bool initialize_p
ATTRIBUTE_UNUSED);
extern void arm_init_builtins (void);
extern void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
-
+extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high);
+extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
+ bool high);
#ifdef RTX_CODE
extern bool arm_vector_mode_supported_p (machine_mode);
extern bool arm_small_register_classes_for_mode_p (machine_mode);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8fd6c2000f0..776a0a79790 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -30322,4 +30322,80 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
return;
}
+
+/* Construct and return a PARALLEL RTX vector with elements numbering the
+ lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
+ the vector - from the perspective of the architecture. This does not
+ line up with GCC's perspective on lane numbers, so we end up with
+ different masks depending on our target endian-ness. The diagram
+ below may help. We must draw the distinction when building masks
+ which select one half of the vector. An instruction selecting
+ architectural low-lanes for a big-endian target, must be described using
+ a mask selecting GCC high-lanes.
+
+ Big-Endian Little-Endian
+
+GCC 0 1 2 3 3 2 1 0
+ | x | x | x | x | | x | x | x | x |
+Architecture 3 2 1 0 3 2 1 0
+
+Low Mask: { 2, 3 } { 0, 1 }
+High Mask: { 0, 1 } { 2, 3 }
+*/
+
+rtx
+arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
+{
+ int nunits = GET_MODE_NUNITS (mode);
+ rtvec v = rtvec_alloc (nunits / 2);
+ int high_base = nunits / 2;
+ int low_base = 0;
+ int base;
+ rtx t1;
+ int i;
+
+ if (BYTES_BIG_ENDIAN)
+ base = high ? low_base : high_base;
+ else
+ base = high ? high_base : low_base;
+
+ for (i = 0; i < nunits / 2; i++)
+ RTVEC_ELT (v, i) = GEN_INT (base + i);
+
+ t1 = gen_rtx_PARALLEL (mode, v);
+ return t1;
+}
+
+/* Check OP for validity as a PARALLEL RTX vector with elements
+ numbering the lanes of either the high (HIGH == TRUE) or low lanes,
+ from the perspective of the architecture. See the diagram above
+ arm_simd_vect_par_cnst_half_p for more details. */
+
+bool
+arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
+ bool high)
+{
+ rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
+ HOST_WIDE_INT count_op = XVECLEN (op, 0);
+ HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
+ int i = 0;
+
+ if (!VECTOR_MODE_P (mode))
+ return false;
+
+ if (count_op != count_ideal)
+ return false;
+
+ for (i = 0; i < count_ideal; i++)
+ {
+ rtx elt_op = XVECEXP (op, 0, i);
+ rtx elt_ideal = XVECEXP (ideal, 0, i);
+
+ if (!CONST_INT_P (elt_op)
+ || INTVAL (elt_ideal) != INTVAL (elt_op))
+ return false;
+ }
+ return true;
+}
+
#include "gt-arm.h"
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 879c07c13b6..6b4896de61f 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1204,16 +1204,133 @@
;; Widening operations
+(define_expand "widen_ssum<mode>3"
+ [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
+ (plus:<V_double_width>
+ (sign_extend:<V_double_width>
+ (match_operand:VQI 1 "s_register_operand" ""))
+ (match_operand:<V_double_width> 2 "s_register_operand" "")))]
+ "TARGET_NEON"
+ {
+ machine_mode mode = GET_MODE (operands[1]);
+ rtx p1, p2;
+
+ p1 = arm_simd_vect_par_cnst_half (mode, false);
+ p2 = arm_simd_vect_par_cnst_half (mode, true);
+
+ if (operands[0] != operands[2])
+ emit_move_insn (operands[0], operands[2]);
+
+ emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
+ operands[1],
+ p1,
+ operands[0]));
+ emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
+ operands[1],
+ p2,
+ operands[0]));
+ DONE;
+ }
+)
+
+(define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen>
+ (sign_extend:<VW:V_widen>
+ (vec_select:VW
+ (match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2 "vect_par_constant_low" "")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+{
+ return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
+ "vaddw.<V_s_elem>\t%q0, %q3, %e1";
+}
+ [(set_attr "type" "neon_add_widen")])
+
+(define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen>
+ (sign_extend:<VW:V_widen>
+ (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2 "vect_par_constant_high" "")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+{
+ return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
+ "vaddw.<V_s_elem>\t%q0, %q3, %f1";
+}
+ [(set_attr "type" "neon_add_widen")])
+
(define_insn "widen_ssum<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
- (plus:<V_widen> (sign_extend:<V_widen>
- (match_operand:VW 1 "s_register_operand" "%w"))
- (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+ (plus:<V_widen>
+ (sign_extend:<V_widen>
+ (match_operand:VW 1 "s_register_operand" "%w"))
+ (match_operand:<V_widen> 2 "s_register_operand" "w")))]
"TARGET_NEON"
"vaddw.<V_s_elem>\t%q0, %q2, %P1"
[(set_attr "type" "neon_add_widen")]
)
+(define_expand "widen_usum<mode>3"
+ [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
+ (plus:<V_double_width>
+ (zero_extend:<V_double_width>
+ (match_operand:VQI 1 "s_register_operand" ""))
+ (match_operand:<V_double_width> 2 "s_register_operand" "")))]
+ "TARGET_NEON"
+ {
+ machine_mode mode = GET_MODE (operands[1]);
+ rtx p1, p2;
+
+ p1 = arm_simd_vect_par_cnst_half (mode, false);
+ p2 = arm_simd_vect_par_cnst_half (mode, true);
+
+ if (operands[0] != operands[2])
+ emit_move_insn (operands[0], operands[2]);
+
+ emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
+ operands[1],
+ p1,
+ operands[0]));
+ emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
+ operands[1],
+ p2,
+ operands[0]));
+ DONE;
+ }
+)
+
+(define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen>
+ (zero_extend:<VW:V_widen>
+ (vec_select:VW
+ (match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2 "vect_par_constant_low" "")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+{
+ return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
+ "vaddw.<V_u_elem>\t%q0, %q3, %e1";
+}
+ [(set_attr "type" "neon_add_widen")])
+
+(define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen>
+ (zero_extend:<VW:V_widen>
+ (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2 "vect_par_constant_high" "")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+{
+ return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
+ "vaddw.<V_u_elem>\t%q0, %q3, %f1";
+}
+ [(set_attr "type" "neon_add_widen")])
+
(define_insn "widen_usum<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(plus:<V_widen> (zero_extend:<V_widen>
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index b1cd556211a..ad92f6c5419 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -612,59 +612,13 @@
(define_special_predicate "vect_par_constant_high"
(match_code "parallel")
{
- HOST_WIDE_INT count = XVECLEN (op, 0);
- int i;
- int base = GET_MODE_NUNITS (mode);
-
- if ((count < 1)
- || (count != base/2))
- return false;
-
- if (!VECTOR_MODE_P (mode))
- return false;
-
- for (i = 0; i < count; i++)
- {
- rtx elt = XVECEXP (op, 0, i);
- int val;
-
- if (!CONST_INT_P (elt))
- return false;
-
- val = INTVAL (elt);
- if (val != (base/2) + i)
- return false;
- }
- return true;
+ return arm_simd_check_vect_par_cnst_half_p (op, mode, true);
})
(define_special_predicate "vect_par_constant_low"
(match_code "parallel")
{
- HOST_WIDE_INT count = XVECLEN (op, 0);
- int i;
- int base = GET_MODE_NUNITS (mode);
-
- if ((count < 1)
- || (count != base/2))
- return false;
-
- if (!VECTOR_MODE_P (mode))
- return false;
-
- for (i = 0; i < count; i++)
- {
- rtx elt = XVECEXP (op, 0, i);
- int val;
-
- if (!CONST_INT_P (elt))
- return false;
-
- val = INTVAL (elt);
- if (val != i)
- return false;
- }
- return true;
+ return arm_simd_check_vect_par_cnst_half_p (op, mode, false);
})
(define_predicate "const_double_vcvt_power_of_two_reciprocal"