aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Collison <michael.collison@linaro.org>2015-10-18 23:22:28 -0700
committerMichael Collison <michael.collison@linaro.org>2015-10-21 17:20:30 -0700
commitb46ae998110cf577bc589be2470e48ab454cf42e (patch)
treec5da07c4e7be2175b7d74c78e80e69990c94b21d
parentc4a359c9a56ca1009289245af29bb00f5d1d73cb (diff)
TCWG 833 changeslinaro-local/tcwg_317
part two of big endian for vaddw part three of big endian for vaddw
-rw-r--r--gcc/config/arm/neon.md171
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vaddws16.c18
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vaddws32.c17
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vaddwu16.c18
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vaddwu32.c17
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vaddwu8.c18
-rw-r--r--gcc/testsuite/lib/target-supports.exp1
7 files changed, 259 insertions, 1 deletions
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index e5a2b0f1c9a..d8a3f205efd 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1174,6 +1174,91 @@
;; Widening operations
+(define_expand "widen_ssum<mode>3"
+ [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
+ (plus:<V_double_width> (sign_extend:<V_double_width> (match_operand:VQI 1 "s_register_operand" ""))
+ (match_operand:<V_double_width> 2 "s_register_operand" "")))]
+ "TARGET_NEON"
+ {
+ int i;
+ int temp1, temp2;
+ rtx temp3;
+ int half_elem = <V_mode_nunits>/2;
+ rtvec v1 = rtvec_alloc (half_elem);
+ rtvec v2 = rtvec_alloc (half_elem);
+ machine_mode mode = GET_MODE (operands[1]);
+ rtx p1, p2;
+
+ if (BYTES_BIG_ENDIAN)
+ {
+ for (i = 0; i < half_elem; i++)
+ {
+ temp1 = half_elem - 1 - i;
+ temp2 = GET_MODE_NUNITS (mode) - 1 - temp1;
+ temp3 = GEN_INT (temp2);
+ RTVEC_ELT(v1, i) = temp3;
+ }
+ }
+ else
+ {
+
+ for (i = 0; i < half_elem; i++)
+ RTVEC_ELT (v1, i) = GEN_INT (i);
+ }
+
+ p1 = gen_rtx_PARALLEL (mode, v1);
+
+ if (BYTES_BIG_ENDIAN)
+ {
+ for (i = 0; i < half_elem; i++)
+ {
+ temp1 = <V_mode_nunits> - 1 - i;
+ temp2 = GET_MODE_NUNITS (mode) - 1 - temp1;
+ temp3 = GEN_INT (temp2);
+ RTVEC_ELT(v2, i) = temp3;
+ }
+ }
+ else
+ {
+ for (i = half_elem; i < <V_mode_nunits>; i++)
+ RTVEC_ELT (v2, i - half_elem) = GEN_INT (i);
+ }
+
+ p2 = gen_rtx_PARALLEL (mode, v2);
+
+ if (operands[0] != operands[2])
+ emit_move_insn (operands[0], operands[2]);
+
+ emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0], operands[1], p1, operands[0]));
+ emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0], operands[1], p2, operands[0]));
+ DONE;
+ }
+)
+
+(define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen> (sign_extend:<VW:V_widen> (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2 "vect_par_constant_low" "")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+{
+ return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
+ "vaddw.<V_s_elem>\t%q0, %q3, %e1";
+}
+ [(set_attr "type" "neon_add_widen")])
+
+(define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen> (sign_extend:<VW:V_widen> (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2 "vect_par_constant_high" "")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+{
+ return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
+ "vaddw.<V_s_elem>\t%q0, %q3, %f1";
+}
+ [(set_attr "type" "neon_add_widen")])
+
(define_insn "widen_ssum<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(plus:<V_widen> (sign_extend:<V_widen>
@@ -1184,6 +1269,90 @@
[(set_attr "type" "neon_add_widen")]
)
+(define_expand "widen_usum<mode>3"
+ [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
+ (plus:<V_double_width> (zero_extend:<V_double_width> (match_operand:VQI 1 "s_register_operand" ""))
+ (match_operand:<V_double_width> 2 "s_register_operand" "")))]
+ "TARGET_NEON"
+ {
+ int i;
+ int temp1, temp2;
+ rtx temp3;
+ int half_elem = <V_mode_nunits>/2;
+ rtvec v1 = rtvec_alloc (half_elem);
+ rtvec v2 = rtvec_alloc (half_elem);
+ machine_mode mode = GET_MODE (operands[1]);
+ rtx p1, p2;
+
+ if (BYTES_BIG_ENDIAN)
+ {
+ for (i = 0; i < half_elem; i++)
+ {
+ temp1 = half_elem - 1 - i;
+ temp2 = GET_MODE_NUNITS (mode) - 1 - temp1;
+ temp3 = GEN_INT (temp2);
+ RTVEC_ELT(v1, i) = temp3;
+ }
+ }
+ else
+ {
+ for (i = 0; i < half_elem; i++)
+ RTVEC_ELT (v1, i) = GEN_INT (i);
+ }
+
+ p1 = gen_rtx_PARALLEL (mode, v1);
+
+ if (BYTES_BIG_ENDIAN)
+ {
+ for (i = 0; i < half_elem; i++)
+ {
+ temp1 = <V_mode_nunits> - 1 - i;
+ temp2 = GET_MODE_NUNITS (mode) - 1 - temp1;
+ temp3 = GEN_INT (temp2);
+ RTVEC_ELT(v2, i) = temp3;
+ }
+ }
+ else
+ {
+ for (i = half_elem; i < <V_mode_nunits>; i++)
+ RTVEC_ELT (v2, i - half_elem) = GEN_INT (i);
+ }
+
+ p2 = gen_rtx_PARALLEL (mode, v2);
+
+ if (operands[0] != operands[2])
+ emit_move_insn (operands[0], operands[2]);
+
+ emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0], operands[1], p1, operands[0]));
+ emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0], operands[1], p2, operands[0]));
+ DONE;
+ }
+)
+
+(define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen> (zero_extend:<VW:V_widen> (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2 "vect_par_constant_low" "")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+{
+ return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
+ "vaddw.<V_u_elem>\t%q0, %q3, %e1";
+}
+ [(set_attr "type" "neon_add_widen")])
+
+(define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen> (zero_extend:<VW:V_widen> (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2 "vect_par_constant_high" "")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+{
+ return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
+ "vaddw.<V_u_elem>\t%q0, %q3, %f1";
+}
+ [(set_attr "type" "neon_add_widen")])
+
(define_insn "widen_usum<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(plus:<V_widen> (zero_extend:<V_widen>
@@ -5331,7 +5500,7 @@ if (BYTES_BIG_ENDIAN)
[(set (match_operand:<V_unpack> 0 "register_operand" "=w")
(mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 1 "register_operand" "w")
- (match_operand:VU 2 "vect_par_constant_low" "")))
+ (match_operand:VU 2 "vect_par_constant_low" "")))
(SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 3 "register_operand" "w")
(match_dup 2)))))]
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddws16.c b/gcc/testsuite/gcc.target/arm/neon-vaddws16.c
new file mode 100644
index 00000000000..96c657e10f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddws16.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, short * __restrict x)
+{
+ len = len & ~31;
+ int result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.s16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddws32.c b/gcc/testsuite/gcc.target/arm/neon-vaddws32.c
new file mode 100644
index 00000000000..1bfdc138fa5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddws32.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+int
+t6(int len, void * dummy, int * __restrict x)
+{
+ len = len & ~31;
+ long long result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.s32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c
new file mode 100644
index 00000000000..98f87685097
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, unsigned short * __restrict x)
+{
+ len = len & ~31;
+ unsigned int result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw.u16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c
new file mode 100644
index 00000000000..4a72a398264
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+int
+t6(int len, void * dummy, unsigned int * __restrict x)
+{
+ len = len & ~31;
+ unsigned long long result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.u32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c
new file mode 100644
index 00000000000..9c9c68ab604
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, char * __restrict x)
+{
+ len = len & ~31;
+ unsigned short result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.u8" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 4d5b0a3df61..3fe7f4c7d31 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3943,6 +3943,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
} else {
set et_vect_widen_sum_hi_to_si_pattern_saved 0
if { [istarget powerpc*-*-*]
+ || [check_effective_target_arm_neon_ok]
|| [istarget ia64-*-*] } {
set et_vect_widen_sum_hi_to_si_pattern_saved 1
}