diff options
author | Michael Collison <michael.collison@linaro.org> | 2015-10-18 23:22:28 -0700 |
---|---|---|
committer | Michael Collison <michael.collison@linaro.org> | 2015-10-21 17:20:30 -0700 |
commit | b46ae998110cf577bc589be2470e48ab454cf42e (patch) | |
tree | c5da07c4e7be2175b7d74c78e80e69990c94b21d | |
parent | c4a359c9a56ca1009289245af29bb00f5d1d73cb (diff) |
TCWG 833 changeslinaro-local/tcwg_317
part two of big endian for vaddw
part three of big endian for vaddw
-rw-r--r-- | gcc/config/arm/neon.md | 171 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddws16.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddws32.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddwu16.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddwu32.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddwu8.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 1 |
7 files changed, 259 insertions, 1 deletions
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e5a2b0f1c9a..d8a3f205efd 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1174,6 +1174,91 @@ ;; Widening operations +(define_expand "widen_ssum<mode>3" + [(set (match_operand:<V_double_width> 0 "s_register_operand" "") + (plus:<V_double_width> (sign_extend:<V_double_width> (match_operand:VQI 1 "s_register_operand" "")) + (match_operand:<V_double_width> 2 "s_register_operand" "")))] + "TARGET_NEON" + { + int i; + int temp1, temp2; + rtx temp3; + int half_elem = <V_mode_nunits>/2; + rtvec v1 = rtvec_alloc (half_elem); + rtvec v2 = rtvec_alloc (half_elem); + machine_mode mode = GET_MODE (operands[1]); + rtx p1, p2; + + if (BYTES_BIG_ENDIAN) + { + for (i = 0; i < half_elem; i++) + { + temp1 = half_elem - 1 - i; + temp2 = GET_MODE_NUNITS (mode) - 1 - temp1; + temp3 = GEN_INT (temp2); + RTVEC_ELT(v1, i) = temp3; + } + } + else + { + + for (i = 0; i < half_elem; i++) + RTVEC_ELT (v1, i) = GEN_INT (i); + } + + p1 = gen_rtx_PARALLEL (mode, v1); + + if (BYTES_BIG_ENDIAN) + { + for (i = 0; i < half_elem; i++) + { + temp1 = <V_mode_nunits> - 1 - i; + temp2 = GET_MODE_NUNITS (mode) - 1 - temp1; + temp3 = GEN_INT (temp2); + RTVEC_ELT(v2, i) = temp3; + } + } + else + { + for (i = half_elem; i < <V_mode_nunits>; i++) + RTVEC_ELT (v2, i - half_elem) = GEN_INT (i); + } + + p2 = gen_rtx_PARALLEL (mode, v2); + + if (operands[0] != operands[2]) + emit_move_insn (operands[0], operands[2]); + + emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0], operands[1], p1, operands[0])); + emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0], operands[1], p2, operands[0])); + DONE; + } +) + +(define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3" + [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") + (plus:<VW:V_widen> (sign_extend:<VW:V_widen> (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w") + (match_operand:VQI 2 "vect_par_constant_low" ""))) + (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] + "TARGET_NEON" +{ + return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" : + "vaddw.<V_s_elem>\t%q0, %q3, %e1"; +} + [(set_attr "type" "neon_add_widen")]) + +(define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3" + [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") + (plus:<VW:V_widen> (sign_extend:<VW:V_widen> (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w") + (match_operand:VQI 2 "vect_par_constant_high" ""))) + (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] + "TARGET_NEON" +{ + return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" : + "vaddw.<V_s_elem>\t%q0, %q3, %f1"; +} + [(set_attr "type" "neon_add_widen")]) + (define_insn "widen_ssum<mode>3" [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") (plus:<V_widen> (sign_extend:<V_widen> @@ -1184,6 +1269,90 @@ [(set_attr "type" "neon_add_widen")] ) +(define_expand "widen_usum<mode>3" + [(set (match_operand:<V_double_width> 0 "s_register_operand" "") + (plus:<V_double_width> (zero_extend:<V_double_width> (match_operand:VQI 1 "s_register_operand" "")) + (match_operand:<V_double_width> 2 "s_register_operand" "")))] + "TARGET_NEON" + { + int i; + int temp1, temp2; + rtx temp3; + int half_elem = <V_mode_nunits>/2; + rtvec v1 = rtvec_alloc (half_elem); + rtvec v2 = rtvec_alloc (half_elem); + machine_mode mode = GET_MODE (operands[1]); + rtx p1, p2; + + if (BYTES_BIG_ENDIAN) + { + for (i = 0; i < half_elem; i++) + { + temp1 = half_elem - 1 - i; + temp2 = GET_MODE_NUNITS (mode) - 1 - temp1; + temp3 = GEN_INT (temp2); + RTVEC_ELT(v1, i) = temp3; + } + } + else + { + for (i = 0; i < half_elem; i++) + RTVEC_ELT (v1, i) = GEN_INT (i); + } + + p1 = gen_rtx_PARALLEL (mode, v1); + + if (BYTES_BIG_ENDIAN) + { + for (i = 0; i < half_elem; i++) + { + temp1 = <V_mode_nunits> - 1 - i; + temp2 = GET_MODE_NUNITS (mode) - 1 - temp1; + temp3 = GEN_INT (temp2); + RTVEC_ELT(v2, i) = temp3; + } + } + else + { + for (i = half_elem; i < <V_mode_nunits>; i++) + RTVEC_ELT (v2, i - half_elem) = GEN_INT (i); + } + + p2 = gen_rtx_PARALLEL (mode, v2); + + if (operands[0] != operands[2]) + emit_move_insn (operands[0], operands[2]); + + emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0], operands[1], p1, operands[0])); + emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0], operands[1], p2, operands[0])); + DONE; + } +) + +(define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3" + [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") + (plus:<VW:V_widen> (zero_extend:<VW:V_widen> (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w") + (match_operand:VQI 2 "vect_par_constant_low" ""))) + (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] + "TARGET_NEON" +{ + return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" : + "vaddw.<V_u_elem>\t%q0, %q3, %e1"; +} + [(set_attr "type" "neon_add_widen")]) + +(define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3" + [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") + (plus:<VW:V_widen> (zero_extend:<VW:V_widen> (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w") + (match_operand:VQI 2 "vect_par_constant_high" ""))) + (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] + "TARGET_NEON" +{ + return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" : + "vaddw.<V_u_elem>\t%q0, %q3, %f1"; +} + [(set_attr "type" "neon_add_widen")]) + (define_insn "widen_usum<mode>3" [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") (plus:<V_widen> (zero_extend:<V_widen> @@ -5331,7 +5500,7 @@ if (BYTES_BIG_ENDIAN) [(set (match_operand:<V_unpack> 0 "register_operand" "=w") (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> (match_operand:VU 1 "register_operand" "w") - (match_operand:VU 2 "vect_par_constant_low" ""))) + (match_operand:VU 2 "vect_par_constant_low" ""))) (SE:<V_unpack> (vec_select:<V_HALF> (match_operand:VU 3 "register_operand" "w") (match_dup 2)))))] diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddws16.c b/gcc/testsuite/gcc.target/arm/neon-vaddws16.c new file mode 100644 index 00000000000..96c657e10f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddws16.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-add-options arm_neon_ok } */ +/* { dg-options "-O3" } */ + + +int +t6(int len, void * dummy, short * __restrict x) +{ + len = len & ~31; + int result = 0; + __asm volatile (""); + for (int i = 0; i < len; i++) + result += x[i]; + return result; +} + +/* { dg-final { scan-assembler "vaddw\.s16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddws32.c b/gcc/testsuite/gcc.target/arm/neon-vaddws32.c new file mode 100644 index 00000000000..1bfdc138fa5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddws32.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-add-options arm_neon_ok } */ +/* { dg-options "-O3" } */ + +int +t6(int len, void * dummy, int * __restrict x) +{ + len = len & ~31; + long long result = 0; + __asm volatile (""); + for (int i = 0; i < len; i++) + result += x[i]; + return result; +} + +/* { dg-final { scan-assembler "vaddw\.s32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c new file mode 100644 index 00000000000..98f87685097 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-add-options arm_neon_ok } */ +/* { dg-options "-O3" } */ + + +int +t6(int len, void * dummy, unsigned short * __restrict x) +{ + len = len & ~31; + unsigned int result = 0; + __asm volatile (""); + for (int i = 0; i < len; i++) + result += x[i]; + return result; +} + +/* { dg-final { scan-assembler "vaddw.u16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c new file mode 100644 index 00000000000..4a72a398264 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-add-options arm_neon_ok } */ +/* { dg-options "-O3" } */ + +int +t6(int len, void * dummy, unsigned int * __restrict x) +{ + len = len & ~31; + unsigned long long result = 0; + __asm volatile (""); + for (int i = 0; i < len; i++) + result += x[i]; + return result; +} + +/* { dg-final { scan-assembler "vaddw\.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c new file mode 100644 index 00000000000..9c9c68ab604 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-add-options arm_neon_ok } */ +/* { dg-options "-O3" } */ + + +int +t6(int len, void * dummy, char * __restrict x) +{ + len = len & ~31; + unsigned short result = 0; + __asm volatile (""); + for (int i = 0; i < len; i++) + result += x[i]; + return result; +} + +/* { dg-final { scan-assembler "vaddw\.u8" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 4d5b0a3df61..3fe7f4c7d31 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3943,6 +3943,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } { } else { set et_vect_widen_sum_hi_to_si_pattern_saved 0 if { [istarget powerpc*-*-*] + || [check_effective_target_arm_neon_ok] || [istarget ia64-*-*] } { set et_vect_widen_sum_hi_to_si_pattern_saved 1 } |