diff options
-rw-r--r-- | gcc/ChangeLog | 33 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 35 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 48 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 22 | ||||
-rw-r--r-- | gcc/doc/sourcebuild.texi | 4 | ||||
-rw-r--r-- | gcc/genmatch.c | 17 | ||||
-rw-r--r-- | gcc/gimple-match-head.c | 37 | ||||
-rw-r--r-- | gcc/gimple-match.h | 29 | ||||
-rw-r--r-- | gcc/match.pd | 35 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c | 58 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c | 125 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c | 30 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c | 119 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c | 32 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c | 119 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c | 34 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 7 |
19 files changed, 786 insertions, 11 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 840bdf7c89e..fd187b92d39 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,36 @@ +2018-05-25 Richard Sandiford <richard.sandiford@linaro.org> + + * doc/sourcebuild.texi (vect_double_cond_arith: Document. + * gimple-match.h (gimple_match_op::MAX_NUM_OPS): Bump to 4. + (gimple_match_op::gimple_match_op): Add an overload for 4 operands. + (gimple_match_op::set_op): Likewise. + (gimple_resimplify4): Declare. + * genmatch.c (get_operand_type): Handle CFN_COND_* functions. + (expr::gen_transform): Likewise. + (decision_tree::gen): Generate a simplification routine for 4 operands. + * gimple-match-head.c (gimple_simplify): Add an overload for + 4 operands. In the top-level function, handle up to 4 call + arguments and call gimple_resimplify4. + (gimple_resimplify4): New function. + (build_call_internal): Pass a fourth operand. + (maybe_push_to_seq): Likewise. + * match.pd (UNCOND_BINARY, COND_BINARY): New operator lists. + Fold VEC_COND_EXPRs of an operation and a default value into + an IFN_COND_* function if possible. + * config/aarch64/iterators.md (UNSPEC_COND_MAX, UNSPEC_COND_MIN): + New unspecs. + (SVE_COND_FP_BINARY): Include them. + (optab, sve_fp_op): Handle them. + (SVE_INT_BINARY_REV): New code iterator. + (SVE_COND_FP_BINARY_REV): New int iterator. + (commutative): New int attribute. + * config/aarch64/aarch64-protos.h (aarch64_sve_prepare_conditional_op): + Declare. + * config/aarch64/aarch64.c (aarch64_sve_prepare_conditional_op): New + function. + * config/aarch64/aarch64-sve.md (cond_<optab><mode>): Use it. + (*cond_<optab><mode>): New patterns for reversed operands. + 2018-05-25 Richard Biener <rguenther@suse.de> * tree-vectorizer.h (STMT_VINFO_GROUP_*, GROUP_*): Remove. diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index eec86428741..4ea50acaa59 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -513,6 +513,7 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE); void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx); bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool); void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *); +void aarch64_sve_prepare_conditional_op (rtx *, unsigned int, bool); #endif /* RTX_CODE */ void aarch64_init_builtins (void); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 4f918492ae4..0bb37e72bd4 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1769,7 +1769,8 @@ UNSPEC_SEL))] "TARGET_SVE" { - gcc_assert (rtx_equal_p (operands[2], operands[4])); + bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH); + aarch64_sve_prepare_conditional_op (operands, 5, commutative_p); }) ;; Predicated integer operations. @@ -1786,6 +1787,20 @@ "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" ) +;; Predicated integer operations with the operands reversed. +(define_insn "*cond_<optab><mode>" + [(set (match_operand:SVE_I 0 "register_operand" "=w") + (unspec:SVE_I + [(match_operand:<VPRED> 1 "register_operand" "Upl") + (SVE_INT_BINARY_REV:SVE_I + (match_operand:SVE_I 2 "register_operand" "w") + (match_operand:SVE_I 3 "register_operand" "0")) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + "<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" +) + ;; Set operand 0 to the last active element in operand 3, or to tied ;; operand 1 if no elements are active. (define_insn "fold_extract_last_<mode>" @@ -2567,7 +2582,7 @@ UNSPEC_SEL))] "TARGET_SVE" { - gcc_assert (rtx_equal_p (operands[2], operands[4])); + aarch64_sve_prepare_conditional_op (operands, 5, <commutative>); }) ;; Predicated floating-point operations. @@ -2586,6 +2601,22 @@ "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" ) +;; Predicated floating-point operations with the operands reversed. +(define_insn "*cond_<optab><mode>" + [(set (match_operand:SVE_F 0 "register_operand" "=w") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "register_operand" "Upl") + (unspec:SVE_F + [(match_dup 1) + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "0")] + SVE_COND_FP_BINARY) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + "<sve_fp_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" +) + ;; Shift an SVE vector left and insert a scalar into element 0. (define_insn "vec_shl_insert_<mode>" [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 04dedd2f7e0..afc91850d6f 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -16041,6 +16041,54 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode, emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL)); } +/* Prepare a cond_<optab><mode> operation that has the operands + given by OPERANDS, where: + + - operand 0 is the destination + - operand 1 is a predicate + - operands 2 to NOPS - 2 are the operands to an operation that is + performed for active lanes + - operand NOPS - 1 specifies the values to use for inactive lanes. + + COMMUTATIVE_P is true if operands 2 and 3 are commutative. In that case, + no pattern is provided for a tie between operands 3 and NOPS - 1. */ + +void +aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops, + bool commutative_p) +{ + /* We can do the operation directly if the "else" value matches one + of the other inputs. */ + for (unsigned int i = 2; i < nops - 1; ++i) + if (rtx_equal_p (operands[i], operands[nops - 1])) + { + if (i == 3 && commutative_p) + std::swap (operands[2], operands[3]); + return; + } + + /* If the "else" value is different from the other operands, we have + the choice of doing a SEL on the output or a SEL on an input. + Neither choice is better in all cases, but one advantage of + selecting the input is that it can avoid a move when the output + needs to be distinct from the inputs. E.g. if operand N maps to + register N, selecting the output would give: + + MOVPRFX Z0.S, Z2.S + ADD Z0.S, P1/M, Z0.S, Z3.S + SEL Z0.S, P1, Z0.S, Z4.S + + whereas selecting the input avoids the MOVPRFX: + + SEL Z0.S, P1, Z2.S, Z4.S + ADD Z0.S, P1/M, Z0.S, Z3.S. */ + machine_mode mode = GET_MODE (operands[0]); + rtx temp = gen_reg_rtx (mode); + rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]); + emit_set_insn (temp, gen_rtx_UNSPEC (mode, vec, UNSPEC_SEL)); + operands[2] = operands[nops - 1] = temp; +} + /* Implement TARGET_MODES_TIEABLE_P. In principle we should always return true. However due to issues with register allocation it is preferable to avoid tieing integer scalar and FP scalar modes. Executing integer diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index b57c7e221f8..4db3a4c368f 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -464,6 +464,8 @@ UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md. UNSPEC_COND_ADD ; Used in aarch64-sve.md. UNSPEC_COND_SUB ; Used in aarch64-sve.md. + UNSPEC_COND_MAX ; Used in aarch64-sve.md. + UNSPEC_COND_MIN ; Used in aarch64-sve.md. UNSPEC_COND_LT ; Used in aarch64-sve.md. UNSPEC_COND_LE ; Used in aarch64-sve.md. UNSPEC_COND_EQ ; Used in aarch64-sve.md. @@ -1203,6 +1205,8 @@ (define_code_iterator SVE_INT_BINARY [plus minus smax umax smin umin and ior xor]) +(define_code_iterator SVE_INT_BINARY_REV [minus]) + ;; SVE integer comparisons. (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu]) @@ -1529,7 +1533,10 @@ (define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART]) -(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB]) +(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB + UNSPEC_COND_MAX UNSPEC_COND_MIN]) + +(define_int_iterator SVE_COND_FP_BINARY_REV [UNSPEC_COND_SUB]) (define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_LT UNSPEC_COND_LE UNSPEC_COND_EQ UNSPEC_COND_NE @@ -1559,7 +1566,9 @@ (UNSPEC_IORV "ior") (UNSPEC_XORV "xor") (UNSPEC_COND_ADD "add") - (UNSPEC_COND_SUB "sub")]) + (UNSPEC_COND_SUB "sub") + (UNSPEC_COND_MAX "smax") + (UNSPEC_COND_MIN "smin")]) (define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax") (UNSPEC_UMINV "umin") @@ -1771,4 +1780,11 @@ (UNSPEC_COND_GT "gt")]) (define_int_attr sve_fp_op [(UNSPEC_COND_ADD "fadd") - (UNSPEC_COND_SUB "fsub")]) + (UNSPEC_COND_SUB "fsub") + (UNSPEC_COND_MAX "fmaxnm") + (UNSPEC_COND_MIN "fminnm")]) + +(define_int_attr commutative [(UNSPEC_COND_ADD "true") + (UNSPEC_COND_SUB "false") + (UNSPEC_COND_MIN "true") + (UNSPEC_COND_MAX "true")]) diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 596007d630d..00e53a657c8 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1425,6 +1425,10 @@ have different type from the value operands. @item vect_double Target supports hardware vectors of @code{double}. +@item vect_double_cond_arith +Target supports conditional addition, subtraction, minimum and maximum +on vectors of @code{double}, via the @code{cond_} optabs. + @item vect_element_align_preferred The target's preferred vector alignment is the same as the element alignment. diff --git a/gcc/genmatch.c b/gcc/genmatch.c index 5715dd16f55..d6bd90d1533 100644 --- a/gcc/genmatch.c +++ b/gcc/genmatch.c @@ -2370,6 +2370,18 @@ get_operand_type (id_base *op, unsigned pos, else if (*op == COND_EXPR && pos == 0) return "boolean_type_node"; + else if (strncmp (op->id, "CFN_COND_", 9) == 0) + { + /* IFN_COND_* operands 1 and later by default have the same type + as the result. The type of operand 0 needs to be specified + explicitly. */ + if (pos > 0 && expr_type) + return expr_type; + else if (pos > 0 && in_type) + return in_type; + else + return NULL; + } else { /* Otherwise all types should match - choose one in order of @@ -2429,7 +2441,8 @@ expr::gen_transform (FILE *f, int indent, const char *dest, bool gimple, in_type = NULL; } else if (*opr == COND_EXPR - || *opr == VEC_COND_EXPR) + || *opr == VEC_COND_EXPR + || strncmp (opr->id, "CFN_COND_", 9) == 0) { /* Conditions are of the same type as their first alternative. */ sprintf (optype, "TREE_TYPE (ops%d[1])", depth); @@ -3737,7 +3750,7 @@ decision_tree::gen (FILE *f, bool gimple) } fprintf (stderr, "removed %u duplicate tails\n", rcnt); - for (unsigned n = 1; n <= 3; ++n) + for (unsigned n = 1; n <= 4; ++n) { /* First generate split-out functions. */ for (unsigned i = 0; i < root->kids.length (); i++) diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c index 4598781e61c..1a12bb35e01 100644 --- a/gcc/gimple-match-head.c +++ b/gcc/gimple-match-head.c @@ -51,6 +51,8 @@ static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree), code_helper, tree, tree, tree); static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree), code_helper, tree, tree, tree, tree); +static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree), + code_helper, tree, tree, tree, tree, tree); const unsigned int gimple_match_op::MAX_NUM_OPS; @@ -215,6 +217,30 @@ gimple_resimplify3 (gimple_seq *seq, gimple_match_op *res_op, return canonicalized; } +/* Helper that matches and simplifies the toplevel result from + a gimple_simplify run (where we don't want to build + a stmt in case it's used in in-place folding). Replaces + RES_OP with a simplified and/or canonicalized result and + returns whether any change was made. */ + +bool +gimple_resimplify4 (gimple_seq *seq, gimple_match_op *res_op, + tree (*valueize)(tree)) +{ + /* No constant folding is defined for four-operand functions. */ + + gimple_match_op res_op2 (*res_op); + if (gimple_simplify (&res_op2, seq, valueize, + res_op->code, res_op->type, + res_op->ops[0], res_op->ops[1], res_op->ops[2], + res_op->ops[3])) + { + *res_op = res_op2; + return true; + } + + return false; +} /* If in GIMPLE the operation described by RES_OP should be single-rhs, build a GENERIC tree for that expression and update RES_OP accordingly. */ @@ -256,7 +282,8 @@ build_call_internal (internal_fn fn, gimple_match_op *res_op) return gimple_build_call_internal (fn, res_op->num_ops, res_op->op_or_null (0), res_op->op_or_null (1), - res_op->op_or_null (2)); + res_op->op_or_null (2), + res_op->op_or_null (3)); } /* Push the exploded expression described by RES_OP as a statement to @@ -343,7 +370,8 @@ maybe_push_res_to_seq (gimple_match_op *res_op, gimple_seq *seq, tree res) new_stmt = gimple_build_call (decl, num_ops, res_op->op_or_null (0), res_op->op_or_null (1), - res_op->op_or_null (2)); + res_op->op_or_null (2), + res_op->op_or_null (3)); } if (!res) { @@ -654,7 +682,7 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, /* ??? This way we can't simplify calls with side-effects. */ if (gimple_call_lhs (stmt) != NULL_TREE && gimple_call_num_args (stmt) >= 1 - && gimple_call_num_args (stmt) <= 3) + && gimple_call_num_args (stmt) <= 4) { bool valueized = false; combined_fn cfn; @@ -697,6 +725,9 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, case 3: return (gimple_resimplify3 (seq, res_op, valueize) || valueized); + case 4: + return (gimple_resimplify4 (seq, res_op, valueize) + || valueized); default: gcc_unreachable (); } diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h index 9a4d3bb630f..69b53f21157 100644 --- a/gcc/gimple-match.h +++ b/gcc/gimple-match.h @@ -49,17 +49,19 @@ struct gimple_match_op gimple_match_op (code_helper, tree, tree); gimple_match_op (code_helper, tree, tree, tree); gimple_match_op (code_helper, tree, tree, tree, tree); + gimple_match_op (code_helper, tree, tree, tree, tree, tree); void set_op (code_helper, tree, unsigned int); void set_op (code_helper, tree, tree); void set_op (code_helper, tree, tree, tree); void set_op (code_helper, tree, tree, tree, tree); + void set_op (code_helper, tree, tree, tree, tree, tree); void set_value (tree); tree op_or_null (unsigned int) const; /* The maximum value of NUM_OPS. */ - static const unsigned int MAX_NUM_OPS = 3; + static const unsigned int MAX_NUM_OPS = 4; /* The operation being performed. */ code_helper code; @@ -113,6 +115,17 @@ gimple_match_op::gimple_match_op (code_helper code_in, tree type_in, ops[2] = op2; } +inline +gimple_match_op::gimple_match_op (code_helper code_in, tree type_in, + tree op0, tree op1, tree op2, tree op3) + : code (code_in), type (type_in), num_ops (4) +{ + ops[0] = op0; + ops[1] = op1; + ops[2] = op2; + ops[3] = op3; +} + /* Change the operation performed to CODE_IN, the type of the result to TYPE_IN, and the number of operands to NUM_OPS_IN. The caller needs to set the operands itself. */ @@ -160,6 +173,19 @@ gimple_match_op::set_op (code_helper code_in, tree type_in, ops[2] = op2; } +inline void +gimple_match_op::set_op (code_helper code_in, tree type_in, + tree op0, tree op1, tree op2, tree op3) +{ + code = code_in; + type = type_in; + num_ops = 4; + ops[0] = op0; + ops[1] = op1; + ops[2] = op2; + ops[3] = op3; +} + /* Set the "operation" to be the single value VALUE, such as a constant or SSA_NAME. */ @@ -196,6 +222,7 @@ bool gimple_simplify (gimple *, gimple_match_op *, gimple_seq *, bool gimple_resimplify1 (gimple_seq *, gimple_match_op *, tree (*)(tree)); bool gimple_resimplify2 (gimple_seq *, gimple_match_op *, tree (*)(tree)); bool gimple_resimplify3 (gimple_seq *, gimple_match_op *, tree (*)(tree)); +bool gimple_resimplify4 (gimple_seq *, gimple_match_op *, tree (*)(tree)); tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *, tree res = NULL_TREE); void maybe_build_generic_op (gimple_match_op *); diff --git a/gcc/match.pd b/gcc/match.pd index 8a71141eac9..f08571ef28c 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -74,6 +74,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (FLOOR) DEFINE_INT_AND_FLOAT_ROUND_FN (CEIL) DEFINE_INT_AND_FLOAT_ROUND_FN (ROUND) DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + +/* Binary operations and their associated IFN_COND_* function. */ +(define_operator_list UNCOND_BINARY + plus minus + min max + bit_and bit_ior bit_xor) +(define_operator_list COND_BINARY + IFN_COND_ADD IFN_COND_SUB + IFN_COND_MIN IFN_COND_MAX + IFN_COND_AND IFN_COND_IOR IFN_COND_XOR) /* As opposed to convert?, this still creates a single pattern, so it is not a suitable replacement for convert? in all cases. */ @@ -4780,3 +4790,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (cmp (popcount @0) integer_zerop) (rep @0 { build_zero_cst (TREE_TYPE (@0)); })))) + +/* Simplify: + + a = a1 op a2 + r = c ? a : b; + + to: + + r = c ? a1 op a2 : b; + + if the target can do it in one go. This makes the operation conditional + on c, so could drop potentially-trapping arithmetic, but that's a valid + simplification if the result of the operation isn't needed. */ +(for uncond_op (UNCOND_BINARY) + cond_op (COND_BINARY) + (simplify + (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3) + (with { tree op_type = TREE_TYPE (@4); } + (if (element_precision (type) == element_precision (op_type)) + (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3)))))) + (simplify + (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3))) + (with { tree op_type = TREE_TYPE (@4); } + (if (element_precision (type) == element_precision (op_type)) + (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1))))))) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 59b230956e8..c5b2c631b5d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,15 @@ +2018-05-25 Richard Sandiford <richard.sandiford@linaro.org> + + * lib/target-supports.exp + (check_effective_target_vect_double_cond_arith): New proc. + * gcc.dg/vect/vect-cond-arith-1.c: New test. + * gcc.target/aarch64/sve/vcond_8.c: Likewise. + * gcc.target/aarch64/sve/vcond_8_run.c: Likewise. + * gcc.target/aarch64/sve/vcond_9.c: Likewise. + * gcc.target/aarch64/sve/vcond_9_run.c: Likewise. + * gcc.target/aarch64/sve/vcond_12.c: Likewise. + * gcc.target/aarch64/sve/vcond_12_run.c: Likewise. + 2018-05-25 Janus Weil <janus@gcc.gnu.org> PR fortran/85839 diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c new file mode 100644 index 00000000000..9f2fccd7187 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c @@ -0,0 +1,58 @@ +/* { dg-additional-options "-fdump-tree-optimized -fno-trapping-math -ffinite-math-only" } */ + +#include "tree-vect.h" + +#define N (VECTOR_BITS * 11 / 64 + 3) + +#define add(A, B) ((A) + (B)) +#define sub(A, B) ((A) - (B)) + +#define DEF(OP) \ + void __attribute__ ((noipa)) \ + f_##OP (double *restrict a, double *restrict b, double x) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + double truev = OP (b[i], x); \ + a[i] = b[i] < 100 ? truev : b[i]; \ + } \ + } + +#define TEST(OP) \ + { \ + f_##OP (a, b, 10); \ + for (int i = 0; i < N; ++i) \ + { \ + int bval = (i % 17) * 10; \ + int truev = OP (bval, 10); \ + if (a[i] != (bval < 100 ? truev : bval)) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +#define FOR_EACH_OP(T) \ + T (add) \ + T (sub) \ + T (__builtin_fmax) \ + T (__builtin_fmin) + +FOR_EACH_OP (DEF) + +int +main (void) +{ + double a[N], b[N]; + for (int i = 0; i < N; ++i) + { + b[i] = (i % 17) * 10; + asm volatile ("" ::: "memory"); + } + FOR_EACH_OP (TEST) + return 0; +} + +/* { dg-final { scan-tree-dump { = \.COND_ADD} "optimized" { target vect_double_cond_arith } } } */ +/* { dg-final { scan-tree-dump { = \.COND_SUB} "optimized" { target vect_double_cond_arith } } } */ +/* { dg-final { scan-tree-dump { = \.COND_MAX} "optimized" { target vect_double_cond_arith } } } */ +/* { dg-final { scan-tree-dump { = \.COND_MIN} "optimized" { target vect_double_cond_arith } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c new file mode 100644 index 00000000000..95b371a1773 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c @@ -0,0 +1,125 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ + +#include <stdint.h> + +#define add(A, B) ((A) + (B)) +#define sub(A, B) ((A) - (B)) +#define max(A, B) ((A) > (B) ? (A) : (B)) +#define min(A, B) ((A) < (B) ? (A) : (B)) +#define and(A, B) ((A) & (B)) +#define ior(A, B) ((A) | (B)) +#define xor(A, B) ((A) ^ (B)) + +#define N 121 + +#define DEF_LOOP(TYPE, CMPTYPE, OP) \ + void __attribute__((noipa)) \ + f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond, \ + CMPTYPE limit, TYPE src2v, TYPE elsev) \ + { \ + TYPE induc = 0; \ + for (unsigned int i = 0; i < N; ++i, induc += 1) \ + { \ + TYPE truev = OP (induc, src2v); \ + dest[i] = cond[i] < limit ? truev : elsev; \ + } \ + } + +#define FOR_EACH_INT_TYPE(T, TYPE) \ + T (TYPE, TYPE, add) \ + T (TYPE, TYPE, sub) \ + T (TYPE, TYPE, max) \ + T (TYPE, TYPE, min) \ + T (TYPE, TYPE, and) \ + T (TYPE, TYPE, ior) \ + T (TYPE, TYPE, xor) + +#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \ + T (TYPE, CMPTYPE, add) \ + T (TYPE, CMPTYPE, sub) \ + T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \ + T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX) + +#define FOR_EACH_LOOP(T) \ + FOR_EACH_INT_TYPE (T, int8_t) \ + FOR_EACH_INT_TYPE (T, int16_t) \ + FOR_EACH_INT_TYPE (T, int32_t) \ + FOR_EACH_INT_TYPE (T, int64_t) \ + FOR_EACH_INT_TYPE (T, uint8_t) \ + FOR_EACH_INT_TYPE (T, uint16_t) \ + FOR_EACH_INT_TYPE (T, uint32_t) \ + FOR_EACH_INT_TYPE (T, uint64_t) \ + FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \ + FOR_EACH_FP_TYPE (T, float, float, f32) \ + FOR_EACH_FP_TYPE (T, double, double, f64) + +FOR_EACH_LOOP (DEF_LOOP) + +/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */ + +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b,} 14 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h,} 18 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s,} 18 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d,} 18 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c new file mode 100644 index 00000000000..50a98c84a59 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c @@ -0,0 +1,30 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ + +#include "vcond_12.c" + +#define TEST_LOOP(TYPE, CMPTYPE, OP) \ + { \ + TYPE dest[N]; \ + CMPTYPE cond[N]; \ + for (unsigned int i = 0; i < N; ++i) \ + cond[i] = i % 5; \ + TYPE src2v = 14; \ + TYPE elsev = 17; \ + f_##OP##_##TYPE (dest, cond, 3, src2v, elsev); \ + TYPE induc = 0; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE if_true = OP (induc, src2v); \ + if (dest[i] != (i % 5 < 3 ? if_true : elsev)) \ + __builtin_abort (); \ + induc += 1; \ + } \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + FOR_EACH_LOOP (TEST_LOOP); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c new file mode 100644 index 00000000000..c32ab596716 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c @@ -0,0 +1,119 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */ + +#include <stdint.h> + +#define add(A, B) ((A) + (B)) +#define sub(A, B) ((A) - (B)) +#define max(A, B) ((A) > (B) ? (A) : (B)) +#define min(A, B) ((A) < (B) ? (A) : (B)) +#define and(A, B) ((A) & (B)) +#define ior(A, B) ((A) | (B)) +#define xor(A, B) ((A) ^ (B)) + +#define DEF_LOOP(TYPE, CMPTYPE, OP) \ + void __attribute__((noipa)) \ + f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond, \ + CMPTYPE limit, TYPE *restrict src, \ + TYPE val, unsigned int n) \ + { \ + for (unsigned int i = 0; i < n; ++i) \ + { \ + TYPE truev = OP (src[i], val); \ + dest[i] = cond[i] < limit ? truev : src[i]; \ + } \ + } + +#define FOR_EACH_INT_TYPE(T, TYPE) \ + T (TYPE, TYPE, add) \ + T (TYPE, TYPE, sub) \ + T (TYPE, TYPE, max) \ + T (TYPE, TYPE, min) \ + T (TYPE, TYPE, and) \ + T (TYPE, TYPE, ior) \ + T (TYPE, TYPE, xor) + +#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \ + T (TYPE, CMPTYPE, add) \ + T (TYPE, CMPTYPE, sub) \ + T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \ + T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX) + +#define FOR_EACH_LOOP(T) \ + FOR_EACH_INT_TYPE (T, int8_t) \ + FOR_EACH_INT_TYPE (T, int16_t) \ + FOR_EACH_INT_TYPE (T, int32_t) \ + FOR_EACH_INT_TYPE (T, int64_t) \ + FOR_EACH_INT_TYPE (T, uint8_t) \ + FOR_EACH_INT_TYPE (T, uint16_t) \ + FOR_EACH_INT_TYPE (T, uint32_t) \ + FOR_EACH_INT_TYPE (T, uint64_t) \ + FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \ + FOR_EACH_FP_TYPE (T, float, float, f32) \ + FOR_EACH_FP_TYPE (T, double, double, f64) + +FOR_EACH_LOOP (DEF_LOOP) + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ +/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c new file mode 100644 index 00000000000..5f45e1667d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c @@ -0,0 +1,32 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */ + +#include "vcond_8.c" + +#define N 187 + +#define TEST_LOOP(TYPE, CMPTYPE, OP) \ + { \ + TYPE dest[N], src[N]; \ + CMPTYPE cond[N]; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + src[i] = i * 3; \ + cond[i] = i % 5; \ + } \ + f_##OP##_##TYPE (dest, cond, 3, src, 77, N); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE if_false = i * 3; \ + TYPE if_true = OP (if_false, (TYPE) 77); \ + if (dest[i] != (i % 5 < 3 ? if_true : if_false)) \ + __builtin_abort (); \ + } \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + FOR_EACH_LOOP (TEST_LOOP); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c new file mode 100644 index 00000000000..618e187d587 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c @@ -0,0 +1,119 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */ + +#include <stdint.h> + +#define add(A, B) ((A) + (B)) +#define sub(A, B) ((A) - (B)) +#define max(A, B) ((A) > (B) ? (A) : (B)) +#define min(A, B) ((A) < (B) ? (A) : (B)) +#define and(A, B) ((A) & (B)) +#define ior(A, B) ((A) | (B)) +#define xor(A, B) ((A) ^ (B)) + +#define DEF_LOOP(TYPE, CMPTYPE, OP) \ + void __attribute__((noipa)) \ + f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond, \ + CMPTYPE limit, TYPE *restrict src1, \ + TYPE *restrict src2, unsigned int n) \ + { \ + for (unsigned int i = 0; i < n; ++i) \ + { \ + TYPE truev = OP (src1[i], src2[i]); \ + dest[i] = cond[i] < limit ? truev : src2[i]; \ + } \ + } + +#define FOR_EACH_INT_TYPE(T, TYPE) \ + T (TYPE, TYPE, add) \ + T (TYPE, TYPE, sub) \ + T (TYPE, TYPE, max) \ + T (TYPE, TYPE, min) \ + T (TYPE, TYPE, and) \ + T (TYPE, TYPE, ior) \ + T (TYPE, TYPE, xor) + +#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \ + T (TYPE, CMPTYPE, add) \ + T (TYPE, CMPTYPE, sub) \ + T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \ + T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX) + +#define FOR_EACH_LOOP(T) \ + FOR_EACH_INT_TYPE (T, int8_t) \ + FOR_EACH_INT_TYPE (T, int16_t) \ + FOR_EACH_INT_TYPE (T, int32_t) \ + FOR_EACH_INT_TYPE (T, int64_t) \ + FOR_EACH_INT_TYPE (T, uint8_t) \ + FOR_EACH_INT_TYPE (T, uint16_t) \ + FOR_EACH_INT_TYPE (T, uint32_t) \ + FOR_EACH_INT_TYPE (T, uint64_t) \ + FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \ + FOR_EACH_FP_TYPE (T, float, float, f32) \ + FOR_EACH_FP_TYPE (T, double, double, f64) + +FOR_EACH_LOOP (DEF_LOOP) + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ +/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c new file mode 100644 index 00000000000..14f32420e1e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c @@ -0,0 +1,34 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */ + +#include "vcond_9.c" + +#define N 187 + +#define TEST_LOOP(TYPE, CMPTYPE, OP) \ + { \ + TYPE dest[N], src1[N], src2[N]; \ + CMPTYPE cond[N]; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + src1[i] = i * 4 - i % 7; \ + src2[i] = i * 3 + 1; \ + cond[i] = i % 5; \ + } \ + f_##OP##_##TYPE (dest, cond, 3, src1, src2, N); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE src1v = i * 4 - i % 7; \ + TYPE src2v = i * 3 + 1; \ + TYPE if_true = OP (src1v, src2v); \ + if (dest[i] != (i % 5 < 3 ? if_true : src2v)) \ + __builtin_abort (); \ + } \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + FOR_EACH_LOOP (TEST_LOOP); + return 0; +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 0a53d7b1aad..0f8edce69bf 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -5590,6 +5590,13 @@ proc check_effective_target_vect_double { } { return $et_vect_double_saved($et_index) } +# Return 1 if the target supports conditional addition, subtraction, minimum +# and maximum on vectors of double, via the cond_ optabs. Return 0 otherwise. + +proc check_effective_target_vect_double_cond_arith { } { + return [check_effective_target_aarch64_sve] +} + # Return 1 if the target supports hardware vectors of long long, 0 otherwise. # # This won't change for different subtargets so cache the result. |