19 files changed, 786 insertions, 11 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 840bdf7c89e..fd187b92d39 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,36 @@
+2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* doc/sourcebuild.texi (vect_double_cond_arith: Document.
+	* gimple-match.h (gimple_match_op::MAX_NUM_OPS): Bump to 4.
+	(gimple_match_op::gimple_match_op): Add an overload for 4 operands.
+	(gimple_match_op::set_op): Likewise.
+	(gimple_resimplify4): Declare.
+	* genmatch.c (get_operand_type): Handle CFN_COND_* functions.
+	(expr::gen_transform): Likewise.
+	(decision_tree::gen): Generate a simplification routine for 4 operands.
+	* gimple-match-head.c (gimple_simplify): Add an overload for
+	4 operands.  In the top-level function, handle up to 4 call
+	arguments and call gimple_resimplify4.
+	(gimple_resimplify4): New function.
+	(build_call_internal): Pass a fourth operand.
+	(maybe_push_to_seq): Likewise.
+	* match.pd (UNCOND_BINARY, COND_BINARY): New operator lists.
+	Fold VEC_COND_EXPRs of an operation and a default value into
+	an IFN_COND_* function if possible.
+	* config/aarch64/iterators.md (UNSPEC_COND_MAX, UNSPEC_COND_MIN):
+	New unspecs.
+	(SVE_COND_FP_BINARY): Include them.
+	(optab, sve_fp_op): Handle them.
+	(SVE_INT_BINARY_REV): New code iterator.
+	(SVE_COND_FP_BINARY_REV): New int iterator.
+	(commutative): New int attribute.
+	* config/aarch64/aarch64-protos.h (aarch64_sve_prepare_conditional_op):
+	Declare.
+	* config/aarch64/aarch64.c (aarch64_sve_prepare_conditional_op): New
+	function.
+	* config/aarch64/aarch64-sve.md (cond_<optab><mode>): Use it.
+	(*cond_<optab><mode>): New patterns for reversed operands.
+
 2018-05-25  Richard Biener  <rguenther@suse.de>
 
 	* tree-vectorizer.h (STMT_VINFO_GROUP_*, GROUP_*): Remove.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index eec86428741..4ea50acaa59 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -513,6 +513,7 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
 void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
 bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
+void aarch64_sve_prepare_conditional_op (rtx *, unsigned int, bool);
 #endif /* RTX_CODE */
 
 void aarch64_init_builtins (void);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 4f918492ae4..0bb37e72bd4 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1769,7 +1769,8 @@
 	  UNSPEC_SEL))]
   "TARGET_SVE"
 {
-  gcc_assert (rtx_equal_p (operands[2], operands[4]));
+  bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
+  aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
 })
 
 ;; Predicated integer operations.
@@ -1786,6 +1787,20 @@
   "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
 )
 
+;; Predicated integer operations with the operands reversed.
+(define_insn "*cond_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (SVE_INT_BINARY_REV:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w")
+	     (match_operand:SVE_I 3 "register_operand" "0"))
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+)
+
 ;; Set operand 0 to the last active element in operand 3, or to tied
 ;; operand 1 if no elements are active.
 (define_insn "fold_extract_last_<mode>"
@@ -2567,7 +2582,7 @@
 	  UNSPEC_SEL))]
   "TARGET_SVE"
 {
-  gcc_assert (rtx_equal_p (operands[2], operands[4]));
+  aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
 })
 
 ;; Predicated floating-point operations.
@@ -2586,6 +2601,22 @@
   "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
 )
 
+;; Predicated floating-point operations with the operands reversed.
+(define_insn "*cond_<optab><mode>"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_F
+	     [(match_dup 1)
+	      (match_operand:SVE_F 2 "register_operand" "w")
+	      (match_operand:SVE_F 3 "register_operand" "0")]
+	     SVE_COND_FP_BINARY)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "<sve_fp_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+)
+
 ;; Shift an SVE vector left and insert a scalar into element 0.
 (define_insn "vec_shl_insert_<mode>"
   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 04dedd2f7e0..afc91850d6f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -16041,6 +16041,54 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
   emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
 }
 
+/* Prepare a cond_<optab><mode> operation that has the operands
+   given by OPERANDS, where:
+
+   - operand 0 is the destination
+   - operand 1 is a predicate
+   - operands 2 to NOPS - 2 are the operands to an operation that is
+     performed for active lanes
+   - operand NOPS - 1 specifies the values to use for inactive lanes.
+
+   COMMUTATIVE_P is true if operands 2 and 3 are commutative.  In that case,
+   no pattern is provided for a tie between operands 3 and NOPS - 1.  */
+
+void
+aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops,
+				    bool commutative_p)
+{
+  /* We can do the operation directly if the "else" value matches one
+     of the other inputs.  */
+  for (unsigned int i = 2; i < nops - 1; ++i)
+    if (rtx_equal_p (operands[i], operands[nops - 1]))
+      {
+	if (i == 3 && commutative_p)
+	  std::swap (operands[2], operands[3]);
+	return;
+      }
+
+  /* If the "else" value is different from the other operands, we have
+     the choice of doing a SEL on the output or a SEL on an input.
+     Neither choice is better in all cases, but one advantage of
+     selecting the input is that it can avoid a move when the output
+     needs to be distinct from the inputs.  E.g. if operand N maps to
+     register N, selecting the output would give:
+
+	MOVPRFX Z0.S, Z2.S
+	ADD Z0.S, P1/M, Z0.S, Z3.S
+	SEL Z0.S, P1, Z0.S, Z4.S
+
+     whereas selecting the input avoids the MOVPRFX:
+
+	SEL Z0.S, P1, Z2.S, Z4.S
+	ADD Z0.S, P1/M, Z0.S, Z3.S.  */
+  machine_mode mode = GET_MODE (operands[0]);
+  rtx temp = gen_reg_rtx (mode);
+  rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]);
+  emit_set_insn (temp, gen_rtx_UNSPEC (mode, vec, UNSPEC_SEL));
+  operands[2] = operands[nops - 1] = temp;
+}
+
 /* Implement TARGET_MODES_TIEABLE_P.  In principle we should always return
    true.  However due to issues with register allocation it is preferable
    to avoid tieing integer scalar and FP scalar modes.  Executing integer
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b57c7e221f8..4db3a4c368f 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -464,6 +464,8 @@
     UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md.
     UNSPEC_COND_ADD	; Used in aarch64-sve.md.
     UNSPEC_COND_SUB	; Used in aarch64-sve.md.
+    UNSPEC_COND_MAX	; Used in aarch64-sve.md.
+    UNSPEC_COND_MIN	; Used in aarch64-sve.md.
     UNSPEC_COND_LT	; Used in aarch64-sve.md.
     UNSPEC_COND_LE	; Used in aarch64-sve.md.
     UNSPEC_COND_EQ	; Used in aarch64-sve.md.
@@ -1203,6 +1205,8 @@
 (define_code_iterator SVE_INT_BINARY [plus minus smax umax smin umin
 				      and ior xor])
 
+(define_code_iterator SVE_INT_BINARY_REV [minus])
+
 ;; SVE integer comparisons.
 (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
 
@@ -1529,7 +1533,10 @@
 
 (define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART])
 
-(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB])
+(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB
+					 UNSPEC_COND_MAX UNSPEC_COND_MIN])
+
+(define_int_iterator SVE_COND_FP_BINARY_REV [UNSPEC_COND_SUB])
 
 (define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_LT UNSPEC_COND_LE
 				      UNSPEC_COND_EQ UNSPEC_COND_NE
@@ -1559,7 +1566,9 @@
 			(UNSPEC_IORV "ior")
 			(UNSPEC_XORV "xor")
 			(UNSPEC_COND_ADD "add")
-			(UNSPEC_COND_SUB "sub")])
+			(UNSPEC_COND_SUB "sub")
+			(UNSPEC_COND_MAX "smax")
+			(UNSPEC_COND_MIN "smin")])
 
 (define_int_attr  maxmin_uns [(UNSPEC_UMAXV "umax")
 			      (UNSPEC_UMINV "umin")
@@ -1771,4 +1780,11 @@
 			 (UNSPEC_COND_GT "gt")])
 
 (define_int_attr sve_fp_op [(UNSPEC_COND_ADD "fadd")
-			    (UNSPEC_COND_SUB "fsub")])
+			    (UNSPEC_COND_SUB "fsub")
+			    (UNSPEC_COND_MAX "fmaxnm")
+			    (UNSPEC_COND_MIN "fminnm")])
+
+(define_int_attr commutative [(UNSPEC_COND_ADD "true")
+			      (UNSPEC_COND_SUB "false")
+			      (UNSPEC_COND_MIN "true")
+			      (UNSPEC_COND_MAX "true")])
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 596007d630d..00e53a657c8 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1425,6 +1425,10 @@ have different type from the value operands.
 @item vect_double
 Target supports hardware vectors of @code{double}.
 
+@item vect_double_cond_arith
+Target supports conditional addition, subtraction, minimum and maximum
+on vectors of @code{double}, via the @code{cond_} optabs.
+
 @item vect_element_align_preferred
 The target's preferred vector alignment is the same as the element
 alignment.
diff --git a/gcc/genmatch.c b/gcc/genmatch.c
index 5715dd16f55..d6bd90d1533 100644
--- a/gcc/genmatch.c
+++ b/gcc/genmatch.c
@@ -2370,6 +2370,18 @@ get_operand_type (id_base *op, unsigned pos,
   else if (*op == COND_EXPR
 	   && pos == 0)
     return "boolean_type_node";
+  else if (strncmp (op->id, "CFN_COND_", 9) == 0)
+    {
+      /* IFN_COND_* operands 1 and later by default have the same type
+	 as the result.  The type of operand 0 needs to be specified
+	 explicitly.  */
+      if (pos > 0 && expr_type)
+	return expr_type;
+      else if (pos > 0 && in_type)
+	return in_type;
+      else
+	return NULL;
+    }
   else
     {
       /* Otherwise all types should match - choose one in order of
@@ -2429,7 +2441,8 @@ expr::gen_transform (FILE *f, int indent, const char *dest, bool gimple,
       in_type = NULL;
     }
   else if (*opr == COND_EXPR
-	   || *opr == VEC_COND_EXPR)
+	   || *opr == VEC_COND_EXPR
+	   || strncmp (opr->id, "CFN_COND_", 9) == 0)
     {
       /* Conditions are of the same type as their first alternative.  */
       sprintf (optype, "TREE_TYPE (ops%d[1])", depth);
@@ -3737,7 +3750,7 @@ decision_tree::gen (FILE *f, bool gimple)
     }
   fprintf (stderr, "removed %u duplicate tails\n", rcnt);
 
-  for (unsigned n = 1; n <= 3; ++n)
+  for (unsigned n = 1; n <= 4; ++n)
     {
       /* First generate split-out functions.  */
       for (unsigned i = 0; i < root->kids.length (); i++)
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 4598781e61c..1a12bb35e01 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -51,6 +51,8 @@ static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
 			     code_helper, tree, tree, tree);
 static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
 			     code_helper, tree, tree, tree, tree);
+static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
+			     code_helper, tree, tree, tree, tree, tree);
 
 const unsigned int gimple_match_op::MAX_NUM_OPS;
 
@@ -215,6 +217,30 @@ gimple_resimplify3 (gimple_seq *seq, gimple_match_op *res_op,
   return canonicalized;
 }
 
+/* Helper that matches and simplifies the toplevel result from
+   a gimple_simplify run (where we don't want to build
+   a stmt in case it's used in in-place folding).  Replaces
+   RES_OP with a simplified and/or canonicalized result and
+   returns whether any change was made.  */
+
+bool
+gimple_resimplify4 (gimple_seq *seq, gimple_match_op *res_op,
+		    tree (*valueize)(tree))
+{
+  /* No constant folding is defined for four-operand functions.  */
+
+  gimple_match_op res_op2 (*res_op);
+  if (gimple_simplify (&res_op2, seq, valueize,
+		       res_op->code, res_op->type,
+		       res_op->ops[0], res_op->ops[1], res_op->ops[2],
+		       res_op->ops[3]))
+    {
+      *res_op = res_op2;
+      return true;
+    }
+
+  return false;
+}
 
 /* If in GIMPLE the operation described by RES_OP should be single-rhs,
    build a GENERIC tree for that expression and update RES_OP accordingly.  */
@@ -256,7 +282,8 @@ build_call_internal (internal_fn fn, gimple_match_op *res_op)
   return gimple_build_call_internal (fn, res_op->num_ops,
 				     res_op->op_or_null (0),
 				     res_op->op_or_null (1),
-				     res_op->op_or_null (2));
+				     res_op->op_or_null (2),
+				     res_op->op_or_null (3));
 }
 
 /* Push the exploded expression described by RES_OP as a statement to
@@ -343,7 +370,8 @@ maybe_push_res_to_seq (gimple_match_op *res_op, gimple_seq *seq, tree res)
 	  new_stmt = gimple_build_call (decl, num_ops,
 					res_op->op_or_null (0),
 					res_op->op_or_null (1),
-					res_op->op_or_null (2));
+					res_op->op_or_null (2),
+					res_op->op_or_null (3));
 	}
       if (!res)
 	{
@@ -654,7 +682,7 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
       /* ???  This way we can't simplify calls with side-effects.  */
       if (gimple_call_lhs (stmt) != NULL_TREE
 	  && gimple_call_num_args (stmt) >= 1
-	  && gimple_call_num_args (stmt) <= 3)
+	  && gimple_call_num_args (stmt) <= 4)
 	{
 	  bool valueized = false;
 	  combined_fn cfn;
@@ -697,6 +725,9 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
 	    case 3:
 	      return (gimple_resimplify3 (seq, res_op, valueize)
 		      || valueized);
+	    case 4:
+	      return (gimple_resimplify4 (seq, res_op, valueize)
+		      || valueized);
 	    default:
 	     gcc_unreachable ();
 	    }
diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h
index 9a4d3bb630f..69b53f21157 100644
--- a/gcc/gimple-match.h
+++ b/gcc/gimple-match.h
@@ -49,17 +49,19 @@ struct gimple_match_op
   gimple_match_op (code_helper, tree, tree);
   gimple_match_op (code_helper, tree, tree, tree);
   gimple_match_op (code_helper, tree, tree, tree, tree);
+  gimple_match_op (code_helper, tree, tree, tree, tree, tree);
 
   void set_op (code_helper, tree, unsigned int);
   void set_op (code_helper, tree, tree);
   void set_op (code_helper, tree, tree, tree);
   void set_op (code_helper, tree, tree, tree, tree);
+  void set_op (code_helper, tree, tree, tree, tree, tree);
   void set_value (tree);
 
   tree op_or_null (unsigned int) const;
 
   /* The maximum value of NUM_OPS.  */
-  static const unsigned int MAX_NUM_OPS = 3;
+  static const unsigned int MAX_NUM_OPS = 4;
 
   /* The operation being performed.  */
   code_helper code;
@@ -113,6 +115,17 @@ gimple_match_op::gimple_match_op (code_helper code_in, tree type_in,
   ops[2] = op2;
 }
 
+inline
+gimple_match_op::gimple_match_op (code_helper code_in, tree type_in,
+				  tree op0, tree op1, tree op2, tree op3)
+  : code (code_in), type (type_in), num_ops (4)
+{
+  ops[0] = op0;
+  ops[1] = op1;
+  ops[2] = op2;
+  ops[3] = op3;
+}
+
 /* Change the operation performed to CODE_IN, the type of the result to
    TYPE_IN, and the number of operands to NUM_OPS_IN.  The caller needs
    to set the operands itself.  */
@@ -160,6 +173,19 @@ gimple_match_op::set_op (code_helper code_in, tree type_in,
   ops[2] = op2;
 }
 
+inline void
+gimple_match_op::set_op (code_helper code_in, tree type_in,
+			 tree op0, tree op1, tree op2, tree op3)
+{
+  code = code_in;
+  type = type_in;
+  num_ops = 4;
+  ops[0] = op0;
+  ops[1] = op1;
+  ops[2] = op2;
+  ops[3] = op3;
+}
+
 /* Set the "operation" to be the single value VALUE, such as a constant
    or SSA_NAME.  */
 
@@ -196,6 +222,7 @@ bool gimple_simplify (gimple *, gimple_match_op *, gimple_seq *,
 bool gimple_resimplify1 (gimple_seq *, gimple_match_op *, tree (*)(tree));
 bool gimple_resimplify2 (gimple_seq *, gimple_match_op *, tree (*)(tree));
 bool gimple_resimplify3 (gimple_seq *, gimple_match_op *, tree (*)(tree));
+bool gimple_resimplify4 (gimple_seq *, gimple_match_op *, tree (*)(tree));
 tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *,
 			    tree res = NULL_TREE);
 void maybe_build_generic_op (gimple_match_op *);
diff --git a/gcc/match.pd b/gcc/match.pd
index 8a71141eac9..f08571ef28c 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -74,6 +74,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (FLOOR)
 DEFINE_INT_AND_FLOAT_ROUND_FN (CEIL)
 DEFINE_INT_AND_FLOAT_ROUND_FN (ROUND)
 DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+
+/* Binary operations and their associated IFN_COND_* function.  */
+(define_operator_list UNCOND_BINARY
+  plus minus
+  min max
+  bit_and bit_ior bit_xor)
+(define_operator_list COND_BINARY
+  IFN_COND_ADD IFN_COND_SUB
+  IFN_COND_MIN IFN_COND_MAX
+  IFN_COND_AND IFN_COND_IOR IFN_COND_XOR)
     
 /* As opposed to convert?, this still creates a single pattern, so
    it is not a suitable replacement for convert? in all cases.  */
@@ -4780,3 +4790,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (simplify
       (cmp (popcount @0) integer_zerop)
       (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
+
+/* Simplify:
+
+     a = a1 op a2
+     r = c ? a : b;
+
+   to:
+
+     r = c ? a1 op a2 : b;
+
+   if the target can do it in one go.  This makes the operation conditional
+   on c, so could drop potentially-trapping arithmetic, but that's a valid
+   simplification if the result of the operation isn't needed.  */
+(for uncond_op (UNCOND_BINARY)
+     cond_op (COND_BINARY)
+ (simplify
+  (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
+  (with { tree op_type = TREE_TYPE (@4); }
+   (if (element_precision (type) == element_precision (op_type))
+    (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
+ (simplify
+  (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
+  (with { tree op_type = TREE_TYPE (@4); }
+   (if (element_precision (type) == element_precision (op_type))
+    (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 59b230956e8..c5b2c631b5d 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,15 @@
+2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* lib/target-supports.exp
+	(check_effective_target_vect_double_cond_arith): New proc.
+	* gcc.dg/vect/vect-cond-arith-1.c: New test.
+	* gcc.target/aarch64/sve/vcond_8.c: Likewise.
+	* gcc.target/aarch64/sve/vcond_8_run.c: Likewise.
+	* gcc.target/aarch64/sve/vcond_9.c: Likewise.
+	* gcc.target/aarch64/sve/vcond_9_run.c: Likewise.
+	* gcc.target/aarch64/sve/vcond_12.c: Likewise.
+	* gcc.target/aarch64/sve/vcond_12_run.c: Likewise.
+
 2018-05-25  Janus Weil  <janus@gcc.gnu.org>
 
 	PR fortran/85839
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c
new file mode 100644
index 00000000000..9f2fccd7187
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c
@@ -0,0 +1,58 @@
+/* { dg-additional-options "-fdump-tree-optimized -fno-trapping-math -ffinite-math-only" } */
+
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS * 11 / 64 + 3)
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+
+#define DEF(OP)							\
+  void __attribute__ ((noipa))					\
+  f_##OP (double *restrict a, double *restrict b, double x)	\
+  {								\
+    for (int i = 0; i < N; ++i)					\
+      {								\
+	double truev = OP (b[i], x);				\
+	a[i] = b[i] < 100 ? truev : b[i];			\
+      }								\
+  }
+
+#define TEST(OP)					\
+  {							\
+    f_##OP (a, b, 10);					\
+    for (int i = 0; i < N; ++i)				\
+      {							\
+	int bval = (i % 17) * 10;			\
+	int truev = OP (bval, 10);			\
+	if (a[i] != (bval < 100 ? truev : bval))	\
+	__builtin_abort ();				\
+	asm volatile ("" ::: "memory");			\
+      }							\
+  }
+
+#define FOR_EACH_OP(T)				\
+  T (add)					\
+  T (sub)					\
+  T (__builtin_fmax)				\
+  T (__builtin_fmin)
+
+FOR_EACH_OP (DEF)
+
+int
+main (void)
+{
+  double a[N], b[N];
+  for (int i = 0; i < N; ++i)
+    {
+      b[i] = (i % 17) * 10;
+      asm volatile ("" ::: "memory");
+    }
+  FOR_EACH_OP (TEST)
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { = \.COND_ADD} "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump { = \.COND_SUB} "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump { = \.COND_MAX} "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump { = \.COND_MIN} "optimized" { target vect_double_cond_arith } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c
new file mode 100644
index 00000000000..95b371a1773
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c
@@ -0,0 +1,125 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include <stdint.h>
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+#define max(A, B) ((A) > (B) ? (A) : (B))
+#define min(A, B) ((A) < (B) ? (A) : (B))
+#define and(A, B) ((A) & (B))
+#define ior(A, B) ((A) | (B))
+#define xor(A, B) ((A) ^ (B))
+
+#define N 121
+
+#define DEF_LOOP(TYPE, CMPTYPE, OP)				\
+  void __attribute__((noipa))					\
+  f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond,	\
+		   CMPTYPE limit, TYPE src2v, TYPE elsev)	\
+  {								\
+    TYPE induc = 0;						\
+    for (unsigned int i = 0; i < N; ++i, induc += 1)		\
+      {								\
+	TYPE truev = OP (induc, src2v);				\
+	dest[i] = cond[i] < limit ? truev : elsev;		\
+      }								\
+  }
+
+#define FOR_EACH_INT_TYPE(T, TYPE) \
+  T (TYPE, TYPE, add) \
+  T (TYPE, TYPE, sub) \
+  T (TYPE, TYPE, max) \
+  T (TYPE, TYPE, min) \
+  T (TYPE, TYPE, and) \
+  T (TYPE, TYPE, ior) \
+  T (TYPE, TYPE, xor)
+
+#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
+  T (TYPE, CMPTYPE, add) \
+  T (TYPE, CMPTYPE, sub) \
+  T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
+  T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
+
+#define FOR_EACH_LOOP(T) \
+  FOR_EACH_INT_TYPE (T, int8_t) \
+  FOR_EACH_INT_TYPE (T, int16_t) \
+  FOR_EACH_INT_TYPE (T, int32_t) \
+  FOR_EACH_INT_TYPE (T, int64_t) \
+  FOR_EACH_INT_TYPE (T, uint8_t) \
+  FOR_EACH_INT_TYPE (T, uint16_t) \
+  FOR_EACH_INT_TYPE (T, uint32_t) \
+  FOR_EACH_INT_TYPE (T, uint64_t) \
+  FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
+  FOR_EACH_FP_TYPE (T, float, float, f32) \
+  FOR_EACH_FP_TYPE (T, double, double, f64)
+
+FOR_EACH_LOOP (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
+
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b,} 14 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h,} 18 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s,} 18 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d,} 18 } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c
new file mode 100644
index 00000000000..50a98c84a59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include "vcond_12.c"
+
+#define TEST_LOOP(TYPE, CMPTYPE, OP)				\
+  {								\
+    TYPE dest[N];						\
+    CMPTYPE cond[N];						\
+    for (unsigned int i = 0; i < N; ++i)			\
+      cond[i] = i % 5;						\
+    TYPE src2v = 14;						\
+    TYPE elsev = 17;						\
+    f_##OP##_##TYPE (dest, cond, 3, src2v, elsev);		\
+    TYPE induc = 0;						\
+    for (unsigned int i = 0; i < N; ++i)			\
+      {								\
+	TYPE if_true = OP (induc, src2v);			\
+	if (dest[i] != (i % 5 < 3 ? if_true : elsev))		\
+	  __builtin_abort ();					\
+	induc += 1;						\
+      }								\
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  FOR_EACH_LOOP (TEST_LOOP);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c
new file mode 100644
index 00000000000..c32ab596716
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include <stdint.h>
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+#define max(A, B) ((A) > (B) ? (A) : (B))
+#define min(A, B) ((A) < (B) ? (A) : (B))
+#define and(A, B) ((A) & (B))
+#define ior(A, B) ((A) | (B))
+#define xor(A, B) ((A) ^ (B))
+
+#define DEF_LOOP(TYPE, CMPTYPE, OP)				\
+  void __attribute__((noipa))					\
+  f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond,	\
+		   CMPTYPE limit, TYPE *restrict src,		\
+		   TYPE val, unsigned int n)			\
+  {								\
+    for (unsigned int i = 0; i < n; ++i)			\
+      {								\
+	TYPE truev = OP (src[i], val);				\
+	dest[i] = cond[i] < limit ? truev : src[i];		\
+      }								\
+  }
+
+#define FOR_EACH_INT_TYPE(T, TYPE) \
+  T (TYPE, TYPE, add) \
+  T (TYPE, TYPE, sub) \
+  T (TYPE, TYPE, max) \
+  T (TYPE, TYPE, min) \
+  T (TYPE, TYPE, and) \
+  T (TYPE, TYPE, ior) \
+  T (TYPE, TYPE, xor)
+
+#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
+  T (TYPE, CMPTYPE, add) \
+  T (TYPE, CMPTYPE, sub) \
+  T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
+  T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
+
+#define FOR_EACH_LOOP(T) \
+  FOR_EACH_INT_TYPE (T, int8_t) \
+  FOR_EACH_INT_TYPE (T, int16_t) \
+  FOR_EACH_INT_TYPE (T, int32_t) \
+  FOR_EACH_INT_TYPE (T, int64_t) \
+  FOR_EACH_INT_TYPE (T, uint8_t) \
+  FOR_EACH_INT_TYPE (T, uint16_t) \
+  FOR_EACH_INT_TYPE (T, uint32_t) \
+  FOR_EACH_INT_TYPE (T, uint64_t) \
+  FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
+  FOR_EACH_FP_TYPE (T, float, float, f32) \
+  FOR_EACH_FP_TYPE (T, double, double, f64)
+
+FOR_EACH_LOOP (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c
new file mode 100644
index 00000000000..5f45e1667d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c
@@ -0,0 +1,32 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include "vcond_8.c"
+
+#define N 187
+
+#define TEST_LOOP(TYPE, CMPTYPE, OP)				\
+  {								\
+    TYPE dest[N], src[N];					\
+    CMPTYPE cond[N];						\
+    for (unsigned int i = 0; i < N; ++i)			\
+      {								\
+        src[i] = i * 3;						\
+	cond[i] = i % 5;					\
+      }								\
+    f_##OP##_##TYPE (dest, cond, 3, src, 77, N);		\
+    for (unsigned int i = 0; i < N; ++i)			\
+      {								\
+        TYPE if_false = i * 3;					\
+	TYPE if_true = OP (if_false, (TYPE) 77);		\
+	if (dest[i] != (i % 5 < 3 ? if_true : if_false))	\
+	  __builtin_abort ();					\
+      }								\
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  FOR_EACH_LOOP (TEST_LOOP);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c
new file mode 100644
index 00000000000..618e187d587
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include <stdint.h>
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+#define max(A, B) ((A) > (B) ? (A) : (B))
+#define min(A, B) ((A) < (B) ? (A) : (B))
+#define and(A, B) ((A) & (B))
+#define ior(A, B) ((A) | (B))
+#define xor(A, B) ((A) ^ (B))
+
+#define DEF_LOOP(TYPE, CMPTYPE, OP)				\
+  void __attribute__((noipa))					\
+  f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond,	\
+		   CMPTYPE limit, TYPE *restrict src1,		\
+		   TYPE *restrict src2, unsigned int n)		\
+  {								\
+    for (unsigned int i = 0; i < n; ++i)			\
+      {								\
+	TYPE truev = OP (src1[i], src2[i]);			\
+	dest[i] = cond[i] < limit ? truev : src2[i];		\
+      }								\
+  }
+
+#define FOR_EACH_INT_TYPE(T, TYPE) \
+  T (TYPE, TYPE, add) \
+  T (TYPE, TYPE, sub) \
+  T (TYPE, TYPE, max) \
+  T (TYPE, TYPE, min) \
+  T (TYPE, TYPE, and) \
+  T (TYPE, TYPE, ior) \
+  T (TYPE, TYPE, xor)
+
+#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
+  T (TYPE, CMPTYPE, add) \
+  T (TYPE, CMPTYPE, sub) \
+  T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
+  T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
+
+#define FOR_EACH_LOOP(T) \
+  FOR_EACH_INT_TYPE (T, int8_t) \
+  FOR_EACH_INT_TYPE (T, int16_t) \
+  FOR_EACH_INT_TYPE (T, int32_t) \
+  FOR_EACH_INT_TYPE (T, int64_t) \
+  FOR_EACH_INT_TYPE (T, uint8_t) \
+  FOR_EACH_INT_TYPE (T, uint16_t) \
+  FOR_EACH_INT_TYPE (T, uint32_t) \
+  FOR_EACH_INT_TYPE (T, uint64_t) \
+  FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
+  FOR_EACH_FP_TYPE (T, float, float, f32) \
+  FOR_EACH_FP_TYPE (T, double, double, f64)
+
+FOR_EACH_LOOP (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c
new file mode 100644
index 00000000000..14f32420e1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include "vcond_9.c"
+
+#define N 187
+
+#define TEST_LOOP(TYPE, CMPTYPE, OP)				\
+  {								\
+    TYPE dest[N], src1[N], src2[N];				\
+    CMPTYPE cond[N];						\
+    for (unsigned int i = 0; i < N; ++i)			\
+      {								\
+        src1[i] = i * 4 - i % 7;				\
+        src2[i] = i * 3 + 1;					\
+	cond[i] = i % 5;					\
+      }								\
+    f_##OP##_##TYPE (dest, cond, 3, src1, src2, N);		\
+    for (unsigned int i = 0; i < N; ++i)			\
+      {								\
+	TYPE src1v = i * 4 - i % 7;				\
+        TYPE src2v = i * 3 + 1;					\
+	TYPE if_true = OP (src1v, src2v);			\
+	if (dest[i] != (i % 5 < 3 ? if_true : src2v))		\
+	  __builtin_abort ();					\
+      }								\
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  FOR_EACH_LOOP (TEST_LOOP);
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 0a53d7b1aad..0f8edce69bf 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5590,6 +5590,13 @@ proc check_effective_target_vect_double { } {
     return $et_vect_double_saved($et_index)
 }
 
+# Return 1 if the target supports conditional addition, subtraction, minimum
+# and maximum on vectors of double, via the cond_ optabs.  Return 0 otherwise.
+
+proc check_effective_target_vect_double_cond_arith { } {
+    return [check_effective_target_aarch64_sve]
+}
+
 # Return 1 if the target supports hardware vectors of long long, 0 otherwise.
 #
 # This won't change for different subtargets so cache the result.