aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c171
1 files changed, 63 insertions, 108 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index c74a485cc2f6..e50dca91b631 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4551,14 +4551,9 @@ get_initial_defs_for_reduction (slp_tree slp_node,
unsigned HOST_WIDE_INT nunits;
unsigned j, number_of_places_left_in_vector;
tree vector_type;
- tree vop;
- int group_size = stmts.length ();
- unsigned int vec_num, i;
- unsigned number_of_copies = 1;
- vec<tree> voprnds;
- voprnds.create (number_of_vectors);
+ unsigned int group_size = stmts.length ();
+ unsigned int i;
struct loop *loop;
- auto_vec<tree, 16> permute_results;
vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
@@ -4589,119 +4584,79 @@ get_initial_defs_for_reduction (slp_tree slp_node,
if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
nunits = group_size;
- number_of_copies = nunits * number_of_vectors / group_size;
-
number_of_places_left_in_vector = nunits;
bool constant_p = true;
tree_vector_builder elts (vector_type, nunits, 1);
elts.quick_grow (nunits);
- for (j = 0; j < number_of_copies; j++)
+ gimple_seq ctor_seq = NULL;
+ for (j = 0; j < nunits * number_of_vectors; ++j)
{
- for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
- {
- tree op;
- /* Get the def before the loop. In reduction chain we have only
- one initial value. */
- if ((j != (number_of_copies - 1)
- || (reduc_chain && i != 0))
- && neutral_op)
- op = neutral_op;
- else
- op = PHI_ARG_DEF_FROM_EDGE (stmt, pe);
+ tree op;
+ i = j % group_size;
+ stmt_vinfo = vinfo_for_stmt (stmts[i]);
- /* Create 'vect_ = {op0,op1,...,opn}'. */
- number_of_places_left_in_vector--;
- elts[number_of_places_left_in_vector] = op;
- if (!CONSTANT_CLASS_P (op))
- constant_p = false;
+ /* Get the def before the loop. In reduction chain we have only
+ one initial value. Else we have as many as PHIs in the group. */
+ if (reduc_chain)
+ op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
+ else if (((vec_oprnds->length () + 1) * nunits
+ - number_of_places_left_in_vector >= group_size)
+ && neutral_op)
+ op = neutral_op;
+ else
+ op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
- if (number_of_places_left_in_vector == 0)
- {
- gimple_seq ctor_seq = NULL;
- tree init;
- if (constant_p && !neutral_op
- ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
- : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
- /* Build the vector directly from ELTS. */
- init = gimple_build_vector (&ctor_seq, &elts);
- else if (neutral_op)
- {
- /* Build a vector of the neutral value and shift the
- other elements into place. */
- init = gimple_build_vector_from_val (&ctor_seq, vector_type,
- neutral_op);
- int k = nunits;
- while (k > 0 && elts[k - 1] == neutral_op)
- k -= 1;
- while (k > 0)
- {
- k -= 1;
- gcall *call = gimple_build_call_internal
- (IFN_VEC_SHL_INSERT, 2, init, elts[k]);
- init = make_ssa_name (vector_type);
- gimple_call_set_lhs (call, init);
- gimple_seq_add_stmt (&ctor_seq, call);
- }
- }
- else
+ /* Create 'vect_ = {op0,op1,...,opn}'. */
+ number_of_places_left_in_vector--;
+ elts[nunits - number_of_places_left_in_vector - 1] = op;
+ if (!CONSTANT_CLASS_P (op))
+ constant_p = false;
+
+ if (number_of_places_left_in_vector == 0)
+ {
+ tree init;
+ if (constant_p && !neutral_op
+ ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
+ : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
+ /* Build the vector directly from ELTS. */
+ init = gimple_build_vector (&ctor_seq, &elts);
+ else if (neutral_op)
+ {
+ /* Build a vector of the neutral value and shift the
+ other elements into place. */
+ init = gimple_build_vector_from_val (&ctor_seq, vector_type,
+ neutral_op);
+ int k = nunits;
+ while (k > 0 && elts[k - 1] == neutral_op)
+ k -= 1;
+ while (k > 0)
{
- /* First time round, duplicate ELTS to fill the
- required number of vectors, then cherry pick the
- appropriate result for each iteration. */
- if (vec_oprnds->is_empty ())
- duplicate_and_interleave (&ctor_seq, vector_type, elts,
- number_of_vectors,
- permute_results);
- init = permute_results[number_of_vectors - j - 1];
+ k -= 1;
+ gcall *call = gimple_build_call_internal
+ (IFN_VEC_SHL_INSERT, 2, init, elts[k]);
+ init = make_ssa_name (vector_type);
+ gimple_call_set_lhs (call, init);
+ gimple_seq_add_stmt (&ctor_seq, call);
}
- if (ctor_seq != NULL)
- gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
- voprnds.quick_push (init);
-
- number_of_places_left_in_vector = nunits;
- elts.new_vector (vector_type, nunits, 1);
- elts.quick_grow (nunits);
- constant_p = true;
- }
- }
- }
-
- /* Since the vectors are created in the reverse order, we should invert
- them. */
- vec_num = voprnds.length ();
- for (j = vec_num; j != 0; j--)
- {
- vop = voprnds[j - 1];
- vec_oprnds->quick_push (vop);
- }
-
- voprnds.release ();
-
- /* In case that VF is greater than the unrolling factor needed for the SLP
- group of stmts, NUMBER_OF_VECTORS to be created is greater than
- NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
- to replicate the vectors. */
- tree neutral_vec = NULL;
- while (number_of_vectors > vec_oprnds->length ())
- {
- if (neutral_op)
- {
- if (!neutral_vec)
+ }
+ else
{
- gimple_seq ctor_seq = NULL;
- neutral_vec = gimple_build_vector_from_val
- (&ctor_seq, vector_type, neutral_op);
- if (ctor_seq != NULL)
- gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
+ /* First time round, duplicate ELTS to fill the
+ required number of vectors. */
+ duplicate_and_interleave (&ctor_seq, vector_type, elts,
+ number_of_vectors, *vec_oprnds);
+ break;
}
- vec_oprnds->quick_push (neutral_vec);
- }
- else
- {
- for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
- vec_oprnds->quick_push (vop);
- }
+ vec_oprnds->quick_push (init);
+
+ number_of_places_left_in_vector = nunits;
+ elts.new_vector (vector_type, nunits, 1);
+ elts.quick_grow (nunits);
+ constant_p = true;
+ }
}
+ if (ctor_seq != NULL)
+ gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
}