From a3842acd872541a0f94709bb0673d64b81d525f9 Mon Sep 17 00:00:00 2001 From: "J\"orn Rennecke" Date: Thu, 4 Jun 2009 17:48:38 +0000 Subject: * tree-parloops.c (parallelize_loops): Don't check for vector phi nodes when processing a loop that should be executed on a different target than CFUN. * tree-vectorizer.c (vectorize_loops): Also analyze and vectorize loops for different targets than CFUN. * tree-vect-transform.c (vect_transform_loop): If the loop is for a different target than CFUN, don't do alignment peeling. Switch to loop target during transformation and back afterwards. * cfgloop.c (alloc_loop): Initialize target_arch member from CFUN. * tree-ssa-loop.c (pass_vectorize): Add TODO_rebuild_alias to todo_flags_finish. * tree-vect-transform.c (vect_create_addr_base_for_vector_ref): New parameter ptr_alias_set. Changed all callers. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/arc-milepost-branch@148179 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.multi-target | 18 ++++++++++++++++++ gcc/cfgloop.c | 1 + gcc/tree-parloops.c | 6 +++--- gcc/tree-ssa-loop.c | 2 +- gcc/tree-vect-transform.c | 35 +++++++++++++++++++++++++---------- gcc/tree-vectorizer.c | 39 +++++++++++++++++++++++++++------------ 6 files changed, 75 insertions(+), 26 deletions(-) diff --git a/gcc/ChangeLog.multi-target b/gcc/ChangeLog.multi-target index 8b297811280..515fc6b7f27 100644 --- a/gcc/ChangeLog.multi-target +++ b/gcc/ChangeLog.multi-target @@ -1,3 +1,21 @@ +2009-06-04 J"orn Rennecke + + * tree-parloops.c (parallelize_loops): Don't check for vector phi + nodes when processing a loop that should be executed on a different + target than CFUN. + * tree-vectorizer.c (vectorize_loops): Also analyze and vectorize + loops for different targets than CFUN. + * tree-vect-transform.c (vect_transform_loop): If the loop is for a + different target than CFUN, don't do alignment peeling. Switch + to loop target during transformation and back afterwards. + + * cfgloop.c (alloc_loop): Initialize target_arch member from CFUN. + + * tree-ssa-loop.c (pass_vectorize): Add TODO_rebuild_alias to + todo_flags_finish. + * tree-vect-transform.c (vect_create_addr_base_for_vector_ref): New + parameter ptr_alias_set. Changed all callers. + 2009-06-01 J"orn Rennecke * target.h (struct gcc_target): Add member ptr_mode. diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index e74284e8988..c1d096420cc 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -338,6 +338,7 @@ alloc_loop (void) loop->exits = GGC_CNEW (struct loop_exit); loop->exits->next = loop->exits->prev = loop->exits; + loop->target_arch = cfun->target_arch; return loop; } diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index 6c24a03bac9..8d4b3a5524c 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -1887,12 +1887,12 @@ parallelize_loops (void) /* And of course, the loop must be parallelizable. */ || !can_duplicate_loop_p (loop) || loop_has_blocks_with_irreducible_flag (loop) - /* FIXME: the check for vector phi nodes could be removed. */ - || loop_has_vector_phi_nodes (loop) || (loop->target_arch != cfun->target_arch ? !number_of_iterations_exit (loop, single_dom_exit (loop), &niter_desc, false) - : (flag_tree_parallelize_loops <= 1 + /* FIXME: the check for vector phi nodes could be removed. */ + : (loop_has_vector_phi_nodes (loop) + || flag_tree_parallelize_loops <= 1 || !loop_parallel_p (loop, reduction_list, &niter_desc)))) continue; diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c index 6fb1dad0987..b80c288c0dc 100644 --- a/gcc/tree-ssa-loop.c +++ b/gcc/tree-ssa-loop.c @@ -243,7 +243,7 @@ struct gimple_opt_pass pass_vectorize = 0, /* properties_provided */ 0, /* properties_destroyed */ TODO_verify_loops, /* todo_flags_start */ - TODO_dump_func | TODO_update_ssa + TODO_dump_func | TODO_update_ssa | TODO_rebuild_alias | TODO_ggc_collect /* todo_flags_finish */ } }; diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 13f152ba191..08035a3b8ec 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -53,7 +53,7 @@ static tree vect_create_destination_var (tree, tree); static tree vect_create_data_ref_ptr (gimple, struct loop*, tree, tree *, gimple *, bool, bool *, tree); static tree vect_create_addr_base_for_vector_ref - (gimple, gimple_seq *, tree, struct loop *); + (gimple, gimple_seq *, tree, struct loop *, alias_set_type); static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); static tree vect_get_vec_def_for_operand (tree, gimple, tree *); static tree vect_init_vector (gimple, tree, tree, gimple_stmt_iterator *); @@ -875,7 +875,8 @@ static tree vect_create_addr_base_for_vector_ref (gimple stmt, gimple_seq *new_stmt_list, tree offset, - struct loop *loop) + struct loop *loop, + alias_set_type ptr_alias_set) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); @@ -944,10 +945,18 @@ vect_create_addr_base_for_vector_ref (gimple stmt, /* addr_expr = addr_base */ addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, get_name (base_name)); + if (ptr_alias_set) + DECL_POINTER_ALIAS_SET (addr_expr) = ptr_alias_set; + /* FIXME: as addr_expr has no memory tag, alias analysis thinks it + 'points-to anything' . */ add_referenced_var (addr_expr); vec_stmt = fold_convert (vect_ptr_type, addr_base); addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, get_name (base_name)); + if (ptr_alias_set) + DECL_POINTER_ALIAS_SET (addr_expr2) = ptr_alias_set; + /* FIXME: as addr_expr2 has no memory tag, alias analysis thinks it + 'points-to anything' . */ add_referenced_var (addr_expr2); vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr2); gimple_seq_add_seq (new_stmt_list, seq); @@ -1034,6 +1043,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, tree indx_before_incr, indx_after_incr; gimple incr; tree step; + alias_set_type ptr_alias_set = 0; /* Check the step (evolution) of the load in LOOP, and record whether it's invariant. */ @@ -1082,7 +1092,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, && TYPE_RESTRICT (TREE_TYPE (DR_BASE_ADDRESS (dr)))) { get_alias_set (base_name); - DECL_POINTER_ALIAS_SET (vect_ptr) + DECL_POINTER_ALIAS_SET (vect_ptr) = ptr_alias_set = DECL_POINTER_ALIAS_SET (SSA_NAME_VAR (DR_BASE_ADDRESS (dr))); } @@ -1141,7 +1151,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, /* Create: (&(base[init_val+offset]) in the loop preheader. */ new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, - offset, loop); + offset, loop, ptr_alias_set); pe = loop_preheader_edge (loop); if (new_stmt_list) { @@ -5659,7 +5669,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, { /* Generate the INIT_ADDR computation outside LOOP. */ init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts, - NULL_TREE, loop); + NULL_TREE, loop, 0); pe = loop_preheader_edge (loop); new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); gcc_assert (!new_bb); @@ -7621,7 +7631,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) { gimple_seq new_stmts = NULL; tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, - &new_stmts, NULL_TREE, loop); + &new_stmts, NULL_TREE, loop, 0); tree ptr_type = TREE_TYPE (start_addr); tree size = TYPE_SIZE (ptr_type); tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); @@ -7859,7 +7869,7 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, /* create: addr_tmp = (int)(address_of_first_vector) */ addr_base = vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list, - NULL_TREE, loop); + NULL_TREE, loop, 0); if (new_stmt_list != NULL) gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list); @@ -8024,10 +8034,10 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, addr_base_a = vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list, - NULL_TREE, loop); + NULL_TREE, loop, 0); addr_base_b = vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list, - NULL_TREE, loop); + NULL_TREE, loop, 0); segment_length_a = vect_vfa_segment_size (dr_a, vect_factor); segment_length_b = vect_vfa_segment_size (dr_b, vect_factor); @@ -8328,6 +8338,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) bool strided_store; bool slp_scheduled = false; unsigned int nunits; + bool arch_change = loop->target_arch != cfun->target_arch; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vec_transform_loop ==="); @@ -8343,7 +8354,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) /* Peel the loop if there are data refs with unknown alignment. Only one data ref with unknown store is allowed. */ - if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) + if (!arch_change && LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) vect_do_peeling_for_alignment (loop_vinfo); /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a @@ -8369,6 +8380,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) split_edge (loop_preheader_edge (loop)); + targetm_pnt = targetm_array[loop->target_arch]; + /* FORNOW: the vectorizer supports only loops which body consist of one basic block (header + empty latch). When the vectorizer will support more involved loop forms, the order by which the BBs are @@ -8517,6 +8530,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) until all the loops have been transformed? */ update_ssa (TODO_update_ssa); + targetm_pnt = targetm_array[cfun->target_arch]; + if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)) fprintf (vect_dump, "LOOP VECTORIZED."); if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS)) diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index a455896a6f1..d3791690f3d 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -520,7 +520,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, update_phi = gsi_stmt (gsi_update); /* Virtual phi; Mark it for renaming. We actually want to call - mar_sym_for_renaming, but since all ssa renaming datastructures + mark_sym_for_renaming, but since all ssa renaming datastructures are going to be freed before we get to call ssa_update, we just record this name for now in a bitmap, and will mark it for renaming later. */ @@ -2788,12 +2788,9 @@ vectorize_loops (void) destroy_loop_vec_info (loop_vinfo, true); loop_vinfo = 0; } - if (best_arch == (int) cfun->target_arch) - { - targetm_pnt = targetm_array[best_arch]; - loop_vinfo = vect_analyze_loop (loop); - target_arch = best_arch; - } + targetm_pnt = targetm_array[best_arch]; + loop_vinfo = vect_analyze_loop (loop); + target_arch = best_arch; } targetm_pnt = targetm_array[cfun->target_arch]; loop->aux = loop_vinfo; @@ -2803,12 +2800,30 @@ vectorize_loops (void) if (best_arch != (int) cfun->target_arch) { - /* This loop should be vectorized for another target. Since we - might to have more than one thread on this other target, but - do the reduction on the main processor, leave this to - parallelize_loops. */ + /* This loop should be vectorized for another target. + We do the vectorization now because, if required, alias checks + and a loop version for the aliased case should run on the main + target (saving code space on the extra target). + Likewise, peeling to obtain the vectorization factor + (vect_do_peeling_for_loop_bound) should be done for the main + target. ??? We might want to extend this peeling to do + a bit of looping to work concurrently with the extra target. + ??? This is good for arc-mxp or ppc-spu, but h8300-sh64 would + be better off (at least if power is no object once we activate + the sh64) doing more work on the sh64. + Alignment checks will not be necessary because alignment + mismatch is taken care of during data transfer. + (Might need to modify this aspect if the DMA mechanism for + some target architecture pair as alignment restrictions). + Since we might to have more than one thread on this other + target, but do the reduction on the main processor, we leave + the outlining parallelize_loops. + As parallelize_loops will see the vectorized loop, there should + be no trouble with a thread other than on the main target + gettingvector subunits not making up a full vector. + An additional task that vectorization the will have to do now + is to translate pointers to use the appropriate ptr_mode. */ loop->target_arch = best_arch; - continue; } vect_transform_loop (loop_vinfo); num_vectorized_loops++; -- cgit v1.2.3