diff options
author | J"orn Rennecke <joern.rennecke@st.com> | 2009-06-01 19:54:52 +0000 |
---|---|---|
committer | J"orn Rennecke <joern.rennecke@st.com> | 2009-06-01 19:54:52 +0000 |
commit | 2a9ff8bf117b1d9cdbefb32acaafa46223de7850 (patch) | |
tree | 629c2751da45e24969e054ca82164c031bd60b5f | |
parent | 4f362fecae69eb1b89c45ba4c16aa56d502367d4 (diff) |
* target.h (struct gcc_target): Add member ptr_mode.
* target-def.h (TARGET_INITIALIZER): Add initializer for ptr_mode.
* tree-ssa-loop.c (gate_tree_parallelize_loops): Also enable if a
loop is to be executed on another target.
* tree-parloop.c (separate_decls_in_region): New parameter new_target.
Changed caller.
(create loop_fn): New parameter target_arch. Changed caller.
(parallelize_loops): Also enable for loops that are to be executed
on another target.
* tree-vectorizer.c (vectorize_loops): If a loop is to be vectorized
for a different target, only set the target_arch filed in loop and
ignore the lop for now. Use cfun->target_arch to restore target.
* tree-vectorizer.h (struct _loop_vec_info): Remove target_arch member.
* cfgloop.h (struct loop): Add target_arch member.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/arc-milepost-branch@148068 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog.multi-target | 17 | ||||
-rw-r--r-- | gcc/cfgloop.h | 2 | ||||
-rw-r--r-- | gcc/target-def.h | 1 | ||||
-rw-r--r-- | gcc/target.h | 3 | ||||
-rw-r--r-- | gcc/tree-parloops.c | 34 | ||||
-rw-r--r-- | gcc/tree-ssa-loop.c | 10 | ||||
-rw-r--r-- | gcc/tree-vectorizer.c | 35 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 5 |
8 files changed, 85 insertions, 22 deletions
diff --git a/gcc/ChangeLog.multi-target b/gcc/ChangeLog.multi-target index 1a31c57e130..8b297811280 100644 --- a/gcc/ChangeLog.multi-target +++ b/gcc/ChangeLog.multi-target @@ -1,3 +1,20 @@ +2009-06-01 J"orn Rennecke <joern.rennecke@arc.com> + + * target.h (struct gcc_target): Add member ptr_mode. + * target-def.h (TARGET_INITIALIZER): Add initializer for ptr_mode. + * tree-ssa-loop.c (gate_tree_parallelize_loops): Also enable if a + loop is to be executed on another target. + * tree-parloop.c (separate_decls_in_region): New parameter new_target. + Changed caller. + (create loop_fn): New parameter target_arch. Changed caller. + (parallelize_loops): Also enable for loops that are to be executed + on another target. + * tree-vectorizer.c (vectorize_loops): If a loop is to be vectorized + for a different target, only set the target_arch filed in loop and + ignore the lop for now. Use cfun->target_arch to restore target. + * tree-vectorizer.h (struct _loop_vec_info): Remove target_arch member. + * cfgloop.h (struct loop): Add target_arch member. + 2009-05-23 J"orn Rennecke <joern.rennecke@arc.com> * doc/tm.texi: Adjust struct machine_function documentation. diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 73aae7d6cf9..3f597de7d1e 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -152,6 +152,8 @@ struct loop GTY ((chain_next ("%h.next"))) bool any_upper_bound; bool any_estimate; + /* For what target is this loop to be vectorized? targetm_array index. */ + unsigned target_arch : 8; /* An integer estimation of the number of iterations. Estimate_state describes what is the state of the estimation. */ diff --git a/gcc/target-def.h b/gcc/target-def.h index 374f30fedb1..d5579c87865 100644 --- a/gcc/target-def.h +++ b/gcc/target-def.h @@ -846,6 +846,7 @@ { \ TARGET_NAME, \ TARGET_NUM, \ + &ptr_mode, \ TARGET_ASM_OUT, \ TARGET_SCHED, \ TARGET_VECTORIZE, \ diff --git a/gcc/target.h b/gcc/target.h index 2ae2c7c420c..50f3eb67a09 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -702,6 +702,9 @@ struct gcc_target Initialized with the Makefile-generated TARGET_NUM. */ int target_arch; + /* Points to the ptr_mode variable for this target. */ + enum machine_mode *ptr_mode; + /* Functions that output assembler for the target. */ struct asm_out asm_out; diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index 4e9b102973a..6c24a03bac9 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -1156,7 +1156,7 @@ create_loads_and_stores_for_name (void **slot, void *data) static void separate_decls_in_region (edge entry, edge exit, htab_t reduction_list, tree *arg_struct, tree *new_arg_struct, - struct clsn_data *ld_st_data) + struct clsn_data *ld_st_data, unsigned new_target) { basic_block bb1 = split_edge (entry); @@ -1220,7 +1220,10 @@ separate_decls_in_region (edge entry, edge exit, htab_t reduction_list, /* Create the loads and stores. */ *arg_struct = create_tmp_var (type, ".paral_data_store"); add_referenced_var (*arg_struct); - nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load"); + nvar = create_tmp_var (build_pointer_type_for_mode + (type, *targetm_array[new_target]->ptr_mode, + false), + ".paral_data_load"); add_referenced_var (nvar); *new_arg_struct = make_ssa_name (nvar, NULL); @@ -1270,7 +1273,7 @@ parallelized_function_p (tree fn) a parallelized loop. */ static tree -create_loop_fn (void) +create_loop_fn (unsigned int target_arch) { char buf[100]; char *tname; @@ -1312,6 +1315,15 @@ create_loop_fn (void) TREE_USED (t) = 1; DECL_ARGUMENTS (decl) = t; + if (target_arch) + { + const char *target_name = targetm_array[target_arch]->name; + + tree value = build_string (strlen (target_name), target_name); + decl_attributes (&decl, build_tree_list (get_identifier ("target_arch"), + build_tree_list (NULL, value)), + 0); + } allocate_struct_function (decl, false); /* The call to allocate_struct_function clobbers CFUN, so we need to restore @@ -1791,11 +1803,15 @@ gen_parallel_loop (struct loop *loop, htab_t reduction_list, /* In the old loop, move all variables non-local to the loop to a structure and back, and create separate decls for the variables used in loop. */ separate_decls_in_region (entry, exit, reduction_list, &arg_struct, - &new_arg_struct, &clsn_data); + &new_arg_struct, &clsn_data, loop->target_arch); /* Create the parallel constructs. */ - parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct, - new_arg_struct, n_threads); + parallel_head + = create_parallel_loop (loop, create_loop_fn (loop->target_arch), + arg_struct, new_arg_struct, n_threads); + /* ??? for loop->target_arch != cfun->target_arch, should create another + function so that a small slice of the loop can be run on the main + processor. */ if (htab_elements (reduction_list) > 0) create_call_for_reduction (loop, reduction_list, &clsn_data); @@ -1873,7 +1889,11 @@ parallelize_loops (void) || loop_has_blocks_with_irreducible_flag (loop) /* FIXME: the check for vector phi nodes could be removed. */ || loop_has_vector_phi_nodes (loop) - || !loop_parallel_p (loop, reduction_list, &niter_desc)) + || (loop->target_arch != cfun->target_arch + ? !number_of_iterations_exit (loop, single_dom_exit (loop), + &niter_desc, false) + : (flag_tree_parallelize_loops <= 1 + || !loop_parallel_p (loop, reduction_list, &niter_desc)))) continue; changed = true; diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c index 33cb130e4ca..6fb1dad0987 100644 --- a/gcc/tree-ssa-loop.c +++ b/gcc/tree-ssa-loop.c @@ -587,7 +587,15 @@ struct gimple_opt_pass pass_complete_unrolli = static bool gate_tree_parallelize_loops (void) { - return flag_tree_parallelize_loops > 1; + struct loop *loop; + loop_iterator li; + + if (flag_tree_parallelize_loops > 1) + return true; + FOR_EACH_LOOP (li, loop, 0) + if (loop->target_arch != cfun->target_arch) + return true; + return false; } static unsigned diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 706dafe2a26..a455896a6f1 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -2772,7 +2772,10 @@ vectorize_loops (void) loop_vinfo = vect_analyze_loop (loop); if (!loop_vinfo) continue; - if (LOOP_VINFO_VECT_FACTOR (loop_vinfo) > best_factor) + /* FIXME: insert some machine learning heuristic here to + better compare the targets. */ + if (LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) + && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > best_factor) { best_arch = target_arch; best_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); @@ -2781,21 +2784,33 @@ vectorize_loops (void) if (best_arch >= 0 && target_arch != best_arch) { if (loop_vinfo) - destroy_loop_vec_info (loop_vinfo, true); - targetm_pnt = targetm_array[best_arch]; - loop_vinfo = vect_analyze_loop (loop); - target_arch = best_arch; + { + destroy_loop_vec_info (loop_vinfo, true); + loop_vinfo = 0; + } + if (best_arch == (int) cfun->target_arch) + { + targetm_pnt = targetm_array[best_arch]; + loop_vinfo = vect_analyze_loop (loop); + target_arch = best_arch; + } } - targetm_pnt = &this_targetm; + targetm_pnt = targetm_array[cfun->target_arch]; loop->aux = loop_vinfo; - if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) + if (best_arch < 0) continue; - loop_vinfo->target_arch = target_arch; - targetm_pnt = targetm_array[target_arch]; + if (best_arch != (int) cfun->target_arch) + { + /* This loop should be vectorized for another target. Since we + might to have more than one thread on this other target, but + do the reduction on the main processor, leave this to + parallelize_loops. */ + loop->target_arch = best_arch; + continue; + } vect_transform_loop (loop_vinfo); - targetm_pnt = &this_targetm; num_vectorized_loops++; } vect_loop_location = UNKNOWN_LOC; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index fc6a3fa8eca..53ebbea2ed3 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -72,7 +72,7 @@ enum verbosity_levels { REPORT_DR_DETAILS, REPORT_BAD_FORM_LOOPS, REPORT_OUTER_LOOPS, - REPORT_SLP, + REPORT_SLP, /* report Superword Level Parallelism analysis details. */ REPORT_DETAILS, /* New verbosity levels should be added before this one. */ MAX_VERBOSITY_LEVEL @@ -187,9 +187,6 @@ typedef struct _loop_vec_info { /* Is the loop vectorizable? */ bool vectorizable; - /* For what target is this loop to be vectorized? targetm_array index. */ - unsigned int target_arch : 8; - /* Unrolling factor */ int vectorization_factor; |