aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJ"orn Rennecke <joern.rennecke@st.com>2009-06-01 19:54:52 +0000
committerJ"orn Rennecke <joern.rennecke@st.com>2009-06-01 19:54:52 +0000
commit2a9ff8bf117b1d9cdbefb32acaafa46223de7850 (patch)
tree629c2751da45e24969e054ca82164c031bd60b5f
parent4f362fecae69eb1b89c45ba4c16aa56d502367d4 (diff)
* target.h (struct gcc_target): Add member ptr_mode.
* target-def.h (TARGET_INITIALIZER): Add initializer for ptr_mode. * tree-ssa-loop.c (gate_tree_parallelize_loops): Also enable if a loop is to be executed on another target. * tree-parloop.c (separate_decls_in_region): New parameter new_target. Changed caller. (create loop_fn): New parameter target_arch. Changed caller. (parallelize_loops): Also enable for loops that are to be executed on another target. * tree-vectorizer.c (vectorize_loops): If a loop is to be vectorized for a different target, only set the target_arch filed in loop and ignore the lop for now. Use cfun->target_arch to restore target. * tree-vectorizer.h (struct _loop_vec_info): Remove target_arch member. * cfgloop.h (struct loop): Add target_arch member. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/arc-milepost-branch@148068 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog.multi-target17
-rw-r--r--gcc/cfgloop.h2
-rw-r--r--gcc/target-def.h1
-rw-r--r--gcc/target.h3
-rw-r--r--gcc/tree-parloops.c34
-rw-r--r--gcc/tree-ssa-loop.c10
-rw-r--r--gcc/tree-vectorizer.c35
-rw-r--r--gcc/tree-vectorizer.h5
8 files changed, 85 insertions, 22 deletions
diff --git a/gcc/ChangeLog.multi-target b/gcc/ChangeLog.multi-target
index 1a31c57e130..8b297811280 100644
--- a/gcc/ChangeLog.multi-target
+++ b/gcc/ChangeLog.multi-target
@@ -1,3 +1,20 @@
+2009-06-01 J"orn Rennecke <joern.rennecke@arc.com>
+
+ * target.h (struct gcc_target): Add member ptr_mode.
+ * target-def.h (TARGET_INITIALIZER): Add initializer for ptr_mode.
+ * tree-ssa-loop.c (gate_tree_parallelize_loops): Also enable if a
+ loop is to be executed on another target.
+ * tree-parloop.c (separate_decls_in_region): New parameter new_target.
+ Changed caller.
+ (create loop_fn): New parameter target_arch. Changed caller.
+ (parallelize_loops): Also enable for loops that are to be executed
+ on another target.
+ * tree-vectorizer.c (vectorize_loops): If a loop is to be vectorized
+ for a different target, only set the target_arch filed in loop and
+ ignore the lop for now. Use cfun->target_arch to restore target.
+ * tree-vectorizer.h (struct _loop_vec_info): Remove target_arch member.
+ * cfgloop.h (struct loop): Add target_arch member.
+
2009-05-23 J"orn Rennecke <joern.rennecke@arc.com>
* doc/tm.texi: Adjust struct machine_function documentation.
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 73aae7d6cf9..3f597de7d1e 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -152,6 +152,8 @@ struct loop GTY ((chain_next ("%h.next")))
bool any_upper_bound;
bool any_estimate;
+ /* For what target is this loop to be vectorized? targetm_array index. */
+ unsigned target_arch : 8;
/* An integer estimation of the number of iterations. Estimate_state
describes what is the state of the estimation. */
diff --git a/gcc/target-def.h b/gcc/target-def.h
index 374f30fedb1..d5579c87865 100644
--- a/gcc/target-def.h
+++ b/gcc/target-def.h
@@ -846,6 +846,7 @@
{ \
TARGET_NAME, \
TARGET_NUM, \
+ &ptr_mode, \
TARGET_ASM_OUT, \
TARGET_SCHED, \
TARGET_VECTORIZE, \
diff --git a/gcc/target.h b/gcc/target.h
index 2ae2c7c420c..50f3eb67a09 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -702,6 +702,9 @@ struct gcc_target
Initialized with the Makefile-generated TARGET_NUM. */
int target_arch;
+ /* Points to the ptr_mode variable for this target. */
+ enum machine_mode *ptr_mode;
+
/* Functions that output assembler for the target. */
struct asm_out asm_out;
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 4e9b102973a..6c24a03bac9 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -1156,7 +1156,7 @@ create_loads_and_stores_for_name (void **slot, void *data)
static void
separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
tree *arg_struct, tree *new_arg_struct,
- struct clsn_data *ld_st_data)
+ struct clsn_data *ld_st_data, unsigned new_target)
{
basic_block bb1 = split_edge (entry);
@@ -1220,7 +1220,10 @@ separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
/* Create the loads and stores. */
*arg_struct = create_tmp_var (type, ".paral_data_store");
add_referenced_var (*arg_struct);
- nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
+ nvar = create_tmp_var (build_pointer_type_for_mode
+ (type, *targetm_array[new_target]->ptr_mode,
+ false),
+ ".paral_data_load");
add_referenced_var (nvar);
*new_arg_struct = make_ssa_name (nvar, NULL);
@@ -1270,7 +1273,7 @@ parallelized_function_p (tree fn)
a parallelized loop. */
static tree
-create_loop_fn (void)
+create_loop_fn (unsigned int target_arch)
{
char buf[100];
char *tname;
@@ -1312,6 +1315,15 @@ create_loop_fn (void)
TREE_USED (t) = 1;
DECL_ARGUMENTS (decl) = t;
+ if (target_arch)
+ {
+ const char *target_name = targetm_array[target_arch]->name;
+
+ tree value = build_string (strlen (target_name), target_name);
+ decl_attributes (&decl, build_tree_list (get_identifier ("target_arch"),
+ build_tree_list (NULL, value)),
+ 0);
+ }
allocate_struct_function (decl, false);
/* The call to allocate_struct_function clobbers CFUN, so we need to restore
@@ -1791,11 +1803,15 @@ gen_parallel_loop (struct loop *loop, htab_t reduction_list,
/* In the old loop, move all variables non-local to the loop to a structure
and back, and create separate decls for the variables used in loop. */
separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
- &new_arg_struct, &clsn_data);
+ &new_arg_struct, &clsn_data, loop->target_arch);
/* Create the parallel constructs. */
- parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
- new_arg_struct, n_threads);
+ parallel_head
+ = create_parallel_loop (loop, create_loop_fn (loop->target_arch),
+ arg_struct, new_arg_struct, n_threads);
+ /* ??? for loop->target_arch != cfun->target_arch, should create another
+ function so that a small slice of the loop can be run on the main
+ processor. */
if (htab_elements (reduction_list) > 0)
create_call_for_reduction (loop, reduction_list, &clsn_data);
@@ -1873,7 +1889,11 @@ parallelize_loops (void)
|| loop_has_blocks_with_irreducible_flag (loop)
/* FIXME: the check for vector phi nodes could be removed. */
|| loop_has_vector_phi_nodes (loop)
- || !loop_parallel_p (loop, reduction_list, &niter_desc))
+ || (loop->target_arch != cfun->target_arch
+ ? !number_of_iterations_exit (loop, single_dom_exit (loop),
+ &niter_desc, false)
+ : (flag_tree_parallelize_loops <= 1
+ || !loop_parallel_p (loop, reduction_list, &niter_desc))))
continue;
changed = true;
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 33cb130e4ca..6fb1dad0987 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -587,7 +587,15 @@ struct gimple_opt_pass pass_complete_unrolli =
static bool
gate_tree_parallelize_loops (void)
{
- return flag_tree_parallelize_loops > 1;
+ struct loop *loop;
+ loop_iterator li;
+
+ if (flag_tree_parallelize_loops > 1)
+ return true;
+ FOR_EACH_LOOP (li, loop, 0)
+ if (loop->target_arch != cfun->target_arch)
+ return true;
+ return false;
}
static unsigned
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 706dafe2a26..a455896a6f1 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -2772,7 +2772,10 @@ vectorize_loops (void)
loop_vinfo = vect_analyze_loop (loop);
if (!loop_vinfo)
continue;
- if (LOOP_VINFO_VECT_FACTOR (loop_vinfo) > best_factor)
+ /* FIXME: insert some machine learning heuristic here to
+ better compare the targets. */
+ if (LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)
+ && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > best_factor)
{
best_arch = target_arch;
best_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -2781,21 +2784,33 @@ vectorize_loops (void)
if (best_arch >= 0 && target_arch != best_arch)
{
if (loop_vinfo)
- destroy_loop_vec_info (loop_vinfo, true);
- targetm_pnt = targetm_array[best_arch];
- loop_vinfo = vect_analyze_loop (loop);
- target_arch = best_arch;
+ {
+ destroy_loop_vec_info (loop_vinfo, true);
+ loop_vinfo = 0;
+ }
+ if (best_arch == (int) cfun->target_arch)
+ {
+ targetm_pnt = targetm_array[best_arch];
+ loop_vinfo = vect_analyze_loop (loop);
+ target_arch = best_arch;
+ }
}
- targetm_pnt = &this_targetm;
+ targetm_pnt = targetm_array[cfun->target_arch];
loop->aux = loop_vinfo;
- if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
+ if (best_arch < 0)
continue;
- loop_vinfo->target_arch = target_arch;
- targetm_pnt = targetm_array[target_arch];
+ if (best_arch != (int) cfun->target_arch)
+ {
+ /* This loop should be vectorized for another target. Since we
+ might to have more than one thread on this other target, but
+ do the reduction on the main processor, leave this to
+ parallelize_loops. */
+ loop->target_arch = best_arch;
+ continue;
+ }
vect_transform_loop (loop_vinfo);
- targetm_pnt = &this_targetm;
num_vectorized_loops++;
}
vect_loop_location = UNKNOWN_LOC;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index fc6a3fa8eca..53ebbea2ed3 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -72,7 +72,7 @@ enum verbosity_levels {
REPORT_DR_DETAILS,
REPORT_BAD_FORM_LOOPS,
REPORT_OUTER_LOOPS,
- REPORT_SLP,
+ REPORT_SLP, /* report Superword Level Parallelism analysis details. */
REPORT_DETAILS,
/* New verbosity levels should be added before this one. */
MAX_VERBOSITY_LEVEL
@@ -187,9 +187,6 @@ typedef struct _loop_vec_info {
/* Is the loop vectorizable? */
bool vectorizable;
- /* For what target is this loop to be vectorized? targetm_array index. */
- unsigned int target_arch : 8;
-
/* Unrolling factor */
int vectorization_factor;