aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vectorizer.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r--gcc/tree-vectorizer.c39
1 files changed, 27 insertions, 12 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index a455896a6f1..d3791690f3d 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -520,7 +520,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
update_phi = gsi_stmt (gsi_update);
/* Virtual phi; Mark it for renaming. We actually want to call
- mar_sym_for_renaming, but since all ssa renaming datastructures
+ mark_sym_for_renaming, but since all ssa renaming datastructures
are going to be freed before we get to call ssa_update, we just
record this name for now in a bitmap, and will mark it for
renaming later. */
@@ -2788,12 +2788,9 @@ vectorize_loops (void)
destroy_loop_vec_info (loop_vinfo, true);
loop_vinfo = 0;
}
- if (best_arch == (int) cfun->target_arch)
- {
- targetm_pnt = targetm_array[best_arch];
- loop_vinfo = vect_analyze_loop (loop);
- target_arch = best_arch;
- }
+ targetm_pnt = targetm_array[best_arch];
+ loop_vinfo = vect_analyze_loop (loop);
+ target_arch = best_arch;
}
targetm_pnt = targetm_array[cfun->target_arch];
loop->aux = loop_vinfo;
@@ -2803,12 +2800,30 @@ vectorize_loops (void)
if (best_arch != (int) cfun->target_arch)
{
- /* This loop should be vectorized for another target. Since we
- might to have more than one thread on this other target, but
- do the reduction on the main processor, leave this to
- parallelize_loops. */
+ /* This loop should be vectorized for another target.
+ We do the vectorization now because, if required, alias checks
+ and a loop version for the aliased case should run on the main
+ target (saving code space on the extra target).
+ Likewise, peeling to obtain the vectorization factor
+ (vect_do_peeling_for_loop_bound) should be done for the main
+ target. ??? We might want to extend this peeling to do
+ a bit of looping to work concurrently with the extra target.
+ ??? This is good for arc-mxp or ppc-spu, but h8300-sh64 would
+ be better off (at least if power is no object once we activate
+ the sh64) doing more work on the sh64.
+ Alignment checks will not be necessary because alignment
+ mismatch is taken care of during data transfer.
+ (Might need to modify this aspect if the DMA mechanism for
+ some target architecture pair as alignment restrictions).
+ Since we might to have more than one thread on this other
+ target, but do the reduction on the main processor, we leave
+ the outlining parallelize_loops.
+ As parallelize_loops will see the vectorized loop, there should
+ be no trouble with a thread other than on the main target
+ gettingvector subunits not making up a full vector.
+ An additional task that vectorization the will have to do now
+ is to translate pointers to use the appropriate ptr_mode. */
loop->target_arch = best_arch;
- continue;
}
vect_transform_loop (loop_vinfo);
num_vectorized_loops++;