diff options
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r-- | gcc/tree-vectorizer.c | 39 |
1 files changed, 27 insertions, 12 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index a455896a6f1..d3791690f3d 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -520,7 +520,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, update_phi = gsi_stmt (gsi_update); /* Virtual phi; Mark it for renaming. We actually want to call - mar_sym_for_renaming, but since all ssa renaming datastructures + mark_sym_for_renaming, but since all ssa renaming datastructures are going to be freed before we get to call ssa_update, we just record this name for now in a bitmap, and will mark it for renaming later. */ @@ -2788,12 +2788,9 @@ vectorize_loops (void) destroy_loop_vec_info (loop_vinfo, true); loop_vinfo = 0; } - if (best_arch == (int) cfun->target_arch) - { - targetm_pnt = targetm_array[best_arch]; - loop_vinfo = vect_analyze_loop (loop); - target_arch = best_arch; - } + targetm_pnt = targetm_array[best_arch]; + loop_vinfo = vect_analyze_loop (loop); + target_arch = best_arch; } targetm_pnt = targetm_array[cfun->target_arch]; loop->aux = loop_vinfo; @@ -2803,12 +2800,30 @@ vectorize_loops (void) if (best_arch != (int) cfun->target_arch) { - /* This loop should be vectorized for another target. Since we - might to have more than one thread on this other target, but - do the reduction on the main processor, leave this to - parallelize_loops. */ + /* This loop should be vectorized for another target. + We do the vectorization now because, if required, alias checks + and a loop version for the aliased case should run on the main + target (saving code space on the extra target). + Likewise, peeling to obtain the vectorization factor + (vect_do_peeling_for_loop_bound) should be done for the main + target. ??? We might want to extend this peeling to do + a bit of looping to work concurrently with the extra target. + ??? This is good for arc-mxp or ppc-spu, but h8300-sh64 would + be better off (at least if power is no object once we activate + the sh64) doing more work on the sh64. + Alignment checks will not be necessary because alignment + mismatch is taken care of during data transfer. + (Might need to modify this aspect if the DMA mechanism for + some target architecture pair as alignment restrictions). + Since we might to have more than one thread on this other + target, but do the reduction on the main processor, we leave + the outlining parallelize_loops. + As parallelize_loops will see the vectorized loop, there should + be no trouble with a thread other than on the main target + gettingvector subunits not making up a full vector. + An additional task that vectorization the will have to do now + is to translate pointers to use the appropriate ptr_mode. */ loop->target_arch = best_arch; - continue; } vect_transform_loop (loop_vinfo); num_vectorized_loops++; |