From 713e0f0fbfdef0ff16984a215dcf6242447e7dab Mon Sep 17 00:00:00 2001 From: Razya Ladelsky Date: Thu, 22 Oct 2009 14:43:40 +0000 Subject: 2009-10-22 Razya Ladelsky * cfgloopmanip.c (duplicate_subloops): Export. * tree-parloops.c (loop_parallel_p): Dump if loop is innermost. (transform_to_exit_first_loop): Duplicate bbs starting from header up to loop->latch instead of exit->src. Initialize control variable to the correct number of iterations. (gather_scalar_reductions): Do not register double reductions. (parallelize_loops): Dump which loop is tested. Indicate whether the parallelized loop is inner or not. Remove the innermost-loop requirement. * cfgloop.h (duplicate_subloops): Export. * tree-cfg.c (add_phi_args_after_redirect): New function. (gimple_duplicate_sese_tail): Remove the no-subloops constraint. Call duplicate_subloops. Update number of iterations at the exit condition. Don't redirect nexits always to the loop exit. Redirect copied edges from latch to the loop exit. * testsuite/libgomp.graphite/force-parallel-2.c: Adjust scan. * testsuite/gcc.dg/autopar/outer-1.c: New testcase. * testsuite/gcc.dg/autopar/outer-2.c: New testcase. * testsuite/gcc.dg/autopar/outer-3.c: New testcase. * testsuite/gcc.dg/autopar/outer-4.c: New testcase. * testsuite/gcc.dg/autopar/outer-5.c: New testcase. * testsuite/gcc.dg/autopar/outer-6.c: New testcase. git-svn-id: https://gcc.gnu.org/svn/gcc/trunk@153457 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 26 ++++ gcc/cfgloop.h | 1 + gcc/cfgloopmanip.c | 3 +- gcc/testsuite/gcc.dg/autopar/outer-1.c | 33 +++++ gcc/testsuite/gcc.dg/autopar/outer-2.c | 33 +++++ gcc/testsuite/gcc.dg/autopar/outer-3.c | 33 +++++ gcc/testsuite/gcc.dg/autopar/outer-4.c | 38 ++++++ gcc/testsuite/gcc.dg/autopar/outer-5.c | 52 ++++++++ gcc/testsuite/gcc.dg/autopar/outer-6.c | 50 +++++++ gcc/tree-cfg.c | 146 ++++++++++++++++++--- gcc/tree-parloops.c | 66 +++++++--- .../testsuite/libgomp.graphite/force-parallel-2.c | 2 +- 12 files changed, 449 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/autopar/outer-1.c create mode 100644 gcc/testsuite/gcc.dg/autopar/outer-2.c create mode 100644 gcc/testsuite/gcc.dg/autopar/outer-3.c create mode 100644 gcc/testsuite/gcc.dg/autopar/outer-4.c create mode 100644 gcc/testsuite/gcc.dg/autopar/outer-5.c create mode 100644 gcc/testsuite/gcc.dg/autopar/outer-6.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d93585473e1..54890010683 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2009-10-22 Razya Ladelsky + + * cfgloopmanip.c (duplicate_subloops): Export. + * tree-parloops.c (loop_parallel_p): Dump if loop is innermost. + (transform_to_exit_first_loop): Duplicate bbs starting from + header up to loop->latch instead of exit->src. + Initialize control variable to the correct number of iterations. + (gather_scalar_reductions): Do not register double reductions. + (parallelize_loops): Dump which loop is tested. + Indicate whether the parallelized loop is inner or not. + Remove the innermost-loop requirement. + * cfgloop.h (duplicate_subloops): Export. + * tree-cfg.c (add_phi_args_after_redirect): New function. + (gimple_duplicate_sese_tail): Remove the no-subloops constraint. + Call duplicate_subloops. + Update number of iterations at the exit condition. + Don't redirect nexits always to the loop exit. + Redirect copied edges from latch to the loop exit. + * testsuite/libgomp.graphite/force-parallel-2.c: Adjust scan. + * testsuite/gcc.dg/autopar/outer-1.c: New testcase. + * testsuite/gcc.dg/autopar/outer-2.c: New testcase. + * testsuite/gcc.dg/autopar/outer-3.c: New testcase. + * testsuite/gcc.dg/autopar/outer-4.c: New testcase. + * testsuite/gcc.dg/autopar/outer-5.c: New testcase. + * testsuite/gcc.dg/autopar/outer-6.c: New testcase. + 2009-10-22 Jan Hubicka * ipa-cp.c (ipcp_read_summary): Remove now invalid FIXME and diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index d6c26bf0ba0..4abdf8bfe15 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -288,6 +288,7 @@ extern edge create_empty_if_region_on_edge (edge, tree); extern struct loop *create_empty_loop_on_edge (edge, tree, tree, tree, tree, tree *, tree *, struct loop *); extern struct loop * duplicate_loop (struct loop *, struct loop *); +extern void duplicate_subloops (struct loop *, struct loop *); extern bool duplicate_loop_to_header_edge (struct loop *, edge, unsigned, sbitmap, edge, VEC (edge, heap) **, int); diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index a357a2fd77a..6f24415a72d 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -32,7 +32,6 @@ along with GCC; see the file COPYING3. If not see #include "output.h" #include "tree-flow.h" -static void duplicate_subloops (struct loop *, struct loop *); static void copy_loops_to (struct loop **, int, struct loop *); static void loop_redirect_edge (edge, basic_block); @@ -886,7 +885,7 @@ duplicate_loop (struct loop *loop, struct loop *target) /* Copies structure of subloops of LOOP into TARGET loop, placing newly created loops into loop tree. */ -static void +void duplicate_subloops (struct loop *loop, struct loop *target) { struct loop *aloop, *cloop; diff --git a/gcc/testsuite/gcc.dg/autopar/outer-1.c b/gcc/testsuite/gcc.dg/autopar/outer-1.c new file mode 100644 index 00000000000..913d390f684 --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/outer-1.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +void abort (void); + +void parloop (int N) +{ + int i, j; + int x[10000][10000]; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + x[i][j] = i + j + 3; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + if (x[i][j] != i + j + 3) + abort (); +} + +int main(void) +{ + parloop(10000); + + return 0; +} + + +/* Check that outer loop is parallelized. */ +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "parloops" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/outer-2.c b/gcc/testsuite/gcc.dg/autopar/outer-2.c new file mode 100644 index 00000000000..351b3499e89 --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/outer-2.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +void abort (void); + +void parloop (int N) +{ + int i, j,ii; + int x[400][10][400]; + +for (ii = 0; ii < N; ii++) + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + x[i][j][ii] = ii+i + j + 3; + +for (ii = 0; ii < N; ii++) + for (i = 0; i < N;i++) + for (j = 0; j < N; j++) + if (x[i][j][ii] != ii+i + j + 3) + abort (); +} + +int main(void) +{ + parloop(400); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "parloops" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/outer-3.c b/gcc/testsuite/gcc.dg/autopar/outer-3.c new file mode 100644 index 00000000000..2f1033f9830 --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/outer-3.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +void abort (void); + +void parloop (int N) +{ + int i, j; + int x[500][500]; + + for (i = 0; i < N; i++) + for (j = 0; j < i; j++) + x[i][j] = i + j + 3; + + for (i = 0; i < N; i++) + for (j = 0; j < i; j++) + if (x[i][j] != i + j + 3) + abort (); +} + +int main(void) +{ + parloop(500); + + return 0; +} + + +/* Check that outer loop is parallelized. */ +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "parloops" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/outer-4.c b/gcc/testsuite/gcc.dg/autopar/outer-4.c new file mode 100644 index 00000000000..d6ad687d408 --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/outer-4.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +void abort (void); + +int g_sum=0; +int x[500][500]; + +__attribute__((noinline)) +void parloop (int N) +{ + int i, j; + int sum; + + /* Double reduction is currently not supported, outer loop is not + parallelized. Inner reduction is detected, inner loop is + parallelized. */ + sum = 0; + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + sum += x[i][j]; + + g_sum = sum; +} + +int main(void) +{ + parloop(500); + + return 0; +} + + +/* Check that outer loop is parallelized. */ +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 0 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 1 "parloops" } } */ +/* { dg-final { cleanup-tree-dump "parloops" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/outer-5.c b/gcc/testsuite/gcc.dg/autopar/outer-5.c new file mode 100644 index 00000000000..e172b3f9332 --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/outer-5.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +void abort (void); + +int x[500][500]; +int y[500]; +int g_sum=0; + +__attribute__((noinline)) +void init (int i, int j) +{ + x[i][j]=1; +} + +__attribute__((noinline)) +void parloop (int N) +{ + int i, j; + int sum; + + /* Inner cycle is currently not supported, outer loop is not + parallelized. Inner reduction is detected, inner loop is + parallelized. */ + for (i = 0; i < N; i++) + { + sum = 0; + for (j = 0; j < N; j++) + sum += x[i][j]; + y[i]=sum; + } + g_sum = sum; +} + +int main(void) +{ + int i,j; + for (i = 0; i < 500; i++) + for (j = 0; j < 500; j++) + init(i, j); + + parloop(500); + + return 0; +} + + +/* Check that outer loop is parallelized. */ +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 0 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 1 "parloops" } } */ +/* { dg-final { cleanup-tree-dump "parloops" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/outer-6.c b/gcc/testsuite/gcc.dg/autopar/outer-6.c new file mode 100644 index 00000000000..67b89d05982 --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/outer-6.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +void abort (void); + +int x[500][500]; +int y[500]; +int g_sum=0; + +__attribute__((noinline)) +void init (int i, int j) +{ + x[i][j]=1; +} + +__attribute__((noinline)) +void parloop (int N) +{ + int i, j; + int sum; + + /* Outer loop reduction, outerloop is parallelized. */ + sum=0; + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + y[i]=x[i][j]; + sum += y[i]; + } + g_sum = sum; +} + +int main(void) +{ + int i,j; + for (i = 0; i < 500; i++) + for (j = 0; j < 500; j++) + init(i, j); + + parloop(500); + + return 0; +} + + +/* Check that outer loop is parallelized. */ +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */ +/* { dg-final { cleanup-tree-dump "parloops" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 1c820a3e3c5..7f31868ea7b 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -4850,6 +4850,31 @@ gimple_duplicate_bb (basic_block bb) return new_bb; } +/* Add phi arguments to the phi nodes in E_COPY->dest according to + the phi arguments coming from the equivalent edge at + the phi nodes of DEST. */ + +static void +add_phi_args_after_redirect (edge e_copy, edge orig_e) +{ + gimple_stmt_iterator psi, psi_copy; + gimple phi, phi_copy; + tree def; + + for (psi = gsi_start_phis (orig_e->dest), + psi_copy = gsi_start_phis (e_copy->dest); + !gsi_end_p (psi); + gsi_next (&psi), gsi_next (&psi_copy)) + { + + phi = gsi_stmt (psi); + phi_copy = gsi_stmt (psi_copy); + def = PHI_ARG_DEF_FROM_EDGE (phi, orig_e); + add_phi_arg (phi_copy, def, e_copy, + gimple_phi_arg_location_from_edge (phi, orig_e)); + } +} + /* Adds phi node arguments for edge E_COPY after basic block duplication. */ static void @@ -5131,9 +5156,14 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU int total_freq = 0, exit_freq = 0; gcov_type total_count = 0, exit_count = 0; edge exits[2], nexits[2], e; - gimple_stmt_iterator gsi; + gimple_stmt_iterator gsi,gsi1; gimple cond_stmt; - edge sorig, snew; + edge sorig, snew, orig_e; + basic_block exit_bb; + edge_iterator ei; + VEC (edge, heap) *redirect_edges; + basic_block iters_bb, orig_src; + tree new_rhs; gcc_assert (EDGE_COUNT (exit->src->succs) == 2); exits[0] = exit; @@ -5149,17 +5179,13 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU it will work, but the resulting code will not be correct. */ for (i = 0; i < n_region; i++) { - /* We do not handle subloops, i.e. all the blocks must belong to the - same loop. */ - if (region[i]->loop_father != orig_loop) - return false; - if (region[i] == orig_loop->latch) return false; } initialize_original_copy_tables (); set_loop_copy (orig_loop, loop); + duplicate_subloops (orig_loop, loop); if (!region_copy) { @@ -5225,8 +5251,36 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU cond_stmt = last_stmt (exit->src); gcc_assert (gimple_code (cond_stmt) == GIMPLE_COND); cond_stmt = gimple_copy (cond_stmt); + + /* If the block consisting of the exit condition has the latch as + successor, then the body of the loop is executed before + the exit consition is tested. In such case, moving the + condition to the entry, causes that the loop will iterate + one less iteration (which is the wanted outcome, since we + peel out the last iteration). If the body is executed after + the condition, moving the condition to the entry requires + decrementing one iteration. */ + if (exits[1]->dest == orig_loop->latch) + new_rhs = gimple_cond_rhs (cond_stmt); + else + { + new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)), + gimple_cond_rhs (cond_stmt), + build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1)); + + if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME) + { + iters_bb = gimple_bb (SSA_NAME_DEF_STMT (gimple_cond_rhs (cond_stmt))); + for (gsi1 = gsi_start_bb (iters_bb); !gsi_end_p (gsi1); gsi_next (&gsi1)) + if (gsi_stmt (gsi1)==SSA_NAME_DEF_STMT (gimple_cond_rhs (cond_stmt))) + break; + + new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true, + NULL_TREE,false,GSI_CONTINUE_LINKING); + } + } + gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs)); gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt))); - gimple_cond_set_rhs (cond_stmt, unshare_expr (gimple_cond_rhs (cond_stmt))); gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); sorig = single_succ_edge (switch_bb); @@ -5238,25 +5292,87 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU /* Add the PHI node arguments. */ add_phi_args_after_copy (region_copy, n_region, snew); - + /* Get rid of now superfluous conditions and associated edges (and phi node arguments). */ + exit_bb = exit->dest; + e = redirect_edge_and_branch (exits[0], exits[1]->dest); PENDING_STMT (e) = NULL; - e = redirect_edge_and_branch (nexits[1], nexits[0]->dest); - PENDING_STMT (e) = NULL; - + + /* If the block consisting of the exit condition has the latch as + successor, then the body of the loop is executed before + the exit consition is tested. + + { body } + { cond } (exit[0]) -> { latch } + | + V (exit[1]) + + { exit_bb } + + + In such case, the equivalent copied edge nexits[1] + (for the peeled iteration) needs to be redirected to exit_bb. + + Otherwise, + + { cond } (exit[0]) -> { body } + | + V (exit[1]) + + { exit_bb } + + + exit[0] is pointing to the body of the loop, + and the equivalent nexits[0] needs to be redirected to + the copied body (of the peeled iteration). */ + + if (exits[1]->dest == orig_loop->latch) + e = redirect_edge_and_branch (nexits[1], nexits[0]->dest); + else + e = redirect_edge_and_branch (nexits[0], nexits[1]->dest); + PENDING_STMT (e) = NULL; + + redirect_edges = VEC_alloc (edge, heap, 10); + + for (i = 0; i < n_region; i++) + region_copy[i]->flags |= BB_DUPLICATED; + + /* Iterate all incoming edges to latch. All those coming from + copied bbs will be redicrecred to exit_bb. */ + FOR_EACH_EDGE (e, ei, orig_loop->latch->preds) + { + if (e->src->flags & BB_DUPLICATED) + VEC_safe_push (edge, heap, redirect_edges, e); + } + + for (i = 0; i < n_region; i++) + region_copy[i]->flags &= ~BB_DUPLICATED; + + for (i = 0; VEC_iterate (edge, redirect_edges, i, e); ++i) + { + e = redirect_edge_and_branch (e, exit_bb); + PENDING_STMT (e) = NULL; + orig_src = get_bb_original (e->src); + orig_e = find_edge (orig_src, orig_loop->latch); + add_phi_args_after_redirect (e, orig_e); + } + + VEC_free (edge, heap, redirect_edges); + + /* Anything that is outside of the region, but was dominated by something inside needs to update dominance info. */ iterate_fix_dominators (CDI_DOMINATORS, doms, false); VEC_free (basic_block, heap, doms); - + /* Update the SSA web. */ update_ssa (TODO_update_ssa); - + if (free_region_copy) free (region_copy); - + free_original_copy_tables (); return true; } diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index 681e04612e4..61e372a54c3 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -255,7 +255,13 @@ loop_parallel_p (struct loop *loop) bool ret = false; if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "\nConsidering loop %d\n", loop->num); + { + fprintf (dump_file, "Considering loop %d\n", loop->num); + if (!loop->inner) + fprintf (dump_file, "loop is innermost\n"); + else + fprintf (dump_file, "loop NOT innermost\n"); + } /* Check for problems with dependences. If the loop can be reversed, the iterations are independent. */ @@ -1289,8 +1295,9 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit bool ok; edge exit = single_dom_exit (loop), hpred; tree control, control_name, res, t; - gimple phi, nphi, cond_stmt, stmt; + gimple phi, nphi, cond_stmt, stmt, cond_nit; gimple_stmt_iterator gsi; + tree nit_1; split_block_after_labels (loop->header); orig_header = single_succ (loop->header); @@ -1308,7 +1315,6 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit res = PHI_RESULT (phi); t = make_ssa_name (SSA_NAME_VAR (res), phi); SET_PHI_RESULT (phi, t); - nphi = create_phi_node (res, orig_header); SSA_NAME_DEF_STMT (res) = nphi; add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION); @@ -1320,10 +1326,11 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit control = t; } } - bbs = get_loop_body_in_dom_order (loop); - for (n = 0; bbs[n] != exit->src; n++) + + for (n = 0; bbs[n] != loop->latch; n++) continue; + n--; nbbs = XNEWVEC (basic_block, n); ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit, bbs + 1, n, nbbs); @@ -1358,7 +1365,6 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit struct reduction_info *red; tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit); - red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val)); if (red) { @@ -1374,12 +1380,15 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit } gcc_assert (control_name != NULL_TREE); - /* Initialize the control variable to NIT. */ + /* Initialize the control variable to number of iterations + according to the rhs of the exit condition. */ gsi = gsi_after_labels (ex_bb); - nit = force_gimple_operand_gsi (&gsi, - fold_convert (TREE_TYPE (control_name), nit), + cond_nit = last_stmt (exit->src); + nit_1 = gimple_cond_rhs (cond_nit); + nit_1 = force_gimple_operand_gsi (&gsi, + fold_convert (TREE_TYPE (control_name), nit_1), false, NULL_TREE, false, GSI_SAME_STMT); - stmt = gimple_build_assign (control_name, nit); + stmt = gimple_build_assign (control_name, nit_1); gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); SSA_NAME_DEF_STMT (control_name) = stmt; } @@ -1740,7 +1749,7 @@ gather_scalar_reductions (loop_p loop, htab_t reduction_list) && simple_loop_info) { gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc); - if (reduc_stmt) + if (reduc_stmt && !double_reduc) build_new_reduction (reduction_list, reduc_stmt, phi); } } @@ -1890,15 +1899,32 @@ parallelize_loops (void) FOR_EACH_LOOP (li, loop, 0) { htab_empty (reduction_list); - - /* If we use autopar in graphite pass, we use it's marked dependency + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Trying loop %d as candidate\n",loop->num); + if (loop->inner) + fprintf (dump_file, "loop %d is not innermost\n",loop->num); + else + fprintf (dump_file, "loop %d is innermost\n",loop->num); + } + + /* If we use autopar in graphite pass, we use its marked dependency checking results. */ if (flag_loop_parallelize_all && !loop->can_be_parallel) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "loop is not parallel according to graphite\n"); continue; + } - /* FIXME: Only consider innermost loops with just one exit. */ - if (loop->inner || !single_dom_exit (loop)) + if (!single_dom_exit (loop)) + { + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "loop is !single_dom_exit\n"); + continue; + } if (/* And of course, the loop must be parallelizable. */ !can_duplicate_loop_p (loop) @@ -1915,7 +1941,7 @@ parallelize_loops (void) /* Do not bother with loops in cold areas. */ || optimize_loop_nest_for_size_p (loop))) continue; - + if (!try_get_loop_niter (loop, &niter_desc)) continue; @@ -1926,6 +1952,14 @@ parallelize_loops (void) continue; changed = true; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "parallelizing "); + if (loop->inner) + fprintf (dump_file, "outer loop\n"); + else + fprintf (dump_file, "inner loop\n"); + } gen_parallel_loop (loop, reduction_list, n_threads, &niter_desc); verify_flow_info (); diff --git a/libgomp/testsuite/libgomp.graphite/force-parallel-2.c b/libgomp/testsuite/libgomp.graphite/force-parallel-2.c index a198fed658a..7f2c823f73e 100644 --- a/libgomp/testsuite/libgomp.graphite/force-parallel-2.c +++ b/libgomp/testsuite/libgomp.graphite/force-parallel-2.c @@ -23,7 +23,7 @@ int main(void) } /* Check that parallel code generation part make the right answer. */ -/* { dg-final { scan-tree-dump-times "2 loops carried no dependency" 1 "graphite" } } */ +/* { dg-final { scan-tree-dump-times "2 loops carried no dependency" 2 "graphite" } } */ /* { dg-final { cleanup-tree-dump "graphite" } } */ /* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */ /* { dg-final { cleanup-tree-dump "parloops" } } */ -- cgit v1.2.3