mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-25 20:03:58 +08:00
cfgloopmanip.c (duplicate_subloops): Export.
2009-10-22 Razya Ladelsky <razya@il.ibm.com> * cfgloopmanip.c (duplicate_subloops): Export. * tree-parloops.c (loop_parallel_p): Dump if loop is innermost. (transform_to_exit_first_loop): Duplicate bbs starting from header up to loop->latch instead of exit->src. Initialize control variable to the correct number of iterations. (gather_scalar_reductions): Do not register double reductions. (parallelize_loops): Dump which loop is tested. Indicate whether the parallelized loop is inner or not. Remove the innermost-loop requirement. * cfgloop.h (duplicate_subloops): Export. * tree-cfg.c (add_phi_args_after_redirect): New function. (gimple_duplicate_sese_tail): Remove the no-subloops constraint. Call duplicate_subloops. Update number of iterations at the exit condition. Don't redirect nexits always to the loop exit. Redirect copied edges from latch to the loop exit. * testsuite/libgomp.graphite/force-parallel-2.c: Adjust scan. * testsuite/gcc.dg/autopar/outer-1.c: New testcase. * testsuite/gcc.dg/autopar/outer-2.c: New testcase. * testsuite/gcc.dg/autopar/outer-3.c: New testcase. * testsuite/gcc.dg/autopar/outer-4.c: New testcase. * testsuite/gcc.dg/autopar/outer-5.c: New testcase. * testsuite/gcc.dg/autopar/outer-6.c: New testcase. From-SVN: r153457
This commit is contained in:
parent
0d4958d022
commit
487102294c
@ -1,3 +1,29 @@
|
|||||||
|
2009-10-22 Razya Ladelsky <razya@il.ibm.com>
|
||||||
|
|
||||||
|
* cfgloopmanip.c (duplicate_subloops): Export.
|
||||||
|
* tree-parloops.c (loop_parallel_p): Dump if loop is innermost.
|
||||||
|
(transform_to_exit_first_loop): Duplicate bbs starting from
|
||||||
|
header up to loop->latch instead of exit->src.
|
||||||
|
Initialize control variable to the correct number of iterations.
|
||||||
|
(gather_scalar_reductions): Do not register double reductions.
|
||||||
|
(parallelize_loops): Dump which loop is tested.
|
||||||
|
Indicate whether the parallelized loop is inner or not.
|
||||||
|
Remove the innermost-loop requirement.
|
||||||
|
* cfgloop.h (duplicate_subloops): Export.
|
||||||
|
* tree-cfg.c (add_phi_args_after_redirect): New function.
|
||||||
|
(gimple_duplicate_sese_tail): Remove the no-subloops constraint.
|
||||||
|
Call duplicate_subloops.
|
||||||
|
Update number of iterations at the exit condition.
|
||||||
|
Don't redirect nexits always to the loop exit.
|
||||||
|
Redirect copied edges from latch to the loop exit.
|
||||||
|
* testsuite/libgomp.graphite/force-parallel-2.c: Adjust scan.
|
||||||
|
* testsuite/gcc.dg/autopar/outer-1.c: New testcase.
|
||||||
|
* testsuite/gcc.dg/autopar/outer-2.c: New testcase.
|
||||||
|
* testsuite/gcc.dg/autopar/outer-3.c: New testcase.
|
||||||
|
* testsuite/gcc.dg/autopar/outer-4.c: New testcase.
|
||||||
|
* testsuite/gcc.dg/autopar/outer-5.c: New testcase.
|
||||||
|
* testsuite/gcc.dg/autopar/outer-6.c: New testcase.
|
||||||
|
|
||||||
2009-10-22 Jan Hubicka <jh@suse.cz>
|
2009-10-22 Jan Hubicka <jh@suse.cz>
|
||||||
|
|
||||||
* ipa-cp.c (ipcp_read_summary): Remove now invalid FIXME and
|
* ipa-cp.c (ipcp_read_summary): Remove now invalid FIXME and
|
||||||
|
@ -288,6 +288,7 @@ extern edge create_empty_if_region_on_edge (edge, tree);
|
|||||||
extern struct loop *create_empty_loop_on_edge (edge, tree, tree, tree, tree,
|
extern struct loop *create_empty_loop_on_edge (edge, tree, tree, tree, tree,
|
||||||
tree *, tree *, struct loop *);
|
tree *, tree *, struct loop *);
|
||||||
extern struct loop * duplicate_loop (struct loop *, struct loop *);
|
extern struct loop * duplicate_loop (struct loop *, struct loop *);
|
||||||
|
extern void duplicate_subloops (struct loop *, struct loop *);
|
||||||
extern bool duplicate_loop_to_header_edge (struct loop *, edge,
|
extern bool duplicate_loop_to_header_edge (struct loop *, edge,
|
||||||
unsigned, sbitmap, edge,
|
unsigned, sbitmap, edge,
|
||||||
VEC (edge, heap) **, int);
|
VEC (edge, heap) **, int);
|
||||||
|
@ -32,7 +32,6 @@ along with GCC; see the file COPYING3. If not see
|
|||||||
#include "output.h"
|
#include "output.h"
|
||||||
#include "tree-flow.h"
|
#include "tree-flow.h"
|
||||||
|
|
||||||
static void duplicate_subloops (struct loop *, struct loop *);
|
|
||||||
static void copy_loops_to (struct loop **, int,
|
static void copy_loops_to (struct loop **, int,
|
||||||
struct loop *);
|
struct loop *);
|
||||||
static void loop_redirect_edge (edge, basic_block);
|
static void loop_redirect_edge (edge, basic_block);
|
||||||
@ -886,7 +885,7 @@ duplicate_loop (struct loop *loop, struct loop *target)
|
|||||||
|
|
||||||
/* Copies structure of subloops of LOOP into TARGET loop, placing
|
/* Copies structure of subloops of LOOP into TARGET loop, placing
|
||||||
newly created loops into loop tree. */
|
newly created loops into loop tree. */
|
||||||
static void
|
void
|
||||||
duplicate_subloops (struct loop *loop, struct loop *target)
|
duplicate_subloops (struct loop *loop, struct loop *target)
|
||||||
{
|
{
|
||||||
struct loop *aloop, *cloop;
|
struct loop *aloop, *cloop;
|
||||||
|
33
gcc/testsuite/gcc.dg/autopar/outer-1.c
Normal file
33
gcc/testsuite/gcc.dg/autopar/outer-1.c
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
void abort (void);
|
||||||
|
|
||||||
|
void parloop (int N)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
int x[10000][10000];
|
||||||
|
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
for (j = 0; j < N; j++)
|
||||||
|
x[i][j] = i + j + 3;
|
||||||
|
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
for (j = 0; j < N; j++)
|
||||||
|
if (x[i][j] != i + j + 3)
|
||||||
|
abort ();
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
parloop(10000);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Check that outer loop is parallelized. */
|
||||||
|
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "parloops" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
33
gcc/testsuite/gcc.dg/autopar/outer-2.c
Normal file
33
gcc/testsuite/gcc.dg/autopar/outer-2.c
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
void abort (void);
|
||||||
|
|
||||||
|
void parloop (int N)
|
||||||
|
{
|
||||||
|
int i, j,ii;
|
||||||
|
int x[400][10][400];
|
||||||
|
|
||||||
|
for (ii = 0; ii < N; ii++)
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
for (j = 0; j < N; j++)
|
||||||
|
x[i][j][ii] = ii+i + j + 3;
|
||||||
|
|
||||||
|
for (ii = 0; ii < N; ii++)
|
||||||
|
for (i = 0; i < N;i++)
|
||||||
|
for (j = 0; j < N; j++)
|
||||||
|
if (x[i][j][ii] != ii+i + j + 3)
|
||||||
|
abort ();
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
parloop(400);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "parloops" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
33
gcc/testsuite/gcc.dg/autopar/outer-3.c
Normal file
33
gcc/testsuite/gcc.dg/autopar/outer-3.c
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
void abort (void);
|
||||||
|
|
||||||
|
void parloop (int N)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
int x[500][500];
|
||||||
|
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
for (j = 0; j < i; j++)
|
||||||
|
x[i][j] = i + j + 3;
|
||||||
|
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
for (j = 0; j < i; j++)
|
||||||
|
if (x[i][j] != i + j + 3)
|
||||||
|
abort ();
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
parloop(500);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Check that outer loop is parallelized. */
|
||||||
|
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "parloops" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
38
gcc/testsuite/gcc.dg/autopar/outer-4.c
Normal file
38
gcc/testsuite/gcc.dg/autopar/outer-4.c
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
void abort (void);
|
||||||
|
|
||||||
|
int g_sum=0;
|
||||||
|
int x[500][500];
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
void parloop (int N)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
int sum;
|
||||||
|
|
||||||
|
/* Double reduction is currently not supported, outer loop is not
|
||||||
|
parallelized. Inner reduction is detected, inner loop is
|
||||||
|
parallelized. */
|
||||||
|
sum = 0;
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
for (j = 0; j < N; j++)
|
||||||
|
sum += x[i][j];
|
||||||
|
|
||||||
|
g_sum = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
parloop(500);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Check that outer loop is parallelized. */
|
||||||
|
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 0 "parloops" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 1 "parloops" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "parloops" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
52
gcc/testsuite/gcc.dg/autopar/outer-5.c
Normal file
52
gcc/testsuite/gcc.dg/autopar/outer-5.c
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
void abort (void);
|
||||||
|
|
||||||
|
int x[500][500];
|
||||||
|
int y[500];
|
||||||
|
int g_sum=0;
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
void init (int i, int j)
|
||||||
|
{
|
||||||
|
x[i][j]=1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
void parloop (int N)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
int sum;
|
||||||
|
|
||||||
|
/* Inner cycle is currently not supported, outer loop is not
|
||||||
|
parallelized. Inner reduction is detected, inner loop is
|
||||||
|
parallelized. */
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
sum = 0;
|
||||||
|
for (j = 0; j < N; j++)
|
||||||
|
sum += x[i][j];
|
||||||
|
y[i]=sum;
|
||||||
|
}
|
||||||
|
g_sum = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
for (i = 0; i < 500; i++)
|
||||||
|
for (j = 0; j < 500; j++)
|
||||||
|
init(i, j);
|
||||||
|
|
||||||
|
parloop(500);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Check that outer loop is parallelized. */
|
||||||
|
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 0 "parloops" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 1 "parloops" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "parloops" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
50
gcc/testsuite/gcc.dg/autopar/outer-6.c
Normal file
50
gcc/testsuite/gcc.dg/autopar/outer-6.c
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
void abort (void);
|
||||||
|
|
||||||
|
int x[500][500];
|
||||||
|
int y[500];
|
||||||
|
int g_sum=0;
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
void init (int i, int j)
|
||||||
|
{
|
||||||
|
x[i][j]=1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
void parloop (int N)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
int sum;
|
||||||
|
|
||||||
|
/* Outer loop reduction, outerloop is parallelized. */
|
||||||
|
sum=0;
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
for (j = 0; j < N; j++)
|
||||||
|
y[i]=x[i][j];
|
||||||
|
sum += y[i];
|
||||||
|
}
|
||||||
|
g_sum = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
for (i = 0; i < 500; i++)
|
||||||
|
for (j = 0; j < 500; j++)
|
||||||
|
init(i, j);
|
||||||
|
|
||||||
|
parloop(500);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Check that outer loop is parallelized. */
|
||||||
|
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "parloops" } } */
|
||||||
|
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
146
gcc/tree-cfg.c
146
gcc/tree-cfg.c
@ -4850,6 +4850,31 @@ gimple_duplicate_bb (basic_block bb)
|
|||||||
return new_bb;
|
return new_bb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Add phi arguments to the phi nodes in E_COPY->dest according to
|
||||||
|
the phi arguments coming from the equivalent edge at
|
||||||
|
the phi nodes of DEST. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
add_phi_args_after_redirect (edge e_copy, edge orig_e)
|
||||||
|
{
|
||||||
|
gimple_stmt_iterator psi, psi_copy;
|
||||||
|
gimple phi, phi_copy;
|
||||||
|
tree def;
|
||||||
|
|
||||||
|
for (psi = gsi_start_phis (orig_e->dest),
|
||||||
|
psi_copy = gsi_start_phis (e_copy->dest);
|
||||||
|
!gsi_end_p (psi);
|
||||||
|
gsi_next (&psi), gsi_next (&psi_copy))
|
||||||
|
{
|
||||||
|
|
||||||
|
phi = gsi_stmt (psi);
|
||||||
|
phi_copy = gsi_stmt (psi_copy);
|
||||||
|
def = PHI_ARG_DEF_FROM_EDGE (phi, orig_e);
|
||||||
|
add_phi_arg (phi_copy, def, e_copy,
|
||||||
|
gimple_phi_arg_location_from_edge (phi, orig_e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Adds phi node arguments for edge E_COPY after basic block duplication. */
|
/* Adds phi node arguments for edge E_COPY after basic block duplication. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -5131,9 +5156,14 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU
|
|||||||
int total_freq = 0, exit_freq = 0;
|
int total_freq = 0, exit_freq = 0;
|
||||||
gcov_type total_count = 0, exit_count = 0;
|
gcov_type total_count = 0, exit_count = 0;
|
||||||
edge exits[2], nexits[2], e;
|
edge exits[2], nexits[2], e;
|
||||||
gimple_stmt_iterator gsi;
|
gimple_stmt_iterator gsi,gsi1;
|
||||||
gimple cond_stmt;
|
gimple cond_stmt;
|
||||||
edge sorig, snew;
|
edge sorig, snew, orig_e;
|
||||||
|
basic_block exit_bb;
|
||||||
|
edge_iterator ei;
|
||||||
|
VEC (edge, heap) *redirect_edges;
|
||||||
|
basic_block iters_bb, orig_src;
|
||||||
|
tree new_rhs;
|
||||||
|
|
||||||
gcc_assert (EDGE_COUNT (exit->src->succs) == 2);
|
gcc_assert (EDGE_COUNT (exit->src->succs) == 2);
|
||||||
exits[0] = exit;
|
exits[0] = exit;
|
||||||
@ -5149,17 +5179,13 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU
|
|||||||
it will work, but the resulting code will not be correct. */
|
it will work, but the resulting code will not be correct. */
|
||||||
for (i = 0; i < n_region; i++)
|
for (i = 0; i < n_region; i++)
|
||||||
{
|
{
|
||||||
/* We do not handle subloops, i.e. all the blocks must belong to the
|
|
||||||
same loop. */
|
|
||||||
if (region[i]->loop_father != orig_loop)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (region[i] == orig_loop->latch)
|
if (region[i] == orig_loop->latch)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
initialize_original_copy_tables ();
|
initialize_original_copy_tables ();
|
||||||
set_loop_copy (orig_loop, loop);
|
set_loop_copy (orig_loop, loop);
|
||||||
|
duplicate_subloops (orig_loop, loop);
|
||||||
|
|
||||||
if (!region_copy)
|
if (!region_copy)
|
||||||
{
|
{
|
||||||
@ -5225,8 +5251,36 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU
|
|||||||
cond_stmt = last_stmt (exit->src);
|
cond_stmt = last_stmt (exit->src);
|
||||||
gcc_assert (gimple_code (cond_stmt) == GIMPLE_COND);
|
gcc_assert (gimple_code (cond_stmt) == GIMPLE_COND);
|
||||||
cond_stmt = gimple_copy (cond_stmt);
|
cond_stmt = gimple_copy (cond_stmt);
|
||||||
|
|
||||||
|
/* If the block consisting of the exit condition has the latch as
|
||||||
|
successor, then the body of the loop is executed before
|
||||||
|
the exit consition is tested. In such case, moving the
|
||||||
|
condition to the entry, causes that the loop will iterate
|
||||||
|
one less iteration (which is the wanted outcome, since we
|
||||||
|
peel out the last iteration). If the body is executed after
|
||||||
|
the condition, moving the condition to the entry requires
|
||||||
|
decrementing one iteration. */
|
||||||
|
if (exits[1]->dest == orig_loop->latch)
|
||||||
|
new_rhs = gimple_cond_rhs (cond_stmt);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)),
|
||||||
|
gimple_cond_rhs (cond_stmt),
|
||||||
|
build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1));
|
||||||
|
|
||||||
|
if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME)
|
||||||
|
{
|
||||||
|
iters_bb = gimple_bb (SSA_NAME_DEF_STMT (gimple_cond_rhs (cond_stmt)));
|
||||||
|
for (gsi1 = gsi_start_bb (iters_bb); !gsi_end_p (gsi1); gsi_next (&gsi1))
|
||||||
|
if (gsi_stmt (gsi1)==SSA_NAME_DEF_STMT (gimple_cond_rhs (cond_stmt)))
|
||||||
|
break;
|
||||||
|
|
||||||
|
new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true,
|
||||||
|
NULL_TREE,false,GSI_CONTINUE_LINKING);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs));
|
||||||
gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt)));
|
gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt)));
|
||||||
gimple_cond_set_rhs (cond_stmt, unshare_expr (gimple_cond_rhs (cond_stmt)));
|
|
||||||
gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
|
gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
|
||||||
|
|
||||||
sorig = single_succ_edge (switch_bb);
|
sorig = single_succ_edge (switch_bb);
|
||||||
@ -5238,25 +5292,87 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU
|
|||||||
|
|
||||||
/* Add the PHI node arguments. */
|
/* Add the PHI node arguments. */
|
||||||
add_phi_args_after_copy (region_copy, n_region, snew);
|
add_phi_args_after_copy (region_copy, n_region, snew);
|
||||||
|
|
||||||
/* Get rid of now superfluous conditions and associated edges (and phi node
|
/* Get rid of now superfluous conditions and associated edges (and phi node
|
||||||
arguments). */
|
arguments). */
|
||||||
|
exit_bb = exit->dest;
|
||||||
|
|
||||||
e = redirect_edge_and_branch (exits[0], exits[1]->dest);
|
e = redirect_edge_and_branch (exits[0], exits[1]->dest);
|
||||||
PENDING_STMT (e) = NULL;
|
PENDING_STMT (e) = NULL;
|
||||||
e = redirect_edge_and_branch (nexits[1], nexits[0]->dest);
|
|
||||||
PENDING_STMT (e) = NULL;
|
/* If the block consisting of the exit condition has the latch as
|
||||||
|
successor, then the body of the loop is executed before
|
||||||
|
the exit consition is tested.
|
||||||
|
|
||||||
|
{ body }
|
||||||
|
{ cond } (exit[0]) -> { latch }
|
||||||
|
|
|
||||||
|
V (exit[1])
|
||||||
|
|
||||||
|
{ exit_bb }
|
||||||
|
|
||||||
|
|
||||||
|
In such case, the equivalent copied edge nexits[1]
|
||||||
|
(for the peeled iteration) needs to be redirected to exit_bb.
|
||||||
|
|
||||||
|
Otherwise,
|
||||||
|
|
||||||
|
{ cond } (exit[0]) -> { body }
|
||||||
|
|
|
||||||
|
V (exit[1])
|
||||||
|
|
||||||
|
{ exit_bb }
|
||||||
|
|
||||||
|
|
||||||
|
exit[0] is pointing to the body of the loop,
|
||||||
|
and the equivalent nexits[0] needs to be redirected to
|
||||||
|
the copied body (of the peeled iteration). */
|
||||||
|
|
||||||
|
if (exits[1]->dest == orig_loop->latch)
|
||||||
|
e = redirect_edge_and_branch (nexits[1], nexits[0]->dest);
|
||||||
|
else
|
||||||
|
e = redirect_edge_and_branch (nexits[0], nexits[1]->dest);
|
||||||
|
PENDING_STMT (e) = NULL;
|
||||||
|
|
||||||
|
redirect_edges = VEC_alloc (edge, heap, 10);
|
||||||
|
|
||||||
|
for (i = 0; i < n_region; i++)
|
||||||
|
region_copy[i]->flags |= BB_DUPLICATED;
|
||||||
|
|
||||||
|
/* Iterate all incoming edges to latch. All those coming from
|
||||||
|
copied bbs will be redicrecred to exit_bb. */
|
||||||
|
FOR_EACH_EDGE (e, ei, orig_loop->latch->preds)
|
||||||
|
{
|
||||||
|
if (e->src->flags & BB_DUPLICATED)
|
||||||
|
VEC_safe_push (edge, heap, redirect_edges, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < n_region; i++)
|
||||||
|
region_copy[i]->flags &= ~BB_DUPLICATED;
|
||||||
|
|
||||||
|
for (i = 0; VEC_iterate (edge, redirect_edges, i, e); ++i)
|
||||||
|
{
|
||||||
|
e = redirect_edge_and_branch (e, exit_bb);
|
||||||
|
PENDING_STMT (e) = NULL;
|
||||||
|
orig_src = get_bb_original (e->src);
|
||||||
|
orig_e = find_edge (orig_src, orig_loop->latch);
|
||||||
|
add_phi_args_after_redirect (e, orig_e);
|
||||||
|
}
|
||||||
|
|
||||||
|
VEC_free (edge, heap, redirect_edges);
|
||||||
|
|
||||||
|
|
||||||
/* Anything that is outside of the region, but was dominated by something
|
/* Anything that is outside of the region, but was dominated by something
|
||||||
inside needs to update dominance info. */
|
inside needs to update dominance info. */
|
||||||
iterate_fix_dominators (CDI_DOMINATORS, doms, false);
|
iterate_fix_dominators (CDI_DOMINATORS, doms, false);
|
||||||
VEC_free (basic_block, heap, doms);
|
VEC_free (basic_block, heap, doms);
|
||||||
|
|
||||||
/* Update the SSA web. */
|
/* Update the SSA web. */
|
||||||
update_ssa (TODO_update_ssa);
|
update_ssa (TODO_update_ssa);
|
||||||
|
|
||||||
if (free_region_copy)
|
if (free_region_copy)
|
||||||
free (region_copy);
|
free (region_copy);
|
||||||
|
|
||||||
free_original_copy_tables ();
|
free_original_copy_tables ();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -255,7 +255,13 @@ loop_parallel_p (struct loop *loop)
|
|||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
|
{
|
||||||
|
fprintf (dump_file, "Considering loop %d\n", loop->num);
|
||||||
|
if (!loop->inner)
|
||||||
|
fprintf (dump_file, "loop is innermost\n");
|
||||||
|
else
|
||||||
|
fprintf (dump_file, "loop NOT innermost\n");
|
||||||
|
}
|
||||||
|
|
||||||
/* Check for problems with dependences. If the loop can be reversed,
|
/* Check for problems with dependences. If the loop can be reversed,
|
||||||
the iterations are independent. */
|
the iterations are independent. */
|
||||||
@ -1289,8 +1295,9 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit
|
|||||||
bool ok;
|
bool ok;
|
||||||
edge exit = single_dom_exit (loop), hpred;
|
edge exit = single_dom_exit (loop), hpred;
|
||||||
tree control, control_name, res, t;
|
tree control, control_name, res, t;
|
||||||
gimple phi, nphi, cond_stmt, stmt;
|
gimple phi, nphi, cond_stmt, stmt, cond_nit;
|
||||||
gimple_stmt_iterator gsi;
|
gimple_stmt_iterator gsi;
|
||||||
|
tree nit_1;
|
||||||
|
|
||||||
split_block_after_labels (loop->header);
|
split_block_after_labels (loop->header);
|
||||||
orig_header = single_succ (loop->header);
|
orig_header = single_succ (loop->header);
|
||||||
@ -1308,7 +1315,6 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit
|
|||||||
res = PHI_RESULT (phi);
|
res = PHI_RESULT (phi);
|
||||||
t = make_ssa_name (SSA_NAME_VAR (res), phi);
|
t = make_ssa_name (SSA_NAME_VAR (res), phi);
|
||||||
SET_PHI_RESULT (phi, t);
|
SET_PHI_RESULT (phi, t);
|
||||||
|
|
||||||
nphi = create_phi_node (res, orig_header);
|
nphi = create_phi_node (res, orig_header);
|
||||||
SSA_NAME_DEF_STMT (res) = nphi;
|
SSA_NAME_DEF_STMT (res) = nphi;
|
||||||
add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
|
add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
|
||||||
@ -1320,10 +1326,11 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit
|
|||||||
control = t;
|
control = t;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bbs = get_loop_body_in_dom_order (loop);
|
bbs = get_loop_body_in_dom_order (loop);
|
||||||
for (n = 0; bbs[n] != exit->src; n++)
|
|
||||||
|
for (n = 0; bbs[n] != loop->latch; n++)
|
||||||
continue;
|
continue;
|
||||||
|
n--;
|
||||||
nbbs = XNEWVEC (basic_block, n);
|
nbbs = XNEWVEC (basic_block, n);
|
||||||
ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
|
ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
|
||||||
bbs + 1, n, nbbs);
|
bbs + 1, n, nbbs);
|
||||||
@ -1358,7 +1365,6 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit
|
|||||||
struct reduction_info *red;
|
struct reduction_info *red;
|
||||||
|
|
||||||
tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
|
tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
|
||||||
|
|
||||||
red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
|
red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
|
||||||
if (red)
|
if (red)
|
||||||
{
|
{
|
||||||
@ -1374,12 +1380,15 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit
|
|||||||
}
|
}
|
||||||
gcc_assert (control_name != NULL_TREE);
|
gcc_assert (control_name != NULL_TREE);
|
||||||
|
|
||||||
/* Initialize the control variable to NIT. */
|
/* Initialize the control variable to number of iterations
|
||||||
|
according to the rhs of the exit condition. */
|
||||||
gsi = gsi_after_labels (ex_bb);
|
gsi = gsi_after_labels (ex_bb);
|
||||||
nit = force_gimple_operand_gsi (&gsi,
|
cond_nit = last_stmt (exit->src);
|
||||||
fold_convert (TREE_TYPE (control_name), nit),
|
nit_1 = gimple_cond_rhs (cond_nit);
|
||||||
|
nit_1 = force_gimple_operand_gsi (&gsi,
|
||||||
|
fold_convert (TREE_TYPE (control_name), nit_1),
|
||||||
false, NULL_TREE, false, GSI_SAME_STMT);
|
false, NULL_TREE, false, GSI_SAME_STMT);
|
||||||
stmt = gimple_build_assign (control_name, nit);
|
stmt = gimple_build_assign (control_name, nit_1);
|
||||||
gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
|
gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
|
||||||
SSA_NAME_DEF_STMT (control_name) = stmt;
|
SSA_NAME_DEF_STMT (control_name) = stmt;
|
||||||
}
|
}
|
||||||
@ -1740,7 +1749,7 @@ gather_scalar_reductions (loop_p loop, htab_t reduction_list)
|
|||||||
&& simple_loop_info)
|
&& simple_loop_info)
|
||||||
{
|
{
|
||||||
gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
|
gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
|
||||||
if (reduc_stmt)
|
if (reduc_stmt && !double_reduc)
|
||||||
build_new_reduction (reduction_list, reduc_stmt, phi);
|
build_new_reduction (reduction_list, reduc_stmt, phi);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1890,15 +1899,32 @@ parallelize_loops (void)
|
|||||||
FOR_EACH_LOOP (li, loop, 0)
|
FOR_EACH_LOOP (li, loop, 0)
|
||||||
{
|
{
|
||||||
htab_empty (reduction_list);
|
htab_empty (reduction_list);
|
||||||
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
/* If we use autopar in graphite pass, we use it's marked dependency
|
{
|
||||||
|
fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
|
||||||
|
if (loop->inner)
|
||||||
|
fprintf (dump_file, "loop %d is not innermost\n",loop->num);
|
||||||
|
else
|
||||||
|
fprintf (dump_file, "loop %d is innermost\n",loop->num);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we use autopar in graphite pass, we use its marked dependency
|
||||||
checking results. */
|
checking results. */
|
||||||
if (flag_loop_parallelize_all && !loop->can_be_parallel)
|
if (flag_loop_parallelize_all && !loop->can_be_parallel)
|
||||||
|
{
|
||||||
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
fprintf (dump_file, "loop is not parallel according to graphite\n");
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* FIXME: Only consider innermost loops with just one exit. */
|
if (!single_dom_exit (loop))
|
||||||
if (loop->inner || !single_dom_exit (loop))
|
{
|
||||||
|
|
||||||
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
fprintf (dump_file, "loop is !single_dom_exit\n");
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (/* And of course, the loop must be parallelizable. */
|
if (/* And of course, the loop must be parallelizable. */
|
||||||
!can_duplicate_loop_p (loop)
|
!can_duplicate_loop_p (loop)
|
||||||
@ -1915,7 +1941,7 @@ parallelize_loops (void)
|
|||||||
/* Do not bother with loops in cold areas. */
|
/* Do not bother with loops in cold areas. */
|
||||||
|| optimize_loop_nest_for_size_p (loop)))
|
|| optimize_loop_nest_for_size_p (loop)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!try_get_loop_niter (loop, &niter_desc))
|
if (!try_get_loop_niter (loop, &niter_desc))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -1926,6 +1952,14 @@ parallelize_loops (void)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
changed = true;
|
changed = true;
|
||||||
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
{
|
||||||
|
fprintf (dump_file, "parallelizing ");
|
||||||
|
if (loop->inner)
|
||||||
|
fprintf (dump_file, "outer loop\n");
|
||||||
|
else
|
||||||
|
fprintf (dump_file, "inner loop\n");
|
||||||
|
}
|
||||||
gen_parallel_loop (loop, reduction_list,
|
gen_parallel_loop (loop, reduction_list,
|
||||||
n_threads, &niter_desc);
|
n_threads, &niter_desc);
|
||||||
verify_flow_info ();
|
verify_flow_info ();
|
||||||
|
@ -23,7 +23,7 @@ int main(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Check that parallel code generation part make the right answer. */
|
/* Check that parallel code generation part make the right answer. */
|
||||||
/* { dg-final { scan-tree-dump-times "2 loops carried no dependency" 1 "graphite" } } */
|
/* { dg-final { scan-tree-dump-times "2 loops carried no dependency" 2 "graphite" } } */
|
||||||
/* { dg-final { cleanup-tree-dump "graphite" } } */
|
/* { dg-final { cleanup-tree-dump "graphite" } } */
|
||||||
/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */
|
/* { dg-final { scan-tree-dump-times "loopfn" 5 "optimized" } } */
|
||||||
/* { dg-final { cleanup-tree-dump "parloops" } } */
|
/* { dg-final { cleanup-tree-dump "parloops" } } */
|
||||||
|
Loading…
Reference in New Issue
Block a user