From 7103facc65f999274883bdfd6781d6e2f0ff6f97 Mon Sep 17 00:00:00 2001 From: rguenth Date: Fri, 25 Oct 2013 11:51:11 +0000 Subject: [PATCH] 2013-10-25 Richard Biener PR tree-optimization/58626 * tree-loop-distribution.c (enum rdg_dep_type): Remove anti_dd, output_dd and input_dd. (struct rdg_edge): Remove level and relation members. (RDGE_LEVEL, RDGE_RELATION): Remove. (dot_rdg_1): Adjust. (create_rdg_edge_for_ddr): Remove. (create_rdg_edges_for_scalar): Adjust. (create_edge_for_control_dependence): Likewise. (create_rdg_edges): Split into ... (create_rdg_flow_edges): ... this (create_rdg_cd_edges): ... and this. (free_rdg): Adjust. (build_rdg): Likewise, do not compute data dependences or add edges for them. (pg_add_dependence_edges): New function. (pgcmp): Likewise. (distribute_loop): First apply all non-dependence based partition mergings. Then compute dependences between partitions and merge and order partitions according to them. * gcc.dg/torture/pr58626.c: New testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@204062 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 23 ++ gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.dg/torture/pr58626.c | 20 ++ gcc/tree-loop-distribution.c | 399 ++++++++++++++++++++------------- 4 files changed, 287 insertions(+), 160 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr58626.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1c7bc5436d9..98286d132f3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2013-10-25 Richard Biener + + PR tree-optimization/58626 + * tree-loop-distribution.c (enum rdg_dep_type): Remove + anti_dd, output_dd and input_dd. + (struct rdg_edge): Remove level and relation members. + (RDGE_LEVEL, RDGE_RELATION): Remove. + (dot_rdg_1): Adjust. + (create_rdg_edge_for_ddr): Remove. + (create_rdg_edges_for_scalar): Adjust. + (create_edge_for_control_dependence): Likewise. + (create_rdg_edges): Split into ... + (create_rdg_flow_edges): ... this + (create_rdg_cd_edges): ... and this. + (free_rdg): Adjust. + (build_rdg): Likewise, do not compute data dependences or + add edges for them. + (pg_add_dependence_edges): New function. + (pgcmp): Likewise. + (distribute_loop): First apply all non-dependence based + partition mergings. Then compute dependences between partitions + and merge and order partitions according to them. + 2013-10-25 Eric Botcazou PR rtl-optimization/58831 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f6b958dce2e..1134e3ba28a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2013-10-25 Richard Biener + + PR tree-optimization/58626 + * gcc.dg/torture/pr58626.c: New testcase. + 2013-10-25 Paolo Carlini PR c++/54812 diff --git a/gcc/testsuite/gcc.dg/torture/pr58626.c b/gcc/testsuite/gcc.dg/torture/pr58626.c new file mode 100644 index 00000000000..1416384b7a6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr58626.c @@ -0,0 +1,20 @@ +/* { dg-do run } */ + +extern void abort (void); + +int a[8][6] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; +int b; + +int main(void) +{ + for (b = 0; b <= 1; b++) { + a[1][3] = 0; + int c; + for (c = 0; c <= 1; c++) { + a[c + 1][b] = a[c + 2][b]; + } + } + if (a[1][1] != 1) + abort (); + return 0; +} diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 15f3809ed44..353ce247de0 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -96,15 +96,6 @@ enum rdg_dep_type /* Read After Write (RAW). */ flow_dd = 'f', - /* Write After Read (WAR). */ - anti_dd = 'a', - - /* Write After Write (WAW). */ - output_dd = 'o', - - /* Read After Read (RAR). */ - input_dd = 'i', - /* Control dependence (execute conditional on). */ control_dd = 'c' }; @@ -115,19 +106,9 @@ typedef struct rdg_edge { /* Type of the dependence. */ enum rdg_dep_type type; - - /* Levels of the dependence: the depth of the loops that carry the - dependence. */ - unsigned level; - - /* Dependence relation between data dependences, NULL when one of - the vertices is a scalar. */ - ddr_p relation; } *rdg_edge_p; #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type -#define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level -#define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation /* Dump vertex I in RDG to FILE. */ @@ -215,23 +196,11 @@ dot_rdg_1 (FILE *file, struct graph *rdg) for (e = v->succ; e; e = e->succ_next) switch (RDGE_TYPE (e)) { - case input_dd: - fprintf (file, "%d -> %d [label=input] \n", i, e->dest); - break; - - case output_dd: - fprintf (file, "%d -> %d [label=output] \n", i, e->dest); - break; - case flow_dd: /* These are the most common dependences: don't print these. */ fprintf (file, "%d -> %d \n", i, e->dest); break; - case anti_dd: - fprintf (file, "%d -> %d [label=anti] \n", i, e->dest); - break; - case control_dd: fprintf (file, "%d -> %d [label=control] \n", i, e->dest); break; @@ -273,52 +242,6 @@ rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt) return index; } -/* Creates an edge in RDG for each distance vector from DDR. The - order that we keep track of in the RDG is the order in which - statements have to be executed. */ - -static void -create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr) -{ - struct graph_edge *e; - int va, vb; - data_reference_p dra = DDR_A (ddr); - data_reference_p drb = DDR_B (ddr); - unsigned level = ddr_dependence_level (ddr); - - /* For non scalar dependences, when the dependence is REVERSED, - statement B has to be executed before statement A. */ - if (level > 0 - && !DDR_REVERSED_P (ddr)) - { - data_reference_p tmp = dra; - dra = drb; - drb = tmp; - } - - va = rdg_vertex_for_stmt (rdg, DR_STMT (dra)); - vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb)); - - if (va < 0 || vb < 0) - return; - - e = add_edge (rdg, va, vb); - e->data = XNEW (struct rdg_edge); - - RDGE_LEVEL (e) = level; - RDGE_RELATION (e) = ddr; - - /* Determines the type of the data dependence. */ - if (DR_IS_READ (dra) && DR_IS_READ (drb)) - RDGE_TYPE (e) = input_dd; - else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)) - RDGE_TYPE (e) = output_dd; - else if (DR_IS_WRITE (dra) && DR_IS_READ (drb)) - RDGE_TYPE (e) = flow_dd; - else if (DR_IS_READ (dra) && DR_IS_WRITE (drb)) - RDGE_TYPE (e) = anti_dd; -} - /* Creates dependence edges in RDG for all the uses of DEF. IDEF is the index of DEF in RDG. */ @@ -339,7 +262,6 @@ create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) e = add_edge (rdg, idef, use); e->data = XNEW (struct rdg_edge); RDGE_TYPE (e) = flow_dd; - RDGE_RELATION (e) = NULL; } } @@ -366,7 +288,6 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb, e = add_edge (rdg, c, v); e->data = XNEW (struct rdg_edge); RDGE_TYPE (e) = control_dd; - RDGE_RELATION (e) = NULL; } } } @@ -374,38 +295,38 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb, /* Creates the edges of the reduced dependence graph RDG. */ static void -create_rdg_edges (struct graph *rdg, vec ddrs, control_dependences *cd) +create_rdg_flow_edges (struct graph *rdg) { int i; - struct data_dependence_relation *ddr; def_operand_p def_p; ssa_op_iter iter; - FOR_EACH_VEC_ELT (ddrs, i, ddr) - if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) - create_rdg_edge_for_ddr (rdg, ddr); - else - free_dependence_relation (ddr); - for (i = 0; i < rdg->n_vertices; i++) FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), iter, SSA_OP_DEF) create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); +} - if (cd) - for (i = 0; i < rdg->n_vertices; i++) - { - gimple stmt = RDG_STMT (rdg, i); - if (gimple_code (stmt) == GIMPLE_PHI) - { - edge_iterator ei; - edge e; - FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) +/* Creates the edges of the reduced dependence graph RDG. */ + +static void +create_rdg_cd_edges (struct graph *rdg, control_dependences *cd) +{ + int i; + + for (i = 0; i < rdg->n_vertices; i++) + { + gimple stmt = RDG_STMT (rdg, i); + if (gimple_code (stmt) == GIMPLE_PHI) + { + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) create_edge_for_control_dependence (rdg, e->src, i, cd); - } - else - create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); - } + } + else + create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); + } } /* Build the vertices of the reduced dependence graph RDG. Return false @@ -494,10 +415,7 @@ free_rdg (struct graph *rdg) struct graph_edge *e; for (e = v->succ; e; e = e->succ_next) - { - free_dependence_relation (RDGE_RELATION (e)); - free (e->data); - } + free (e->data); if (v->data) { @@ -520,7 +438,6 @@ build_rdg (vec loop_nest, control_dependences *cd) struct graph *rdg; vec stmts; vec datarefs; - vec dependence_relations; /* Create the RDG vertices from the stmts of the loop nest. */ stmts.create (10); @@ -536,19 +453,10 @@ build_rdg (vec loop_nest, control_dependences *cd) } stmts.release (); - /* Create the RDG edges from the data dependences in the loop nest. */ - dependence_relations.create (100); - if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest, - false) - || !known_dependences_p (dependence_relations)) - { - free_dependence_relations (dependence_relations); - datarefs.release (); - free_rdg (rdg); - return NULL; - } - create_rdg_edges (rdg, dependence_relations, cd); - dependence_relations.release (); + create_rdg_flow_edges (rdg); + if (cd) + create_rdg_cd_edges (rdg, cd); + datarefs.release (); return rdg; @@ -1405,6 +1313,70 @@ partition_contains_all_rw (struct graph *rdg, return false; } +/* Compute partition dependence created by the data references in DRS1 + and DRS2 and modify and return DIR according to that. */ + +static int +pg_add_dependence_edges (struct graph *rdg, vec loops, int dir, + vec drs1, + vec drs2) +{ + data_reference_p dr1, dr2; + + /* dependence direction - 0 is no dependence, -1 is back, + 1 is forth, 2 is both (we can stop then, merging will occur). */ + for (int ii = 0; drs1.iterate (ii, &dr1); ++ii) + for (int jj = 0; drs2.iterate (jj, &dr2); ++jj) + { + int this_dir = 1; + ddr_p ddr; + /* Re-shuffle data-refs to be in dominator order. */ + if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) + > rdg_vertex_for_stmt (rdg, DR_STMT (dr2))) + { + data_reference_p tem = dr1; + dr1 = dr2; + dr2 = tem; + this_dir = -this_dir; + } + ddr = initialize_data_dependence_relation (dr1, dr2, loops); + compute_affine_dependence (ddr, loops[0]); + if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) + this_dir = 2; + else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) + { + if (DDR_REVERSED_P (ddr)) + { + data_reference_p tem = dr1; + dr1 = dr2; + dr2 = tem; + this_dir = -this_dir; + } + /* Known dependences can still be unordered througout the + iteration space, see gcc.dg/tree-ssa/ldist-16.c. */ + if (DDR_NUM_DIST_VECTS (ddr) == 0) + this_dir = 2; + } + else + this_dir = 0; + free_dependence_relation (ddr); + if (dir == 0) + dir = this_dir; + else if (dir != this_dir) + return 2; + } + return dir; +} + +/* Compare postorder number of the partition graph vertices V1 and V2. */ + +static int +pgcmp (const void *v1_, const void *v2_) +{ + const vertex *v1 = (const vertex *)v1_; + const vertex *v2 = (const vertex *)v2_; + return v2->post - v1->post; +} /* Distributes the code from LOOP in such a way that producer statements are placed before consumer statements. Tries to separate @@ -1421,6 +1393,8 @@ distribute_loop (struct loop *loop, vec stmts, partition_t partition; bool any_builtin; int i, nbp; + graph *pg = NULL; + int num_sccs = 1; *nb_calls = 0; loop_nest.create (3); @@ -1455,8 +1429,8 @@ distribute_loop (struct loop *loop, vec stmts, any_builtin |= partition_builtin_p (partition); } - /* If we did not detect any builtin but are not asked to apply - regular loop distribution simply bail out. */ + /* If we are only distributing patterns but did not detect any, + simply bail out. */ if (!flag_tree_loop_distribution && !any_builtin) { @@ -1464,9 +1438,56 @@ distribute_loop (struct loop *loop, vec stmts, goto ldist_done; } + /* If we are only distributing patterns fuse all partitions that + were not classified as builtins. This also avoids chopping + a loop into pieces, separated by builtin calls. That is, we + only want no or a single loop body remaining. */ + partition_t into; + if (!flag_tree_loop_distribution) + { + for (i = 0; partitions.iterate (i, &into); ++i) + if (!partition_builtin_p (into)) + break; + for (++i; partitions.iterate (i, &partition); ++i) + if (!partition_builtin_p (partition)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "fusing non-builtin partitions\n"); + dump_bitmap (dump_file, into->stmts); + dump_bitmap (dump_file, partition->stmts); + } + partition_merge_into (into, partition); + partitions.unordered_remove (i); + partition_free (partition); + i--; + } + } + + /* Due to limitations in the transform phase we have to fuse all + reduction partitions into the last partition so the existing + loop will contain all loop-closed PHI nodes. */ + for (i = 0; partitions.iterate (i, &into); ++i) + if (partition_reduction_p (into)) + break; + for (i = i + 1; partitions.iterate (i, &partition); ++i) + if (partition_reduction_p (partition)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "fusing partitions\n"); + dump_bitmap (dump_file, into->stmts); + dump_bitmap (dump_file, partition->stmts); + fprintf (dump_file, "because they have reductions\n"); + } + partition_merge_into (into, partition); + partitions.unordered_remove (i); + partition_free (partition); + i--; + } + /* Apply our simple cost model - fuse partitions with similar memory accesses. */ - partition_t into; for (i = 0; partitions.iterate (i, &into); ++i) { if (partition_builtin_p (into)) @@ -1486,61 +1507,119 @@ distribute_loop (struct loop *loop, vec stmts, "memory accesses\n"); } partition_merge_into (into, partition); - partitions.ordered_remove (j); + partitions.unordered_remove (j); partition_free (partition); j--; } } } - /* If we are only distributing patterns fuse all partitions that - were not properly classified as builtins. */ - if (!flag_tree_loop_distribution) + /* Build the partition dependency graph. */ + if (partitions.length () > 1) { - partition_t into; - /* Only fuse adjacent non-builtin partitions, see PR53616. - ??? Use dependence information to improve partition ordering. */ - i = 0; - do + pg = new_graph (partitions.length ()); + struct pgdata { + partition_t partition; + vec writes; + vec reads; + }; +#define PGDATA(i) ((pgdata *)(pg->vertices[i].data)) + for (i = 0; partitions.iterate (i, &partition); ++i) + { + vertex *v = &pg->vertices[i]; + pgdata *data = new pgdata; + data_reference_p dr; + /* FIXME - leaks. */ + v->data = data; + bitmap_iterator bi; + unsigned j; + data->partition = partition; + data->reads = vNULL; + data->writes = vNULL; + EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi) + for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k) + if (DR_IS_READ (dr)) + data->reads.safe_push (dr); + else + data->writes.safe_push (dr); + } + partition_t partition1, partition2; + for (i = 0; partitions.iterate (i, &partition1); ++i) + for (int j = i + 1; partitions.iterate (j, &partition2); ++j) + { + /* dependence direction - 0 is no dependence, -1 is back, + 1 is forth, 2 is both (we can stop then, merging will occur). */ + int dir = 0; + dir = pg_add_dependence_edges (rdg, loop_nest, dir, + PGDATA(i)->writes, + PGDATA(j)->reads); + if (dir != 2) + dir = pg_add_dependence_edges (rdg, loop_nest, dir, + PGDATA(i)->reads, + PGDATA(j)->writes); + if (dir != 2) + dir = pg_add_dependence_edges (rdg, loop_nest, dir, + PGDATA(i)->writes, + PGDATA(j)->writes); + if (dir == 1 || dir == 2) + add_edge (pg, i, j); + if (dir == -1 || dir == 2) + add_edge (pg, j, i); + } + + /* Add edges to the reduction partition (if any) to force it last. */ + unsigned j; + for (j = 0; partitions.iterate (j, &partition); ++j) + if (partition_reduction_p (partition)) + break; + if (j < partitions.length ()) { - for (; partitions.iterate (i, &into); ++i) - if (!partition_builtin_p (into)) + for (unsigned i = 0; partitions.iterate (i, &partition); ++i) + if (i != j) + add_edge (pg, i, j); + } + + /* Compute partitions we cannot separate and fuse them. */ + num_sccs = graphds_scc (pg, NULL); + for (i = 0; i < num_sccs; ++i) + { + partition_t first; + int j; + for (j = 0; partitions.iterate (j, &first); ++j) + if (pg->vertices[j].component == i) break; - for (++i; partitions.iterate (i, &partition); ++i) - if (!partition_builtin_p (partition)) + for (j = j + 1; partitions.iterate (j, &partition); ++j) + if (pg->vertices[j].component == i) { - partition_merge_into (into, partition); - partitions.ordered_remove (i); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "fusing partitions\n"); + dump_bitmap (dump_file, first->stmts); + dump_bitmap (dump_file, partition->stmts); + fprintf (dump_file, "because they are in the same " + "dependence SCC\n"); + } + partition_merge_into (first, partition); + partitions[j] = NULL; partition_free (partition); - i--; + PGDATA (j)->partition = NULL; } - else - break; } - while ((unsigned) i < partitions.length ()); - } - /* Fuse all reduction partitions into the last. */ - if (partitions.length () > 1) - { - partition_t into = partitions.last (); - for (i = partitions.length () - 2; i >= 0; --i) + /* Now order the remaining nodes in postorder. */ + qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp); + partitions.truncate (0); + for (i = 0; i < pg->n_vertices; ++i) { - partition_t what = partitions[i]; - if (partition_reduction_p (what)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "fusing partitions\n"); - dump_bitmap (dump_file, into->stmts); - dump_bitmap (dump_file, what->stmts); - fprintf (dump_file, "because the latter has reductions\n"); - } - partition_merge_into (into, what); - partitions.ordered_remove (i); - partition_free (what); - } + pgdata *data = PGDATA (i); + if (data->partition) + partitions.safe_push (data->partition); + data->reads.release (); + data->writes.release (); + delete data; } + gcc_assert (partitions.length () == (unsigned)num_sccs); + free_graph (pg); } nbp = partitions.length (); -- 2.11.4.GIT