gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to
  18 the Free Software Foundation, 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "flags.h"
  27 #include "rtl.h"
  28 #include "tm_p.h"
  29 #include "ggc.h"
  30 #include "basic-block.h"
  31 #include "output.h"
  32 #include "errors.h"
  33 #include "expr.h"
  34 #include "function.h"
  35 #include "diagnostic.h"
  36 #include "tree-flow.h"
  37 #include "tree-dump.h"
  38 #include "tree-pass.h"
  39
  40 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  41    one or more in-edges to B to instead reach the destination of an
  42    out-edge from B while preserving any side effects in B.
  43
  44    ie, given A->B and B->C, change A->B to be A->C yet still preserve the
  45    side effects of executing B.
  46
  47      1. Make a copy of B (including its outgoing edges and statements).  Call
  48         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  49
  50      2. Remove the control statement at the end of B' and all outgoing edges
  51         except B'->C.
  52
  53      3. Add a new argument to each PHI in C with the same value as the existing
  54         argument associated with edge B->C.  Associate the new PHI arguments
  55         with the edge B'->C.
  56
  57      4. For each PHI in B, find or create a PHI in B' with an identical
  58         PHI_RESULT.  Add an argument to the PHI in B' which as the same
  59         value as the PHI in B associated with the edge A->B.  Associate
  60         the new argument in the PHI in B' with the edge A->B.
  61
  62      5. Change the edge A->B to A->B'.
  63
  64         5a. This automatically deletes any PHI arguments associated with the
  65             edge A->B in B.
  66
  67         5b. This automatically associates each new argument added in step 4
  68             with the edge A->B'.
  69
  70      6. Repeat for other incoming edges into B.
  71
  72      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  73
  74    Note that block duplication can be minimized by first collecting the
  75    the set of unique destination blocks that the incoming edges should
  76    be threaded to.  Block duplication can be further minimized by using
  77    B instead of creating B' for one destination if all edges into B are
  78    going to be threaded to a successor of B.  */
  79
  80
  81 /* Main data structure recording information regarding B's duplicate
  82    blocks.  */
  83
  84 struct redirection_data
  85 {
  86   /* A duplicate of B with the trailing control statement removed and which
  87      targets a single successor of B.  */
  88   basic_block dup_block;
  89
  90   /* An outgoing edge from B.  DUP_BLOCK will have OUTGOING_EDGE->dest as
  91      its single successor.  */
  92   edge outgoing_edge;
  93 };
  94
  95 /* Main data structure to hold information for duplicates of BB.  */
  96 static varray_type redirection_data;
  97
  98 /* For each PHI node in BB, find or create a PHI node in NEW_BB for the
  99    same PHI_RESULT.  Add an argument to the PHI node in NEW_BB which
 100    corresponds to the same PHI argument associated with edge E in BB.  */
 101
 102 static void
 103 copy_phis_to_block (basic_block new_bb, basic_block bb, edge e)
 104 {
 105   tree phi, arg;
 106
 107   /* Walk over every PHI in BB.  */
 108   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 109     {
 110       tree new_phi;
 111
 112       /* First try to find a PHI node in NEW_BB which has the same
 113          PHI_RESULT as the PHI from BB we are currently processing.  */
 114       for (new_phi = phi_nodes (new_bb); new_phi;
 115            new_phi = PHI_CHAIN (new_phi))
 116         if (PHI_RESULT (new_phi) == PHI_RESULT (phi))
 117           break;
 118
 119       /* If we did not find a suitable PHI in NEW_BB, create one.  */
 120       if (!new_phi)
 121         new_phi = create_phi_node (PHI_RESULT (phi), new_bb);
 122
 123       /* Extract the argument corresponding to E from the current PHI
 124          node in BB.  */
 125       arg = PHI_ARG_DEF_TREE (phi, phi_arg_from_edge (phi, e));
 126
 127       /* Now add that same argument to the new PHI node in block NEW_BB.  */
 128       add_phi_arg (&new_phi, arg, e);
 129     }
 130 }
 131
 132 /* Remove the last statement in block BB which must be a COND_EXPR or
 133    SWITCH_EXPR.  Also remove all outgoing edges except the edge which
 134    reaches DEST_BB.
 135
 136    This is only used by jump threading which knows the last statement in
 137    BB should be a COND_EXPR or SWITCH_EXPR.  If the block ends with any other
 138    statement, then we abort.  */
 139
 140 static void
 141 remove_last_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 142 {
 143   block_stmt_iterator bsi;
 144   edge e;
 145   unsigned ix;
 146
 147   bsi = bsi_last (bb);
 148
 149   gcc_assert (TREE_CODE (bsi_stmt (bsi)) == COND_EXPR
 150               || TREE_CODE (bsi_stmt (bsi)) == SWITCH_EXPR);
 151
 152   bsi_remove (&bsi);
 153
 154   for (ix = 0; VEC_iterate (edge, bb->succs, ix, e); )
 155     {
 156       if (e->dest != dest_bb)
 157         ssa_remove_edge (e);
 158       else
 159         ix++;
 160     }
 161
 162   /* BB now has a single outgoing edge. We need to update the flags for
 163      that single outgoing edge.  */
 164   EDGE_SUCC (bb, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
 165   EDGE_SUCC (bb, 0)->flags |= EDGE_FALLTHRU;
 166 }
 167
 168 /* Create a duplicate of BB which only reaches the destination of the edge
 169    stored in RD.  Record the duplicate block in RD.  */
 170
 171 static void
 172 create_block_for_threading (basic_block bb, struct redirection_data *rd)
 173 {
 174   tree phi;
 175
 176   /* We can use the generic block duplication code and simply remove
 177      the stuff we do not need.  */
 178   rd->dup_block = duplicate_block (bb, NULL);
 179
 180   /* The call to duplicate_block will copy everything, including the
 181      useless COND_EXPR or SWITCH_EXPR at the end of the block.  We just remove
 182      the useless COND_EXPR or SWITCH_EXPR here rather than having a
 183      specialized block copier.  */
 184   remove_last_stmt_and_useless_edges (rd->dup_block, rd->outgoing_edge->dest);
 185
 186   /* If there are any PHI nodes at the destination of the outgoing edge
 187      from the duplicate block, then we will need to add a new argument
 188      to them.  The argument should have the same value as the argument
 189      associated with the outgoing edge stored in RD.  */
 190   for (phi = phi_nodes (EDGE_SUCC (rd->dup_block, 0)->dest); phi;
 191        phi = PHI_CHAIN (phi))
 192     {
 193       int indx = phi_arg_from_edge (phi, rd->outgoing_edge);
 194       add_phi_arg (&phi, PHI_ARG_DEF_TREE (phi, indx),
 195                    EDGE_SUCC (rd->dup_block, 0));
 196     }
 197 }
 198
 199 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
 200    is reached via one or more specific incoming edges, we know which
 201    outgoing edge from BB will be traversed.
 202
 203    We want to redirect those incoming edges to the target of the
 204    appropriate outgoing edge.  Doing so avoids a conditional branch
 205    and may expose new optimization opportunities.  Note that we have
 206    to update dominator tree and SSA graph after such changes.
 207
 208    The key to keeping the SSA graph update manageable is to duplicate
 209    the side effects occurring in BB so that those side effects still
 210    occur on the paths which bypass BB after redirecting edges.
 211
 212    We accomplish this by creating duplicates of BB and arranging for
 213    the duplicates to unconditionally pass control to one specific
 214    successor of BB.  We then revector the incoming edges into BB to
 215    the appropriate duplicate of BB.
 216
 217    BB and its duplicates will have assignments to the same set of
 218    SSA_NAMEs.  Right now, we just call into rewrite_ssa_into_ssa
 219    to update the SSA graph for those names.
 220
 221    We are also going to experiment with a true incremental update
 222    scheme for the duplicated resources.  Of of the interesting
 223    properties we can exploit here is that all the resources set
 224    in BB will have the same IDFS, so we have one IDFS computation
 225    per block with incoming threaded edges, which can lower the
 226    cost of the true incremental update algorithm.  */
 227
 228 static void
 229 thread_block (basic_block bb)
 230 {
 231   /* E is an incoming edge into BB that we may or may not want to
 232      redirect to a duplicate of BB.  */
 233   edge e;
 234   edge_iterator ei;
 235
 236   /* ALL indicates whether or not all incoming edges into BB should
 237      be threaded to a duplicate of BB.  */
 238   bool all = true;
 239
 240   unsigned int i, ix;
 241
 242   VARRAY_GENERIC_PTR_INIT (redirection_data, 2, "redirection data");
 243
 244   /* Look at each incoming edge into BB.  Record each unique outgoing
 245      edge that we want to thread an incoming edge to.  Also note if
 246      all incoming edges are threaded or not.  */
 247   FOR_EACH_EDGE (e, ei, bb->preds)
 248     {
 249       if (!e->aux)
 250         {
 251           all = false;
 252         }
 253       else
 254         {
 255           unsigned int i;
 256
 257           /* See if we can find an entry for the destination of this
 258              threaded edge that has already been recorded.  */
 259           for (i = 0; i < VARRAY_ACTIVE_SIZE (redirection_data); i++)
 260             {
 261               struct redirection_data *rd;
 262               edge e2;
 263
 264               rd = VARRAY_GENERIC_PTR (redirection_data, i);
 265               e2 = e->aux;
 266
 267               if (e2->dest == rd->outgoing_edge->dest)
 268                 break;
 269             }
 270
 271           /* If the loop did not terminate early, then we have a new
 272              destination for the incoming threaded edges.  Record it.  */
 273           if (i == VARRAY_ACTIVE_SIZE (redirection_data))
 274             {
 275               struct redirection_data *rd;
 276
 277               rd = ggc_alloc_cleared (sizeof (struct redirection_data));
 278               rd->outgoing_edge = e->aux;
 279               VARRAY_PUSH_GENERIC_PTR (redirection_data, rd);
 280             }
 281         }
 282     }
 283
 284   /* Now create duplicates of BB.  Note that if all incoming edges are
 285      threaded, then BB is going to become unreachable.  In that case
 286      we use BB for one of the duplicates rather than wasting memory
 287      duplicating BB.  Thus the odd starting condition for the loop.  */
 288   for (i = (all ? 1 : 0); i < VARRAY_ACTIVE_SIZE (redirection_data); i++)
 289     {
 290       struct redirection_data *rd = VARRAY_GENERIC_PTR (redirection_data, i);
 291       create_block_for_threading (bb, rd);
 292     }
 293
 294   /* The loop above created the duplicate blocks (and the statements
 295      within the duplicate blocks).  This loop creates PHI nodes for the
 296      duplicated blocks and redirects the incoming edges into BB to reach
 297      the duplicates of BB.
 298
 299      Note that redirecting the edge will change e->pred_next, so we have
 300      to hold e->pred_next in a temporary.
 301
 302      If this turns out to be a performance problem, then we could create
 303      a list of incoming edges associated with each entry in
 304      REDIRECTION_DATA and walk over that list of edges instead.  */
 305
 306   for (ix = 0; VEC_iterate (edge, bb->preds, ix, e); )
 307     {
 308       edge new_dest = e->aux;
 309
 310       /* E was not threaded, then there is nothing to do.  */
 311       if (!new_dest)
 312         {
 313           ix++;
 314           continue;
 315         }
 316
 317       /* Go ahead and clear E->aux.  It's not needed anymore and failure
 318          to clear it will cause all kinds of unpleasant problems later.  */
 319       e->aux = NULL;
 320
 321       /* We know E is an edge we want to thread.  Find the entry associated
 322          with E's new destination in the REDIRECTION_DATA array.  */
 323       for (i = 0; i < VARRAY_ACTIVE_SIZE (redirection_data); i++)
 324         {
 325           struct redirection_data *rd;
 326
 327           rd = VARRAY_GENERIC_PTR (redirection_data, i);
 328
 329           /* We have found the right entry if the outgoing edge in this
 330              entry matches E's new destination.  Note that if we have not
 331              created a duplicate block (rd->dup_block is NULL), then we
 332              are going to re-use BB as a duplicate and we do not need
 333              to create PHI nodes or redirect the edge.  */
 334           if (rd->outgoing_edge == new_dest && rd->dup_block)
 335             {
 336               edge e2;
 337               copy_phis_to_block (rd->dup_block, bb, e);
 338
 339               if (dump_file && (dump_flags & TDF_DETAILS))
 340                 fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 341                          e->src->index, e->dest->index, rd->dup_block->index);
 342
 343               e2 = redirect_edge_and_branch (e, rd->dup_block);
 344               PENDING_STMT (e2) = NULL;
 345
 346               if ((dump_file && (dump_flags & TDF_DETAILS))
 347                   && e->src != e2->src)
 348               fprintf (dump_file, "    basic block %d created\n",
 349                        e2->src->index);
 350               break;
 351             }
 352         }
 353     }
 354
 355   /* If all the incoming edges where threaded, then we used BB as one
 356      of the duplicate blocks.  We need to fixup BB in that case so that
 357      it no longer has a COND_EXPR or SWITCH_EXPR and reaches one destination
 358      unconditionally.  */
 359   if (all)
 360     {
 361       struct redirection_data *rd;
 362
 363       rd = VARRAY_GENERIC_PTR (redirection_data, 0);
 364
 365       if (dump_file && (dump_flags & TDF_DETAILS))
 366         fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 367                  EDGE_PRED (bb, 0)->src->index, bb->index,
 368                  EDGE_SUCC (bb, 0)->dest->index);
 369
 370       remove_last_stmt_and_useless_edges (bb, rd->outgoing_edge->dest);
 371     }
 372
 373   /* Done with this block.  Clear REDIRECTION_DATA.  */
 374   VARRAY_CLEAR (redirection_data);
 375 }
 376
 377 /* Walk through all blocks and thread incoming edges to the block's
 378    destinations as requested.  This is the only entry point into this
 379    file.
 380
 381    Blocks which have one or more incoming edges have INCOMING_EDGE_THREADED
 382    set in the block's annotation.
 383    this routine.
 384
 385    Each edge that should be threaded has the new destination edge stored in
 386    the original edge's AUX field.
 387
 388    This routine (or one of its callees) will clear INCOMING_EDGE_THREADED
 389    in the block annotations and the AUX field in the edges.
 390
 391    It is the caller's responsibility to fix the dominance information
 392    and rewrite duplicated SSA_NAMEs back into SSA form.
 393
 394    Returns true if one or more edges were threaded, false otherwise.   */
 395
 396 bool
 397 thread_through_all_blocks (void)
 398 {
 399   basic_block bb;
 400   bool retval = false;
 401
 402   FOR_EACH_BB (bb)
 403     {
 404       if (bb_ann (bb)->incoming_edge_threaded)
 405         {
 406           thread_block (bb);
 407           retval = true;
 408           bb_ann (bb)->incoming_edge_threaded = false;
 409         }
 410     }
 411   return retval;
 412 }